summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig8
-rw-r--r--arch/alpha/Makefile2
-rw-r--r--arch/alpha/configs/defconfig2
-rw-r--r--arch/alpha/include/asm/processor.h2
-rw-r--r--arch/alpha/include/uapi/asm/mman.h2
-rw-r--r--arch/alpha/kernel/Makefile4
-rw-r--r--arch/alpha/kernel/osf_sys.c12
-rw-r--r--arch/alpha/kernel/process.c5
-rw-r--r--arch/alpha/kernel/setup.c4
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl4
-rw-r--r--arch/arc/Kconfig2
-rw-r--r--arch/arc/Makefile2
-rw-r--r--arch/arc/configs/tb10x_defconfig2
-rw-r--r--arch/arc/include/asm/processor.h3
-rw-r--r--arch/arc/kernel/Makefile4
-rw-r--r--arch/arm/Kconfig6
-rw-r--r--arch/arm/Makefile3
-rw-r--r--arch/arm/boot/dts/aspeed-g5.dtsi8
-rw-r--r--arch/arm/boot/dts/aspeed-g6.dtsi8
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/configs/milbeaut_m10v_defconfig3
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/configs/oxnas_v6_defconfig2
-rw-r--r--arch/arm/configs/pxa_defconfig3
-rw-r--r--arch/arm/configs/sama7_defconfig2
-rw-r--r--arch/arm/configs/sp7021_defconfig2
-rw-r--r--arch/arm/crypto/Kconfig238
-rw-r--r--arch/arm/include/asm/device.h1
-rw-r--r--arch/arm/include/asm/processor.h3
-rw-r--r--arch/arm/kernel/Makefile4
-rw-r--r--arch/arm/kernel/process.c4
-rw-r--r--arch/arm/mach-at91/pm.c15
-rw-r--r--arch/arm/mm/dma-mapping.c10
-rw-r--r--arch/arm64/Kconfig6
-rw-r--r--arch/arm64/Makefile3
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/Kconfig279
-rw-r--r--arch/arm64/hyperv/mshyperv.c2
-rw-r--r--arch/arm64/include/asm/kvm_host.h4
-rw-r--r--arch/arm64/include/asm/pgtable.h15
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/elfcore.c16
-rw-r--r--arch/arm64/kernel/process.c4
-rw-r--r--arch/arm64/kernel/vdso.c3
-rw-r--r--arch/arm64/kvm/arm.c15
-rw-r--r--arch/arm64/kvm/debug.c34
-rw-r--r--arch/arm64/kvm/guest.c1
-rw-r--r--arch/arm64/kvm/handle_exit.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c154
-rw-r--r--arch/arm64/kvm/sys_regs.h24
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c2
-rw-r--r--arch/arm64/mm/dma-mapping.c2
-rw-r--r--arch/arm64/mm/hugetlbpage.c2
-rw-r--r--arch/csky/Kconfig2
-rw-r--r--arch/csky/Makefile2
-rw-r--r--arch/csky/include/asm/processor.h5
-rw-r--r--arch/csky/kernel/Makefile4
-rw-r--r--arch/hexagon/Makefile2
-rw-r--r--arch/hexagon/include/asm/processor.h4
-rw-r--r--arch/hexagon/kernel/Makefile3
-rw-r--r--arch/hexagon/kernel/process.c7
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/configs/bigsur_defconfig2
-rw-r--r--arch/ia64/configs/generic_defconfig2
-rw-r--r--arch/ia64/configs/gensparse_defconfig3
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/configs/zx1_defconfig1
-rw-r--r--arch/ia64/include/asm/processor.h7
-rw-r--r--arch/ia64/include/asm/sparsemem.h6
-rw-r--r--arch/ia64/kernel/Makefile9
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/ia64/kernel/sys_ia64.c26
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/loongarch/Kbuild1
-rw-r--r--arch/loongarch/Kconfig64
-rw-r--r--arch/loongarch/Makefile24
-rw-r--r--arch/loongarch/configs/loongson3_defconfig63
-rw-r--r--arch/loongarch/include/asm/Kbuild5
-rw-r--r--arch/loongarch/include/asm/bootinfo.h5
-rw-r--r--arch/loongarch/include/asm/bug.h58
-rw-r--r--arch/loongarch/include/asm/cacheflush.h87
-rw-r--r--arch/loongarch/include/asm/cacheops.h36
-rw-r--r--arch/loongarch/include/asm/cmpxchg.h8
-rw-r--r--arch/loongarch/include/asm/cpu-features.h5
-rw-r--r--arch/loongarch/include/asm/cpu-info.h21
-rw-r--r--arch/loongarch/include/asm/elf.h37
-rw-r--r--arch/loongarch/include/asm/fixmap.h15
-rw-r--r--arch/loongarch/include/asm/inst.h410
-rw-r--r--arch/loongarch/include/asm/io.h73
-rw-r--r--arch/loongarch/include/asm/kexec.h60
-rw-r--r--arch/loongarch/include/asm/loongarch.h33
-rw-r--r--arch/loongarch/include/asm/module.h27
-rw-r--r--arch/loongarch/include/asm/module.lds.h1
-rw-r--r--arch/loongarch/include/asm/percpu.h9
-rw-r--r--arch/loongarch/include/asm/perf_event.h4
-rw-r--r--arch/loongarch/include/asm/pgtable-bits.h3
-rw-r--r--arch/loongarch/include/asm/processor.h3
-rw-r--r--arch/loongarch/include/asm/setup.h2
-rw-r--r--arch/loongarch/include/asm/spinlock.h12
-rw-r--r--arch/loongarch/include/asm/spinlock_types.h11
-rw-r--r--arch/loongarch/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/loongarch/include/uapi/asm/perf_regs.h40
-rw-r--r--arch/loongarch/kernel/Makefile11
-rw-r--r--arch/loongarch/kernel/cacheinfo.c98
-rw-r--r--arch/loongarch/kernel/cpu-probe.c4
-rw-r--r--arch/loongarch/kernel/crash_dump.c23
-rw-r--r--arch/loongarch/kernel/head.S22
-rw-r--r--arch/loongarch/kernel/machine_kexec.c304
-rw-r--r--arch/loongarch/kernel/mem.c3
-rw-r--r--arch/loongarch/kernel/module-sections.c61
-rw-r--r--arch/loongarch/kernel/module.c105
-rw-r--r--arch/loongarch/kernel/perf_event.c887
-rw-r--r--arch/loongarch/kernel/perf_regs.c53
-rw-r--r--arch/loongarch/kernel/relocate_kernel.S112
-rw-r--r--arch/loongarch/kernel/setup.c78
-rw-r--r--arch/loongarch/kernel/smp.c5
-rw-r--r--arch/loongarch/kernel/sysrq.c65
-rw-r--r--arch/loongarch/kernel/topology.c3
-rw-r--r--arch/loongarch/kernel/traps.c33
-rw-r--r--arch/loongarch/kernel/vmlinux.lds.S4
-rw-r--r--arch/loongarch/mm/cache.c211
-rw-r--r--arch/loongarch/mm/init.c64
-rw-r--r--arch/loongarch/mm/mmap.c29
-rw-r--r--arch/loongarch/mm/tlb.c5
-rw-r--r--arch/loongarch/mm/tlbex.S537
-rw-r--r--arch/loongarch/net/Makefile7
-rw-r--r--arch/loongarch/net/bpf_jit.c1179
-rw-r--r--arch/loongarch/net/bpf_jit.h282
-rw-r--r--arch/loongarch/pci/acpi.c76
-rw-r--r--arch/loongarch/pci/pci.c7
-rw-r--r--arch/m68k/68000/Makefile2
-rw-r--r--arch/m68k/68000/ints.c6
-rw-r--r--arch/m68k/Kconfig.cpu6
-rw-r--r--arch/m68k/Makefile9
-rw-r--r--arch/m68k/coldfire/Makefile2
-rw-r--r--arch/m68k/configs/amcore_defconfig4
-rw-r--r--arch/m68k/configs/m5208evb_defconfig3
-rw-r--r--arch/m68k/configs/m5249evb_defconfig3
-rw-r--r--arch/m68k/configs/m5272c3_defconfig3
-rw-r--r--arch/m68k/configs/m5275evb_defconfig3
-rw-r--r--arch/m68k/configs/m5307c3_defconfig3
-rw-r--r--arch/m68k/configs/m5407c3_defconfig3
-rw-r--r--arch/m68k/include/asm/processor.h5
-rw-r--r--arch/m68k/kernel/Makefile23
-rw-r--r--arch/microblaze/Makefile1
-rw-r--r--arch/microblaze/configs/mmu_defconfig2
-rw-r--r--arch/microblaze/include/asm/processor.h5
-rw-r--r--arch/microblaze/kernel/Makefile4
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/configs/bcm47xx_defconfig2
-rw-r--r--arch/mips/configs/cavium_octeon_defconfig2
-rw-r--r--arch/mips/configs/ci20_defconfig2
-rw-r--r--arch/mips/configs/cu1000-neo_defconfig2
-rw-r--r--arch/mips/configs/cu1830-neo_defconfig2
-rw-r--r--arch/mips/configs/db1xxx_defconfig1
-rw-r--r--arch/mips/configs/generic_defconfig3
-rw-r--r--arch/mips/configs/omega2p_defconfig2
-rw-r--r--arch/mips/configs/qi_lb60_defconfig2
-rw-r--r--arch/mips/configs/vocore2_defconfig2
-rw-r--r--arch/mips/crypto/Kconfig74
-rw-r--r--arch/mips/include/asm/processor.h3
-rw-r--r--arch/mips/include/uapi/asm/mman.h2
-rw-r--r--arch/mips/kernel/Makefile4
-rw-r--r--arch/mips/kernel/setup.c2
-rw-r--r--arch/nios2/Kbuild2
-rw-r--r--arch/nios2/Kconfig2
-rw-r--r--arch/nios2/Makefile5
-rw-r--r--arch/nios2/configs/10m50_defconfig2
-rw-r--r--arch/nios2/configs/3c120_defconfig2
-rw-r--r--arch/nios2/include/asm/processor.h5
-rw-r--r--arch/nios2/kernel/Makefile2
-rw-r--r--arch/openrisc/Makefile2
-rw-r--r--arch/openrisc/include/asm/processor.h1
-rw-r--r--arch/openrisc/kernel/Makefile4
-rw-r--r--arch/openrisc/kernel/process.c4
-rw-r--r--arch/parisc/Makefile2
-rw-r--r--arch/parisc/include/asm/processor.h3
-rw-r--r--arch/parisc/include/uapi/asm/mman.h2
-rw-r--r--arch/parisc/kernel/Makefile4
-rw-r--r--arch/parisc/kernel/cache.c9
-rw-r--r--arch/parisc/kernel/process.c4
-rw-r--r--arch/powerpc/Kconfig37
-rw-r--r--arch/powerpc/Kconfig.debug15
-rw-r--r--arch/powerpc/Makefile28
-rw-r--r--arch/powerpc/boot/44x.h5
-rw-r--r--arch/powerpc/boot/4xx.h5
-rw-r--r--arch/powerpc/boot/Makefile1
-rw-r--r--arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi51
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8540ads.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8541cds.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8555cds.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8560ads.dts2
-rw-r--r--arch/powerpc/boot/dts/ksi8560.dts2
-rw-r--r--arch/powerpc/boot/dts/mgcoge.dts7
-rw-r--r--arch/powerpc/boot/dts/stx_gp3_8560.dts2
-rw-r--r--arch/powerpc/boot/dts/stxssa8555.dts2
-rw-r--r--arch/powerpc/boot/dts/tqm8540.dts2
-rw-r--r--arch/powerpc/boot/dts/tqm8541.dts2
-rw-r--r--arch/powerpc/boot/dts/tqm8555.dts2
-rw-r--r--arch/powerpc/boot/dts/tqm8560.dts2
-rw-r--r--arch/powerpc/boot/dts/turris1x.dts14
-rw-r--r--arch/powerpc/boot/dummy.c4
-rw-r--r--arch/powerpc/boot/opal-calls.S6
-rw-r--r--arch/powerpc/boot/ops.h6
-rw-r--r--arch/powerpc/boot/ppc_asm.h10
-rw-r--r--arch/powerpc/boot/serial.c6
-rw-r--r--arch/powerpc/boot/simple_alloc.c6
-rwxr-xr-xarch/powerpc/boot/wrapper2
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig1
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig3
-rw-r--r--arch/powerpc/configs/85xx/ppa8548_defconfig2
-rw-r--r--arch/powerpc/configs/cell_defconfig1
-rw-r--r--arch/powerpc/configs/fsl-emb-nonhw.config2
-rw-r--r--arch/powerpc/configs/g5_defconfig1
-rw-r--r--arch/powerpc/configs/microwatt_defconfig5
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig1
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig2
-rw-r--r--arch/powerpc/configs/pasemi_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig1
-rw-r--r--arch/powerpc/configs/powernv_defconfig7
-rw-r--r--arch/powerpc/configs/ppc64_defconfig5
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig3
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig7
-rw-r--r--arch/powerpc/configs/ps3_defconfig1
-rw-r--r--arch/powerpc/configs/pseries_defconfig7
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/configs/storcenter_defconfig1
-rw-r--r--arch/powerpc/crypto/Kconfig97
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h14
-rw-r--r--arch/powerpc/include/asm/barrier.h2
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h22
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h6
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h42
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h4
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h56
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h15
-rw-r--r--arch/powerpc/include/asm/cpu_setup.h49
-rw-r--r--arch/powerpc/include/asm/cpu_setup_power.h12
-rw-r--r--arch/powerpc/include/asm/cputable.h8
-rw-r--r--arch/powerpc/include/asm/cputime.h2
-rw-r--r--arch/powerpc/include/asm/dtl.h8
-rw-r--r--arch/powerpc/include/asm/hugetlb.h4
-rw-r--r--arch/powerpc/include/asm/hw_irq.h50
-rw-r--r--arch/powerpc/include/asm/interrupt.h41
-rw-r--r--arch/powerpc/include/asm/kexec.h2
-rw-r--r--arch/powerpc/include/asm/kfence.h15
-rw-r--r--arch/powerpc/include/asm/kgdb.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h4
-rw-r--r--arch/powerpc/include/asm/lppaca.h10
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mmu.h11
-rw-r--r--arch/powerpc/include/asm/mmu_context.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h8
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-85xx.h (renamed from arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h)6
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/hugetlb-e500.h (renamed from arch/powerpc/include/asm/nohash/hugetlb-book3e.h)8
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-e500.h (renamed from arch/powerpc/include/asm/nohash/mmu-book3e.h)0
-rw-r--r--arch/powerpc/include/asm/nohash/mmu.h4
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h2
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h30
-rw-r--r--arch/powerpc/include/asm/nohash/pte-e500.h (renamed from arch/powerpc/include/asm/nohash/pte-book3e.h)6
-rw-r--r--arch/powerpc/include/asm/nohash/tlbflush.h2
-rw-r--r--arch/powerpc/include/asm/opal.h6
-rw-r--r--arch/powerpc/include/asm/paca.h9
-rw-r--r--arch/powerpc/include/asm/page.h8
-rw-r--r--arch/powerpc/include/asm/paravirt.h12
-rw-r--r--arch/powerpc/include/asm/paravirt_api_clock.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h2
-rw-r--r--arch/powerpc/include/asm/pgtable.h19
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h7
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h52
-rw-r--r--arch/powerpc/include/asm/processor.h21
-rw-r--r--arch/powerpc/include/asm/ps3av.h2
-rw-r--r--arch/powerpc/include/asm/ptrace.h9
-rw-r--r--arch/powerpc/include/asm/reg_booke.h6
-rw-r--r--arch/powerpc/include/asm/rtas.h1
-rw-r--r--arch/powerpc/include/asm/runlatch.h6
-rw-r--r--arch/powerpc/include/asm/sections.h17
-rw-r--r--arch/powerpc/include/asm/setup.h5
-rw-r--r--arch/powerpc/include/asm/synch.h2
-rw-r--r--arch/powerpc/include/asm/syscall.h11
-rw-r--r--arch/powerpc/include/asm/syscall_wrapper.h49
-rw-r--r--arch/powerpc/include/asm/syscalls.h158
-rw-r--r--arch/powerpc/include/asm/syscalls_32.h (renamed from arch/powerpc/kernel/ppc32.h)6
-rw-r--r--arch/powerpc/include/asm/time.h5
-rw-r--r--arch/powerpc/include/asm/udbg.h54
-rw-r--r--arch/powerpc/include/asm/unistd.h1
-rw-r--r--arch/powerpc/include/asm/vdso.h3
-rw-r--r--arch/powerpc/include/asm/vdso/processor.h8
-rw-r--r--arch/powerpc/include/asm/vdso/timebase.h2
-rw-r--r--arch/powerpc/include/asm/xics.h1
-rw-r--r--arch/powerpc/kernel/85xx_entry_mapping.S (renamed from arch/powerpc/kernel/fsl_booke_entry_mapping.S)0
-rw-r--r--arch/powerpc/kernel/Makefile33
-rw-r--r--arch/powerpc/kernel/asm-offsets.c10
-rw-r--r--arch/powerpc/kernel/cpu_setup_e500.S (renamed from arch/powerpc/kernel/cpu_setup_fsl_booke.S)6
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c2
-rw-r--r--arch/powerpc/kernel/cpu_specs.h29
-rw-r--r--arch/powerpc/kernel/cpu_specs_40x.h280
-rw-r--r--arch/powerpc/kernel/cpu_specs_44x.h304
-rw-r--r--arch/powerpc/kernel/cpu_specs_47x.h74
-rw-r--r--arch/powerpc/kernel/cpu_specs_85xx.h57
-rw-r--r--arch/powerpc/kernel/cpu_specs_8xx.h23
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_32.h605
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_64.h481
-rw-r--r--arch/powerpc/kernel/cpu_specs_e500mc.h75
-rw-r--r--arch/powerpc/kernel/cputable.c1920
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c2
-rw-r--r--arch/powerpc/kernel/entry_32.S52
-rw-r--r--arch/powerpc/kernel/entry_64.S6
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S70
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S159
-rw-r--r--arch/powerpc/kernel/head_64.S58
-rw-r--r--arch/powerpc/kernel/head_85xx.S (renamed from arch/powerpc/kernel/head_fsl_booke.S)8
-rw-r--r--arch/powerpc/kernel/head_booke.h8
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c53
-rw-r--r--arch/powerpc/kernel/idle_64e.S (renamed from arch/powerpc/kernel/idle_book3e.S)10
-rw-r--r--arch/powerpc/kernel/idle_85xx.S (renamed from arch/powerpc/kernel/idle_e500.S)0
-rw-r--r--arch/powerpc/kernel/interrupt.c10
-rw-r--r--arch/powerpc/kernel/interrupt_64.S140
-rw-r--r--arch/powerpc/kernel/irq_64.c93
-rw-r--r--arch/powerpc/kernel/kgdb.c12
-rw-r--r--arch/powerpc/kernel/kprobes.c8
-rw-r--r--arch/powerpc/kernel/kvm.c8
-rw-r--r--arch/powerpc/kernel/legacy_serial.c2
-rw-r--r--arch/powerpc/kernel/misc_64.S6
-rw-r--r--arch/powerpc/kernel/optprobes_head.S2
-rw-r--r--arch/powerpc/kernel/paca.c38
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kernel/pci_dn.c1
-rw-r--r--arch/powerpc/kernel/process.c9
-rw-r--r--arch/powerpc/kernel/prom.c63
-rw-r--r--arch/powerpc/kernel/prom_init.c6
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh3
-rw-r--r--arch/powerpc/kernel/reloc_64.S14
-rw-r--r--arch/powerpc/kernel/rtas.c79
-rw-r--r--arch/powerpc/kernel/security.c11
-rw-r--r--arch/powerpc/kernel/setup-common.c19
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c43
-rw-r--r--arch/powerpc/kernel/signal.h3
-rw-r--r--arch/powerpc/kernel/signal_32.c2
-rw-r--r--arch/powerpc/kernel/smp.c40
-rw-r--r--arch/powerpc/kernel/swsusp_85xx.S (renamed from arch/powerpc/kernel/swsusp_booke.S)0
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S16
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c86
-rw-r--r--arch/powerpc/kernel/syscall.c39
-rw-r--r--arch/powerpc/kernel/syscalls.c60
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl28
-rw-r--r--arch/powerpc/kernel/sysfs.c6
-rw-r--r--arch/powerpc/kernel/systbl.c (renamed from arch/powerpc/kernel/systbl.S)33
-rw-r--r--arch/powerpc/kernel/systbl_chk.sh30
-rw-r--r--arch/powerpc/kernel/time.c121
-rw-r--r--arch/powerpc/kernel/trace/ftrace_low.S2
-rw-r--r--arch/powerpc/kernel/trace/ftrace_mprofile.S6
-rw-r--r--arch/powerpc/kernel/traps.c22
-rw-r--r--arch/powerpc/kernel/udbg.c2
-rw-r--r--arch/powerpc/kernel/udbg_16550.c52
-rw-r--r--arch/powerpc/kernel/vdso.c25
-rw-r--r--arch/powerpc/kernel/vdso/Makefile4
-rw-r--r--arch/powerpc/kernel/vdso/vdso32.lds.S1
-rw-r--r--arch/powerpc/kernel/vdso/vdso64.lds.S1
-rw-r--r--arch/powerpc/kernel/vector.S15
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S97
-rw-r--r--arch/powerpc/kexec/core.c6
-rw-r--r--arch/powerpc/kexec/core_32.c2
-rw-r--r--arch/powerpc/kexec/core_64.c2
-rw-r--r--arch/powerpc/kexec/relocate_32.S4
-rw-r--r--arch/powerpc/kvm/Kconfig4
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c146
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S6
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S4
-rw-r--r--arch/powerpc/kvm/e500.h2
-rw-r--r--arch/powerpc/kvm/powerpc.c1
-rw-r--r--arch/powerpc/kvm/tm.S2
-rw-r--r--arch/powerpc/lib/code-patching.c30
-rw-r--r--arch/powerpc/lib/copypage_64.S7
-rw-r--r--arch/powerpc/lib/feature-fixups.c4
-rw-r--r--arch/powerpc/lib/string_64.S7
-rw-r--r--arch/powerpc/math-emu/Makefile7
-rw-r--r--arch/powerpc/math-emu/math.c18
-rw-r--r--arch/powerpc/math-emu/math_efp.c60
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c8
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c11
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c4
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c2
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c32
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c11
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c28
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c13
-rw-r--r--arch/powerpc/mm/fault.c23
-rw-r--r--arch/powerpc/mm/hugetlbpage.c8
-rw-r--r--arch/powerpc/mm/init_32.c17
-rw-r--r--arch/powerpc/mm/mem.c4
-rw-r--r--arch/powerpc/mm/mmu_decl.h28
-rw-r--r--arch/powerpc/mm/nohash/Makefile6
-rw-r--r--arch/powerpc/mm/nohash/e500.c (renamed from arch/powerpc/mm/nohash/fsl_book3e.c)6
-rw-r--r--arch/powerpc/mm/nohash/e500_hugetlbpage.c (renamed from arch/powerpc/mm/nohash/book3e_hugetlbpage.c)0
-rw-r--r--arch/powerpc/mm/nohash/tlb.c80
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S6
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S10
-rw-r--r--arch/powerpc/mm/numa.c6
-rw-r--r--arch/powerpc/mm/pgtable_32.c7
-rw-r--r--arch/powerpc/mm/ptdump/Makefile2
-rw-r--r--arch/powerpc/perf/bhrb.S2
-rw-r--r--arch/powerpc/perf/callchain_32.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c27
-rw-r--r--arch/powerpc/perf/imc-pmu.c10
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c2
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c8
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c3
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c6
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig5
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c4
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c6
-rw-r--r--arch/powerpc/platforms/85xx/ksi8560.c2
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c1
-rw-r--r--arch/powerpc/platforms/85xx/qemu_e500.c4
-rw-r--r--arch/powerpc/platforms/85xx/sgy_cts1000.c35
-rw-r--r--arch/powerpc/platforms/8xx/tqm8xx_setup.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype71
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c2
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c37
-rw-r--r--arch/powerpc/platforms/cell/iommu.c4
-rw-r--r--arch/powerpc/platforms/cell/setup.c2
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c6
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/chrp/chrp.h1
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c6
-rw-r--r--arch/powerpc/platforms/embedded6xx/ls_uart.c2
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c3
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c15
-rw-r--r--arch/powerpc/platforms/maple/time.c1
-rw-r--r--arch/powerpc/platforms/pasemi/misc.c3
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c10
-rw-r--r--arch/powerpc/platforms/powermac/feature.c15
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c1
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c1
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c8
-rw-r--r--arch/powerpc/platforms/powernv/idle.c3
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c3
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c22
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig8
-rw-r--r--arch/powerpc/platforms/pseries/Makefile3
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c88
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c15
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S4
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c2
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c6
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c11
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c17
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c5
-rw-r--r--arch/powerpc/platforms/pseries/setup.c49
-rw-r--r--arch/powerpc/platforms/pseries/vas.c8
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c2
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c9
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c2
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c9
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c22
-rw-r--r--arch/powerpc/sysdev/xive/common.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c15
-rw-r--r--arch/powerpc/xmon/ppc.h2
-rw-r--r--arch/powerpc/xmon/spr_access.S4
-rw-r--r--arch/powerpc/xmon/xmon.c16
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts2
-rw-r--r--arch/riscv/include/asm/gpr-num.h8
-rw-r--r--arch/riscv/include/asm/hwcap.h4
-rw-r--r--arch/riscv/include/asm/insn-def.h137
-rw-r--r--arch/riscv/include/asm/kvm_host.h1
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_sbi.h4
-rw-r--r--arch/riscv/include/asm/processor.h5
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h4
-rw-r--r--arch/riscv/kernel/Makefile2
-rw-r--r--arch/riscv/kernel/cpu.c1
-rw-r--r--arch/riscv/kernel/cpufeature.c1
-rw-r--r--arch/riscv/kernel/vdso.c3
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/main.c2
-rw-r--r--arch/riscv/kvm/tlb.c155
-rw-r--r--arch/riscv/kvm/vcpu.c60
-rw-r--r--arch/riscv/kvm/vcpu_exit.c39
-rw-r--r--arch/riscv/mm/dma-noncoherent.c2
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/boot/startup.c8
-rw-r--r--arch/s390/boot/version.c1
-rw-r--r--arch/s390/crypto/Kconfig135
-rw-r--r--arch/s390/include/asm/abs_lowcore.h17
-rw-r--r--arch/s390/include/asm/ccwdev.h1
-rw-r--r--arch/s390/include/asm/ctl_reg.h3
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/maccess.h17
-rw-r--r--arch/s390/include/asm/os_info.h14
-rw-r--r--arch/s390/include/asm/pai.h6
-rw-r--r--arch/s390/include/asm/pci.h1
-rw-r--r--arch/s390/include/asm/pgtable.h4
-rw-r--r--arch/s390/include/asm/processor.h20
-rw-r--r--arch/s390/include/asm/smp.h4
-rw-r--r--arch/s390/kernel/Makefile8
-rw-r--r--arch/s390/kernel/abs_lowcore.c95
-rw-r--r--arch/s390/kernel/crash_dump.c40
-rw-r--r--arch/s390/kernel/debug.c2
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/ipl.c9
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/os_info.c10
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c1
-rw-r--r--arch/s390/kernel/perf_pai_crypto.c1
-rw-r--r--arch/s390/kernel/perf_pai_ext.c671
-rw-r--r--arch/s390/kernel/setup.c47
-rw-r--r--arch/s390/kernel/smp.c97
-rw-r--r--arch/s390/kernel/vdso.c3
-rw-r--r--arch/s390/lib/delay.c11
-rw-r--r--arch/s390/lib/uaccess.c3
-rw-r--r--arch/s390/mm/dump_pagetables.c20
-rw-r--r--arch/s390/mm/fault.c15
-rw-r--r--arch/s390/mm/gmap.c6
-rw-r--r--arch/s390/mm/hugetlbpage.c10
-rw-r--r--arch/s390/mm/init.c2
-rw-r--r--arch/s390/mm/maccess.c175
-rw-r--r--arch/s390/mm/vmem.c104
-rw-r--r--arch/s390/pci/pci_dma.c2
-rw-r--r--arch/sh/Makefile2
-rw-r--r--arch/sh/configs/apsh4a3a_defconfig2
-rw-r--r--arch/sh/configs/apsh4ad0a_defconfig2
-rw-r--r--arch/sh/configs/ecovec24_defconfig2
-rw-r--r--arch/sh/configs/edosk7760_defconfig2
-rw-r--r--arch/sh/configs/magicpanelr2_defconfig2
-rw-r--r--arch/sh/configs/polaris_defconfig2
-rw-r--r--arch/sh/configs/r7780mp_defconfig2
-rw-r--r--arch/sh/configs/r7785rp_defconfig2
-rw-r--r--arch/sh/configs/rsk7203_defconfig2
-rw-r--r--arch/sh/configs/sdk7780_defconfig2
-rw-r--r--arch/sh/configs/sdk7786_defconfig1
-rw-r--r--arch/sh/configs/se7712_defconfig2
-rw-r--r--arch/sh/configs/se7721_defconfig2
-rw-r--r--arch/sh/configs/sh2007_defconfig2
-rw-r--r--arch/sh/configs/sh7757lcr_defconfig2
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig2
-rw-r--r--arch/sh/configs/urquell_defconfig3
-rw-r--r--arch/sh/include/asm/hw_breakpoint.h5
-rw-r--r--arch/sh/include/asm/processor_32.h3
-rw-r--r--arch/sh/kernel/Makefile4
-rw-r--r--arch/sh/kernel/process_32.c5
-rw-r--r--arch/sh/mm/Kconfig2
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/Makefile2
-rw-r--r--arch/sparc/crypto/Kconfig90
-rw-r--r--arch/sparc/include/asm/processor_32.h3
-rw-r--r--arch/sparc/include/asm/processor_64.h3
-rw-r--r--arch/sparc/kernel/Makefile3
-rw-r--r--arch/um/configs/i386_defconfig2
-rw-r--r--arch/um/configs/x86_64_defconfig2
-rw-r--r--arch/um/include/asm/processor-generic.h4
-rw-r--r--arch/um/kernel/tlb.c14
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/Makefile1
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/kaslr.c1
-rw-r--r--arch/x86/boot/version.c1
-rw-r--r--arch/x86/configs/i386_defconfig6
-rw-r--r--arch/x86/configs/x86_64_defconfig5
-rw-r--r--arch/x86/crypto/Kconfig484
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aria-aesni-avx-asm_64.S1303
-rw-r--r--arch/x86/crypto/aria-avx.h16
-rw-r--r--arch/x86/crypto/aria_aesni_avx_glue.c213
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c10
-rw-r--r--arch/x86/entry/vdso/Makefile3
-rw-r--r--arch/x86/entry/vdso/vma.c9
-rw-r--r--arch/x86/events/Makefile2
-rw-r--r--arch/x86/events/amd/Makefile2
-rw-r--r--arch/x86/events/amd/brs.c69
-rw-r--r--arch/x86/events/amd/core.c210
-rw-r--r--arch/x86/events/amd/ibs.c360
-rw-r--r--arch/x86/events/amd/lbr.c439
-rw-r--r--arch/x86/events/core.c61
-rw-r--r--arch/x86/events/intel/core.c101
-rw-r--r--arch/x86/events/intel/cstate.c1
-rw-r--r--arch/x86/events/intel/ds.c55
-rw-r--r--arch/x86/events/intel/lbr.c273
-rw-r--r--arch/x86/events/intel/p4.c37
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h130
-rw-r--r--arch/x86/events/perf_event_flags.h22
-rw-r--r--arch/x86/events/utils.c251
-rw-r--r--arch/x86/hyperv/hv_init.c6
-rw-r--r--arch/x86/include/asm/amd-ibs.h16
-rw-r--r--arch/x86/include/asm/checksum.h16
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h5
-rw-r--r--arch/x86/include/asm/kmsan.h87
-rw-r--r--arch/x86/include/asm/kprobes.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/page_64.h7
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/include/asm/pgtable-3level.h8
-rw-r--r--arch/x86/include/asm/pgtable.h9
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h47
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h13
-rw-r--r--arch/x86/include/asm/sparsemem.h2
-rw-r--r--arch/x86/include/asm/string_64.h23
-rw-r--r--arch/x86/include/asm/uaccess.h22
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/dumpstack.c6
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/tboot.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c11
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/kvm/pmu.c20
-rw-r--r--arch/x86/kvm/svm/pmu.c117
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c29
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/iomem.c5
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c23
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_64.c21
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/kmsan_shadow.c20
-rw-r--r--arch/x86/mm/pat/set_memory.c49
-rw-r--r--arch/x86/mm/pgtable.c5
-rw-r--r--arch/x86/realmode/rm/Makefile1
-rw-r--r--arch/x86/xen/Kconfig9
-rw-r--r--arch/x86/xen/enlighten_hvm.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c101
-rw-r--r--arch/x86/xen/pmu.c71
-rw-r--r--arch/x86/xen/smp_pv.c2
-rw-r--r--arch/xtensa/Kconfig2
-rw-r--r--arch/xtensa/Makefile2
-rw-r--r--arch/xtensa/configs/audio_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/cadence_csp_defconfig2
-rw-r--r--arch/xtensa/configs/generic_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/nommu_kc705_defconfig2
-rw-r--r--arch/xtensa/configs/smp_lx200_defconfig2
-rw-r--r--arch/xtensa/configs/virt_defconfig2
-rw-r--r--arch/xtensa/configs/xip_kc705_defconfig2
-rw-r--r--arch/xtensa/include/asm/elf.h24
-rw-r--r--arch/xtensa/include/asm/processor.h6
-rw-r--r--arch/xtensa/include/uapi/asm/mman.h2
-rw-r--r--arch/xtensa/include/uapi/asm/ptrace.h4
-rw-r--r--arch/xtensa/kernel/Makefile4
-rw-r--r--arch/xtensa/kernel/syscall.c18
675 files changed, 15787 insertions, 6310 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 266862428a84..8f138e580d1a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1416,6 +1416,14 @@ config DYNAMIC_SIGFRAME
 config HAVE_ARCH_NODE_DEV_GROUP
 	bool
 
+config ARCH_HAS_NONLEAF_PMD_YOUNG
+	bool
+	help
+	  Architectures that select this option are capable of setting the
+	  accessed bit in non-leaf PMD entries when using them as part of linear
+	  address translations. Page table walkers that clear the accessed bit
+	  may use this capability to reduce their search space.
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 881cb913e23a..45158024085e 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -36,8 +36,6 @@ cflags-y				+= $(cpuflags-y)
 # BWX is most important, but we don't really want any emulation ever.
 KBUILD_CFLAGS += $(cflags-y) -Wa,-mev6
 
-head-y := arch/alpha/kernel/head.o
-
 libs-y				+= arch/alpha/lib/
 
 # export what is needed by arch/alpha/boot/Makefile
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index 7e9336930880..6a39fe8ce9e5 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -65,7 +65,7 @@ CONFIG_NFSD=m
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_ALPHA_LEGACY_START_ADDRESS=y
 CONFIG_MATHEMU=y
 CONFIG_CRYPTO_HMAC=y
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 43e234c518b1..714abe494e5f 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -36,8 +36,6 @@ extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 
 /* Free all resources held by a thread. */
 struct task_struct;
-extern void release_thread(struct task_struct *);
-
 unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 4aa996423b0d..763929e814e9 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -76,6 +76,8 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index bd82c8a12f0b..fb4efec7cbc7 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -3,11 +3,11 @@
 # Makefile for the linux kernel.
 #
 
-extra-y		:= head.o vmlinux.lds
+extra-y		:= vmlinux.lds
 asflags-y	:= $(KBUILD_CFLAGS)
 ccflags-y	:= -Wno-sign-compare
 
-obj-y    := entry.o traps.o process.o osf_sys.o irq.o \
+obj-y    := head.o entry.o traps.o process.o osf_sys.o irq.o \
 	    irq_alpha.o signal.o setup.o ptrace.o time.o \
 	    systbls.o err_common.o io.o bugs.o termios.o
 
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 68ec314d3fac..c54469b369cb 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1278,18 +1278,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	return addr;
 }
 
-SYSCALL_DEFINE3(osf_readv, unsigned long, fd,
-		const struct iovec __user *, vector, unsigned long, count)
-{
-	return sys_readv(fd, vector, count);
-}
-
-SYSCALL_DEFINE3(osf_writev, unsigned long, fd,
-		const struct iovec __user *, vector, unsigned long, count)
-{
-	return sys_writev(fd, vector, count);
-}
-
 SYSCALL_DEFINE2(osf_getpriority, int, which, int, who)
 {
 	int prio = sys_getpriority(which, who);
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index e2e25f8b5e76..dbf1bc5e2ad2 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -225,11 +225,6 @@ flush_thread(void)
 	current_thread_info()->pcb.unique = 0;
 }
 
-void
-release_thread(struct task_struct *dead_task)
-{
-}
-
 /*
  * Copy architecture-specific thread state
  */
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index b4fbbba30aa2..33bf3a627002 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -491,9 +491,9 @@ setup_arch(char **cmdline_p)
 	   boot flags depending on the boot mode, we need some shorthand.
 	   This should do for installation.  */
 	if (strcmp(COMMAND_LINE, "INSTALL") == 0) {
-		strlcpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof command_line);
+		strscpy(command_line, "root=/dev/fd0 load_ramdisk=1", sizeof(command_line));
 	} else {
-		strlcpy(command_line, COMMAND_LINE, sizeof command_line);
+		strscpy(command_line, COMMAND_LINE, sizeof(command_line));
 	}
 	strcpy(boot_command_line, command_line);
 	*cmdline_p = command_line;
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 3515bc4f16a4..8ebacf37a8cf 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -125,8 +125,8 @@
 116	common	osf_gettimeofday		sys_osf_gettimeofday
 117	common	osf_getrusage			sys_osf_getrusage
 118	common	getsockopt			sys_getsockopt
-120	common	readv				sys_osf_readv
-121	common	writev				sys_osf_writev
+120	common	readv				sys_readv
+121	common	writev				sys_writev
 122	common	osf_settimeofday		sys_osf_settimeofday
 123	common	fchown				sys_fchown
 124	common	fchmod				sys_fchmod
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 9e3653253ef2..d9a13ccf89a3 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -554,7 +554,7 @@ config ARC_BUILTIN_DTB_NAME
 
 endmenu	 # "ARC Architecture Configuration"
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	default "12" if ARC_HUGEPAGE_16M
 	default "11"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index efc54f3e35e0..329400a1c355 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -82,8 +82,6 @@ KBUILD_CFLAGS	+= $(cflags-y)
 KBUILD_AFLAGS	+= $(KBUILD_CFLAGS)
 KBUILD_LDFLAGS	+= $(ldflags-y)
 
-head-y		:= arch/arc/kernel/head.o
-
 # w/o this dtb won't embed into kernel binary
 core-y		+= arch/arc/boot/dts/
 
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index d93b65008d4a..4a94d1684ed6 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -90,7 +90,7 @@ CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_HEADERS_INSTALL=y
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 54db9d7bb562..fb844fce1ab6 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -43,9 +43,6 @@ struct task_struct;
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
 
-/* Free all resources held by a thread */
-#define release_thread(thread) do { } while (0)
-
 /*
  * A lot of busy-wait loops in SMP are based off of non-volatile data otherwise
  * get optimised away by gcc
diff --git a/arch/arc/kernel/Makefile b/arch/arc/kernel/Makefile
index 8c4fc4b54c14..0723d888ac44 100644
--- a/arch/arc/kernel/Makefile
+++ b/arch/arc/kernel/Makefile
@@ -3,7 +3,7 @@
 # Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
 #
 
-obj-y	:= arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
+obj-y	:= head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
 obj-y	+= signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
 obj-$(CONFIG_ISA_ARCOMPACT)		+= entry-compact.o intc-compact.o
 obj-$(CONFIG_ISA_ARCV2)			+= entry-arcv2.o intc-arcv2.o
@@ -31,4 +31,4 @@ else
 obj-y += ctx_sw_asm.o
 endif
 
-extra-y := vmlinux.lds head.o
+extra-y := vmlinux.lds
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 22dc1d6936bc..a08c9d092a33 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1362,7 +1362,7 @@ config ARM_MODULE_PLTS
 	  Disabling this is usually safe for small single-platform
 	  configurations. If unsure, say y.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	default "12" if SOC_AM33XX
 	default "9" if SA1111
@@ -1850,8 +1850,4 @@ config ARCH_HIBERNATION_POSSIBLE
 
 endmenu
 
-if CRYPTO
-source "arch/arm/crypto/Kconfig"
-endif
-
 source "arch/arm/Kconfig.assembler"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 0e04bc6b2ad3..c846119c448f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -137,9 +137,6 @@ KBUILD_AFLAGS	+=$(CFLAGS_ABI) $(AFLAGS_ISA) $(arch-y) $(tune-y) -include asm/uni
 
 CHECKFLAGS	+= -D__arm__
 
-#Default value
-head-y		:= arch/arm/kernel/head$(MMUEXT).o
-
 # Text offset. This list is sorted numerically by address in order to
 # provide a means to avoid/resolve conflicts in multi-arch kernels.
 # Note: the 32kB below this value is reserved for use by the kernel
diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi
index c89092c3905b..04f98d1dbb97 100644
--- a/arch/arm/boot/dts/aspeed-g5.dtsi
+++ b/arch/arm/boot/dts/aspeed-g5.dtsi
@@ -262,6 +262,14 @@
 				quality = <100>;
 			};
 
+			hace: crypto@1e6e3000 {
+				compatible = "aspeed,ast2500-hace";
+				reg = <0x1e6e3000 0x100>;
+				interrupts = <4>;
+				clocks = <&syscon ASPEED_CLK_GATE_YCLK>;
+				resets = <&syscon ASPEED_RESET_HACE>;
+			};
+
 			gfx: display@1e6e6000 {
 				compatible = "aspeed,ast2500-gfx", "syscon";
 				reg = <0x1e6e6000 0x1000>;
diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi
index 1387a763a6a5..ebbcfe445d9c 100644
--- a/arch/arm/boot/dts/aspeed-g6.dtsi
+++ b/arch/arm/boot/dts/aspeed-g6.dtsi
@@ -323,6 +323,14 @@
 			#size-cells = <1>;
 			ranges;
 
+			hace: crypto@1e6d0000 {
+				compatible = "aspeed,ast2600-hace";
+				reg = <0x1e6d0000 0x200>;
+				interrupts = <GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&syscon ASPEED_CLK_GATE_YCLK>;
+				resets = <&syscon ASPEED_RESET_HACE>;
+			};
+
 			syscon: syscon@1e6e2000 {
 				compatible = "aspeed,ast2600-scu", "syscon", "simple-mfd";
 				reg = <0x1e6e2000 0x1000>;
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index deb24a4bd011..31e8e0c0ee1b 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -32,7 +32,6 @@ CONFIG_KERNEL_MODE_NEON=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_ADVANCED_DEBUG=y
 CONFIG_ENERGY_MODEL=y
-CONFIG_ARM_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM_NEON=m
 CONFIG_CRYPTO_SHA256_ARM=m
 CONFIG_CRYPTO_SHA512_ARM=m
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 6429c4106ab5..078d61b758a9 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -31,7 +31,7 @@ CONFIG_SOC_VF610=y
 CONFIG_SMP=y
 CONFIG_ARM_PSCI=y
 CONFIG_HIGHMEM=y
-CONFIG_FORCE_MAX_ZONEORDER=14
+CONFIG_ARCH_FORCE_MAX_ORDER=14
 CONFIG_CMDLINE="noinitrd console=ttymxc0,115200"
 CONFIG_KEXEC=y
 CONFIG_CPU_FREQ=y
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index 6f6b5d0918f7..a2e25bf843cc 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -26,7 +26,7 @@ CONFIG_THUMB2_KERNEL=y
 # CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11 is not set
 # CONFIG_ARM_PATCH_IDIV is not set
 CONFIG_HIGHMEM=y
-CONFIG_FORCE_MAX_ZONEORDER=12
+CONFIG_ARCH_FORCE_MAX_ORDER=12
 CONFIG_SECCOMP=y
 CONFIG_KEXEC=y
 CONFIG_EFI=y
@@ -44,7 +44,6 @@ CONFIG_ARM_CPUIDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_KERNEL_MODE_NEON=y
-CONFIG_ARM_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM_NEON=m
 CONFIG_CRYPTO_SHA1_ARM_CE=m
 CONFIG_CRYPTO_SHA2_ARM_CE=m
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 54a6dc0aa5a4..b61b2e3d116b 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -132,7 +132,6 @@ CONFIG_ARM_EXYNOS_CPUIDLE=y
 CONFIG_ARM_TEGRA_CPUIDLE=y
 CONFIG_ARM_QCOM_SPM_CPUIDLE=y
 CONFIG_KERNEL_MODE_NEON=y
-CONFIG_ARM_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM_NEON=m
 CONFIG_CRYPTO_SHA1_ARM_CE=m
 CONFIG_CRYPTO_SHA2_ARM_CE=m
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 965853c1c530..2a66850d3288 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -53,7 +53,6 @@ CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
 CONFIG_KERNEL_MODE_NEON=y
 CONFIG_PM_DEBUG=y
-CONFIG_ARM_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM_NEON=m
 CONFIG_CRYPTO_SHA256_ARM=m
 CONFIG_CRYPTO_SHA512_ARM=m
diff --git a/arch/arm/configs/oxnas_v6_defconfig b/arch/arm/configs/oxnas_v6_defconfig
index d206c4f04490..70a67b3fc91b 100644
--- a/arch/arm/configs/oxnas_v6_defconfig
+++ b/arch/arm/configs/oxnas_v6_defconfig
@@ -12,7 +12,7 @@ CONFIG_ARCH_OXNAS=y
 CONFIG_MACH_OX820=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=16
-CONFIG_FORCE_MAX_ZONEORDER=12
+CONFIG_ARCH_FORCE_MAX_ORDER=12
 CONFIG_SECCOMP=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index ca6d0049362b..d60cc9cc4c21 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -21,7 +21,7 @@ CONFIG_MACH_AKITA=y
 CONFIG_MACH_BORZOI=y
 CONFIG_PXA_SYSTEMS_CPLDS=y
 CONFIG_AEABI=y
-CONFIG_FORCE_MAX_ZONEORDER=9
+CONFIG_ARCH_FORCE_MAX_ORDER=9
 CONFIG_CMDLINE="root=/dev/ram0 ro"
 CONFIG_KEXEC=y
 CONFIG_CPU_FREQ=y
@@ -34,7 +34,6 @@ CONFIG_CPUFREQ_DT=m
 CONFIG_ARM_PXA2xx_CPUFREQ=m
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
-CONFIG_ARM_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM=m
 CONFIG_CRYPTO_SHA256_ARM=m
 CONFIG_CRYPTO_SHA512_ARM=m
diff --git a/arch/arm/configs/sama7_defconfig b/arch/arm/configs/sama7_defconfig
index 72af50d9e48a..8f28c9d443f0 100644
--- a/arch/arm/configs/sama7_defconfig
+++ b/arch/arm/configs/sama7_defconfig
@@ -19,7 +19,7 @@ CONFIG_ATMEL_CLOCKSOURCE_TCB=y
 # CONFIG_CACHE_L2X0 is not set
 # CONFIG_ARM_PATCH_IDIV is not set
 # CONFIG_CPU_SW_DOMAIN_PAN is not set
-CONFIG_FORCE_MAX_ZONEORDER=15
+CONFIG_ARCH_FORCE_MAX_ORDER=15
 CONFIG_UACCESS_WITH_MEMCPY=y
 # CONFIG_ATAGS is not set
 CONFIG_CMDLINE="console=ttyS0,115200 earlyprintk ignore_loglevel"
diff --git a/arch/arm/configs/sp7021_defconfig b/arch/arm/configs/sp7021_defconfig
index aa7dfd670db5..5bca2eb59b86 100644
--- a/arch/arm/configs/sp7021_defconfig
+++ b/arch/arm/configs/sp7021_defconfig
@@ -17,7 +17,7 @@ CONFIG_ARCH_SUNPLUS=y
 # CONFIG_VDSO is not set
 CONFIG_SMP=y
 CONFIG_THUMB2_KERNEL=y
-CONFIG_FORCE_MAX_ZONEORDER=12
+CONFIG_ARCH_FORCE_MAX_ORDER=12
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_MODULES=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 149a5bd6b88c..3858c4d4cb98 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -1,92 +1,156 @@
 # SPDX-License-Identifier: GPL-2.0
 
-menuconfig ARM_CRYPTO
-	bool "ARM Accelerated Cryptographic Algorithms"
-	depends on ARM
+menu "Accelerated Cryptographic Algorithms for CPU (arm)"
+
+config CRYPTO_CURVE25519_NEON
+	tristate "Public key crypto: Curve25519 (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+	help
+	  Curve25519 algorithm
+
+	  Architecture: arm with
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_GHASH_ARM_CE
+	tristate "Hash functions: GHASH (PMULL/NEON/ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_CRYPTD
+	select CRYPTO_GF128MUL
 	help
-	  Say Y here to choose from a selection of cryptographic algorithms
-	  implemented using ARM specific CPU features or instructions.
+	  GCM GHASH function (NIST SP800-38D)
 
-if ARM_CRYPTO
+	  Architecture: arm using
+	  - PMULL (Polynomial Multiply Long) instructions
+	  - NEON (Advanced SIMD) extensions
+	  - ARMv8 Crypto Extensions
+
+	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
+	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
+	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
+	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
+
+config CRYPTO_NHPOLY1305_NEON
+	tristate "Hash functions: NHPoly1305 (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_NHPOLY1305
+	help
+	  NHPoly1305 hash function (Adiantum)
+
+	  Architecture: arm using:
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_POLY1305_ARM
+	tristate "Hash functions: Poly1305 (NEON)"
+	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: arm optionally using
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_BLAKE2S_ARM
+	bool "Hash functions: BLAKE2s"
+	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+	help
+	  BLAKE2s cryptographic hash function (RFC 7693)
+
+	  Architecture: arm
+
+	  This is faster than the generic implementations of BLAKE2s and
+	  BLAKE2b, but slower than the NEON implementation of BLAKE2b.
+	  There is no NEON implementation of BLAKE2s, since NEON doesn't
+	  really help with it.
+
+config CRYPTO_BLAKE2B_NEON
+	tristate "Hash functions: BLAKE2b (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_BLAKE2B
+	help
+	  BLAKE2b cryptographic hash function (RFC 7693)
+
+	  Architecture: arm using
+	  - NEON (Advanced SIMD) extensions
+
+	  BLAKE2b digest algorithm optimized with ARM NEON instructions.
+	  On ARM processors that have NEON support but not the ARMv8
+	  Crypto Extensions, typically this BLAKE2b implementation is
+	  much faster than the SHA-2 family and slightly faster than
+	  SHA-1.
 
 config CRYPTO_SHA1_ARM
-	tristate "SHA1 digest algorithm (ARM-asm)"
+	tristate "Hash functions: SHA-1"
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
 	help
-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-	  using optimized ARM assembler.
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: arm
 
 config CRYPTO_SHA1_ARM_NEON
-	tristate "SHA1 digest algorithm (ARM NEON)"
+	tristate "Hash functions: SHA-1 (NEON)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
 	help
-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-	  using optimized ARM NEON assembly, when NEON instructions are
-	  available.
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: arm using
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_SHA1_ARM_CE
-	tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
+	tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_HASH
 	help
-	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
-	  using special ARMv8 Crypto Extensions.
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: arm using ARMv8 Crypto Extensions
 
 config CRYPTO_SHA2_ARM_CE
-	tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
+	tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SHA256_ARM
 	select CRYPTO_HASH
 	help
-	  SHA-256 secure hash standard (DFIPS 180-2) implemented
-	  using special ARMv8 Crypto Extensions.
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm using
+	  - ARMv8 Crypto Extensions
 
 config CRYPTO_SHA256_ARM
-	tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+	tristate "Hash functions: SHA-224 and SHA-256 (NEON)"
 	select CRYPTO_HASH
 	depends on !CPU_V7M
 	help
-	  SHA-256 secure hash standard (DFIPS 180-2) implemented
-	  using optimized ARM assembler and NEON, when available.
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm using
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_SHA512_ARM
-	tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
+	tristate "Hash functions: SHA-384 and SHA-512 (NEON)"
 	select CRYPTO_HASH
 	depends on !CPU_V7M
 	help
-	  SHA-512 secure hash standard (DFIPS 180-2) implemented
-	  using optimized ARM assembler and NEON, when available.
-
-config CRYPTO_BLAKE2S_ARM
-	bool "BLAKE2s digest algorithm (ARM)"
-	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
-	help
-	  BLAKE2s digest algorithm optimized with ARM scalar instructions.  This
-	  is faster than the generic implementations of BLAKE2s and BLAKE2b, but
-	  slower than the NEON implementation of BLAKE2b.  (There is no NEON
-	  implementation of BLAKE2s, since NEON doesn't really help with it.)
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
 
-config CRYPTO_BLAKE2B_NEON
-	tristate "BLAKE2b digest algorithm (ARM NEON)"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_BLAKE2B
-	help
-	  BLAKE2b digest algorithm optimized with ARM NEON instructions.
-	  On ARM processors that have NEON support but not the ARMv8
-	  Crypto Extensions, typically this BLAKE2b implementation is
-	  much faster than SHA-2 and slightly faster than SHA-1.
+	  Architecture: arm using
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_AES_ARM
-	tristate "Scalar AES cipher for ARM"
+	tristate "Ciphers: AES"
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES
 	help
-	  Use optimized AES assembler routines for ARM platforms.
+	  Block ciphers: AES cipher algorithms (FIPS-197)
+
+	  Architecture: arm
 
 	  On ARM processors without the Crypto Extensions, this is the
 	  fastest AES implementation for single blocks.  For multiple
@@ -98,7 +162,7 @@ config CRYPTO_AES_ARM
 	  such attacks very difficult.
 
 config CRYPTO_AES_ARM_BS
-	tristate "Bit sliced AES using NEON instructions"
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (bit-sliced NEON)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
@@ -106,8 +170,13 @@ config CRYPTO_AES_ARM_BS
 	select CRYPTO_CBC
 	select CRYPTO_SIMD
 	help
-	  Use a faster and more secure NEON based implementation of AES in CBC,
-	  CTR and XTS modes
+	  Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+	  with block cipher modes:
+	   - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	   - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	   - CTR (Counter) mode (NIST SP800-38A)
+	   - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	     and IEEE 1619)
 
 	  Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
 	  and for XTS mode encryption, CBC and XTS mode decryption speedup is
@@ -116,58 +185,59 @@ config CRYPTO_AES_ARM_BS
 	  believed to be invulnerable to cache timing attacks.
 
 config CRYPTO_AES_ARM_CE
-	tristate "Accelerated AES using ARMv8 Crypto Extensions"
+	tristate "Ciphers: AES, modes: ECB/CBC/CTS/CTR/XTS (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 	help
-	  Use an implementation of AES in CBC, CTR and XTS modes that uses
-	  ARMv8 Crypto Extensions
+	  Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+	   with block cipher modes:
+	   - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	   - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	   - CTR (Counter) mode (NIST SP800-38A)
+	   - CTS (Cipher Text Stealing) mode (NIST SP800-38A)
+	   - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	     and IEEE 1619)
+
+	  Architecture: arm using:
+	  - ARMv8 Crypto Extensions
 
-config CRYPTO_GHASH_ARM_CE
-	tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_HASH
-	select CRYPTO_CRYPTD
-	select CRYPTO_GF128MUL
+config CRYPTO_CHACHA20_NEON
+	tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (NEON)"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 	help
-	  Use an implementation of GHASH (used by the GCM AEAD chaining mode)
-	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
-	  that is part of the ARMv8 Crypto Extensions, or a slower variant that
-	  uses the vmull.p8 instruction that is part of the basic NEON ISA.
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
 
-config CRYPTO_CRCT10DIF_ARM_CE
-	tristate "CRCT10DIF digest algorithm using PMULL instructions"
-	depends on KERNEL_MODE_NEON
-	depends on CRC_T10DIF
-	select CRYPTO_HASH
+	  Architecture: arm using:
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_CRC32_ARM_CE
-	tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
+	tristate "CRC32C and CRC32"
 	depends on KERNEL_MODE_NEON
 	depends on CRC32
 	select CRYPTO_HASH
+	help
+	  CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+	  and CRC32 CRC algorithm (IEEE 802.3)
 
-config CRYPTO_CHACHA20_NEON
-	tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
-	select CRYPTO_SKCIPHER
-	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	  Architecture: arm using:
+	  - CRC and/or PMULL instructions
 
-config CRYPTO_POLY1305_ARM
-	tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
-	select CRYPTO_HASH
-	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	  Drivers: crc32-arm-ce and crc32c-arm-ce
 
-config CRYPTO_NHPOLY1305_NEON
-	tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+config CRYPTO_CRCT10DIF_ARM_CE
+	tristate "CRCT10DIF"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_NHPOLY1305
+	depends on CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
 
-config CRYPTO_CURVE25519_NEON
-	tristate "NEON accelerated Curve25519 scalar multiplication library"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_LIB_CURVE25519_GENERIC
-	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+	  Architecture: arm using:
+	  - PMULL (Polynomial Multiply Long) instructions
+
+endmenu
 
-endif
diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index 8754c0f5fc90..c6beb1708c64 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -9,7 +9,6 @@ struct dev_archdata {
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 	struct dma_iommu_mapping	*mapping;
 #endif
-	unsigned int dma_coherent:1;
 	unsigned int dma_ops_setup:1;
 };
 
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index bdc35c0e8dfb..326864f79d18 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -81,9 +81,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 /* Forward declaration, a strange C thing */
 struct task_struct;
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
 unsigned long __get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 95034d32213c..48737ec800eb 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,7 +88,7 @@ obj-$(CONFIG_VDSO)		+= vdso.o
 obj-$(CONFIG_EFI)		+= efi.o
 obj-$(CONFIG_PARAVIRT)	+= paravirt.o
 
-head-y			:= head$(MMUEXT).o
+obj-y			+= head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_ARM_PATCH_PHYS_VIRT)	+= phys2virt.o
@@ -108,4 +108,4 @@ obj-$(CONFIG_HAVE_ARM_SMCCC)	+= smccc-call.o
 
 obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
 
-extra-y := $(head-y) vmlinux.lds
+extra-y := vmlinux.lds
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 96f3fbd51764..fc30df88ffbe 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -232,10 +232,6 @@ void flush_thread(void)
 	thread_notify(THREAD_NOTIFY_FLUSH, thread);
 }
 
-void release_thread(struct task_struct *dead_task)
-{
-}
-
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index f4501dea98b0..60dc56d8acfb 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -19,8 +19,6 @@
 #include <linux/clk/at91_pmc.h>
 #include <linux/platform_data/atmel.h>
 
-#include <soc/at91/pm.h>
-
 #include <asm/cacheflush.h>
 #include <asm/fncpy.h>
 #include <asm/system_misc.h>
@@ -656,16 +654,6 @@ static int at91_pm_enter(suspend_state_t state)
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_PINCTRL_AT91
-	/*
-	 * FIXME: this is needed to communicate between the pinctrl driver and
-	 * the PM implementation in the machine. Possibly part of the PM
-	 * implementation should be moved down into the pinctrl driver and get
-	 * called as part of the generic suspend/resume path.
-	 */
-	at91_pinctrl_gpio_suspend();
-#endif
-
 	switch (state) {
 	case PM_SUSPEND_MEM:
 	case PM_SUSPEND_STANDBY:
@@ -690,9 +678,6 @@ static int at91_pm_enter(suspend_state_t state)
 	}
 
 error:
-#ifdef CONFIG_PINCTRL_AT91
-	at91_pinctrl_gpio_resume();
-#endif
 	at91_pm_config_quirks(false);
 	return 0;
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef691a5720d2..d7909091cf97 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1769,8 +1769,14 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
-	dev->archdata.dma_coherent = coherent;
-	dev->dma_coherent = coherent;
+	/*
+	 * Due to legacy code that sets the ->dma_coherent flag from a bus
+	 * notifier we can't just assign coherent to the ->dma_coherent flag
+	 * here, but instead have to make sure we only set but never clear it
+	 * for now.
+	 */
+	if (coherent)
+		dev->dma_coherent = true;
 
 	/*
 	 * Don't override the dma_ops if they have already been set. Ideally
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fb8463c028b2..f6737d2f37b2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -209,7 +209,6 @@ config ARM64
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_GENERIC_VDSO
-	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
 	select KASAN_VMALLOC if KASAN
@@ -1432,7 +1431,7 @@ config XEN
 	help
 	  Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int
 	default "14" if ARM64_64K_PAGES
 	default "12" if ARM64_16K_PAGES
@@ -2251,6 +2250,3 @@ source "drivers/acpi/Kconfig"
 
 source "arch/arm64/kvm/Kconfig"
 
-if CRYPTO
-source "arch/arm64/crypto/Kconfig"
-endif # CRYPTO
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index a82bb3599094..5e56d26a2239 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -133,9 +133,6 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
   CC_FLAGS_FTRACE := -fpatchable-function-entry=2
 endif
 
-# Default value
-head-y		:= arch/arm64/kernel/head.o
-
 ifeq ($(CONFIG_KASAN_SW_TAGS), y)
 KASAN_SHADOW_SCALE_SHIFT := 4
 else ifeq ($(CONFIG_KASAN_GENERIC), y)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d31545cc145b..0b6af3348e79 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -112,7 +112,6 @@ CONFIG_ACPI_APEI_MEMORY_FAILURE=y
 CONFIG_ACPI_APEI_EINJ=y
 CONFIG_VIRTUALIZATION=y
 CONFIG_KVM=y
-CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_SHA512_ARM64_CE=m
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 60db5bb2ddda..8bd80508a710 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -1,141 +1,282 @@
 # SPDX-License-Identifier: GPL-2.0
 
-menuconfig ARM64_CRYPTO
-	bool "ARM64 Accelerated Cryptographic Algorithms"
-	depends on ARM64
+menu "Accelerated Cryptographic Algorithms for CPU (arm64)"
+
+config CRYPTO_GHASH_ARM64_CE
+	tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_GF128MUL
+	select CRYPTO_LIB_AES
+	select CRYPTO_AEAD
 	help
-	  Say Y here to choose from a selection of cryptographic algorithms
-	  implemented using ARM64 specific CPU features or instructions.
+	  GCM GHASH function (NIST SP800-38D)
 
-if ARM64_CRYPTO
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
 
-config CRYPTO_SHA256_ARM64
-	tristate "SHA-224/SHA-256 digest algorithm for arm64"
-	select CRYPTO_HASH
+config CRYPTO_NHPOLY1305_NEON
+	tristate "Hash functions: NHPoly1305 (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_NHPOLY1305
+	help
+	  NHPoly1305 hash function (Adiantum)
 
-config CRYPTO_SHA512_ARM64
-	tristate "SHA-384/SHA-512 digest algorithm for arm64"
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_POLY1305_NEON
+	tristate "Hash functions: Poly1305 (NEON)"
+	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_SHA1_ARM64_CE
-	tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+	tristate "Hash functions: SHA-1 (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_SHA1
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+
+config CRYPTO_SHA256_ARM64
+	tristate "Hash functions: SHA-224 and SHA-256"
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm64
 
 config CRYPTO_SHA2_ARM64_CE
-	tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)"
+	tristate "Hash functions: SHA-224 and SHA-256 (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_SHA256_ARM64
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+
+config CRYPTO_SHA512_ARM64
+	tristate "Hash functions: SHA-384 and SHA-512"
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm64
 
 config CRYPTO_SHA512_ARM64_CE
-	tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
+	tristate "Hash functions: SHA-384 and SHA-512 (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_SHA512_ARM64
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
 
 config CRYPTO_SHA3_ARM64
-	tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	tristate "Hash functions: SHA-3 (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_SHA3
+	help
+	  SHA-3 secure hash algorithms (FIPS 202)
+
+	  Architecture: arm64 using:
+	  - ARMv8.2 Crypto Extensions
 
 config CRYPTO_SM3_ARM64_CE
-	tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
+	tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_SM3
+	help
+	  SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
 
-config CRYPTO_SM4_ARM64_CE
-	tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_ALGAPI
-	select CRYPTO_SM4
-
-config CRYPTO_SM4_ARM64_CE_BLK
-	tristate "SM4 in ECB/CBC/CFB/CTR modes using ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_SKCIPHER
-	select CRYPTO_SM4
-
-config CRYPTO_SM4_ARM64_NEON_BLK
-	tristate "SM4 in ECB/CBC/CFB/CTR modes using NEON instructions"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_SKCIPHER
-	select CRYPTO_SM4
-
-config CRYPTO_GHASH_ARM64_CE
-	tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_HASH
-	select CRYPTO_GF128MUL
-	select CRYPTO_LIB_AES
-	select CRYPTO_AEAD
+	  Architecture: arm64 using:
+	  - ARMv8.2 Crypto Extensions
 
 config CRYPTO_POLYVAL_ARM64_CE
-	tristate "POLYVAL using ARMv8 Crypto Extensions (for HCTR2)"
+	tristate "Hash functions: POLYVAL (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_POLYVAL
+	help
+	  POLYVAL hash function for HCTR2
 
-config CRYPTO_CRCT10DIF_ARM64_CE
-	tristate "CRCT10DIF digest algorithm using PMULL instructions"
-	depends on KERNEL_MODE_NEON && CRC_T10DIF
-	select CRYPTO_HASH
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
 
 config CRYPTO_AES_ARM64
-	tristate "AES core cipher using scalar instructions"
+	tristate "Ciphers: AES, modes: ECB, CBC, CTR, CTS, XCTR, XTS"
 	select CRYPTO_AES
+	help
+	  Block ciphers: AES cipher algorithms (FIPS-197)
+	  Length-preserving ciphers: AES with ECB, CBC, CTR, CTS,
+	    XCTR, and XTS modes
+	  AEAD cipher: AES with CBC, ESSIV, and SHA-256
+	    for fscrypt and dm-crypt
+
+	  Architecture: arm64
 
 config CRYPTO_AES_ARM64_CE
-	tristate "AES core cipher using ARMv8 Crypto Extensions"
+	tristate "Ciphers: AES (ARMv8 Crypto Extensions)"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_LIB_AES
+	help
+	  Block ciphers: AES cipher algorithms (FIPS-197)
 
-config CRYPTO_AES_ARM64_CE_CCM
-	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
-	depends on ARM64 && KERNEL_MODE_NEON
-	select CRYPTO_ALGAPI
-	select CRYPTO_AES_ARM64_CE
-	select CRYPTO_AEAD
-	select CRYPTO_LIB_AES
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
 
 config CRYPTO_AES_ARM64_CE_BLK
-	tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using ARMv8 Crypto Extensions"
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_AES_ARM64_CE
+	help
+	  Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+	  with block cipher modes:
+	  - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	  - CTR (Counter) mode (NIST SP800-38A)
+	  - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	    and IEEE 1619)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
 
 config CRYPTO_AES_ARM64_NEON_BLK
-	tristate "AES in ECB/CBC/CTR/XTS/XCTR modes using NEON instructions"
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (NEON)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_AES
+	help
+	  Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+	  with block cipher modes:
+	  - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	  - CTR (Counter) mode (NIST SP800-38A)
+	  - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	    and IEEE 1619)
+
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_CHACHA20_NEON
-	tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
+	tristate "Ciphers: ChaCha (NEON)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_CHACHA_GENERIC
 	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
 
-config CRYPTO_POLY1305_NEON
-	tristate "Poly1305 hash function using scalar or NEON instructions"
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_AES_ARM64_BS
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XCTR/XTS modes (bit-sliced NEON)"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_HASH
-	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	select CRYPTO_SKCIPHER
+	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_LIB_AES
+	help
+	  Length-preserving ciphers: AES cipher algorithms (FIPS-197)
+	  with block cipher modes:
+	  - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	  - CTR (Counter) mode (NIST SP800-38A)
+	  - XCTR mode for HCTR2
+	  - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	    and IEEE 1619)
 
-config CRYPTO_NHPOLY1305_NEON
-	tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
+	  Architecture: arm64 using:
+	  - bit-sliced algorithm
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_SM4_ARM64_CE
+	tristate "Ciphers: SM4 (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
-	select CRYPTO_NHPOLY1305
+	select CRYPTO_ALGAPI
+	select CRYPTO_SM4
+	help
+	  Block ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016)
 
-config CRYPTO_AES_ARM64_BS
-	tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
+	  Architecture: arm64 using:
+	  - ARMv8.2 Crypto Extensions
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_SM4_ARM64_CE_BLK
+	tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
-	select CRYPTO_AES_ARM64_NEON_BLK
+	select CRYPTO_SM4
+	help
+	  Length-preserving ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016)
+	  with block cipher modes:
+	  - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	  - CFB (Cipher Feedback) mode (NIST SP800-38A)
+	  - CTR (Counter) mode (NIST SP800-38A)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_SM4_ARM64_NEON_BLK
+	tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SM4
+	help
+	  Length-preserving ciphers: SM4 cipher algorithms (OSCCA GB/T 32907-2016)
+	  with block cipher modes:
+	  - ECB (Electronic Codebook) mode (NIST SP800-38A)
+	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
+	  - CFB (Cipher Feedback) mode (NIST SP800-38A)
+	  - CTR (Counter) mode (NIST SP800-38A)
+
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_AES_ARM64_CE_CCM
+	tristate "AEAD cipher: AES in CCM mode (ARMv8 Crypto Extensions)"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AES_ARM64_CE
+	select CRYPTO_AEAD
 	select CRYPTO_LIB_AES
+	help
+	  AEAD cipher: AES cipher algorithms (FIPS-197) with
+	  CCM (Counter with Cipher Block Chaining-Message Authentication Code)
+	  authenticated encryption mode (NIST SP800-38C)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+	  - NEON (Advanced SIMD) extensions
+
+config CRYPTO_CRCT10DIF_ARM64_CE
+	tristate "CRCT10DIF (PMULL)"
+	depends on KERNEL_MODE_NEON && CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
+
+	  Architecture: arm64 using
+	  - PMULL (Polynomial Multiply Long) instructions
+
+endmenu
 
-endif
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index bbbe351e9045..a406454578f0 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -38,7 +38,7 @@ static int __init hyperv_init(void)
 		return 0;
 
 	/* Setup the guest ID */
-	guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
+	guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
 	hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id);
 
 	/* Get the features and hints from Hyper-V */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e9c9388ccc02..45e2136322ba 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm_vcpu_arch {
 	 */
 	struct {
 		u32	mdscr_el1;
+		bool	pstate_ss;
 	} guest_debug_preserved;
 
 	/* vcpu power state */
@@ -535,6 +536,9 @@ struct kvm_vcpu_arch {
 #define IN_WFIT			__vcpu_single_flag(sflags, BIT(3))
 /* vcpu system registers loaded on physical CPU */
 #define SYSREGS_ON_CPU		__vcpu_single_flag(sflags, BIT(4))
+/* Software step state is Active-pending */
+#define DBG_SS_ACTIVE_PENDING	__vcpu_single_flag(sflags, BIT(5))
+
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) +	\
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index b5df82aa99e6..71a1af42f0e8 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1082,24 +1082,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
  * page after fork() + CoW for pfn mappings. We don't always have a
  * hardware-managed access flag on arm64.
  */
-static inline bool arch_faults_on_old_pte(void)
-{
-	/* The register read below requires a stable CPU to make any sense */
-	cant_migrate();
-
-	return !cpu_has_hw_af();
-}
-#define arch_faults_on_old_pte		arch_faults_on_old_pte
+#define arch_has_hw_pte_young		cpu_has_hw_af
 
 /*
  * Experimentally, it's cheap to set the access flag in hardware and we
  * benefit from prefaulting mappings as 'old' to start with.
  */
-static inline bool arch_wants_old_prefaulted_pte(void)
-{
-	return !arch_faults_on_old_pte();
-}
-#define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
+#define arch_wants_old_prefaulted_pte	cpu_has_hw_af
 
 static inline bool pud_sect_supported(void)
 {
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61883518fc50..445aa3af3b76 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -323,9 +323,6 @@ static inline bool is_ttbr1_addr(unsigned long addr)
 /* Forward declaration, a strange C thing */
 struct task_struct;
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
 unsigned long __get_wchan(struct task_struct *p);
 
 void update_sctlr_el1(u64 sctlr);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 38a0b0291edb..2f361a883d8c 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -86,8 +86,8 @@ $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so
 $(obj)/vdso32-wrap.o: $(obj)/vdso32/vdso.so
 
 obj-y					+= probes/
-head-y					:= head.o
-extra-y					+= $(head-y) vmlinux.lds
+obj-y					+= head.o
+extra-y					+= vmlinux.lds
 
 ifeq ($(CONFIG_DEBUG_EFI),y)
 AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 98d67444a5b6..27ef7ad3ffd2 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -8,9 +8,9 @@
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
 
-#define for_each_mte_vma(tsk, vma)					\
+#define for_each_mte_vma(vmi, vma)					\
 	if (system_supports_mte())					\
-		for (vma = tsk->mm->mmap; vma; vma = vma->vm_next)	\
+		for_each_vma(vmi, vma)					\
 			if (vma->vm_flags & VM_MTE)
 
 static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma)
@@ -81,8 +81,9 @@ Elf_Half elf_core_extra_phdrs(void)
 {
 	struct vm_area_struct *vma;
 	int vma_count = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma)
+	for_each_mte_vma(vmi, vma)
 		vma_count++;
 
 	return vma_count;
@@ -91,8 +92,9 @@ Elf_Half elf_core_extra_phdrs(void)
 int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma) {
+	for_each_mte_vma(vmi, vma) {
 		struct elf_phdr phdr;
 
 		phdr.p_type = PT_AARCH64_MEMTAG_MTE;
@@ -116,8 +118,9 @@ size_t elf_core_extra_data_size(void)
 {
 	struct vm_area_struct *vma;
 	size_t data_size = 0;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma)
+	for_each_mte_vma(vmi, vma)
 		data_size += mte_vma_tag_dump_size(vma);
 
 	return data_size;
@@ -126,8 +129,9 @@ size_t elf_core_extra_data_size(void)
 int elf_core_write_extra_data(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, current->mm, 0);
 
-	for_each_mte_vma(current, vma) {
+	for_each_mte_vma(vmi, vma) {
 		if (vma->vm_flags & VM_DONTDUMP)
 			continue;
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 92bcc1768f0b..9015f49c206e 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -279,10 +279,6 @@ void flush_thread(void)
 	flush_tagged_addr_state();
 }
 
-void release_thread(struct task_struct *dead_task)
-{
-}
-
 void arch_release_task_struct(struct task_struct *tsk)
 {
 	fpsimd_release_task(tsk);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index ac93a2ee9c07..99ae81ab91a7 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -133,10 +133,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 {
 	struct mm_struct *mm = task->mm;
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	mmap_read_lock(mm);
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		unsigned long size = vma->vm_end - vma->vm_start;
 
 		if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 446f628a9de1..94d33e296e10 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2269,6 +2269,16 @@ static int __init early_kvm_mode_cfg(char *arg)
 	if (!arg)
 		return -EINVAL;
 
+	if (strcmp(arg, "none") == 0) {
+		kvm_mode = KVM_MODE_NONE;
+		return 0;
+	}
+
+	if (!is_hyp_mode_available()) {
+		pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n");
+		return 0;
+	}
+
 	if (strcmp(arg, "protected") == 0) {
 		if (!is_kernel_in_hyp_mode())
 			kvm_mode = KVM_MODE_PROTECTED;
@@ -2283,11 +2293,6 @@ static int __init early_kvm_mode_cfg(char *arg)
 		return 0;
 	}
 
-	if (strcmp(arg, "none") == 0) {
-		kvm_mode = KVM_MODE_NONE;
-		return 0;
-	}
-
 	return -EINVAL;
 }
 early_param("kvm-arm.mode", early_kvm_mode_cfg);
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 3f7563d768e2..fccf9ec01813 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -32,6 +32,10 @@ static DEFINE_PER_CPU(u64, mdcr_el2);
  *
  * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled
  * after we have restored the preserved value to the main context.
+ *
+ * When single-step is enabled by userspace, we tweak PSTATE.SS on every
+ * guest entry. Preserve PSTATE.SS so we can restore the original value
+ * for the vcpu after the single-step is disabled.
  */
 static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
 {
@@ -41,6 +45,9 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
 
 	trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
 				vcpu->arch.guest_debug_preserved.mdscr_el1);
+
+	vcpu->arch.guest_debug_preserved.pstate_ss =
+					(*vcpu_cpsr(vcpu) & DBG_SPSR_SS);
 }
 
 static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
@@ -51,6 +58,11 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
 
 	trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
 				vcpu_read_sys_reg(vcpu, MDSCR_EL1));
+
+	if (vcpu->arch.guest_debug_preserved.pstate_ss)
+		*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+	else
+		*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
 }
 
 /**
@@ -188,7 +200,18 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 		 * debugging the system.
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
-			*vcpu_cpsr(vcpu) |=  DBG_SPSR_SS;
+			/*
+			 * If the software step state at the last guest exit
+			 * was Active-pending, we don't set DBG_SPSR_SS so
+			 * that the state is maintained (to not run another
+			 * single-step until the pending Software Step
+			 * exception is taken).
+			 */
+			if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING))
+				*vcpu_cpsr(vcpu) |= DBG_SPSR_SS;
+			else
+				*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
+
 			mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
 			mdscr |= DBG_MDSCR_SS;
 			vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
@@ -262,6 +285,15 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
 	 * Restore the guest's debug registers if we were using them.
 	 */
 	if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
+		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+			if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS))
+				/*
+				 * Mark the vcpu as ACTIVE_PENDING
+				 * until Software Step exception is taken.
+				 */
+				vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+		}
+
 		restore_guest_debug_regs(vcpu);
 
 		/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index f802a3b3f8db..2ff13a3f8479 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -937,6 +937,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	} else {
 		/* If not enabled clear all flags */
 		vcpu->guest_debug = 0;
+		vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
 	}
 
 out:
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index bbe5b393d689..e778eefcf214 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -152,8 +152,14 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu)
 	run->debug.arch.hsr_high = upper_32_bits(esr);
 	run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID;
 
-	if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW)
+	switch (ESR_ELx_EC(esr)) {
+	case ESR_ELx_EC_WATCHPT_LOW:
 		run->debug.arch.far = vcpu->arch.fault.far_el2;
+		break;
+	case ESR_ELx_EC_SOFTSTP_LOW:
+		vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING);
+		break;
+	}
 
 	return 0;
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 9f6385702061..8e9d49a964be 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -143,7 +143,7 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 	}
 }
 
-/* Restore VGICv3 state on non_VEH systems */
+/* Restore VGICv3 state on non-VHE systems */
 static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
 	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2ef1121ab844..f4a7c5abcbca 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1063,13 +1063,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
 }
 
 /* Read a sanitised cpufeature ID register by sys_reg_desc */
-static u64 read_id_reg(const struct kvm_vcpu *vcpu,
-		struct sys_reg_desc const *r, bool raz)
+static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
 {
 	u32 id = reg_to_encoding(r);
 	u64 val;
 
-	if (raz)
+	if (sysreg_visible_as_raz(vcpu, r))
 		return 0;
 
 	val = read_sanitised_ftr_reg(id);
@@ -1145,34 +1144,37 @@ static unsigned int id_visibility(const struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-/* cpufeature ID register access trap handlers */
-
-static bool __access_id_reg(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *r,
-			    bool raz)
+static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu,
+				       const struct sys_reg_desc *r)
 {
-	if (p->is_write)
-		return write_to_read_only(vcpu, p, r);
+	/*
+	 * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any
+	 * EL. Promote to RAZ/WI in order to guarantee consistency between
+	 * systems.
+	 */
+	if (!kvm_supports_32bit_el0())
+		return REG_RAZ | REG_USER_WI;
 
-	p->regval = read_id_reg(vcpu, r, raz);
-	return true;
+	return id_visibility(vcpu, r);
 }
 
+static unsigned int raz_visibility(const struct kvm_vcpu *vcpu,
+				   const struct sys_reg_desc *r)
+{
+	return REG_RAZ;
+}
+
+/* cpufeature ID register access trap handlers */
+
 static bool access_id_reg(struct kvm_vcpu *vcpu,
 			  struct sys_reg_params *p,
 			  const struct sys_reg_desc *r)
 {
-	bool raz = sysreg_visible_as_raz(vcpu, r);
-
-	return __access_id_reg(vcpu, p, r, raz);
-}
+	if (p->is_write)
+		return write_to_read_only(vcpu, p, r);
 
-static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      const struct sys_reg_desc *r)
-{
-	return __access_id_reg(vcpu, p, r, true);
+	p->regval = read_id_reg(vcpu, r);
+	return true;
 }
 
 /* Visibility overrides for SVE-specific control registers */
@@ -1208,9 +1210,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	/* We can only differ with CSV[23], and anything else is an error */
-	val ^= read_id_reg(vcpu, rd, false);
-	val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) |
-		 (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT));
+	val ^= read_id_reg(vcpu, rd);
+	val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) |
+		 ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3));
 	if (val)
 		return -EINVAL;
 
@@ -1227,45 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
  * are stored, and for set_id_reg() we don't allow the effective value
  * to be changed.
  */
-static int __get_id_reg(const struct kvm_vcpu *vcpu,
-			const struct sys_reg_desc *rd, u64 *val,
-			bool raz)
-{
-	*val = read_id_reg(vcpu, rd, raz);
-	return 0;
-}
-
-static int __set_id_reg(const struct kvm_vcpu *vcpu,
-			const struct sys_reg_desc *rd, u64 val,
-			bool raz)
-{
-	/* This is what we mean by invariant: you can't change it. */
-	if (val != read_id_reg(vcpu, rd, raz))
-		return -EINVAL;
-
-	return 0;
-}
-
 static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		      u64 *val)
 {
-	bool raz = sysreg_visible_as_raz(vcpu, rd);
-
-	return __get_id_reg(vcpu, rd, val, raz);
+	*val = read_id_reg(vcpu, rd);
+	return 0;
 }
 
 static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		      u64 val)
 {
-	bool raz = sysreg_visible_as_raz(vcpu, rd);
-
-	return __set_id_reg(vcpu, rd, val, raz);
-}
+	/* This is what we mean by invariant: you can't change it. */
+	if (val != read_id_reg(vcpu, rd))
+		return -EINVAL;
 
-static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
-			  u64 val)
-{
-	return __set_id_reg(vcpu, rd, val, true);
+	return 0;
 }
 
 static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
@@ -1367,6 +1345,15 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
 	.visibility = id_visibility,		\
 }
 
+/* sys_reg_desc initialiser for known cpufeature ID registers */
+#define AA32_ID_SANITISED(name) {		\
+	SYS_DESC(SYS_##name),			\
+	.access	= access_id_reg,		\
+	.get_user = get_id_reg,			\
+	.set_user = set_id_reg,			\
+	.visibility = aa32_id_visibility,	\
+}
+
 /*
  * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
  * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
@@ -1374,9 +1361,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
  */
 #define ID_UNALLOCATED(crm, op2) {			\
 	Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2),	\
-	.access = access_raz_id_reg,			\
-	.get_user = get_raz_reg,			\
-	.set_user = set_raz_id_reg,			\
+	.access = access_id_reg,			\
+	.get_user = get_id_reg,				\
+	.set_user = set_id_reg,				\
+	.visibility = raz_visibility			\
 }
 
 /*
@@ -1386,9 +1374,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
  */
 #define ID_HIDDEN(name) {			\
 	SYS_DESC(SYS_##name),			\
-	.access = access_raz_id_reg,		\
-	.get_user = get_raz_reg,		\
-	.set_user = set_raz_id_reg,		\
+	.access = access_id_reg,		\
+	.get_user = get_id_reg,			\
+	.set_user = set_id_reg,			\
+	.visibility = raz_visibility,		\
 }
 
 /*
@@ -1452,33 +1441,33 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 
 	/* AArch64 mappings of the AArch32 ID registers */
 	/* CRm=1 */
-	ID_SANITISED(ID_PFR0_EL1),
-	ID_SANITISED(ID_PFR1_EL1),
-	ID_SANITISED(ID_DFR0_EL1),
+	AA32_ID_SANITISED(ID_PFR0_EL1),
+	AA32_ID_SANITISED(ID_PFR1_EL1),
+	AA32_ID_SANITISED(ID_DFR0_EL1),
 	ID_HIDDEN(ID_AFR0_EL1),
-	ID_SANITISED(ID_MMFR0_EL1),
-	ID_SANITISED(ID_MMFR1_EL1),
-	ID_SANITISED(ID_MMFR2_EL1),
-	ID_SANITISED(ID_MMFR3_EL1),
+	AA32_ID_SANITISED(ID_MMFR0_EL1),
+	AA32_ID_SANITISED(ID_MMFR1_EL1),
+	AA32_ID_SANITISED(ID_MMFR2_EL1),
+	AA32_ID_SANITISED(ID_MMFR3_EL1),
 
 	/* CRm=2 */
-	ID_SANITISED(ID_ISAR0_EL1),
-	ID_SANITISED(ID_ISAR1_EL1),
-	ID_SANITISED(ID_ISAR2_EL1),
-	ID_SANITISED(ID_ISAR3_EL1),
-	ID_SANITISED(ID_ISAR4_EL1),
-	ID_SANITISED(ID_ISAR5_EL1),
-	ID_SANITISED(ID_MMFR4_EL1),
-	ID_SANITISED(ID_ISAR6_EL1),
+	AA32_ID_SANITISED(ID_ISAR0_EL1),
+	AA32_ID_SANITISED(ID_ISAR1_EL1),
+	AA32_ID_SANITISED(ID_ISAR2_EL1),
+	AA32_ID_SANITISED(ID_ISAR3_EL1),
+	AA32_ID_SANITISED(ID_ISAR4_EL1),
+	AA32_ID_SANITISED(ID_ISAR5_EL1),
+	AA32_ID_SANITISED(ID_MMFR4_EL1),
+	AA32_ID_SANITISED(ID_ISAR6_EL1),
 
 	/* CRm=3 */
-	ID_SANITISED(MVFR0_EL1),
-	ID_SANITISED(MVFR1_EL1),
-	ID_SANITISED(MVFR2_EL1),
+	AA32_ID_SANITISED(MVFR0_EL1),
+	AA32_ID_SANITISED(MVFR1_EL1),
+	AA32_ID_SANITISED(MVFR2_EL1),
 	ID_UNALLOCATED(3,3),
-	ID_SANITISED(ID_PFR2_EL1),
+	AA32_ID_SANITISED(ID_PFR2_EL1),
 	ID_HIDDEN(ID_DFR1_EL1),
-	ID_SANITISED(ID_MMFR5_EL1),
+	AA32_ID_SANITISED(ID_MMFR5_EL1),
 	ID_UNALLOCATED(3,7),
 
 	/* AArch64 ID registers */
@@ -2809,6 +2798,9 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg,
 	if (!r)
 		return -ENOENT;
 
+	if (sysreg_user_write_ignore(vcpu, r))
+		return 0;
+
 	if (r->set_user) {
 		ret = (r->set_user)(vcpu, r, val);
 	} else {
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index a8c4cc32eb9a..e4ebb3a379fd 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -86,6 +86,7 @@ struct sys_reg_desc {
 
 #define REG_HIDDEN		(1 << 0) /* hidden from userspace and guest */
 #define REG_RAZ			(1 << 1) /* RAZ from userspace and guest */
+#define REG_USER_WI		(1 << 2) /* WI from userspace only */
 
 static __printf(2, 3)
 inline void print_sys_reg_msg(const struct sys_reg_params *p,
@@ -136,22 +137,31 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
 	__vcpu_sys_reg(vcpu, r->reg) = r->val;
 }
 
-static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
-				 const struct sys_reg_desc *r)
+static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu,
+					     const struct sys_reg_desc *r)
 {
 	if (likely(!r->visibility))
-		return false;
+		return 0;
 
-	return r->visibility(vcpu, r) & REG_HIDDEN;
+	return r->visibility(vcpu, r);
+}
+
+static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu,
+				 const struct sys_reg_desc *r)
+{
+	return sysreg_visibility(vcpu, r) & REG_HIDDEN;
 }
 
 static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu,
 					 const struct sys_reg_desc *r)
 {
-	if (likely(!r->visibility))
-		return false;
+	return sysreg_visibility(vcpu, r) & REG_RAZ;
+}
 
-	return r->visibility(vcpu, r) & REG_RAZ;
+static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu,
+					    const struct sys_reg_desc *r)
+{
+	return sysreg_visibility(vcpu, r) & REG_USER_WI;
 }
 
 static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 9d3299a70242..24d7778d1ce6 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -406,7 +406,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its,
 	struct its_ite *ite;
 
 	for_each_lpi_its(device, ite, its) {
-		if (!ite->collection || coll != ite->collection)
+		if (ite->collection != coll)
 			continue;
 
 		update_affinity_ite(kvm, ite);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 83a512a6ff0d..3cb101e8cb29 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -7,7 +7,7 @@
 #include <linux/gfp.h>
 #include <linux/cache.h>
 #include <linux/dma-map-ops.h>
-#include <linux/dma-iommu.h>
+#include <linux/iommu.h>
 #include <xen/xen.h>
 
 #include <asm/cacheflush.h>
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 0795028f017c..35e9a468d13e 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -245,7 +245,7 @@ static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
 {
 	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
 
-	return page_folio(pfn_to_page(swp_offset(entry)));
+	return page_folio(pfn_to_page(swp_offset_pfn(entry)));
 }
 
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 3cbc2dc62baf..adee6ab36862 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -332,7 +332,7 @@ config HIGHMEM
 	select KMAP_LOCAL
 	default y
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	default "11"
 
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 4e1d619fd5c6..0e4237e55758 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -59,8 +59,6 @@ LDFLAGS += -EL
 
 KBUILD_AFLAGS += $(KBUILD_CFLAGS)
 
-head-y := arch/csky/kernel/head.o
-
 core-y += arch/csky/$(CSKYABI)/
 
 libs-y += arch/csky/lib/ \
diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h
index 9638206bc44f..63ad71fab30d 100644
--- a/arch/csky/include/asm/processor.h
+++ b/arch/csky/include/asm/processor.h
@@ -69,11 +69,6 @@ do {									\
 /* Forward declaration, a strange C thing */
 struct task_struct;
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)    do { } while (0)
 
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 6f14c924b20d..8a868316b912 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
-extra-y := head.o vmlinux.lds
+extra-y := vmlinux.lds
 
-obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
+obj-y += head.o entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
 obj-y += power.o syscall.o syscall_table.o setup.o io.o
 obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
 obj-y += probes/
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile
index 44312bc147d8..92d005958dfb 100644
--- a/arch/hexagon/Makefile
+++ b/arch/hexagon/Makefile
@@ -32,5 +32,3 @@ KBUILD_LDFLAGS += $(ldflags-y)
 TIR_NAME := r19
 KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
 KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
-
-head-y := arch/hexagon/kernel/head.o
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 615f7e49968e..0cd39c2cdf8f 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -60,10 +60,6 @@ struct thread_struct {
 #define KSTK_EIP(tsk) (pt_elr(task_pt_regs(tsk)))
 #define KSTK_ESP(tsk) (pt_psp(task_pt_regs(tsk)))
 
-/*  Free all resources held by a thread; defined in process.c  */
-extern void release_thread(struct task_struct *dead_task);
-
-/* Get wait channel for task P.  */
 extern unsigned long __get_wchan(struct task_struct *p);
 
 /*  The following stuff is pretty HEXAGON specific.  */
diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile
index fae3dce32fde..e73cb321630e 100644
--- a/arch/hexagon/kernel/Makefile
+++ b/arch/hexagon/kernel/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-extra-y := head.o vmlinux.lds
+extra-y := vmlinux.lds
 
+obj-y += head.o
 obj-$(CONFIG_SMP) += smp.o
 
 obj-y += setup.o irq_cpu.o traps.o syscalltab.o signal.o time.o
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index f0552f98a7ba..e15eeaebd785 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -113,13 +113,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 }
 
 /*
- * Release any architecture-specific resources locked by thread
- */
-void release_thread(struct task_struct *dead_task)
-{
-}
-
-/*
  * Some archs flush debug and FPU info here
  */
 void flush_thread(void)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 26ac8ea15a9e..c6e06cdc738f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -200,7 +200,7 @@ config IA64_CYCLONE
 	  Say Y here to enable support for IBM EXA Cyclone time source.
 	  If you're unsure, answer N.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "MAX_ORDER (11 - 17)"  if !HUGETLB_PAGE
 	range 11 17  if !HUGETLB_PAGE
 	default "17" if HUGETLB_PAGE
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index e55c2f138656..56c4bb276b6e 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -44,7 +44,6 @@ quiet_cmd_objcopy = OBJCOPY $@
 cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@
 
 KBUILD_CFLAGS += $(cflags-y)
-head-y := arch/ia64/kernel/head.o
 
 libs-y				+= arch/ia64/lib/
 
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
index a3724882295c..3e1337aceb37 100644
--- a/arch/ia64/configs/bigsur_defconfig
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -20,7 +20,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 # CONFIG_IPV6 is not set
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=m
 CONFIG_ATA=m
@@ -91,7 +90,6 @@ CONFIG_NFS_V4=m
 CONFIG_NFSD=m
 CONFIG_NFSD_V4=y
 CONFIG_CIFS=m
-CONFIG_CIFS_STATS=y
 CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
 CONFIG_NLS_CODEPAGE_437=y
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index a3dff482a3d7..f8033bacea89 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -39,7 +39,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_CONNECTOR=y
 # CONFIG_PNP_DEBUG_MESSAGES is not set
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_SGI_XP=m
@@ -91,7 +90,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
-CONFIG_RAW_DRIVER=m
 CONFIG_HPET=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index 4cd46105b020..ffebe6c503f5 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -31,11 +31,9 @@ CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
 # CONFIG_IPV6 is not set
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_ATA=y
-CONFIG_BLK_DEV_IDECD=y
 CONFIG_ATA_GENERIC=y
 CONFIG_PATA_CMD64X=y
 CONFIG_ATA_PIIX=y
@@ -81,7 +79,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
-CONFIG_RAW_DRIVER=m
 CONFIG_HPET=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index a2045d73adfa..45f5d6e2da0a 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -36,7 +36,6 @@ CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
 # CONFIG_IPV6 is not set
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_ATA=y
@@ -85,7 +84,6 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
-CONFIG_RAW_DRIVER=m
 CONFIG_HPET=y
 CONFIG_AGP=m
 CONFIG_AGP_I460=m
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
index 99f8b2a0332b..ed104550d0d5 100644
--- a/arch/ia64/configs/zx1_defconfig
+++ b/arch/ia64/configs/zx1_defconfig
@@ -30,7 +30,6 @@ CONFIG_PATA_CMD64X=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
-CONFIG_CHR_DEV_OSST=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_CHR_DEV_SG=y
 CONFIG_SCSI_CONSTANTS=y
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 757c2f6d8d4b..d1978e004054 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -318,13 +318,6 @@ struct thread_struct {
 struct mm_struct;
 struct task_struct;
 
-/*
- * Free all resources held by a thread. This is called after the
- * parent of DEAD_TASK has collected the exit status of the task via
- * wait().
- */
-#define release_thread(dead_task)
-
 /* Get wait channel for task P.  */
 extern unsigned long __get_wchan (struct task_struct *p);
 
diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h
index 42ed5248fae9..84e8ce387b69 100644
--- a/arch/ia64/include/asm/sparsemem.h
+++ b/arch/ia64/include/asm/sparsemem.h
@@ -11,10 +11,10 @@
 
 #define SECTION_SIZE_BITS	(30)
 #define MAX_PHYSMEM_BITS	(50)
-#ifdef CONFIG_FORCE_MAX_ZONEORDER
-#if ((CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
+#ifdef CONFIG_ARCH_FORCE_MAX_ORDER
+#if ((CONFIG_ARCH_FORCE_MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS)
 #undef SECTION_SIZE_BITS
-#define SECTION_SIZE_BITS (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT)
+#define SECTION_SIZE_BITS (CONFIG_ARCH_FORCE_MAX_ORDER - 1 + PAGE_SHIFT)
 #endif
 #endif
 
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 08d4a2ba0652..ae9ff07de4ab 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -7,9 +7,9 @@ ifdef CONFIG_DYNAMIC_FTRACE
 CFLAGS_REMOVE_ftrace.o = -pg
 endif
 
-extra-y	:= head.o vmlinux.lds
+extra-y	:= vmlinux.lds
 
-obj-y := entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o	\
+obj-y := head.o entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o		\
 	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \
@@ -34,10 +34,7 @@ mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
-obj-$(CONFIG_IA64_ESI)		+= esi.o
-ifneq ($(CONFIG_IA64_ESI),)
-obj-y				+= esi_stub.o	# must be in kernel proper
-endif
+obj-$(CONFIG_IA64_ESI)		+= esi.o esi_stub.o # must be in kernel proper
 obj-$(CONFIG_INTEL_IOMMU)	+= pci-dma.o
 
 obj-$(CONFIG_ELF_CORE)		+= elfcore.o
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index c62a66710ad6..92ede80d17fe 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1793,7 +1793,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	p->parent = p->real_parent = p->group_leader = p;
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
-	strncpy(p->comm, type, sizeof(p->comm)-1);
+	strscpy(p->comm, type, sizeof(p->comm)-1);
 }
 
 /* Caller prevents this from being called after init */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index fd6301eafa9d..c05728044272 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -552,7 +552,7 @@ setup_arch (char **cmdline_p)
 	ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
 
 	*cmdline_p = __va(ia64_boot_param->command_line);
-	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+	strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
 
 	efi_init();
 	io_port_init();
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index e14db25146c2..215bf3f8cb20 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -166,3 +166,29 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u
 		force_successful_syscall_return();
 	return addr;
 }
+
+asmlinkage long
+ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp)
+{
+	/*
+	 * ia64's clock_gettime() syscall is implemented as a vdso call
+	 * fsys_clock_gettime(). Currently it handles only
+	 * CLOCK_REALTIME and CLOCK_MONOTONIC. Both are based on
+	 * 'ar.itc' counter which gets incremented at a constant
+	 * frequency. It's usually 400MHz, ~2.5x times slower than CPU
+	 * clock frequency. Which is almost a 1ns hrtimer, but not quite.
+	 *
+	 * Let's special-case these timers to report correct precision
+	 * based on ITC frequency and not HZ frequency for supported
+	 * clocks.
+	 */
+	switch (which_clock) {
+	case CLOCK_REALTIME:
+	case CLOCK_MONOTONIC:
+		s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
+		struct timespec64 rtn_tp = ns_to_timespec64(tick_ns);
+		return put_timespec64(&rtn_tp, tp);
+	}
+
+	return sys_clock_getres(which_clock, tp);
+}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 78b1d03e86e1..72c929d9902b 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -240,7 +240,7 @@
 228	common	timer_delete			sys_timer_delete
 229	common	clock_settime			sys_clock_settime
 230	common	clock_gettime			sys_clock_gettime
-231	common	clock_getres			sys_clock_getres
+231	common	clock_getres			ia64_clock_getres
 232	common	clock_nanosleep			sys_clock_nanosleep
 233	common	fstatfs64			sys_fstatfs64
 234	common	statfs64			sys_statfs64
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild
index ab5373d0a24f..b01f5cdb27e0 100644
--- a/arch/loongarch/Kbuild
+++ b/arch/loongarch/Kbuild
@@ -1,5 +1,6 @@
 obj-y += kernel/
 obj-y += mm/
+obj-y += net/
 obj-y += vdso/
 
 # for cleaning
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index cfd976065a0d..903096bd87f8 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -50,6 +50,7 @@ config LOONGARCH
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_USE_QUEUED_RWLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_NO_INSTR
@@ -61,6 +62,7 @@ config LOONGARCH
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_ENTRY
 	select GENERIC_GETTIMEOFDAY
+	select GENERIC_IOREMAP if !ARCH_IOREMAP
 	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
@@ -69,6 +71,7 @@ config LOONGARCH
 	select GENERIC_LIB_CMPDI2
 	select GENERIC_LIB_LSHRDI3
 	select GENERIC_LIB_UCMPDI2
+	select GENERIC_LIB_DEVMEM_IS_ALLOWED
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
@@ -83,6 +86,7 @@ config LOONGARCH
 	select HAVE_CONTEXT_TRACKING_USER
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
+	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
 	select HAVE_GENERIC_VDSO
@@ -93,6 +97,8 @@ config LOONGARCH
 	select HAVE_NMI
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
+	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
 	select HAVE_SETUP_PER_CPU_AREA if NUMA
@@ -136,6 +142,14 @@ config CPU_HAS_PREFETCH
 	bool
 	default y
 
+config GENERIC_BUG
+	def_bool y
+	depends on BUG
+
+config GENERIC_BUG_RELATIVE_POINTERS
+	def_bool y
+	depends on GENERIC_BUG
+
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
 
@@ -157,7 +171,7 @@ config STACKTRACE_SUPPORT
 	bool
 	default y
 
-# MACH_LOONGSON32 and MACH_LOONGSON64 are delibrately carried over from the
+# MACH_LOONGSON32 and MACH_LOONGSON64 are deliberately carried over from the
 # MIPS Loongson code, to preserve Loongson-specific code paths in drivers that
 # are shared between architectures, and specifically expecting the symbols.
 config MACH_LOONGSON32
@@ -166,6 +180,9 @@ config MACH_LOONGSON32
 config MACH_LOONGSON64
 	def_bool 64BIT
 
+config FIX_EARLYCON_MEM
+	def_bool y
+
 config PAGE_SIZE_4KB
 	bool
 
@@ -194,6 +211,9 @@ config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
 
+config AS_HAS_EXPLICIT_RELOCS
+	def_bool $(as-instr,x:pcalau12i \$t0$(comma)%pc_hi20(x))
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -377,7 +397,7 @@ config NODES_SHIFT
 	default "6"
 	depends on NUMA
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 14 64 if PAGE_SIZE_64KB
 	default "14" if PAGE_SIZE_64KB
@@ -399,6 +419,46 @@ config FORCE_MAX_ZONEORDER
 	  The page size is not necessarily 4KB.  Keep this in mind
 	  when choosing a value for this option.
 
+config ARCH_IOREMAP
+	bool "Enable LoongArch DMW-based ioremap()"
+	help
+	  We use generic TLB-based ioremap() by default since it has page
+	  protection support. However, you can enable LoongArch DMW-based
+	  ioremap() for better performance.
+
+config KEXEC
+	bool "Kexec system call"
+	select KEXEC_CORE
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similarity to the exec system call.
+
+config CRASH_DUMP
+	bool "Build kdump crash kernel"
+	help
+	  Generate crash dump after being started by kexec. This should
+	  be normally only set in special crash dump kernels which are
+	  loaded in the main kernel with kexec-tools into a specially
+	  reserved region and then later executed after a crash by
+	  kdump/kexec.
+
+	  For more details see Documentation/admin-guide/kdump/kdump.rst
+
+config PHYSICAL_START
+	hex "Physical address where the kernel is loaded"
+	default "0x90000000a0000000"
+	depends on CRASH_DUMP
+	help
+	  This gives the XKPRANGE address where the kernel is loaded.
+	  If you plan to use kernel for capturing the crash dump change
+	  this value to start of the reserved region (the "X" value as
+	  specified in the "crashkernel=YM@XM" command line boot parameter
+	  passed to the panic-ed kernel).
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 84689c3ee3af..f4cb54d5afd6 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -43,15 +43,37 @@ endif
 
 cflags-y			+= -G0 -pipe -msoft-float
 LDFLAGS_vmlinux			+= -G0 -static -n -nostdlib
+
+# When the assembler supports explicit relocation hint, we must use it.
+# GCC may have -mexplicit-relocs off by default if it was built with an old
+# assembler, so we force it via an option.
+#
+# When the assembler does not supports explicit relocation hint, we can't use
+# it.  Disable it if the compiler supports it.
+#
+# If you've seen "unknown reloc hint" message building the kernel and you are
+# now wondering why "-mexplicit-relocs" is not wrapped with cc-option: the
+# combination of a "new" assembler and "old" compiler is not supported.  Either
+# upgrade the compiler or downgrade the assembler.
+ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
+cflags-y			+= -mexplicit-relocs
+KBUILD_CFLAGS_KERNEL		+= -mdirect-extern-access
+else
+cflags-y			+= $(call cc-option,-mno-explicit-relocs)
 KBUILD_AFLAGS_KERNEL		+= -Wa,-mla-global-with-pcrel
 KBUILD_CFLAGS_KERNEL		+= -Wa,-mla-global-with-pcrel
 KBUILD_AFLAGS_MODULE		+= -Wa,-mla-global-with-abs
 KBUILD_CFLAGS_MODULE		+= -fplt -Wa,-mla-global-with-abs,-mla-local-with-abs
+endif
 
 cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
 
+ifndef CONFIG_PHYSICAL_START
 load-y		= 0x9000000000200000
+else
+load-y		= $(CONFIG_PHYSICAL_START)
+endif
 bootvars-y	= VMLINUX_LOAD_ADDRESS=$(load-y)
 
 drivers-$(CONFIG_PCI)		+= arch/loongarch/pci/
@@ -79,8 +101,6 @@ CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
 endif
 
-head-y := arch/loongarch/kernel/head.o
-
 libs-y += arch/loongarch/lib/
 libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 3712552e18d3..3540e9c0a631 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -4,6 +4,7 @@ CONFIG_POSIX_MQUEUE=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
 CONFIG_PREEMPT=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -45,6 +46,7 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_KEXEC=y
 CONFIG_PAGE_SIZE_16KB=y
 CONFIG_HZ_250=y
 CONFIG_ACPI=y
@@ -55,6 +57,7 @@ CONFIG_ACPI_DOCK=y
 CONFIG_ACPI_IPMI=m
 CONFIG_ACPI_PCI_SLOT=y
 CONFIG_ACPI_HOTPLUG_MEMORY=y
+CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
@@ -65,6 +68,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_PARTITION_ADVANCED=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
 CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
@@ -82,8 +87,11 @@ CONFIG_ZSMALLOC=m
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
+CONFIG_TLS=m
+CONFIG_TLS_DEVICE=y
 CONFIG_XFRM_USER=y
 CONFIG_NET_KEY=y
+CONFIG_XDP_SOCKETS=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
@@ -95,6 +103,7 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_IP_MROUTE=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
@@ -102,6 +111,7 @@ CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_BBR=m
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_INET6_ESP=m
 CONFIG_IPV6_MROUTE=y
 CONFIG_NETWORK_PHY_TIMESTAMPING=y
 CONFIG_NETFILTER=y
@@ -112,10 +122,11 @@ CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_SNMP=m
+CONFIG_NF_CONNTRACK_PPTP=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_COUNTER=m
 CONFIG_NFT_CONNLIMIT=m
 CONFIG_NFT_LOG=m
 CONFIG_NFT_LIMIT=m
@@ -200,7 +211,6 @@ CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
-CONFIG_NF_LOG_ARP=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -254,10 +264,14 @@ CONFIG_BPFILTER=y
 CONFIG_IP_SCTP=m
 CONFIG_RDS=y
 CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_L2TP_IP=m
+CONFIG_L2TP_ETH=m
 CONFIG_BRIDGE=m
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_VLAN_8021Q_MVRP=y
+CONFIG_LLC2=m
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_PRIO=m
@@ -282,9 +296,33 @@ CONFIG_VSOCKETS=m
 CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=y
 CONFIG_CGROUP_NET_PRIO=y
+CONFIG_BPF_STREAM_PARSER=y
 CONFIG_BT=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_HIDP=m
+CONFIG_BT_HS=y
 CONFIG_BT_HCIBTUSB=m
-# CONFIG_BT_HCIBTUSB_BCM is not set
+CONFIG_BT_HCIBTUSB_AUTOSUSPEND=y
+CONFIG_BT_HCIBTUSB_MTK=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_ATH3K=y
+CONFIG_BT_HCIUART_INTEL=y
+CONFIG_BT_HCIUART_AG6XX=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_MRVL=m
+CONFIG_BT_ATH3K=m
+CONFIG_BT_VIRTIO=m
 CONFIG_CFG80211=m
 CONFIG_CFG80211_WEXT=y
 CONFIG_MAC80211=m
@@ -329,7 +367,6 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=y
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -486,6 +523,7 @@ CONFIG_PPP_FILTER=y
 CONFIG_PPP_MPPE=m
 CONFIG_PPP_MULTILINK=y
 CONFIG_PPPOE=m
+CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
@@ -505,7 +543,6 @@ CONFIG_ATH9K_HTC=m
 CONFIG_IWLWIFI=m
 CONFIG_IWLDVM=m
 CONFIG_IWLMVM=m
-CONFIG_IWLWIFI_BCAST_FILTERING=y
 CONFIG_HOSTAP=m
 CONFIG_MT7601U=m
 CONFIG_RT2X00=m
@@ -521,6 +558,14 @@ CONFIG_RTL8821AE=m
 CONFIG_RTL8192CU=m
 # CONFIG_RTLWIFI_DEBUG is not set
 CONFIG_RTL8XXXU=m
+CONFIG_RTW88=m
+CONFIG_RTW88_8822BE=m
+CONFIG_RTW88_8822CE=m
+CONFIG_RTW88_8723DE=m
+CONFIG_RTW88_8821CE=m
+CONFIG_RTW89=m
+CONFIG_RTW89_8852AE=m
+CONFIG_RTW89_8852CE=m
 CONFIG_ZD1211RW=m
 CONFIG_USB_NET_RNDIS_WLAN=m
 CONFIG_INPUT_MOUSEDEV=y
@@ -651,6 +696,11 @@ CONFIG_USB_SERIAL_FTDI_SIO=m
 CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_SERIAL_OPTION=m
 CONFIG_USB_GADGET=y
+CONFIG_TYPEC=m
+CONFIG_TYPEC_TCPM=m
+CONFIG_TYPEC_TCPCI=m
+CONFIG_TYPEC_UCSI=m
+CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
@@ -688,7 +738,6 @@ CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
 CONFIG_R8188EU=m
-# CONFIG_88EU_AP_MODE is not set
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -772,14 +821,12 @@ CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index f2bcfcb4e311..77ad8e6f0906 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,12 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += dma-contiguous.h
 generic-y += export.h
+generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
-generic-y += qrwlock_types.h
-generic-y += spinlock.h
-generic-y += spinlock_types.h
+generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += user.h
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index 8e5881bc5ad1..ed0910e8b856 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -40,4 +40,9 @@ extern unsigned long fw_arg0, fw_arg1, fw_arg2;
 extern struct loongson_board_info b_info;
 extern struct loongson_system_configuration loongson_sysconf;
 
+static inline bool io_master(int cpu)
+{
+	return test_bit(cpu, &loongson_sysconf.cores_io_master);
+}
+
 #endif /* _ASM_BOOTINFO_H */
diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h
index bda49108a76d..d4ca3ba25418 100644
--- a/arch/loongarch/include/asm/bug.h
+++ b/arch/loongarch/include/asm/bug.h
@@ -2,21 +2,59 @@
 #ifndef __ASM_BUG_H
 #define __ASM_BUG_H
 
-#include <linux/compiler.h>
+#include <asm/break.h>
+#include <linux/stringify.h>
+
+#ifndef CONFIG_DEBUG_BUGVERBOSE
+#define _BUGVERBOSE_LOCATION(file, line)
+#else
+#define __BUGVERBOSE_LOCATION(file, line)			\
+		.pushsection .rodata.str, "aMS", @progbits, 1;	\
+	10002:	.string file;					\
+		.popsection;					\
+								\
+		.long 10002b - .;				\
+		.short line;
+#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
+#endif
 
-#ifdef CONFIG_BUG
+#ifndef CONFIG_GENERIC_BUG
+#define __BUG_ENTRY(flags)
+#else
+#define __BUG_ENTRY(flags) 					\
+		.pushsection __bug_table, "aw";			\
+		.align 2;					\
+	10000:	.long 10001f - .;				\
+		_BUGVERBOSE_LOCATION(__FILE__, __LINE__)	\
+		.short flags; 					\
+		.popsection;					\
+	10001:
+#endif
 
-#include <asm/break.h>
+#define ASM_BUG_FLAGS(flags)					\
+	__BUG_ENTRY(flags)					\
+	break		BRK_BUG
 
-static inline void __noreturn BUG(void)
-{
-	__asm__ __volatile__("break %0" : : "i" (BRK_BUG));
-	unreachable();
-}
+#define ASM_BUG()	ASM_BUG_FLAGS(0)
 
-#define HAVE_ARCH_BUG
+#define __BUG_FLAGS(flags)					\
+	asm_inline volatile (__stringify(ASM_BUG_FLAGS(flags)));
 
-#endif
+#define __WARN_FLAGS(flags)					\
+do {								\
+	instrumentation_begin();				\
+	__BUG_FLAGS(BUGFLAG_WARNING|(flags));			\
+	instrumentation_end();					\
+} while (0)
+
+#define BUG()							\
+do {								\
+	instrumentation_begin();				\
+	__BUG_FLAGS(0);						\
+	unreachable();						\
+} while (0)
+
+#define HAVE_ARCH_BUG
 
 #include <asm-generic/bug.h>
 
diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index 670900141b7c..0681788eb474 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -6,10 +6,33 @@
 #define _ASM_CACHEFLUSH_H
 
 #include <linux/mm.h>
-#include <asm/cpu-features.h>
+#include <asm/cpu-info.h>
 #include <asm/cacheops.h>
 
-extern void local_flush_icache_range(unsigned long start, unsigned long end);
+static inline bool cache_present(struct cache_desc *cdesc)
+{
+	return cdesc->flags & CACHE_PRESENT;
+}
+
+static inline bool cache_private(struct cache_desc *cdesc)
+{
+	return cdesc->flags & CACHE_PRIVATE;
+}
+
+static inline bool cache_inclusive(struct cache_desc *cdesc)
+{
+	return cdesc->flags & CACHE_INCLUSIVE;
+}
+
+static inline unsigned int cpu_last_level_cache_line_size(void)
+{
+	int cache_present = boot_cpu_data.cache_leaves_present;
+
+	return boot_cpu_data.cache_leaves[cache_present - 1].linesz;
+}
+
+asmlinkage void __flush_cache_all(void);
+void local_flush_icache_range(unsigned long start, unsigned long end);
 
 #define flush_icache_range	local_flush_icache_range
 #define flush_icache_user_range	local_flush_icache_range
@@ -35,44 +58,30 @@ extern void local_flush_icache_range(unsigned long start, unsigned long end);
 	:								\
 	: "i" (op), "ZC" (*(unsigned char *)(addr)))
 
-static inline void flush_icache_line_indexed(unsigned long addr)
-{
-	cache_op(Index_Invalidate_I, addr);
-}
-
-static inline void flush_dcache_line_indexed(unsigned long addr)
-{
-	cache_op(Index_Writeback_Inv_D, addr);
-}
-
-static inline void flush_vcache_line_indexed(unsigned long addr)
-{
-	cache_op(Index_Writeback_Inv_V, addr);
-}
-
-static inline void flush_scache_line_indexed(unsigned long addr)
-{
-	cache_op(Index_Writeback_Inv_S, addr);
-}
-
-static inline void flush_icache_line(unsigned long addr)
-{
-	cache_op(Hit_Invalidate_I, addr);
-}
-
-static inline void flush_dcache_line(unsigned long addr)
-{
-	cache_op(Hit_Writeback_Inv_D, addr);
-}
-
-static inline void flush_vcache_line(unsigned long addr)
-{
-	cache_op(Hit_Writeback_Inv_V, addr);
-}
-
-static inline void flush_scache_line(unsigned long addr)
+static inline void flush_cache_line(int leaf, unsigned long addr)
 {
-	cache_op(Hit_Writeback_Inv_S, addr);
+	switch (leaf) {
+	case Cache_LEAF0:
+		cache_op(Index_Writeback_Inv_LEAF0, addr);
+		break;
+	case Cache_LEAF1:
+		cache_op(Index_Writeback_Inv_LEAF1, addr);
+		break;
+	case Cache_LEAF2:
+		cache_op(Index_Writeback_Inv_LEAF2, addr);
+		break;
+	case Cache_LEAF3:
+		cache_op(Index_Writeback_Inv_LEAF3, addr);
+		break;
+	case Cache_LEAF4:
+		cache_op(Index_Writeback_Inv_LEAF4, addr);
+		break;
+	case Cache_LEAF5:
+		cache_op(Index_Writeback_Inv_LEAF5, addr);
+		break;
+	default:
+		break;
+	}
 }
 
 #include <asm-generic/cacheflush.h>
diff --git a/arch/loongarch/include/asm/cacheops.h b/arch/loongarch/include/asm/cacheops.h
index dc280efecebd..0f4a86f8e2be 100644
--- a/arch/loongarch/include/asm/cacheops.h
+++ b/arch/loongarch/include/asm/cacheops.h
@@ -8,16 +8,18 @@
 #define __ASM_CACHEOPS_H
 
 /*
- * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * Most cache ops are split into a 3 bit field identifying the cache, and a 2
  * bit field identifying the cache operation.
  */
-#define CacheOp_Cache			0x03
-#define CacheOp_Op			0x1c
+#define CacheOp_Cache			0x07
+#define CacheOp_Op			0x18
 
-#define Cache_I				0x00
-#define Cache_D				0x01
-#define Cache_V				0x02
-#define Cache_S				0x03
+#define Cache_LEAF0			0x00
+#define Cache_LEAF1			0x01
+#define Cache_LEAF2			0x02
+#define Cache_LEAF3			0x03
+#define Cache_LEAF4			0x04
+#define Cache_LEAF5			0x05
 
 #define Index_Invalidate		0x08
 #define Index_Writeback_Inv		0x08
@@ -25,13 +27,17 @@
 #define Hit_Writeback_Inv		0x10
 #define CacheOp_User_Defined		0x18
 
-#define Index_Invalidate_I		(Cache_I | Index_Invalidate)
-#define Index_Writeback_Inv_D		(Cache_D | Index_Writeback_Inv)
-#define Index_Writeback_Inv_V		(Cache_V | Index_Writeback_Inv)
-#define Index_Writeback_Inv_S		(Cache_S | Index_Writeback_Inv)
-#define Hit_Invalidate_I		(Cache_I | Hit_Invalidate)
-#define Hit_Writeback_Inv_D		(Cache_D | Hit_Writeback_Inv)
-#define Hit_Writeback_Inv_V		(Cache_V | Hit_Writeback_Inv)
-#define Hit_Writeback_Inv_S		(Cache_S | Hit_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF0	(Cache_LEAF0 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF1	(Cache_LEAF1 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF2	(Cache_LEAF2 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF3	(Cache_LEAF3 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF4	(Cache_LEAF4 | Index_Writeback_Inv)
+#define Index_Writeback_Inv_LEAF5	(Cache_LEAF5 | Index_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF0		(Cache_LEAF0 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF1		(Cache_LEAF1 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF2		(Cache_LEAF2 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF3		(Cache_LEAF3 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF4		(Cache_LEAF4 | Hit_Writeback_Inv)
+#define Hit_Writeback_Inv_LEAF5		(Cache_LEAF5 | Hit_Writeback_Inv)
 
 #endif	/* __ASM_CACHEOPS_H */
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index ae19e33c7754..ecfa6cf79806 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -61,8 +61,8 @@ static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val,
 	return (old32 & mask) >> shift;
 }
 
-static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
-				   int size)
+static __always_inline unsigned long
+__xchg(volatile void *ptr, unsigned long x, int size)
 {
 	switch (size) {
 	case 1:
@@ -159,8 +159,8 @@ static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
 	return (old32 & mask) >> shift;
 }
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, unsigned int size)
+static __always_inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size)
 {
 	switch (size) {
 	case 1:
diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h
index a8d87c40a0eb..b07974218393 100644
--- a/arch/loongarch/include/asm/cpu-features.h
+++ b/arch/loongarch/include/asm/cpu-features.h
@@ -19,11 +19,6 @@
 #define cpu_has_loongarch32		(cpu_data[0].isa_level & LOONGARCH_CPU_ISA_32BIT)
 #define cpu_has_loongarch64		(cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT)
 
-#define cpu_icache_line_size()		cpu_data[0].icache.linesz
-#define cpu_dcache_line_size()		cpu_data[0].dcache.linesz
-#define cpu_vcache_line_size()		cpu_data[0].vcache.linesz
-#define cpu_scache_line_size()		cpu_data[0].scache.linesz
-
 #ifdef CONFIG_32BIT
 # define cpu_has_64bits			(cpu_data[0].isa_level & LOONGARCH_CPU_ISA_64BIT)
 # define cpu_vabits			31
diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h
index b6c4f96079df..cd73a6f57fe3 100644
--- a/arch/loongarch/include/asm/cpu-info.h
+++ b/arch/loongarch/include/asm/cpu-info.h
@@ -10,18 +10,28 @@
 
 #include <asm/loongarch.h>
 
+/* cache_desc->flags */
+enum {
+	CACHE_PRESENT	= (1 << 0),
+	CACHE_PRIVATE	= (1 << 1),	/* core private cache */
+	CACHE_INCLUSIVE	= (1 << 2),	/* include the inner level caches */
+};
+
 /*
  * Descriptor for a cache
  */
 struct cache_desc {
-	unsigned int waysize;	/* Bytes per way */
+	unsigned char type;
+	unsigned char level;
 	unsigned short sets;	/* Number of lines per set */
 	unsigned char ways;	/* Number of ways */
 	unsigned char linesz;	/* Size of line in bytes */
-	unsigned char waybit;	/* Bits to select in a cache set */
 	unsigned char flags;	/* Flags describing cache properties */
 };
 
+#define CACHE_LEVEL_MAX		3
+#define CACHE_LEAVES_MAX	6
+
 struct cpuinfo_loongarch {
 	u64			asid_cache;
 	unsigned long		asid_mask;
@@ -40,11 +50,8 @@ struct cpuinfo_loongarch {
 	int			tlbsizemtlb;
 	int			tlbsizestlbsets;
 	int			tlbsizestlbways;
-	struct cache_desc	icache; /* Primary I-cache */
-	struct cache_desc	dcache; /* Primary D or combined I/D cache */
-	struct cache_desc	vcache; /* Victim cache, between pcache and scache */
-	struct cache_desc	scache; /* Secondary cache */
-	struct cache_desc	tcache; /* Tertiary/split secondary cache */
+	int			cache_leaves_present; /* number of cache_leaves[] elements */
+	struct cache_desc	cache_leaves[CACHE_LEAVES_MAX];
 	int			core;   /* physical core number in package */
 	int			package;/* physical package number */
 	int			vabits; /* Virtual Address size in bits */
diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h
index 5f3ff4781fda..7af0cebf28d7 100644
--- a/arch/loongarch/include/asm/elf.h
+++ b/arch/loongarch/include/asm/elf.h
@@ -74,6 +74,43 @@
 #define R_LARCH_SUB64				56
 #define R_LARCH_GNU_VTINHERIT			57
 #define R_LARCH_GNU_VTENTRY			58
+#define R_LARCH_B16				64
+#define R_LARCH_B21				65
+#define R_LARCH_B26				66
+#define R_LARCH_ABS_HI20			67
+#define R_LARCH_ABS_LO12			68
+#define R_LARCH_ABS64_LO20			69
+#define R_LARCH_ABS64_HI12			70
+#define R_LARCH_PCALA_HI20			71
+#define R_LARCH_PCALA_LO12			72
+#define R_LARCH_PCALA64_LO20			73
+#define R_LARCH_PCALA64_HI12			74
+#define R_LARCH_GOT_PC_HI20			75
+#define R_LARCH_GOT_PC_LO12			76
+#define R_LARCH_GOT64_PC_LO20			77
+#define R_LARCH_GOT64_PC_HI12			78
+#define R_LARCH_GOT_HI20			79
+#define R_LARCH_GOT_LO12			80
+#define R_LARCH_GOT64_LO20			81
+#define R_LARCH_GOT64_HI12			82
+#define R_LARCH_TLS_LE_HI20			83
+#define R_LARCH_TLS_LE_LO12			84
+#define R_LARCH_TLS_LE64_LO20			85
+#define R_LARCH_TLS_LE64_HI12			86
+#define R_LARCH_TLS_IE_PC_HI20			87
+#define R_LARCH_TLS_IE_PC_LO12			88
+#define R_LARCH_TLS_IE64_PC_LO20		89
+#define R_LARCH_TLS_IE64_PC_HI12		90
+#define R_LARCH_TLS_IE_HI20			91
+#define R_LARCH_TLS_IE_LO12			92
+#define R_LARCH_TLS_IE64_LO20			93
+#define R_LARCH_TLS_IE64_HI12			94
+#define R_LARCH_TLS_LD_PC_HI20			95
+#define R_LARCH_TLS_LD_HI20			96
+#define R_LARCH_TLS_GD_PC_HI20			97
+#define R_LARCH_TLS_GD_HI20			98
+#define R_LARCH_32_PCREL			99
+#define R_LARCH_RELAX				100
 
 #ifndef ELF_ARCH
 
diff --git a/arch/loongarch/include/asm/fixmap.h b/arch/loongarch/include/asm/fixmap.h
index b3541dfa2013..d2e55ae55bb9 100644
--- a/arch/loongarch/include/asm/fixmap.h
+++ b/arch/loongarch/include/asm/fixmap.h
@@ -10,4 +10,19 @@
 
 #define NR_FIX_BTMAPS 64
 
+enum fixed_addresses {
+	FIX_HOLE,
+	FIX_EARLYCON_MEM_BASE,
+	__end_of_fixed_addresses
+};
+
+#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXMAP_PAGE_IO	PAGE_KERNEL_SUC
+
+extern void __set_fixmap(enum fixed_addresses idx,
+			 phys_addr_t phys, pgprot_t flags);
+
+#include <asm-generic/fixmap.h>
+
 #endif
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 7b07cbb3188c..fce1843ceebb 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <asm/asm.h>
 
+#define INSN_BREAK		0x002a0000
+
 #define ADDR_IMMMASK_LU52ID	0xFFF0000000000000
 #define ADDR_IMMMASK_LU32ID	0x000FFFFF00000000
 #define ADDR_IMMMASK_ADDU16ID	0x00000000FFFF0000
@@ -18,9 +20,16 @@
 
 #define ADDR_IMM(addr, INSN)	((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN)
 
+enum reg0i26_op {
+	b_op		= 0x14,
+	bl_op		= 0x15,
+};
+
 enum reg1i20_op {
 	lu12iw_op	= 0x0a,
 	lu32id_op	= 0x0b,
+	pcaddu12i_op	= 0x0e,
+	pcaddu18i_op	= 0x0f,
 };
 
 enum reg1i21_op {
@@ -28,10 +37,34 @@ enum reg1i21_op {
 	bnez_op		= 0x11,
 };
 
+enum reg2_op {
+	revb2h_op	= 0x0c,
+	revb4h_op	= 0x0d,
+	revb2w_op	= 0x0e,
+	revbd_op	= 0x0f,
+	revh2w_op	= 0x10,
+	revhd_op	= 0x11,
+};
+
+enum reg2i5_op {
+	slliw_op	= 0x81,
+	srliw_op	= 0x89,
+	sraiw_op	= 0x91,
+};
+
+enum reg2i6_op {
+	sllid_op	= 0x41,
+	srlid_op	= 0x45,
+	sraid_op	= 0x49,
+};
+
 enum reg2i12_op {
 	addiw_op	= 0x0a,
 	addid_op	= 0x0b,
 	lu52id_op	= 0x0c,
+	andi_op		= 0x0d,
+	ori_op		= 0x0e,
+	xori_op		= 0x0f,
 	ldb_op		= 0xa0,
 	ldh_op		= 0xa1,
 	ldw_op		= 0xa2,
@@ -40,6 +73,20 @@ enum reg2i12_op {
 	sth_op		= 0xa5,
 	stw_op		= 0xa6,
 	std_op		= 0xa7,
+	ldbu_op		= 0xa8,
+	ldhu_op		= 0xa9,
+	ldwu_op		= 0xaa,
+};
+
+enum reg2i14_op {
+	llw_op		= 0x20,
+	scw_op		= 0x21,
+	lld_op		= 0x22,
+	scd_op		= 0x23,
+	ldptrw_op	= 0x24,
+	stptrw_op	= 0x25,
+	ldptrd_op	= 0x26,
+	stptrd_op	= 0x27,
 };
 
 enum reg2i16_op {
@@ -52,6 +99,71 @@ enum reg2i16_op {
 	bgeu_op		= 0x1b,
 };
 
+enum reg2bstrd_op {
+	bstrinsd_op	= 0x2,
+	bstrpickd_op	= 0x3,
+};
+
+enum reg3_op {
+	addw_op		= 0x20,
+	addd_op		= 0x21,
+	subw_op		= 0x22,
+	subd_op		= 0x23,
+	nor_op		= 0x28,
+	and_op		= 0x29,
+	or_op		= 0x2a,
+	xor_op		= 0x2b,
+	orn_op		= 0x2c,
+	andn_op		= 0x2d,
+	sllw_op		= 0x2e,
+	srlw_op		= 0x2f,
+	sraw_op		= 0x30,
+	slld_op		= 0x31,
+	srld_op		= 0x32,
+	srad_op		= 0x33,
+	mulw_op		= 0x38,
+	mulhw_op	= 0x39,
+	mulhwu_op	= 0x3a,
+	muld_op		= 0x3b,
+	mulhd_op	= 0x3c,
+	mulhdu_op	= 0x3d,
+	divw_op		= 0x40,
+	modw_op		= 0x41,
+	divwu_op	= 0x42,
+	modwu_op	= 0x43,
+	divd_op		= 0x44,
+	modd_op		= 0x45,
+	divdu_op	= 0x46,
+	moddu_op	= 0x47,
+	ldxb_op		= 0x7000,
+	ldxh_op		= 0x7008,
+	ldxw_op		= 0x7010,
+	ldxd_op		= 0x7018,
+	stxb_op		= 0x7020,
+	stxh_op		= 0x7028,
+	stxw_op		= 0x7030,
+	stxd_op		= 0x7038,
+	ldxbu_op	= 0x7040,
+	ldxhu_op	= 0x7048,
+	ldxwu_op	= 0x7050,
+	amswapw_op	= 0x70c0,
+	amswapd_op	= 0x70c1,
+	amaddw_op	= 0x70c2,
+	amaddd_op	= 0x70c3,
+	amandw_op	= 0x70c4,
+	amandd_op	= 0x70c5,
+	amorw_op	= 0x70c6,
+	amord_op	= 0x70c7,
+	amxorw_op	= 0x70c8,
+	amxord_op	= 0x70c9,
+};
+
+enum reg3sa2_op {
+	alslw_op	= 0x02,
+	alslwu_op	= 0x03,
+	alsld_op	= 0x16,
+};
+
 struct reg0i26_format {
 	unsigned int immediate_h : 10;
 	unsigned int immediate_l : 16;
@@ -71,6 +183,26 @@ struct reg1i21_format {
 	unsigned int opcode : 6;
 };
 
+struct reg2_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int opcode : 22;
+};
+
+struct reg2i5_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int immediate : 5;
+	unsigned int opcode : 17;
+};
+
+struct reg2i6_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int immediate : 6;
+	unsigned int opcode : 16;
+};
+
 struct reg2i12_format {
 	unsigned int rd : 5;
 	unsigned int rj : 5;
@@ -78,6 +210,13 @@ struct reg2i12_format {
 	unsigned int opcode : 10;
 };
 
+struct reg2i14_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int immediate : 14;
+	unsigned int opcode : 8;
+};
+
 struct reg2i16_format {
 	unsigned int rd : 5;
 	unsigned int rj : 5;
@@ -85,13 +224,43 @@ struct reg2i16_format {
 	unsigned int opcode : 6;
 };
 
+struct reg2bstrd_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int lsbd : 6;
+	unsigned int msbd : 6;
+	unsigned int opcode : 10;
+};
+
+struct reg3_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int rk : 5;
+	unsigned int opcode : 17;
+};
+
+struct reg3sa2_format {
+	unsigned int rd : 5;
+	unsigned int rj : 5;
+	unsigned int rk : 5;
+	unsigned int immediate : 2;
+	unsigned int opcode : 15;
+};
+
 union loongarch_instruction {
 	unsigned int word;
-	struct reg0i26_format reg0i26_format;
-	struct reg1i20_format reg1i20_format;
-	struct reg1i21_format reg1i21_format;
-	struct reg2i12_format reg2i12_format;
-	struct reg2i16_format reg2i16_format;
+	struct reg0i26_format	reg0i26_format;
+	struct reg1i20_format	reg1i20_format;
+	struct reg1i21_format	reg1i21_format;
+	struct reg2_format	reg2_format;
+	struct reg2i5_format	reg2i5_format;
+	struct reg2i6_format	reg2i6_format;
+	struct reg2i12_format	reg2i12_format;
+	struct reg2i14_format	reg2i14_format;
+	struct reg2i16_format	reg2i16_format;
+	struct reg2bstrd_format	reg2bstrd_format;
+	struct reg3_format	reg3_format;
+	struct reg3sa2_format	reg3sa2_format;
 };
 
 #define LOONGARCH_INSN_SIZE	sizeof(union loongarch_instruction)
@@ -166,4 +335,235 @@ u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
 u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
 u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
 
+static inline bool signed_imm_check(long val, unsigned int bit)
+{
+	return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1));
+}
+
+static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
+{
+	return val < (1UL << bit);
+}
+
+#define DEF_EMIT_REG0I26_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       int offset)				\
+{									\
+	unsigned int immediate_l, immediate_h;				\
+									\
+	immediate_l = offset & 0xffff;					\
+	offset >>= 16;							\
+	immediate_h = offset & 0x3ff;					\
+									\
+	insn->reg0i26_format.opcode = OP;				\
+	insn->reg0i26_format.immediate_l = immediate_l;			\
+	insn->reg0i26_format.immediate_h = immediate_h;			\
+}
+
+DEF_EMIT_REG0I26_FORMAT(b, b_op)
+
+#define DEF_EMIT_REG1I20_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd, int imm)		\
+{									\
+	insn->reg1i20_format.opcode = OP;				\
+	insn->reg1i20_format.immediate = imm;				\
+	insn->reg1i20_format.rd = rd;					\
+}
+
+DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op)
+DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op)
+DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op)
+
+#define DEF_EMIT_REG2_FORMAT(NAME, OP)					\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj)			\
+{									\
+	insn->reg2_format.opcode = OP;					\
+	insn->reg2_format.rd = rd;					\
+	insn->reg2_format.rj = rj;					\
+}
+
+DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op)
+DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op)
+DEF_EMIT_REG2_FORMAT(revbd, revbd_op)
+
+#define DEF_EMIT_REG2I5_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       int imm)					\
+{									\
+	insn->reg2i5_format.opcode = OP;				\
+	insn->reg2i5_format.immediate = imm;				\
+	insn->reg2i5_format.rd = rd;					\
+	insn->reg2i5_format.rj = rj;					\
+}
+
+DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op)
+DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op)
+DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op)
+
+#define DEF_EMIT_REG2I6_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       int imm)					\
+{									\
+	insn->reg2i6_format.opcode = OP;				\
+	insn->reg2i6_format.immediate = imm;				\
+	insn->reg2i6_format.rd = rd;					\
+	insn->reg2i6_format.rj = rj;					\
+}
+
+DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op)
+DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op)
+DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op)
+
+#define DEF_EMIT_REG2I12_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       int imm)					\
+{									\
+	insn->reg2i12_format.opcode = OP;				\
+	insn->reg2i12_format.immediate = imm;				\
+	insn->reg2i12_format.rd = rd;					\
+	insn->reg2i12_format.rj = rj;					\
+}
+
+DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op)
+DEF_EMIT_REG2I12_FORMAT(addid, addid_op)
+DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op)
+DEF_EMIT_REG2I12_FORMAT(andi, andi_op)
+DEF_EMIT_REG2I12_FORMAT(ori, ori_op)
+DEF_EMIT_REG2I12_FORMAT(xori, xori_op)
+DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op)
+DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op)
+DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op)
+DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op)
+DEF_EMIT_REG2I12_FORMAT(stb, stb_op)
+DEF_EMIT_REG2I12_FORMAT(sth, sth_op)
+DEF_EMIT_REG2I12_FORMAT(stw, stw_op)
+DEF_EMIT_REG2I12_FORMAT(std, std_op)
+
+#define DEF_EMIT_REG2I14_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       int imm)					\
+{									\
+	insn->reg2i14_format.opcode = OP;				\
+	insn->reg2i14_format.immediate = imm;				\
+	insn->reg2i14_format.rd = rd;					\
+	insn->reg2i14_format.rj = rj;					\
+}
+
+DEF_EMIT_REG2I14_FORMAT(llw, llw_op)
+DEF_EMIT_REG2I14_FORMAT(scw, scw_op)
+DEF_EMIT_REG2I14_FORMAT(lld, lld_op)
+DEF_EMIT_REG2I14_FORMAT(scd, scd_op)
+DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op)
+DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op)
+DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op)
+DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op)
+
+#define DEF_EMIT_REG2I16_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rj,			\
+			       enum loongarch_gpr rd,			\
+			       int offset)				\
+{									\
+	insn->reg2i16_format.opcode = OP;				\
+	insn->reg2i16_format.immediate = offset;			\
+	insn->reg2i16_format.rj = rj;					\
+	insn->reg2i16_format.rd = rd;					\
+}
+
+DEF_EMIT_REG2I16_FORMAT(beq, beq_op)
+DEF_EMIT_REG2I16_FORMAT(bne, bne_op)
+DEF_EMIT_REG2I16_FORMAT(blt, blt_op)
+DEF_EMIT_REG2I16_FORMAT(bge, bge_op)
+DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op)
+DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op)
+DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op)
+
+#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       int msbd,				\
+			       int lsbd)				\
+{									\
+	insn->reg2bstrd_format.opcode = OP;				\
+	insn->reg2bstrd_format.msbd = msbd;				\
+	insn->reg2bstrd_format.lsbd = lsbd;				\
+	insn->reg2bstrd_format.rj = rj;					\
+	insn->reg2bstrd_format.rd = rd;					\
+}
+
+DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op)
+
+#define DEF_EMIT_REG3_FORMAT(NAME, OP)					\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       enum loongarch_gpr rk)			\
+{									\
+	insn->reg3_format.opcode = OP;					\
+	insn->reg3_format.rd = rd;					\
+	insn->reg3_format.rj = rj;					\
+	insn->reg3_format.rk = rk;					\
+}
+
+DEF_EMIT_REG3_FORMAT(addd, addd_op)
+DEF_EMIT_REG3_FORMAT(subd, subd_op)
+DEF_EMIT_REG3_FORMAT(muld, muld_op)
+DEF_EMIT_REG3_FORMAT(divdu, divdu_op)
+DEF_EMIT_REG3_FORMAT(moddu, moddu_op)
+DEF_EMIT_REG3_FORMAT(and, and_op)
+DEF_EMIT_REG3_FORMAT(or, or_op)
+DEF_EMIT_REG3_FORMAT(xor, xor_op)
+DEF_EMIT_REG3_FORMAT(sllw, sllw_op)
+DEF_EMIT_REG3_FORMAT(slld, slld_op)
+DEF_EMIT_REG3_FORMAT(srlw, srlw_op)
+DEF_EMIT_REG3_FORMAT(srld, srld_op)
+DEF_EMIT_REG3_FORMAT(sraw, sraw_op)
+DEF_EMIT_REG3_FORMAT(srad, srad_op)
+DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op)
+DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op)
+DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op)
+DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op)
+DEF_EMIT_REG3_FORMAT(stxb, stxb_op)
+DEF_EMIT_REG3_FORMAT(stxh, stxh_op)
+DEF_EMIT_REG3_FORMAT(stxw, stxw_op)
+DEF_EMIT_REG3_FORMAT(stxd, stxd_op)
+DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op)
+DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op)
+DEF_EMIT_REG3_FORMAT(amandw, amandw_op)
+DEF_EMIT_REG3_FORMAT(amandd, amandd_op)
+DEF_EMIT_REG3_FORMAT(amorw, amorw_op)
+DEF_EMIT_REG3_FORMAT(amord, amord_op)
+DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op)
+DEF_EMIT_REG3_FORMAT(amxord, amxord_op)
+DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op)
+DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op)
+
+#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP)				\
+static inline void emit_##NAME(union loongarch_instruction *insn,	\
+			       enum loongarch_gpr rd,			\
+			       enum loongarch_gpr rj,			\
+			       enum loongarch_gpr rk,			\
+			       int imm)					\
+{									\
+	insn->reg3sa2_format.opcode = OP;				\
+	insn->reg3sa2_format.immediate = imm;				\
+	insn->reg3sa2_format.rd = rd;					\
+	insn->reg3sa2_format.rj = rj;					\
+	insn->reg3sa2_format.rk = rk;					\
+}
+
+DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
+
 #endif /* _ASM_INST_H */
diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h
index 999944ea1cea..402a7d9e3a53 100644
--- a/arch/loongarch/include/asm/io.h
+++ b/arch/loongarch/include/asm/io.h
@@ -27,71 +27,38 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size);
 #define early_memremap early_ioremap
 #define early_memunmap early_iounmap
 
+#ifdef CONFIG_ARCH_IOREMAP
+
 static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
 					 unsigned long prot_val)
 {
-	if (prot_val == _CACHE_CC)
+	if (prot_val & _CACHE_CC)
 		return (void __iomem *)(unsigned long)(CACHE_BASE + offset);
 	else
 		return (void __iomem *)(unsigned long)(UNCACHE_BASE + offset);
 }
 
-/*
- * ioremap -   map bus memory into CPU space
- * @offset:    bus address of the memory
- * @size:      size of the resource to map
- *
- * ioremap performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- */
-#define ioremap(offset, size)					\
-	ioremap_prot((offset), (size), _CACHE_SUC)
+#define ioremap(offset, size)		\
+	ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_SUC))
 
-/*
- * ioremap_wc - map bus memory into CPU space
- * @offset:     bus address of the memory
- * @size:       size of the resource to map
- *
- * ioremap_wc performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked uncachable
- * but accelerated by means of write-combining feature. It is specifically
- * useful for PCIe prefetchable windows, which may vastly improve a
- * communications performance. If it was determined on boot stage, what
- * CPU CCA doesn't support WUC, the method shall fall-back to the
- * _CACHE_SUC option (see cpu_probe() method).
- */
-#define ioremap_wc(offset, size)				\
-	ioremap_prot((offset), (size), _CACHE_WUC)
+#define iounmap(addr) 			((void)(addr))
+
+#endif
 
 /*
- * ioremap_cache -  map bus memory into CPU space
- * @offset:	    bus address of the memory
- * @size:	    size of the resource to map
+ * On LoongArch, ioremap() has two variants, ioremap_wc() and ioremap_cache().
+ * They map bus memory into CPU space, the mapped memory is marked uncachable
+ * (_CACHE_SUC), uncachable but accelerated by write-combine (_CACHE_WUC) and
+ * cachable (_CACHE_CC) respectively for CPU access.
  *
- * ioremap_cache performs a platform specific sequence of operations to
- * make bus memory CPU accessible via the readb/readw/readl/writeb/
- * writew/writel functions and the other mmio helpers. The returned
- * address is not guaranteed to be usable directly as a virtual
- * address.
- *
- * This version of ioremap ensures that the memory is marked cachable by
- * the CPU.  Also enables full write-combining.	 Useful for some
- * memory-like regions on I/O busses.
+ * @offset:    bus address of the memory
+ * @size:      size of the resource to map
  */
-#define ioremap_cache(offset, size)				\
-	ioremap_prot((offset), (size), _CACHE_CC)
+#define ioremap_wc(offset, size)	\
+	ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL_WUC))
 
-static inline void iounmap(const volatile void __iomem *addr)
-{
-}
+#define ioremap_cache(offset, size)	\
+	ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
 
 #define mmiowb() asm volatile ("dbar 0" ::: "memory")
 
@@ -107,4 +74,8 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 
 #include <asm-generic/io.h>
 
+#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern int valid_phys_addr_range(phys_addr_t addr, size_t size);
+extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
+
 #endif /* _ASM_IO_H */
diff --git a/arch/loongarch/include/asm/kexec.h b/arch/loongarch/include/asm/kexec.h
new file mode 100644
index 000000000000..cf95cd3eb2de
--- /dev/null
+++ b/arch/loongarch/include/asm/kexec.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kexec.h for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_KEXEC_H
+#define _ASM_KEXEC_H
+
+#include <asm/stacktrace.h>
+#include <asm/page.h>
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+ /* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Reserve a page for the control code buffer */
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	if (oldregs)
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	else
+		prepare_frametrace(newregs);
+}
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	unsigned long efi_boot;
+	unsigned long cmdline_ptr;
+	unsigned long systable_ptr;
+};
+
+typedef void (*do_kexec_t)(unsigned long efi_boot,
+			   unsigned long cmdline_ptr,
+			   unsigned long systable_ptr,
+			   unsigned long start_addr,
+			   unsigned long first_ind_entry);
+
+struct kimage;
+extern const unsigned char relocate_new_kernel[];
+extern const size_t relocate_new_kernel_size;
+extern void kexec_reboot(void);
+
+#ifdef CONFIG_SMP
+extern atomic_t kexec_ready_to_reboot;
+extern const unsigned char kexec_smp_wait[];
+#endif
+
+#endif /* !_ASM_KEXEC_H */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 3ba4f7e87cd2..7f8d57a61c8b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -187,36 +187,15 @@ static inline u32 read_cpucfg(u32 reg)
 #define  CPUCFG16_L3_DINCL		BIT(16)
 
 #define LOONGARCH_CPUCFG17		0x11
-#define  CPUCFG17_L1I_WAYS_M		GENMASK(15, 0)
-#define  CPUCFG17_L1I_SETS_M		GENMASK(23, 16)
-#define  CPUCFG17_L1I_SIZE_M		GENMASK(30, 24)
-#define  CPUCFG17_L1I_WAYS		0
-#define  CPUCFG17_L1I_SETS		16
-#define  CPUCFG17_L1I_SIZE		24
-
 #define LOONGARCH_CPUCFG18		0x12
-#define  CPUCFG18_L1D_WAYS_M		GENMASK(15, 0)
-#define  CPUCFG18_L1D_SETS_M		GENMASK(23, 16)
-#define  CPUCFG18_L1D_SIZE_M		GENMASK(30, 24)
-#define  CPUCFG18_L1D_WAYS		0
-#define  CPUCFG18_L1D_SETS		16
-#define  CPUCFG18_L1D_SIZE		24
-
 #define LOONGARCH_CPUCFG19		0x13
-#define  CPUCFG19_L2_WAYS_M		GENMASK(15, 0)
-#define  CPUCFG19_L2_SETS_M		GENMASK(23, 16)
-#define  CPUCFG19_L2_SIZE_M		GENMASK(30, 24)
-#define  CPUCFG19_L2_WAYS		0
-#define  CPUCFG19_L2_SETS		16
-#define  CPUCFG19_L2_SIZE		24
-
 #define LOONGARCH_CPUCFG20		0x14
-#define  CPUCFG20_L3_WAYS_M		GENMASK(15, 0)
-#define  CPUCFG20_L3_SETS_M		GENMASK(23, 16)
-#define  CPUCFG20_L3_SIZE_M		GENMASK(30, 24)
-#define  CPUCFG20_L3_WAYS		0
-#define  CPUCFG20_L3_SETS		16
-#define  CPUCFG20_L3_SIZE		24
+#define  CPUCFG_CACHE_WAYS_M		GENMASK(15, 0)
+#define  CPUCFG_CACHE_SETS_M		GENMASK(23, 16)
+#define  CPUCFG_CACHE_LSIZE_M		GENMASK(30, 24)
+#define  CPUCFG_CACHE_WAYS	 	0
+#define  CPUCFG_CACHE_SETS		16
+#define  CPUCFG_CACHE_LSIZE		24
 
 #define LOONGARCH_CPUCFG48		0x30
 #define  CPUCFG48_MCSR_LCK		BIT(0)
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 9f6718df1854..b29b19a46f42 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -17,10 +17,15 @@ struct mod_section {
 };
 
 struct mod_arch_specific {
+	struct mod_section got;
 	struct mod_section plt;
 	struct mod_section plt_idx;
 };
 
+struct got_entry {
+	Elf_Addr symbol_addr;
+};
+
 struct plt_entry {
 	u32 inst_lu12iw;
 	u32 inst_lu32id;
@@ -29,10 +34,16 @@ struct plt_entry {
 };
 
 struct plt_idx_entry {
-	unsigned long symbol_addr;
+	Elf_Addr symbol_addr;
 };
 
-Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val);
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val);
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val);
+
+static inline struct got_entry emit_got_entry(Elf_Addr val)
+{
+	return (struct got_entry) { val };
+}
 
 static inline struct plt_entry emit_plt_entry(unsigned long val)
 {
@@ -77,4 +88,16 @@ static inline struct plt_entry *get_plt_entry(unsigned long val,
 	return plt + plt_idx;
 }
 
+static inline struct got_entry *get_got_entry(Elf_Addr val,
+					      const struct mod_section *sec)
+{
+	struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr;
+	int i;
+
+	for (i = 0; i < sec->num_entries; i++)
+		if (got[i].symbol_addr == val)
+			return &got[i];
+	return NULL;
+}
+
 #endif /* _ASM_MODULE_H */
diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h
index 31c1c0db11a3..a3d1bc0fcc72 100644
--- a/arch/loongarch/include/asm/module.lds.h
+++ b/arch/loongarch/include/asm/module.lds.h
@@ -2,6 +2,7 @@
 /* Copyright (C) 2020-2022 Loongson Technology Corporation Limited */
 SECTIONS {
 	. = ALIGN(4);
+	.got : { BYTE(0) }
 	.plt : { BYTE(0) }
 	.plt.idx : { BYTE(0) }
 }
diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h
index 0bd6b0110198..ad8d88494554 100644
--- a/arch/loongarch/include/asm/percpu.h
+++ b/arch/loongarch/include/asm/percpu.h
@@ -8,6 +8,15 @@
 #include <asm/cmpxchg.h>
 #include <asm/loongarch.h>
 
+/*
+ * The "address" (in fact, offset from $r21) of a per-CPU variable is close to
+ * the loading address of main kernel image, but far from where the modules are
+ * loaded. Tell the compiler this fact when using explicit relocs.
+ */
+#if defined(MODULE) && defined(CONFIG_AS_HAS_EXPLICIT_RELOCS)
+#define PER_CPU_ATTRIBUTES    __attribute__((model("extreme")))
+#endif
+
 /* Use r21 for fast access */
 register unsigned long __my_cpu_offset __asm__("$r21");
 
diff --git a/arch/loongarch/include/asm/perf_event.h b/arch/loongarch/include/asm/perf_event.h
index dcb3b17053a8..2a35a0bc2aaa 100644
--- a/arch/loongarch/include/asm/perf_event.h
+++ b/arch/loongarch/include/asm/perf_event.h
@@ -6,5 +6,7 @@
 
 #ifndef __LOONGARCH_PERF_EVENT_H__
 #define __LOONGARCH_PERF_EVENT_H__
-/* Nothing to show here; the file is required by linux/perf_event.h. */
+
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_pt_regs *)regs
+
 #endif /* __LOONGARCH_PERF_EVENT_H__ */
diff --git a/arch/loongarch/include/asm/pgtable-bits.h b/arch/loongarch/include/asm/pgtable-bits.h
index 9ca147a29bab..3d1e0a69975a 100644
--- a/arch/loongarch/include/asm/pgtable-bits.h
+++ b/arch/loongarch/include/asm/pgtable-bits.h
@@ -83,8 +83,11 @@
 				 _PAGE_GLOBAL | _PAGE_KERN |  _CACHE_SUC)
 #define PAGE_KERNEL_WUC __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
 				 _PAGE_GLOBAL | _PAGE_KERN |  _CACHE_WUC)
+
 #ifndef __ASSEMBLY__
 
+#define _PAGE_IOREMAP		pgprot_val(PAGE_KERNEL_SUC)
+
 #define pgprot_noncached pgprot_noncached
 
 static inline pgprot_t pgprot_noncached(pgprot_t _prot)
diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index 1c4b4308378d..6954dc5d24e9 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -176,9 +176,6 @@ struct thread_struct {
 
 struct task_struct;
 
-/* Free all resources held by a thread. */
-#define release_thread(thread) do { } while (0)
-
 enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL};
 
 extern unsigned long		boot_option_idle_override;
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index 6d7d2a3e23dd..ca373f8e3c4d 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -13,7 +13,9 @@
 
 extern unsigned long eentry;
 extern unsigned long tlbrentry;
+extern void tlb_init(int cpu);
 extern void cpu_cache_init(void);
+extern void cache_error_setup(void);
 extern void per_cpu_trap_init(int cpu);
 extern void set_handler(unsigned long offset, void *addr, unsigned long len);
 extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len);
diff --git a/arch/loongarch/include/asm/spinlock.h b/arch/loongarch/include/asm/spinlock.h
new file mode 100644
index 000000000000..7cb3476999be
--- /dev/null
+++ b/arch/loongarch/include/asm/spinlock.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_SPINLOCK_H
+#define _ASM_SPINLOCK_H
+
+#include <asm/processor.h>
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
+
+#endif /* _ASM_SPINLOCK_H */
diff --git a/arch/loongarch/include/asm/spinlock_types.h b/arch/loongarch/include/asm/spinlock_types.h
new file mode 100644
index 000000000000..7458d036c161
--- /dev/null
+++ b/arch/loongarch/include/asm/spinlock_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_SPINLOCK_TYPES_H
+#define _ASM_SPINLOCK_TYPES_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+
+#endif
diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..eb6e2fd2a1f0
--- /dev/null
+++ b/arch/loongarch/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <linux/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/loongarch/include/uapi/asm/perf_regs.h b/arch/loongarch/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..29d69c00fc7a
--- /dev/null
+++ b/arch/loongarch/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_LOONGARCH_PERF_REGS_H
+#define _ASM_LOONGARCH_PERF_REGS_H
+
+enum perf_event_loongarch_regs {
+	PERF_REG_LOONGARCH_PC,
+	PERF_REG_LOONGARCH_R1,
+	PERF_REG_LOONGARCH_R2,
+	PERF_REG_LOONGARCH_R3,
+	PERF_REG_LOONGARCH_R4,
+	PERF_REG_LOONGARCH_R5,
+	PERF_REG_LOONGARCH_R6,
+	PERF_REG_LOONGARCH_R7,
+	PERF_REG_LOONGARCH_R8,
+	PERF_REG_LOONGARCH_R9,
+	PERF_REG_LOONGARCH_R10,
+	PERF_REG_LOONGARCH_R11,
+	PERF_REG_LOONGARCH_R12,
+	PERF_REG_LOONGARCH_R13,
+	PERF_REG_LOONGARCH_R14,
+	PERF_REG_LOONGARCH_R15,
+	PERF_REG_LOONGARCH_R16,
+	PERF_REG_LOONGARCH_R17,
+	PERF_REG_LOONGARCH_R18,
+	PERF_REG_LOONGARCH_R19,
+	PERF_REG_LOONGARCH_R20,
+	PERF_REG_LOONGARCH_R21,
+	PERF_REG_LOONGARCH_R22,
+	PERF_REG_LOONGARCH_R23,
+	PERF_REG_LOONGARCH_R24,
+	PERF_REG_LOONGARCH_R25,
+	PERF_REG_LOONGARCH_R26,
+	PERF_REG_LOONGARCH_R27,
+	PERF_REG_LOONGARCH_R28,
+	PERF_REG_LOONGARCH_R29,
+	PERF_REG_LOONGARCH_R30,
+	PERF_REG_LOONGARCH_R31,
+	PERF_REG_LOONGARCH_MAX,
+};
+#endif /* _ASM_LOONGARCH_PERF_REGS_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index e5be17009fe8..42be564278fa 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for the Linux/LoongArch kernel.
 #
 
-extra-y		:= head.o vmlinux.lds
+extra-y		:= vmlinux.lds
 
-obj-y		+= cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
+obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 		   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
 		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
 
@@ -23,7 +23,14 @@ obj-$(CONFIG_SMP)		+= smp.o
 
 obj-$(CONFIG_NUMA)		+= numa.o
 
+obj-$(CONFIG_MAGIC_SYSRQ)	+= sysrq.o
+
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+
 obj-$(CONFIG_UNWINDER_GUESS)	+= unwind_guess.o
 obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_regs.o
+
 CPPFLAGS_vmlinux.lds		:= $(KBUILD_CFLAGS)
diff --git a/arch/loongarch/kernel/cacheinfo.c b/arch/loongarch/kernel/cacheinfo.c
index 4662b06269f4..c7988f757281 100644
--- a/arch/loongarch/kernel/cacheinfo.c
+++ b/arch/loongarch/kernel/cacheinfo.c
@@ -5,73 +5,34 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/cacheinfo.h>
+#include <linux/topology.h>
 #include <asm/bootinfo.h>
 #include <asm/cpu-info.h>
 
-/* Populates leaf and increments to next leaf */
-#define populate_cache(cache, leaf, c_level, c_type)		\
-do {								\
-	leaf->type = c_type;					\
-	leaf->level = c_level;					\
-	leaf->coherency_line_size = c->cache.linesz;		\
-	leaf->number_of_sets = c->cache.sets;			\
-	leaf->ways_of_associativity = c->cache.ways;		\
-	leaf->size = c->cache.linesz * c->cache.sets *		\
-		c->cache.ways;					\
-	if (leaf->level > 2)					\
-		leaf->size *= nodes_per_package;		\
-	leaf++;							\
-} while (0)
-
 int init_cache_level(unsigned int cpu)
 {
-	struct cpuinfo_loongarch *c = &current_cpu_data;
+	int cache_present = current_cpu_data.cache_leaves_present;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	int levels = 0, leaves = 0;
-
-	/*
-	 * If Dcache is not set, we assume the cache structures
-	 * are not properly initialized.
-	 */
-	if (c->dcache.waysize)
-		levels += 1;
-	else
-		return -ENOENT;
-
-
-	leaves += (c->icache.waysize) ? 2 : 1;
-
-	if (c->vcache.waysize) {
-		levels++;
-		leaves++;
-	}
 
-	if (c->scache.waysize) {
-		levels++;
-		leaves++;
-	}
+	this_cpu_ci->num_levels =
+		current_cpu_data.cache_leaves[cache_present - 1].level;
+	this_cpu_ci->num_leaves = cache_present;
 
-	if (c->tcache.waysize) {
-		levels++;
-		leaves++;
-	}
-
-	this_cpu_ci->num_levels = levels;
-	this_cpu_ci->num_leaves = leaves;
 	return 0;
 }
 
 static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
 					   struct cacheinfo *sib_leaf)
 {
-	return !((this_leaf->level == 1) || (this_leaf->level == 2));
+	return (!(*(unsigned char *)(this_leaf->priv) & CACHE_PRIVATE)
+		&& !(*(unsigned char *)(sib_leaf->priv) & CACHE_PRIVATE));
 }
 
 static void cache_cpumap_setup(unsigned int cpu)
 {
-	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
-	struct cacheinfo *this_leaf, *sib_leaf;
 	unsigned int index;
+	struct cacheinfo *this_leaf, *sib_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
 	for (index = 0; index < this_cpu_ci->num_leaves; index++) {
 		unsigned int i;
@@ -85,8 +46,10 @@ static void cache_cpumap_setup(unsigned int cpu)
 		for_each_online_cpu(i) {
 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 
-			if (i == cpu || !sib_cpu_ci->info_list)
-				continue;/* skip if itself or no cacheinfo */
+			if (i == cpu || !sib_cpu_ci->info_list ||
+				(cpu_to_node(i) != cpu_to_node(cpu)))
+				continue;
+
 			sib_leaf = sib_cpu_ci->info_list + index;
 			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
 				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
@@ -98,31 +61,24 @@ static void cache_cpumap_setup(unsigned int cpu)
 
 int populate_cache_leaves(unsigned int cpu)
 {
-	int level = 1, nodes_per_package = 1;
-	struct cpuinfo_loongarch *c = &current_cpu_data;
+	int i, cache_present = current_cpu_data.cache_leaves_present;
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
-
-	if (loongson_sysconf.nr_nodes > 1)
-		nodes_per_package = loongson_sysconf.cores_per_package
-					/ loongson_sysconf.cores_per_node;
-
-	if (c->icache.waysize) {
-		populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
-		populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
-	} else {
-		populate_cache(dcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
+	struct cache_desc *cd, *cdesc = current_cpu_data.cache_leaves;
+
+	for (i = 0; i < cache_present; i++) {
+		cd = cdesc + i;
+
+		this_leaf->type = cd->type;
+		this_leaf->level = cd->level;
+		this_leaf->coherency_line_size = cd->linesz;
+		this_leaf->number_of_sets = cd->sets;
+		this_leaf->ways_of_associativity = cd->ways;
+		this_leaf->size = cd->linesz * cd->sets * cd->ways;
+		this_leaf->priv = &cd->flags;
+		this_leaf++;
 	}
 
-	if (c->vcache.waysize)
-		populate_cache(vcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
-	if (c->scache.waysize)
-		populate_cache(scache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
-	if (c->tcache.waysize)
-		populate_cache(tcache, this_leaf, level++, CACHE_TYPE_UNIFIED);
-
 	cache_cpumap_setup(cpu);
 	this_cpu_ci->cpu_map_populated = true;
 
diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c
index 529ab8f44ec6..255a09876ef2 100644
--- a/arch/loongarch/kernel/cpu-probe.c
+++ b/arch/loongarch/kernel/cpu-probe.c
@@ -187,7 +187,9 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int
 	uint64_t *vendor = (void *)(&cpu_full_name[VENDOR_OFFSET]);
 	uint64_t *cpuname = (void *)(&cpu_full_name[CPUNAME_OFFSET]);
 
-	__cpu_full_name[cpu] = cpu_full_name;
+	if (!__cpu_full_name[cpu])
+		__cpu_full_name[cpu] = cpu_full_name;
+
 	*vendor = iocsr_read64(LOONGARCH_IOCSR_VENDOR);
 	*cpuname = iocsr_read64(LOONGARCH_IOCSR_CPUNAME);
 
diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
new file mode 100644
index 000000000000..e559307c1092
--- /dev/null
+++ b/arch/loongarch/kernel/crash_dump.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+			 size_t csize, unsigned long offset)
+{
+	void *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+	if (!vaddr)
+		return -ENOMEM;
+
+	csize = copy_to_iter(vaddr + offset, csize, iter);
+
+	memunmap(vaddr);
+
+	return csize;
+}
diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index 7e57ae8741b1..97425779ce9f 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -8,6 +8,7 @@
 #include <asm/addrspace.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/bug.h>
 #include <asm/regdef.h>
 #include <asm/loongarch.h>
 #include <asm/stackframe.h>
@@ -20,7 +21,11 @@
 
 _head:
 	.word	MZ_MAGIC		/* "MZ", MS-DOS header */
-	.org	0x3c			/* 0x04 ~ 0x3b reserved */
+	.org	0x8
+	.dword	kernel_entry		/* Kernel entry point */
+	.dword	_end - _text		/* Kernel image effective size */
+	.quad	0			/* Kernel image load offset from start of RAM */
+	.org	0x3c			/* 0x20 ~ 0x3b reserved */
 	.long	pe_header - _head	/* Offset to the PE header */
 
 pe_header:
@@ -57,19 +62,19 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	li.w		t0, 0x00		# FPE=0, SXE=0, ASXE=0, BTE=0
 	csrwr		t0, LOONGARCH_CSR_EUEN
 
-	la		t0, __bss_start		# clear .bss
+	la.pcrel	t0, __bss_start		# clear .bss
 	st.d		zero, t0, 0
-	la		t1, __bss_stop - LONGSIZE
+	la.pcrel	t1, __bss_stop - LONGSIZE
 1:
 	addi.d		t0, t0, LONGSIZE
 	st.d		zero, t0, 0
 	bne		t0, t1, 1b
 
-	la		t0, fw_arg0
+	la.pcrel	t0, fw_arg0
 	st.d		a0, t0, 0		# firmware arguments
-	la		t0, fw_arg1
+	la.pcrel	t0, fw_arg1
 	st.d		a1, t0, 0
-	la		t0, fw_arg2
+	la.pcrel	t0, fw_arg2
 	st.d		a2, t0, 0
 
 	/* KSave3 used for percpu base, initialized as 0 */
@@ -77,7 +82,7 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	/* GPR21 used for percpu base (runtime), initialized as 0 */
 	move		u0, zero
 
-	la		tp, init_thread_union
+	la.pcrel	tp, init_thread_union
 	/* Set the SP after an empty pt_regs.  */
 	PTR_LI		sp, (_THREAD_SIZE - 32 - PT_SIZE)
 	PTR_ADD		sp, sp, tp
@@ -85,6 +90,7 @@ SYM_CODE_START(kernel_entry)			# kernel entry point
 	PTR_ADDI	sp, sp, -4 * SZREG	# init stack pointer
 
 	bl		start_kernel
+	ASM_BUG()
 
 SYM_CODE_END(kernel_entry)
 
@@ -116,6 +122,8 @@ SYM_CODE_START(smpboot_entry)
 	ld.d		tp, t0, CPU_BOOT_TINFO
 
 	bl		start_secondary
+	ASM_BUG()
+
 SYM_CODE_END(smpboot_entry)
 
 #endif /* CONFIG_SMP */
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
new file mode 100644
index 000000000000..2dcb9e003657
--- /dev/null
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * machine_kexec.c for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/bootinfo.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* 0x100000 ~ 0x200000 is safe */
+#define KEXEC_CONTROL_CODE	TO_CACHE(0x100000UL)
+#define KEXEC_CMDLINE_ADDR	TO_CACHE(0x108000UL)
+
+static unsigned long reboot_code_buffer;
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void (*relocated_kexec_smp_wait)(void *);
+atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+#endif
+
+static unsigned long efi_boot;
+static unsigned long cmdline_ptr;
+static unsigned long systable_ptr;
+static unsigned long start_addr;
+static unsigned long first_ind_entry;
+
+static void kexec_image_info(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("kexec kimage info:\n");
+	pr_debug("\ttype:        %d\n", kimage->type);
+	pr_debug("\tstart:       %lx\n", kimage->start);
+	pr_debug("\thead:        %lx\n", kimage->head);
+	pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++) {
+		pr_debug("\t    segment[%lu]: %016lx - %016lx", i,
+			kimage->segment[i].mem,
+			kimage->segment[i].mem + kimage->segment[i].memsz);
+		pr_debug("\t\t0x%lx bytes, %lu pages\n",
+			(unsigned long)kimage->segment[i].memsz,
+			(unsigned long)kimage->segment[i].memsz /  PAGE_SIZE);
+	}
+}
+
+int machine_kexec_prepare(struct kimage *kimage)
+{
+	int i;
+	char *bootloader = "kexec";
+	void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR;
+
+	kexec_image_info(kimage);
+
+	kimage->arch.efi_boot = fw_arg0;
+	kimage->arch.systable_ptr = fw_arg2;
+
+	/* Find the command line */
+	for (i = 0; i < kimage->nr_segments; i++) {
+		if (!strncmp(bootloader, (char __user *)kimage->segment[i].buf, strlen(bootloader))) {
+			if (!copy_from_user(cmdline_ptr, kimage->segment[i].buf, COMMAND_LINE_SIZE))
+				kimage->arch.cmdline_ptr = (unsigned long)cmdline_ptr;
+			break;
+		}
+	}
+
+	if (!kimage->arch.cmdline_ptr) {
+		pr_err("Command line not included in the provided image\n");
+		return -EINVAL;
+	}
+
+	/* kexec/kdump need a safe page to save reboot_code_buffer */
+	kimage->control_code_page = virt_to_page((void *)KEXEC_CONTROL_CODE);
+
+	reboot_code_buffer = (unsigned long)page_address(kimage->control_code_page);
+	memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size);
+
+#ifdef CONFIG_SMP
+	/* All secondary cpus now may jump to kexec_smp_wait cycle */
+	relocated_kexec_smp_wait = reboot_code_buffer + (void *)(kexec_smp_wait - relocate_new_kernel);
+#endif
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void kexec_reboot(void)
+{
+	do_kexec_t do_kexec = NULL;
+
+	/*
+	 * We know we were online, and there will be no incoming IPIs at
+	 * this point. Mark online again before rebooting so that the crash
+	 * analysis tool will see us correctly.
+	 */
+	set_cpu_online(smp_processor_id(), true);
+
+	/* Ensure remote CPUs observe that we're online before rebooting. */
+	smp_mb__after_atomic();
+
+	/*
+	 * Make sure we get correct instructions written by the
+	 * machine_kexec_prepare() CPU.
+	 */
+	__asm__ __volatile__ ("\tibar 0\n"::);
+
+#ifdef CONFIG_SMP
+	/* All secondary cpus go to kexec_smp_wait */
+	if (smp_processor_id() > 0) {
+		relocated_kexec_smp_wait(NULL);
+		unreachable();
+	}
+#endif
+
+	do_kexec = (void *)reboot_code_buffer;
+	do_kexec(efi_boot, cmdline_ptr, systable_ptr, start_addr, first_ind_entry);
+
+	unreachable();
+}
+
+
+#ifdef CONFIG_SMP
+static void kexec_shutdown_secondary(void *regs)
+{
+	int cpu = smp_processor_id();
+
+	if (!cpu_online(cpu))
+		return;
+
+	/* We won't be sent IPIs any more. */
+	set_cpu_online(cpu, false);
+
+	local_irq_disable();
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+
+	kexec_reboot();
+}
+
+static void crash_shutdown_secondary(void *passed_regs)
+{
+	int cpu = smp_processor_id();
+	struct pt_regs *regs = passed_regs;
+
+	/*
+	 * If we are passed registers, use those. Otherwise get the
+	 * regs from the last interrupt, which should be correct, as
+	 * we are in an interrupt. But if the regs are not there,
+	 * pull them from the top of the stack. They are probably
+	 * wrong, but we need something to keep from crashing again.
+	 */
+	if (!regs)
+		regs = get_irq_regs();
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	if (!cpu_online(cpu))
+		return;
+
+	/* We won't be sent IPIs any more. */
+	set_cpu_online(cpu, false);
+
+	local_irq_disable();
+	if (!cpumask_test_cpu(cpu, &cpus_in_crash))
+		crash_save_cpu(regs, cpu);
+	cpumask_set_cpu(cpu, &cpus_in_crash);
+
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+
+	kexec_reboot();
+}
+
+void crash_smp_send_stop(void)
+{
+	unsigned int ncpus;
+	unsigned long timeout;
+	static int cpus_stopped;
+
+	/*
+	 * This function can be called twice in panic path, but obviously
+	 * we should execute this only once.
+	 */
+	if (cpus_stopped)
+		return;
+
+	cpus_stopped = 1;
+
+	 /* Excluding the panic cpu */
+	ncpus = num_online_cpus() - 1;
+
+	smp_call_function(crash_shutdown_secondary, NULL, 0);
+	smp_wmb();
+
+	/*
+	 * The crash CPU sends an IPI and wait for other CPUs to
+	 * respond. Delay of at least 10 seconds.
+	 */
+	timeout = MSEC_PER_SEC * 10;
+	pr_emerg("Sending IPI to other cpus...\n");
+	while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
+		mdelay(1);
+		cpu_relax();
+	}
+}
+#endif /* defined(CONFIG_SMP) */
+
+void machine_shutdown(void)
+{
+	int cpu;
+
+	/* All CPUs go to reboot_code_buffer */
+	for_each_possible_cpu(cpu)
+		if (!cpu_online(cpu))
+			cpu_device_up(get_cpu_device(cpu));
+
+#ifdef CONFIG_SMP
+	smp_call_function(kexec_shutdown_secondary, NULL, 0);
+#endif
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	int crashing_cpu;
+
+	local_irq_disable();
+
+	crashing_cpu = smp_processor_id();
+	crash_save_cpu(regs, crashing_cpu);
+
+#ifdef CONFIG_SMP
+	crash_smp_send_stop();
+#endif
+	cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
+
+	pr_info("Starting crashdump kernel...\n");
+}
+
+void machine_kexec(struct kimage *image)
+{
+	unsigned long entry, *ptr;
+	struct kimage_arch *internal = &image->arch;
+
+	efi_boot = internal->efi_boot;
+	cmdline_ptr = internal->cmdline_ptr;
+	systable_ptr = internal->systable_ptr;
+
+	start_addr = (unsigned long)phys_to_virt(image->start);
+
+	first_ind_entry = (image->type == KEXEC_TYPE_DEFAULT) ?
+		(unsigned long)phys_to_virt(image->head & PAGE_MASK) : 0;
+
+	/*
+	 * The generic kexec code builds a page list with physical
+	 * addresses. they are directly accessible through XKPRANGE
+	 * hence the phys_to_virt() call.
+	 */
+	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+	     ptr = (entry & IND_INDIRECTION) ?
+	       phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+		if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
+		    *ptr & IND_DESTINATION)
+			*ptr = (unsigned long) phys_to_virt(*ptr);
+	}
+
+	/* Mark offline before disabling local irq. */
+	set_cpu_online(smp_processor_id(), false);
+
+	/* We do not want to be bothered. */
+	local_irq_disable();
+
+	pr_notice("EFI boot flag 0x%lx\n", efi_boot);
+	pr_notice("Command line at 0x%lx\n", cmdline_ptr);
+	pr_notice("System table at 0x%lx\n", systable_ptr);
+	pr_notice("We will call new kernel at 0x%lx\n", start_addr);
+	pr_notice("Bye ...\n");
+
+	/* Make reboot code buffer available to the boot CPU. */
+	flush_cache_all();
+
+#ifdef CONFIG_SMP
+	atomic_set(&kexec_ready_to_reboot, 1);
+#endif
+
+	kexec_reboot();
+}
diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
index 7423361b0ebc..4a4107a6a965 100644
--- a/arch/loongarch/kernel/mem.c
+++ b/arch/loongarch/kernel/mem.c
@@ -58,7 +58,4 @@ void __init memblock_init(void)
 	/* Reserve the kernel text/data/bss */
 	memblock_reserve(__pa_symbol(&_text),
 			 __pa_symbol(&_end) - __pa_symbol(&_text));
-
-	/* Reserve the initrd */
-	reserve_initrd_mem();
 }
diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
index 6d498288977d..d296a70b758f 100644
--- a/arch/loongarch/kernel/module-sections.c
+++ b/arch/loongarch/kernel/module-sections.c
@@ -7,7 +7,33 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-Elf_Addr module_emit_plt_entry(struct module *mod, unsigned long val)
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val)
+{
+	struct mod_section *got_sec = &mod->arch.got;
+	int i = got_sec->num_entries;
+	struct got_entry *got = get_got_entry(val, got_sec);
+
+	if (got)
+		return (Elf_Addr)got;
+
+	/* There is no GOT entry for val yet, create a new one. */
+	got = (struct got_entry *)got_sec->shdr->sh_addr;
+	got[i] = emit_got_entry(val);
+
+	got_sec->num_entries++;
+	if (got_sec->num_entries > got_sec->max_entries) {
+		/*
+		 * This may happen when the module contains a GOT_HI20 without
+		 * a paired GOT_LO12. Such a module is broken, reject it.
+		 */
+		pr_err("%s: module contains bad GOT relocation\n", mod->name);
+		return 0;
+	}
+
+	return (Elf_Addr)&got[i];
+}
+
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val)
 {
 	int nr;
 	struct mod_section *plt_sec = &mod->arch.plt;
@@ -50,15 +76,25 @@ static bool duplicate_rela(const Elf_Rela *rela, int idx)
 	return false;
 }
 
-static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
+static void count_max_entries(Elf_Rela *relas, int num,
+			      unsigned int *plts, unsigned int *gots)
 {
 	unsigned int i, type;
 
 	for (i = 0; i < num; i++) {
 		type = ELF_R_TYPE(relas[i].r_info);
-		if (type == R_LARCH_SOP_PUSH_PLT_PCREL) {
+		switch (type) {
+		case R_LARCH_SOP_PUSH_PLT_PCREL:
+		case R_LARCH_B26:
 			if (!duplicate_rela(relas, i))
 				(*plts)++;
+			break;
+		case R_LARCH_GOT_PC_HI20:
+			if (!duplicate_rela(relas, i))
+				(*gots)++;
+			break;
+		default:
+			break; /* Do nothing. */
 		}
 	}
 }
@@ -66,18 +102,24 @@ static void count_max_entries(Elf_Rela *relas, int num, unsigned int *plts)
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned int i, num_plts = 0;
+	unsigned int i, num_plts = 0, num_gots = 0;
 
 	/*
 	 * Find the empty .plt sections.
 	 */
 	for (i = 0; i < ehdr->e_shnum; i++) {
-		if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
+		if (!strcmp(secstrings + sechdrs[i].sh_name, ".got"))
+			mod->arch.got.shdr = sechdrs + i;
+		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
 			mod->arch.plt.shdr = sechdrs + i;
 		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx"))
 			mod->arch.plt_idx.shdr = sechdrs + i;
 	}
 
+	if (!mod->arch.got.shdr) {
+		pr_err("%s: module GOT section(s) missing\n", mod->name);
+		return -ENOEXEC;
+	}
 	if (!mod->arch.plt.shdr) {
 		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
@@ -100,9 +142,16 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		if (!(dst_sec->sh_flags & SHF_EXECINSTR))
 			continue;
 
-		count_max_entries(relas, num_rela, &num_plts);
+		count_max_entries(relas, num_rela, &num_plts, &num_gots);
 	}
 
+	mod->arch.got.shdr->sh_type = SHT_NOBITS;
+	mod->arch.got.shdr->sh_flags = SHF_ALLOC;
+	mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry);
+	mod->arch.got.num_entries = 0;
+	mod->arch.got.max_entries = num_gots;
+
 	mod->arch.plt.shdr->sh_type = SHT_NOBITS;
 	mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
 	mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES;
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 638427ff0d51..097595b2fc14 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -18,16 +18,6 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 
-static inline bool signed_imm_check(long val, unsigned int bit)
-{
-	return -(1L << (bit - 1)) <= val && val < (1L << (bit - 1));
-}
-
-static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
-{
-	return val < (1UL << bit);
-}
-
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
 	if (*rela_stack_top >= RELA_STACK_DEPTH)
@@ -281,6 +271,96 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
 	}
 }
 
+static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
+			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+	ptrdiff_t offset = (void *)v - (void *)location;
+	union loongarch_instruction *insn = (union loongarch_instruction *)location;
+
+	if (offset >= SZ_128M)
+		v = module_emit_plt_entry(mod, v);
+
+	if (offset < -SZ_128M)
+		v = module_emit_plt_entry(mod, v);
+
+	offset = (void *)v - (void *)location;
+
+	if (offset & 3) {
+		pr_err("module %s: jump offset = 0x%llx unaligned! dangerous R_LARCH_B26 (%u) relocation\n",
+				mod->name, (long long)offset, type);
+		return -ENOEXEC;
+	}
+
+	if (!signed_imm_check(offset, 28)) {
+		pr_err("module %s: jump offset = 0x%llx overflow! dangerous R_LARCH_B26 (%u) relocation\n",
+				mod->name, (long long)offset, type);
+		return -ENOEXEC;
+	}
+
+	offset >>= 2;
+	insn->reg0i26_format.immediate_l = offset & 0xffff;
+	insn->reg0i26_format.immediate_h = (offset >> 16) & 0x3ff;
+
+	return 0;
+}
+
+static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v,
+			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+	union loongarch_instruction *insn = (union loongarch_instruction *)location;
+	/* Use s32 for a sign-extension deliberately. */
+	s32 offset_hi20 = (void *)((v + 0x800) & ~0xfff) -
+			  (void *)((Elf_Addr)location & ~0xfff);
+	Elf_Addr anchor = (((Elf_Addr)location) & ~0xfff) + offset_hi20;
+	ptrdiff_t offset_rem = (void *)v - (void *)anchor;
+
+	switch (type) {
+	case R_LARCH_PCALA_LO12:
+		insn->reg2i12_format.immediate = v & 0xfff;
+		break;
+	case R_LARCH_PCALA_HI20:
+		v = offset_hi20 >> 12;
+		insn->reg1i20_format.immediate = v & 0xfffff;
+		break;
+	case R_LARCH_PCALA64_LO20:
+		v = offset_rem >> 32;
+		insn->reg1i20_format.immediate = v & 0xfffff;
+		break;
+	case R_LARCH_PCALA64_HI12:
+		v = offset_rem >> 52;
+		insn->reg2i12_format.immediate = v & 0xfff;
+		break;
+	default:
+		pr_err("%s: Unsupport relocation type %u\n", mod->name, type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v,
+			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
+{
+	Elf_Addr got = module_emit_got_entry(mod, v);
+
+	if (!got)
+		return -EINVAL;
+
+	switch (type) {
+	case R_LARCH_GOT_PC_LO12:
+		type = R_LARCH_PCALA_LO12;
+		break;
+	case R_LARCH_GOT_PC_HI20:
+		type = R_LARCH_PCALA_HI20;
+		break;
+	default:
+		pr_err("%s: Unsupport relocation type %u\n", mod->name, type);
+		return -EINVAL;
+	}
+
+	return apply_r_larch_pcala(mod, location, got, rela_stack, rela_stack_top, type);
+}
+
 /*
  * reloc_handlers_rela() - Apply a particular relocation to a module
  * @mod: the module to apply the reloc to
@@ -296,7 +376,7 @@ typedef int (*reloc_rela_handler)(struct module *mod, u32 *location, Elf_Addr v,
 
 /* The handlers for known reloc types */
 static reloc_rela_handler reloc_rela_handlers[] = {
-	[R_LARCH_NONE ... R_LARCH_SUB64]		     = apply_r_larch_error,
+	[R_LARCH_NONE ... R_LARCH_RELAX]		     = apply_r_larch_error,
 
 	[R_LARCH_NONE]					     = apply_r_larch_none,
 	[R_LARCH_32]					     = apply_r_larch_32,
@@ -310,6 +390,9 @@ static reloc_rela_handler reloc_rela_handlers[] = {
 	[R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] 	     = apply_r_larch_sop,
 	[R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
 	[R_LARCH_ADD32 ... R_LARCH_SUB64]		     = apply_r_larch_add_sub,
+	[R_LARCH_B26]					     = apply_r_larch_b26,
+	[R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12]	     = apply_r_larch_pcala,
+	[R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12]	     = apply_r_larch_got_pc,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c
new file mode 100644
index 000000000000..707bd32e5c4f
--- /dev/null
+++ b/arch/loongarch/kernel/perf_event.c
@@ -0,0 +1,887 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux performance counter support for LoongArch.
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 2010 MIPS Technologies, Inc.
+ * Copyright (C) 2011 Cavium Networks, Inc.
+ * Author: Deng-Cheng Zhu
+ */
+
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
+{
+	unsigned long err;
+	unsigned long __user *user_frame_tail;
+	struct stack_frame buftail;
+
+	user_frame_tail = (unsigned long __user *)(fp - sizeof(struct stack_frame));
+
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(user_frame_tail, sizeof(buftail)))
+		return 0;
+
+	pagefault_disable();
+	err = __copy_from_user_inatomic(&buftail, user_frame_tail, sizeof(buftail));
+	pagefault_enable();
+
+	if (err || (unsigned long)user_frame_tail >= buftail.fp)
+		return 0;
+
+	perf_callchain_store(entry, buftail.ra);
+
+	return buftail.fp;
+}
+
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+			 struct pt_regs *regs)
+{
+	unsigned long fp;
+
+	if (perf_guest_state()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->csr_era);
+
+	fp = regs->regs[22];
+
+	while (entry->nr < entry->max_stack && fp && !((unsigned long)fp & 0xf))
+		fp = user_backtrace(entry, fp);
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	for (unwind_start(&state, current, regs);
+	      !unwind_done(&state); unwind_next_frame(&state)) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
+}
+
+#define LOONGARCH_MAX_HWEVENTS 32
+
+struct cpu_hw_events {
+	/* Array of events on this cpu. */
+	struct perf_event	*events[LOONGARCH_MAX_HWEVENTS];
+
+	/*
+	 * Set the bit (indexed by the counter number) when the counter
+	 * is used for an event.
+	 */
+	unsigned long		used_mask[BITS_TO_LONGS(LOONGARCH_MAX_HWEVENTS)];
+
+	/*
+	 * Software copy of the control register for each performance counter.
+	 */
+	unsigned int		saved_ctrl[LOONGARCH_MAX_HWEVENTS];
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.saved_ctrl = {0},
+};
+
+/* The description of LoongArch performance events. */
+struct loongarch_perf_event {
+	unsigned int event_id;
+};
+
+static struct loongarch_perf_event raw_event;
+static DEFINE_MUTEX(raw_event_mutex);
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define HW_OP_UNSUPPORTED		0xffffffff
+#define CACHE_OP_UNSUPPORTED		0xffffffff
+
+#define PERF_MAP_ALL_UNSUPPORTED					\
+	[0 ... PERF_COUNT_HW_MAX - 1] = {HW_OP_UNSUPPORTED}
+
+#define PERF_CACHE_MAP_ALL_UNSUPPORTED					\
+[0 ... C(MAX) - 1] = {							\
+	[0 ... C(OP_MAX) - 1] = {					\
+		[0 ... C(RESULT_MAX) - 1] = {CACHE_OP_UNSUPPORTED},	\
+	},								\
+}
+
+struct loongarch_pmu {
+	u64		max_period;
+	u64		valid_count;
+	u64		overflow;
+	const char	*name;
+	unsigned int	num_counters;
+	u64		(*read_counter)(unsigned int idx);
+	void		(*write_counter)(unsigned int idx, u64 val);
+	const struct loongarch_perf_event *(*map_raw_event)(u64 config);
+	const struct loongarch_perf_event (*general_event_map)[PERF_COUNT_HW_MAX];
+	const struct loongarch_perf_event (*cache_event_map)
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+static struct loongarch_pmu loongarch_pmu;
+
+#define M_PERFCTL_EVENT(event)	(event & CSR_PERFCTRL_EVENT)
+
+#define M_PERFCTL_COUNT_EVENT_WHENEVER	(CSR_PERFCTRL_PLV0 |	\
+					CSR_PERFCTRL_PLV1 |	\
+					CSR_PERFCTRL_PLV2 |	\
+					CSR_PERFCTRL_PLV3 |	\
+					CSR_PERFCTRL_IE)
+
+#define M_PERFCTL_CONFIG_MASK		0x1f0000
+
+static void pause_local_counters(void);
+static void resume_local_counters(void);
+
+static u64 loongarch_pmu_read_counter(unsigned int idx)
+{
+	u64 val = -1;
+
+	switch (idx) {
+	case 0:
+		val = read_csr_perfcntr0();
+		break;
+	case 1:
+		val = read_csr_perfcntr1();
+		break;
+	case 2:
+		val = read_csr_perfcntr2();
+		break;
+	case 3:
+		val = read_csr_perfcntr3();
+		break;
+	default:
+		WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+		return 0;
+	}
+
+	return val;
+}
+
+static void loongarch_pmu_write_counter(unsigned int idx, u64 val)
+{
+	switch (idx) {
+	case 0:
+		write_csr_perfcntr0(val);
+		return;
+	case 1:
+		write_csr_perfcntr1(val);
+		return;
+	case 2:
+		write_csr_perfcntr2(val);
+		return;
+	case 3:
+		write_csr_perfcntr3(val);
+		return;
+	default:
+		WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+		return;
+	}
+}
+
+static unsigned int loongarch_pmu_read_control(unsigned int idx)
+{
+	unsigned int val = -1;
+
+	switch (idx) {
+	case 0:
+		val = read_csr_perfctrl0();
+		break;
+	case 1:
+		val = read_csr_perfctrl1();
+		break;
+	case 2:
+		val = read_csr_perfctrl2();
+		break;
+	case 3:
+		val = read_csr_perfctrl3();
+		break;
+	default:
+		WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+		return 0;
+	}
+
+	return val;
+}
+
+static void loongarch_pmu_write_control(unsigned int idx, unsigned int val)
+{
+	switch (idx) {
+	case 0:
+		write_csr_perfctrl0(val);
+		return;
+	case 1:
+		write_csr_perfctrl1(val);
+		return;
+	case 2:
+		write_csr_perfctrl2(val);
+		return;
+	case 3:
+		write_csr_perfctrl3(val);
+		return;
+	default:
+		WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx);
+		return;
+	}
+}
+
+static int loongarch_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc)
+{
+	int i;
+
+	for (i = 0; i < loongarch_pmu.num_counters; i++) {
+		if (!test_and_set_bit(i, cpuc->used_mask))
+			return i;
+	}
+
+	return -EAGAIN;
+}
+
+static void loongarch_pmu_enable_event(struct hw_perf_event *evt, int idx)
+{
+	unsigned int cpu;
+	struct perf_event *event = container_of(evt, struct perf_event, hw);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+	/* Make sure interrupt enabled. */
+	cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) |
+		(evt->config_base & M_PERFCTL_CONFIG_MASK) | CSR_PERFCTRL_IE;
+
+	cpu = (event->cpu >= 0) ? event->cpu : smp_processor_id();
+
+	/*
+	 * We do not actually let the counter run. Leave it until start().
+	 */
+	pr_debug("Enabling perf counter for CPU%d\n", cpu);
+}
+
+static void loongarch_pmu_disable_event(int idx)
+{
+	unsigned long flags;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+	local_irq_save(flags);
+	cpuc->saved_ctrl[idx] = loongarch_pmu_read_control(idx) &
+		~M_PERFCTL_COUNT_EVENT_WHENEVER;
+	loongarch_pmu_write_control(idx, cpuc->saved_ctrl[idx]);
+	local_irq_restore(flags);
+}
+
+static int loongarch_pmu_event_set_period(struct perf_event *event,
+				    struct hw_perf_event *hwc,
+				    int idx)
+{
+	int ret = 0;
+	u64 left = local64_read(&hwc->period_left);
+	u64 period = hwc->sample_period;
+
+	if (unlikely((left + period) & (1ULL << 63))) {
+		/* left underflowed by more than period. */
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	} else	if (unlikely((left + period) <= period)) {
+		/* left underflowed by less than period. */
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (left > loongarch_pmu.max_period) {
+		left = loongarch_pmu.max_period;
+		local64_set(&hwc->period_left, left);
+	}
+
+	local64_set(&hwc->prev_count, loongarch_pmu.overflow - left);
+
+	loongarch_pmu.write_counter(idx, loongarch_pmu.overflow - left);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static void loongarch_pmu_event_update(struct perf_event *event,
+				 struct hw_perf_event *hwc,
+				 int idx)
+{
+	u64 delta;
+	u64 prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = loongarch_pmu.read_counter(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+				new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = new_raw_count - prev_raw_count;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+}
+
+static void loongarch_pmu_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+
+	/* Set the period for the event. */
+	loongarch_pmu_event_set_period(event, hwc, hwc->idx);
+
+	/* Enable the event. */
+	loongarch_pmu_enable_event(hwc, hwc->idx);
+}
+
+static void loongarch_pmu_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		/* We are working on a local event. */
+		loongarch_pmu_disable_event(hwc->idx);
+		barrier();
+		loongarch_pmu_event_update(event, hwc, hwc->idx);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static int loongarch_pmu_add(struct perf_event *event, int flags)
+{
+	int idx, err = 0;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	perf_pmu_disable(event->pmu);
+
+	/* To look for a free counter for this event. */
+	idx = loongarch_pmu_alloc_counter(cpuc, hwc);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
+
+	/*
+	 * If there is an event in the counter we are going to use then
+	 * make sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	loongarch_pmu_disable_event(idx);
+	cpuc->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		loongarch_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void loongarch_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	WARN_ON(idx < 0 || idx >= loongarch_pmu.num_counters);
+
+	loongarch_pmu_stop(event, PERF_EF_UPDATE);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void loongarch_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	/* Don't read disabled counters! */
+	if (hwc->idx < 0)
+		return;
+
+	loongarch_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static void loongarch_pmu_enable(struct pmu *pmu)
+{
+	resume_local_counters();
+}
+
+static void loongarch_pmu_disable(struct pmu *pmu)
+{
+	pause_local_counters();
+}
+
+static DEFINE_MUTEX(pmu_reserve_mutex);
+static atomic_t active_events = ATOMIC_INIT(0);
+
+static int get_pmc_irq(void)
+{
+	struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
+
+	if (d)
+		return irq_create_mapping(d, EXCCODE_PMC - EXCCODE_INT_START);
+
+	return -EINVAL;
+}
+
+static void reset_counters(void *arg);
+static int __hw_perf_event_init(struct perf_event *event);
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
+		on_each_cpu(reset_counters, NULL, 1);
+		free_irq(get_pmc_irq(), &loongarch_pmu);
+		mutex_unlock(&pmu_reserve_mutex);
+	}
+}
+
+static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
+			struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event *event = cpuc->events[idx];
+	struct hw_perf_event *hwc = &event->hw;
+
+	loongarch_pmu_event_update(event, hwc, idx);
+	data->period = event->hw.last_period;
+	if (!loongarch_pmu_event_set_period(event, hwc, idx))
+		return;
+
+	if (perf_event_overflow(event, data, regs))
+		loongarch_pmu_disable_event(idx);
+}
+
+static irqreturn_t pmu_handle_irq(int irq, void *dev)
+{
+	int n;
+	int handled = IRQ_NONE;
+	uint64_t counter;
+	struct pt_regs *regs;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * First we pause the local counters, so that when we are locked
+	 * here, the counters are all paused. When it gets locked due to
+	 * perf_disable(), the timer interrupt handler will be delayed.
+	 *
+	 * See also loongarch_pmu_start().
+	 */
+	pause_local_counters();
+
+	regs = get_irq_regs();
+
+	perf_sample_data_init(&data, 0, 0);
+
+	for (n = 0; n < loongarch_pmu.num_counters; n++) {
+		if (test_bit(n, cpuc->used_mask)) {
+			counter = loongarch_pmu.read_counter(n);
+			if (counter & loongarch_pmu.overflow) {
+				handle_associated_event(cpuc, n, &data, regs);
+				handled = IRQ_HANDLED;
+			}
+		}
+	}
+
+	resume_local_counters();
+
+	/*
+	 * Do all the work for the pending perf events. We can do this
+	 * in here because the performance counter interrupt is a regular
+	 * interrupt, not NMI.
+	 */
+	if (handled == IRQ_HANDLED)
+		irq_work_run();
+
+	return handled;
+}
+
+static int loongarch_pmu_event_init(struct perf_event *event)
+{
+	int r, irq;
+	unsigned long flags;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		break;
+
+	default:
+		/* Init it to avoid false validate_group */
+		event->hw.event_base = 0xffffffff;
+		return -ENOENT;
+	}
+
+	if (event->cpu >= 0 && !cpu_online(event->cpu))
+		return -ENODEV;
+
+	irq = get_pmc_irq();
+	flags = IRQF_PERCPU | IRQF_NOBALANCING | IRQF_NO_THREAD | IRQF_NO_SUSPEND | IRQF_SHARED;
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmu_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			r = request_irq(irq, pmu_handle_irq, flags, "Perf_PMU", &loongarch_pmu);
+			if (r < 0) {
+				mutex_unlock(&pmu_reserve_mutex);
+				pr_warn("PMU IRQ request failed\n");
+				return -ENODEV;
+			}
+		}
+		atomic_inc(&active_events);
+		mutex_unlock(&pmu_reserve_mutex);
+	}
+
+	return __hw_perf_event_init(event);
+}
+
+static struct pmu pmu = {
+	.pmu_enable	= loongarch_pmu_enable,
+	.pmu_disable	= loongarch_pmu_disable,
+	.event_init	= loongarch_pmu_event_init,
+	.add		= loongarch_pmu_add,
+	.del		= loongarch_pmu_del,
+	.start		= loongarch_pmu_start,
+	.stop		= loongarch_pmu_stop,
+	.read		= loongarch_pmu_read,
+};
+
+static unsigned int loongarch_pmu_perf_event_encode(const struct loongarch_perf_event *pev)
+{
+	return (pev->event_id & 0xff);
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_general_event(int idx)
+{
+	const struct loongarch_perf_event *pev;
+
+	pev = &(*loongarch_pmu.general_event_map)[idx];
+
+	if (pev->event_id == HW_OP_UNSUPPORTED)
+		return ERR_PTR(-ENOENT);
+
+	return pev;
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	const struct loongarch_perf_event *pev;
+
+	cache_type = (config >> 0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_op = (config >> 8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	pev = &((*loongarch_pmu.cache_event_map)
+					[cache_type]
+					[cache_op]
+					[cache_result]);
+
+	if (pev->event_id == CACHE_OP_UNSUPPORTED)
+		return ERR_PTR(-ENOENT);
+
+	return pev;
+}
+
+static int validate_group(struct perf_event *event)
+{
+	struct cpu_hw_events fake_cpuc;
+	struct perf_event *sibling, *leader = event->group_leader;
+
+	memset(&fake_cpuc, 0, sizeof(fake_cpuc));
+
+	if (loongarch_pmu_alloc_counter(&fake_cpuc, &leader->hw) < 0)
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, leader) {
+		if (loongarch_pmu_alloc_counter(&fake_cpuc, &sibling->hw) < 0)
+			return -EINVAL;
+	}
+
+	if (loongarch_pmu_alloc_counter(&fake_cpuc, &event->hw) < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void reset_counters(void *arg)
+{
+	int n;
+	int counters = loongarch_pmu.num_counters;
+
+	for (n = 0; n < counters; n++) {
+		loongarch_pmu_write_control(n, 0);
+		loongarch_pmu.write_counter(n, 0);
+	}
+}
+
+static const struct loongarch_perf_event loongson_event_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00 },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x01 },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x08 },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x09 },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02 },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x03 },
+};
+
+static const struct loongarch_perf_event loongson_cache_map
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+PERF_CACHE_MAP_ALL_UNSUPPORTED,
+[C(L1D)] = {
+	/*
+	 * Like some other architectures (e.g. ARM), the performance
+	 * counters don't differentiate between read and write
+	 * accesses/misses, so this isn't strictly correct, but it's the
+	 * best we can do. Writes and reads get combined.
+	 */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x8 },
+		[C(RESULT_MISS)]	= { 0x9 },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0x8 },
+		[C(RESULT_MISS)]	= { 0x9 },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)]	= { 0xaa },
+		[C(RESULT_MISS)]	= { 0xa9 },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x6 },
+		[C(RESULT_MISS)]	= { 0x7 },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0xc },
+		[C(RESULT_MISS)]	= { 0xd },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0xc },
+		[C(RESULT_MISS)]	= { 0xd },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_MISS)]    = { 0x3b },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]	= { 0x4 },
+		[C(RESULT_MISS)]	= { 0x3c },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)]	= { 0x4 },
+		[C(RESULT_MISS)]	= { 0x3c },
+	},
+},
+[C(BPU)] = {
+	/* Using the same code for *HW_BRANCH* */
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)]  = { 0x02 },
+		[C(RESULT_MISS)]    = { 0x03 },
+	},
+},
+};
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	int err;
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event_attr *attr = &event->attr;
+	const struct loongarch_perf_event *pev;
+
+	/* Returning LoongArch event descriptor for generic perf event. */
+	if (PERF_TYPE_HARDWARE == event->attr.type) {
+		if (event->attr.config >= PERF_COUNT_HW_MAX)
+			return -EINVAL;
+		pev = loongarch_pmu_map_general_event(event->attr.config);
+	} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
+		pev = loongarch_pmu_map_cache_event(event->attr.config);
+	} else if (PERF_TYPE_RAW == event->attr.type) {
+		/* We are working on the global raw event. */
+		mutex_lock(&raw_event_mutex);
+		pev = loongarch_pmu.map_raw_event(event->attr.config);
+	} else {
+		/* The event type is not (yet) supported. */
+		return -EOPNOTSUPP;
+	}
+
+	if (IS_ERR(pev)) {
+		if (PERF_TYPE_RAW == event->attr.type)
+			mutex_unlock(&raw_event_mutex);
+		return PTR_ERR(pev);
+	}
+
+	/*
+	 * We allow max flexibility on how each individual counter shared
+	 * by the single CPU operates (the mode exclusion and the range).
+	 */
+	hwc->config_base = CSR_PERFCTRL_IE;
+
+	hwc->event_base = loongarch_pmu_perf_event_encode(pev);
+	if (PERF_TYPE_RAW == event->attr.type)
+		mutex_unlock(&raw_event_mutex);
+
+	if (!attr->exclude_user) {
+		hwc->config_base |= CSR_PERFCTRL_PLV3;
+		hwc->config_base |= CSR_PERFCTRL_PLV2;
+	}
+	if (!attr->exclude_kernel) {
+		hwc->config_base |= CSR_PERFCTRL_PLV0;
+	}
+	if (!attr->exclude_hv) {
+		hwc->config_base |= CSR_PERFCTRL_PLV1;
+	}
+
+	hwc->config_base &= M_PERFCTL_CONFIG_MASK;
+	/*
+	 * The event can belong to another cpu. We do not assign a local
+	 * counter for it for now.
+	 */
+	hwc->idx = -1;
+	hwc->config = 0;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period  = loongarch_pmu.max_period;
+		hwc->last_period    = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	err = 0;
+	if (event->group_leader != event)
+		err = validate_group(event);
+
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		event->destroy(event);
+
+	return err;
+}
+
+static void pause_local_counters(void)
+{
+	unsigned long flags;
+	int ctr = loongarch_pmu.num_counters;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	local_irq_save(flags);
+	do {
+		ctr--;
+		cpuc->saved_ctrl[ctr] = loongarch_pmu_read_control(ctr);
+		loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr] &
+					 ~M_PERFCTL_COUNT_EVENT_WHENEVER);
+	} while (ctr > 0);
+	local_irq_restore(flags);
+}
+
+static void resume_local_counters(void)
+{
+	int ctr = loongarch_pmu.num_counters;
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	do {
+		ctr--;
+		loongarch_pmu_write_control(ctr, cpuc->saved_ctrl[ctr]);
+	} while (ctr > 0);
+}
+
+static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config)
+{
+	raw_event.event_id = config & 0xff;
+
+	return &raw_event;
+}
+
+static int __init init_hw_perf_events(void)
+{
+	int counters;
+
+	if (!cpu_has_pmp)
+		return -ENODEV;
+
+	pr_info("Performance counters: ");
+	counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1;
+
+	loongarch_pmu.num_counters = counters;
+	loongarch_pmu.max_period = (1ULL << 63) - 1;
+	loongarch_pmu.valid_count = (1ULL << 63) - 1;
+	loongarch_pmu.overflow = 1ULL << 63;
+	loongarch_pmu.name = "loongarch/loongson64";
+	loongarch_pmu.read_counter = loongarch_pmu_read_counter;
+	loongarch_pmu.write_counter = loongarch_pmu_write_counter;
+	loongarch_pmu.map_raw_event = loongarch_pmu_map_raw_event;
+	loongarch_pmu.general_event_map = &loongson_event_map;
+	loongarch_pmu.cache_event_map = &loongson_cache_map;
+
+	on_each_cpu(reset_counters, NULL, 1);
+
+	pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n",
+			loongarch_pmu.name, counters, 64);
+
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+
+	return 0;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/loongarch/kernel/perf_regs.c b/arch/loongarch/kernel/perf_regs.c
new file mode 100644
index 000000000000..263ac4ab5af6
--- /dev/null
+++ b/arch/loongarch/kernel/perf_regs.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 2013 Cavium, Inc.
+ */
+
+#include <linux/perf_event.h>
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_32BIT
+u64 perf_reg_abi(struct task_struct *tsk)
+{
+	return PERF_SAMPLE_REGS_ABI_32;
+}
+#else /* Must be CONFIG_64BIT */
+u64 perf_reg_abi(struct task_struct *tsk)
+{
+	if (test_tsk_thread_flag(tsk, TIF_32BIT_REGS))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
+#endif /* CONFIG_32BIT */
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask)
+		return -EINVAL;
+	if (mask & ~((1ull << PERF_REG_LOONGARCH_MAX) - 1))
+		return -EINVAL;
+	return 0;
+}
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_LOONGARCH_MAX))
+		return 0;
+
+	if ((u32)idx == PERF_REG_LOONGARCH_PC)
+		return regs->csr_era;
+
+	return regs->regs[idx];
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..d13252553a7c
--- /dev/null
+++ b/arch/loongarch/kernel/relocate_kernel.S
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * relocate_kernel.S for kexec
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kexec.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+#include <asm/loongarch.h>
+#include <asm/stackframe.h>
+#include <asm/addrspace.h>
+
+SYM_CODE_START(relocate_new_kernel)
+	/*
+	 * a0: EFI boot flag for the new kernel
+	 * a1: Command line pointer for the new kernel
+	 * a2: System table pointer for the new kernel
+	 * a3: Start address to jump to after relocation
+	 * a4: Pointer to the current indirection page entry
+	 */
+	move		s0, a4
+
+	/*
+	 * In case of a kdump/crash kernel, the indirection page is not
+	 * populated as the kernel is directly copied to a reserved location
+	 */
+	beqz		s0, done
+
+process_entry:
+	PTR_L		s1, s0, 0
+	PTR_ADDI	s0, s0, SZREG
+
+	/* destination page */
+	andi		s2, s1, IND_DESTINATION
+	beqz		s2, 1f
+	li.w		t0, ~0x1
+	and		s3, s1, t0	/* store destination addr in s3 */
+	b		process_entry
+
+1:
+	/* indirection page, update s0	*/
+	andi		s2, s1, IND_INDIRECTION
+	beqz		s2, 1f
+	li.w		t0, ~0x2
+	and		s0, s1, t0
+	b		process_entry
+
+1:
+	/* done page */
+	andi		s2, s1, IND_DONE
+	beqz		s2, 1f
+	b		done
+
+1:
+	/* source page */
+	andi		s2, s1, IND_SOURCE
+	beqz		s2, process_entry
+	li.w		t0, ~0x8
+	and		s1, s1, t0
+	li.w		s5, (1 << _PAGE_SHIFT) / SZREG
+
+copy_word:
+	/* copy page word by word */
+	REG_L		s4, s1, 0
+	REG_S		s4, s3, 0
+	PTR_ADDI	s3, s3, SZREG
+	PTR_ADDI	s1, s1, SZREG
+	LONG_ADDI	s5, s5, -1
+	beqz		s5, process_entry
+	b		copy_word
+	b		process_entry
+
+done:
+	ibar		0
+	dbar		0
+
+	/*
+	 * Jump to the new kernel,
+	 * make sure the values of a0, a1, a2 and a3 are not changed.
+	 */
+	jr		a3
+SYM_CODE_END(relocate_new_kernel)
+
+#ifdef CONFIG_SMP
+/*
+ * Other CPUs should wait until code is relocated and
+ * then start at the entry point from LOONGARCH_IOCSR_MBUF0.
+ */
+SYM_CODE_START(kexec_smp_wait)
+1:	li.w		t0, 0x100			/* wait for init loop */
+2:	addi.w		t0, t0, -1			/* limit mailbox access */
+	bnez		t0, 2b
+	li.w		t1, LOONGARCH_IOCSR_MBUF0
+	iocsrrd.w	s0, t1				/* check PC as an indicator */
+	beqz		s0, 1b
+	iocsrrd.d	s0, t1				/* get PC via mailbox */
+
+	li.d		t0, CACHE_BASE
+	or		s0, s0, t0			/* s0 = TO_CACHE(s0) */
+	jr		s0				/* jump to initial PC */
+SYM_CODE_END(kexec_smp_wait)
+#endif
+
+relocate_new_kernel_end:
+
+SYM_DATA_START(relocate_new_kernel_size)
+	PTR		relocate_new_kernel_end - relocate_new_kernel
+SYM_DATA_END(relocate_new_kernel_size)
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 6ca5a0dcbb1c..1eb63fa9bc81 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -19,6 +19,8 @@
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/pfn.h>
@@ -185,8 +187,70 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
+static void __init arch_reserve_vmcore(void)
+{
+#ifdef CONFIG_PROC_VMCORE
+	u64 i;
+	phys_addr_t start, end;
+
+	if (!is_kdump_kernel())
+		return;
+
+	if (!elfcorehdr_size) {
+		for_each_mem_range(i, &start, &end) {
+			if (elfcorehdr_addr >= start && elfcorehdr_addr < end) {
+				/*
+				 * Reserve from the elf core header to the end of
+				 * the memory segment, that should all be kdump
+				 * reserved memory.
+				 */
+				elfcorehdr_size = end - elfcorehdr_addr;
+				break;
+			}
+		}
+	}
+
+	if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
+		pr_warn("elfcorehdr is overlapped\n");
+		return;
+	}
+
+	memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
+
+	pr_info("Reserving %llu KiB of memory at 0x%llx for elfcorehdr\n",
+		elfcorehdr_size >> 10, elfcorehdr_addr);
+#endif
+}
+
+static void __init arch_parse_crashkernel(void)
+{
+#ifdef CONFIG_KEXEC
+	int ret;
+	unsigned long long start;
+	unsigned long long total_mem;
+	unsigned long long crash_base, crash_size;
+
+	total_mem = memblock_phys_mem_size();
+	ret = parse_crashkernel(boot_command_line, total_mem, &crash_size, &crash_base);
+	if (ret < 0 || crash_size <= 0)
+		return;
+
+	start = memblock_phys_alloc_range(crash_size, 1, crash_base, crash_base + crash_size);
+	if (start != crash_base) {
+		pr_warn("Invalid memory region reserved for crash kernel\n");
+		return;
+	}
+
+	crashk_res.start = crash_base;
+	crashk_res.end	 = crash_base + crash_size - 1;
+#endif
+}
+
 void __init platform_init(void)
 {
+	arch_reserve_vmcore();
+	arch_parse_crashkernel();
+
 #ifdef CONFIG_ACPI_TABLE_UPGRADE
 	acpi_table_upgrade();
 #endif
@@ -289,6 +353,15 @@ static void __init resource_init(void)
 		request_resource(res, &data_resource);
 		request_resource(res, &bss_resource);
 	}
+
+#ifdef CONFIG_KEXEC
+	if (crashk_res.start < crashk_res.end) {
+		insert_resource(&iomem_resource, &crashk_res);
+		pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+			(unsigned long)((crashk_res.end - crashk_res.start + 1) >> 20),
+			(unsigned long)(crashk_res.start  >> 20));
+	}
+#endif
 }
 
 static int __init reserve_memblock_reserved_regions(void)
@@ -336,7 +409,7 @@ static void __init prefill_possible_map(void)
 	for (; i < NR_CPUS; i++)
 		set_cpu_possible(i, false);
 
-	nr_cpu_ids = possible;
+	set_nr_cpu_ids(possible);
 }
 #endif
 
@@ -348,10 +421,11 @@ void __init setup_arch(char **cmdline_p)
 	init_environ();
 	efi_init();
 	memblock_init();
+	pagetable_init();
 	parse_early_param();
+	reserve_initrd_mem();
 
 	platform_init();
-	pagetable_init();
 	arch_mem_init(cmdline_p);
 
 	resource_init();
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index b5fab308dcf2..781a4d4bdddc 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -240,11 +240,6 @@ void loongson3_smp_finish(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-static bool io_master(int cpu)
-{
-	return test_bit(cpu, &loongson_sysconf.cores_io_master);
-}
-
 int loongson3_cpu_disable(void)
 {
 	unsigned long flags;
diff --git a/arch/loongarch/kernel/sysrq.c b/arch/loongarch/kernel/sysrq.c
new file mode 100644
index 000000000000..366baef72d29
--- /dev/null
+++ b/arch/loongarch/kernel/sysrq.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LoongArch specific sysrq operations.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/sysrq.h>
+#include <linux/workqueue.h>
+
+#include <asm/cpu-features.h>
+#include <asm/tlb.h>
+
+/*
+ * Dump TLB entries on all CPUs.
+ */
+
+static DEFINE_SPINLOCK(show_lock);
+
+static void sysrq_tlbdump_single(void *dummy)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&show_lock, flags);
+
+	pr_info("CPU%d:\n", smp_processor_id());
+	dump_tlb_regs();
+	pr_info("\n");
+	dump_tlb_all();
+	pr_info("\n");
+
+	spin_unlock_irqrestore(&show_lock, flags);
+}
+
+#ifdef CONFIG_SMP
+static void sysrq_tlbdump_othercpus(struct work_struct *dummy)
+{
+	smp_call_function(sysrq_tlbdump_single, NULL, 0);
+}
+
+static DECLARE_WORK(sysrq_tlbdump, sysrq_tlbdump_othercpus);
+#endif
+
+static void sysrq_handle_tlbdump(int key)
+{
+	sysrq_tlbdump_single(NULL);
+#ifdef CONFIG_SMP
+	schedule_work(&sysrq_tlbdump);
+#endif
+}
+
+static struct sysrq_key_op sysrq_tlbdump_op = {
+	.handler        = sysrq_handle_tlbdump,
+	.help_msg       = "show-tlbs(x)",
+	.action_msg     = "Show TLB entries",
+	.enable_mask	= SYSRQ_ENABLE_DUMP,
+};
+
+static int __init loongarch_sysrq_init(void)
+{
+	return register_sysrq_key('x', &sysrq_tlbdump_op);
+}
+arch_initcall(loongarch_sysrq_init);
diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c
index ab1a75c0b5a6..caa7cd859078 100644
--- a/arch/loongarch/kernel/topology.c
+++ b/arch/loongarch/kernel/topology.c
@@ -5,6 +5,7 @@
 #include <linux/node.h>
 #include <linux/nodemask.h>
 #include <linux/percpu.h>
+#include <asm/bootinfo.h>
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
@@ -40,7 +41,7 @@ static int __init topology_init(void)
 	for_each_present_cpu(i) {
 		struct cpu *c = &per_cpu(cpu_devices, i);
 
-		c->hotpluggable = !!i;
+		c->hotpluggable = !io_master(i);
 		ret = register_cpu(c, i);
 		if (ret < 0)
 			pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret);
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 5010e95cef84..1a4dce84ebc6 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -10,6 +10,7 @@
 #include <linux/entry-common.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/extable.h>
 #include <linux/mm.h>
@@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
 	oops_exit();
 
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 
@@ -374,6 +378,29 @@ asmlinkage void noinstr do_ale(struct pt_regs *regs)
 	irqentry_exit(regs, state);
 }
 
+#ifdef CONFIG_GENERIC_BUG
+int is_valid_bugaddr(unsigned long addr)
+{
+	return 1;
+}
+#endif /* CONFIG_GENERIC_BUG */
+
+static void bug_handler(struct pt_regs *regs)
+{
+	switch (report_bug(regs->csr_era, regs)) {
+	case BUG_TRAP_TYPE_BUG:
+	case BUG_TRAP_TYPE_NONE:
+		die_if_kernel("Oops - BUG", regs);
+		force_sig(SIGTRAP);
+		break;
+
+	case BUG_TRAP_TYPE_WARN:
+		/* Skip the BUG instruction and continue */
+		regs->csr_era += LOONGARCH_INSN_SIZE;
+		break;
+	}
+}
+
 asmlinkage void noinstr do_bp(struct pt_regs *regs)
 {
 	bool user = user_mode(regs);
@@ -427,8 +454,7 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
 
 	switch (bcode) {
 	case BRK_BUG:
-		die_if_kernel("Kernel bug detected", regs);
-		force_sig(SIGTRAP);
+		bug_handler(regs);
 		break;
 	case BRK_DIVZERO:
 		die_if_kernel("Break instruction in kernel code", regs);
@@ -620,9 +646,6 @@ asmlinkage void noinstr do_vint(struct pt_regs *regs, unsigned long sp)
 	irqentry_exit(regs, state);
 }
 
-extern void tlb_init(int cpu);
-extern void cache_error_setup(void);
-
 unsigned long eentry;
 unsigned long tlbrentry;
 
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index e5890bec2bf6..b3309a5e695b 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -55,6 +55,10 @@ SECTIONS
 
 	EXCEPTION_TABLE(16)
 
+	.got : ALIGN(16) { *(.got) }
+	.plt : ALIGN(16) { *(.plt) }
+	.got.plt : ALIGN(16) { *(.got.plt) }
+
 	. = ALIGN(PECOFF_SEGMENT_ALIGN);
 	__init_begin = .;
 	__inittext_begin = .;
diff --git a/arch/loongarch/mm/cache.c b/arch/loongarch/mm/cache.c
index e8c68dcf6ab2..72685a48eaf0 100644
--- a/arch/loongarch/mm/cache.c
+++ b/arch/loongarch/mm/cache.c
@@ -6,8 +6,8 @@
  * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 2007 MIPS Technologies, Inc.
  */
+#include <linux/cacheinfo.h>
 #include <linux/export.h>
-#include <linux/fcntl.h>
 #include <linux/fs.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
@@ -16,14 +16,21 @@
 #include <linux/sched.h>
 #include <linux/syscalls.h>
 
+#include <asm/bootinfo.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
-#include <asm/dma.h>
 #include <asm/loongarch.h>
+#include <asm/numa.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 
+void cache_error_setup(void)
+{
+	extern char __weak except_vec_cex;
+	set_merr_handler(0x0, &except_vec_cex, 0x80);
+}
+
 /*
  * LoongArch maintains ICache/DCache coherency by hardware,
  * we just need "ibar" to avoid instruction hazard here.
@@ -34,109 +41,121 @@ void local_flush_icache_range(unsigned long start, unsigned long end)
 }
 EXPORT_SYMBOL(local_flush_icache_range);
 
-void cache_error_setup(void)
-{
-	extern char __weak except_vec_cex;
-	set_merr_handler(0x0, &except_vec_cex, 0x80);
-}
-
-static unsigned long icache_size __read_mostly;
-static unsigned long dcache_size __read_mostly;
-static unsigned long vcache_size __read_mostly;
-static unsigned long scache_size __read_mostly;
-
-static char *way_string[] = { NULL, "direct mapped", "2-way",
-	"3-way", "4-way", "5-way", "6-way", "7-way", "8-way",
-	"9-way", "10-way", "11-way", "12-way",
-	"13-way", "14-way", "15-way", "16-way",
-};
-
-static void probe_pcache(void)
+static void flush_cache_leaf(unsigned int leaf)
 {
-	struct cpuinfo_loongarch *c = &current_cpu_data;
-	unsigned int lsize, sets, ways;
-	unsigned int config;
-
-	config = read_cpucfg(LOONGARCH_CPUCFG17);
-	lsize = 1 << ((config & CPUCFG17_L1I_SIZE_M) >> CPUCFG17_L1I_SIZE);
-	sets  = 1 << ((config & CPUCFG17_L1I_SETS_M) >> CPUCFG17_L1I_SETS);
-	ways  = ((config & CPUCFG17_L1I_WAYS_M) >> CPUCFG17_L1I_WAYS) + 1;
-
-	c->icache.linesz = lsize;
-	c->icache.sets = sets;
-	c->icache.ways = ways;
-	icache_size = sets * ways * lsize;
-	c->icache.waysize = icache_size / c->icache.ways;
-
-	config = read_cpucfg(LOONGARCH_CPUCFG18);
-	lsize = 1 << ((config & CPUCFG18_L1D_SIZE_M) >> CPUCFG18_L1D_SIZE);
-	sets  = 1 << ((config & CPUCFG18_L1D_SETS_M) >> CPUCFG18_L1D_SETS);
-	ways  = ((config & CPUCFG18_L1D_WAYS_M) >> CPUCFG18_L1D_WAYS) + 1;
-
-	c->dcache.linesz = lsize;
-	c->dcache.sets = sets;
-	c->dcache.ways = ways;
-	dcache_size = sets * ways * lsize;
-	c->dcache.waysize = dcache_size / c->dcache.ways;
-
-	c->options |= LOONGARCH_CPU_PREFETCH;
-
-	pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n",
-		icache_size >> 10, way_string[c->icache.ways], "VIPT", c->icache.linesz);
-
-	pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n",
-		dcache_size >> 10, way_string[c->dcache.ways], "VIPT", "no aliases", c->dcache.linesz);
+	int i, j, nr_nodes;
+	uint64_t addr = CSR_DMW0_BASE;
+	struct cache_desc *cdesc = current_cpu_data.cache_leaves + leaf;
+
+	nr_nodes = cache_private(cdesc) ? 1 : loongson_sysconf.nr_nodes;
+
+	do {
+		for (i = 0; i < cdesc->sets; i++) {
+			for (j = 0; j < cdesc->ways; j++) {
+				flush_cache_line(leaf, addr);
+				addr++;
+			}
+
+			addr -= cdesc->ways;
+			addr += cdesc->linesz;
+		}
+		addr += (1ULL << NODE_ADDRSPACE_SHIFT);
+	} while (--nr_nodes > 0);
 }
 
-static void probe_vcache(void)
+asmlinkage __visible void __flush_cache_all(void)
 {
-	struct cpuinfo_loongarch *c = &current_cpu_data;
-	unsigned int lsize, sets, ways;
-	unsigned int config;
-
-	config = read_cpucfg(LOONGARCH_CPUCFG19);
-	lsize = 1 << ((config & CPUCFG19_L2_SIZE_M) >> CPUCFG19_L2_SIZE);
-	sets  = 1 << ((config & CPUCFG19_L2_SETS_M) >> CPUCFG19_L2_SETS);
-	ways  = ((config & CPUCFG19_L2_WAYS_M) >> CPUCFG19_L2_WAYS) + 1;
-
-	c->vcache.linesz = lsize;
-	c->vcache.sets = sets;
-	c->vcache.ways = ways;
-	vcache_size = lsize * sets * ways;
-	c->vcache.waysize = vcache_size / c->vcache.ways;
-
-	pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
-		vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
+	int leaf;
+	struct cache_desc *cdesc = current_cpu_data.cache_leaves;
+	unsigned int cache_present = current_cpu_data.cache_leaves_present;
+
+	leaf = cache_present - 1;
+	if (cache_inclusive(cdesc + leaf)) {
+		flush_cache_leaf(leaf);
+		return;
+	}
+
+	for (leaf = 0; leaf < cache_present; leaf++)
+		flush_cache_leaf(leaf);
 }
 
-static void probe_scache(void)
-{
-	struct cpuinfo_loongarch *c = &current_cpu_data;
-	unsigned int lsize, sets, ways;
-	unsigned int config;
-
-	config = read_cpucfg(LOONGARCH_CPUCFG20);
-	lsize = 1 << ((config & CPUCFG20_L3_SIZE_M) >> CPUCFG20_L3_SIZE);
-	sets  = 1 << ((config & CPUCFG20_L3_SETS_M) >> CPUCFG20_L3_SETS);
-	ways  = ((config & CPUCFG20_L3_WAYS_M) >> CPUCFG20_L3_WAYS) + 1;
-
-	c->scache.linesz = lsize;
-	c->scache.sets = sets;
-	c->scache.ways = ways;
-	/* 4 cores. scaches are shared */
-	scache_size = lsize * sets * ways;
-	c->scache.waysize = scache_size / c->scache.ways;
-
-	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
-		scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
-}
+#define L1IUPRE		(1 << 0)
+#define L1IUUNIFY	(1 << 1)
+#define L1DPRE		(1 << 2)
+
+#define LXIUPRE		(1 << 0)
+#define LXIUUNIFY	(1 << 1)
+#define LXIUPRIV	(1 << 2)
+#define LXIUINCL	(1 << 3)
+#define LXDPRE		(1 << 4)
+#define LXDPRIV		(1 << 5)
+#define LXDINCL		(1 << 6)
+
+#define populate_cache_properties(cfg0, cdesc, level, leaf)				\
+do {											\
+	unsigned int cfg1;								\
+											\
+	cfg1 = read_cpucfg(LOONGARCH_CPUCFG17 + leaf);					\
+	if (level == 1)	{								\
+		cdesc->flags |= CACHE_PRIVATE;						\
+	} else {									\
+		if (cfg0 & LXIUPRIV)							\
+			cdesc->flags |= CACHE_PRIVATE;					\
+		if (cfg0 & LXIUINCL)							\
+			cdesc->flags |= CACHE_INCLUSIVE;				\
+	}										\
+	cdesc->level = level;								\
+	cdesc->flags |= CACHE_PRESENT;							\
+	cdesc->ways = ((cfg1 & CPUCFG_CACHE_WAYS_M) >> CPUCFG_CACHE_WAYS) + 1;		\
+	cdesc->sets = 1 << ((cfg1 & CPUCFG_CACHE_SETS_M) >> CPUCFG_CACHE_SETS);		\
+	cdesc->linesz = 1 << ((cfg1 & CPUCFG_CACHE_LSIZE_M) >> CPUCFG_CACHE_LSIZE);	\
+	cdesc++; leaf++;								\
+} while (0)
 
 void cpu_cache_init(void)
 {
-	probe_pcache();
-	probe_vcache();
-	probe_scache();
-
+	unsigned int leaf = 0, level = 1;
+	unsigned int config = read_cpucfg(LOONGARCH_CPUCFG16);
+	struct cache_desc *cdesc = current_cpu_data.cache_leaves;
+
+	if (config & L1IUPRE) {
+		if (config & L1IUUNIFY)
+			cdesc->type = CACHE_TYPE_UNIFIED;
+		else
+			cdesc->type = CACHE_TYPE_INST;
+		populate_cache_properties(config, cdesc, level, leaf);
+	}
+
+	if (config & L1DPRE) {
+		cdesc->type = CACHE_TYPE_DATA;
+		populate_cache_properties(config, cdesc, level, leaf);
+	}
+
+	config = config >> 3;
+	for (level = 2; level <= CACHE_LEVEL_MAX; level++) {
+		if (!config)
+			break;
+
+		if (config & LXIUPRE) {
+			if (config & LXIUUNIFY)
+				cdesc->type = CACHE_TYPE_UNIFIED;
+			else
+				cdesc->type = CACHE_TYPE_INST;
+			populate_cache_properties(config, cdesc, level, leaf);
+		}
+
+		if (config & LXDPRE) {
+			cdesc->type = CACHE_TYPE_DATA;
+			populate_cache_properties(config, cdesc, level, leaf);
+		}
+
+		config = config >> 7;
+	}
+
+	BUG_ON(leaf > CACHE_LEAVES_MAX);
+
+	current_cpu_data.cache_leaves_present = leaf;
+	current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
 	shm_align_mask = PAGE_SIZE - 1;
 }
 
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 0532ed5ba43d..080061793c85 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -152,6 +152,70 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 #endif
 
+static pte_t *fixmap_pte(unsigned long addr)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset_k(addr);
+	p4d = p4d_offset(pgd, addr);
+
+	if (pgd_none(*pgd)) {
+		pud_t *new __maybe_unused;
+
+		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+		pgd_populate(&init_mm, pgd, new);
+#ifndef __PAGETABLE_PUD_FOLDED
+		pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+#endif
+	}
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new __maybe_unused;
+
+		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+		pud_populate(&init_mm, pud, new);
+#ifndef __PAGETABLE_PMD_FOLDED
+		pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+#endif
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		pte_t *new __maybe_unused;
+
+		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+void __init __set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *ptep;
+
+	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
+
+	ptep = fixmap_pte(addr);
+	if (!pte_none(*ptep)) {
+		pte_ERROR(*ptep);
+		return;
+	}
+
+	if (pgprot_val(flags))
+		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
+	else {
+		pte_clear(&init_mm, addr, ptep);
+		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+	}
+}
+
 /*
  * Align swapper_pg_dir in to 64K, allows its address to be loaded
  * with a single LUI instruction in the TLB handlers.  If we used
diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c
index 381a569635a9..fbe1a4856fc4 100644
--- a/arch/loongarch/mm/mmap.c
+++ b/arch/loongarch/mm/mmap.c
@@ -3,6 +3,8 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/export.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 
@@ -116,3 +118,30 @@ int __virt_addr_valid(volatile void *kaddr)
 	return pfn_valid(PFN_DOWN(PHYSADDR(kaddr)));
 }
 EXPORT_SYMBOL_GPL(__virt_addr_valid);
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This might go
+ * away in the future.
+ */
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
+{
+	/*
+	 * Check whether addr is covered by a memory region without the
+	 * MEMBLOCK_NOMAP attribute, and whether that region covers the
+	 * entire range. In theory, this could lead to false negatives
+	 * if the range is covered by distinct but adjacent memory regions
+	 * that only differ in other attributes. However, few of such
+	 * attributes have been defined, and it is debatable whether it
+	 * follows that /dev/mem read() calls should be able traverse
+	 * such boundaries.
+	 */
+	return memblock_is_region_memory(addr, size) && memblock_is_map_memory(addr);
+}
+
+/*
+ * Do not allow /dev/mem mappings beyond the supported physical range.
+ */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+	return !(((pfn << PAGE_SHIFT) + size) & ~(GENMASK_ULL(cpu_pabits, 0)));
+}
diff --git a/arch/loongarch/mm/tlb.c b/arch/loongarch/mm/tlb.c
index 9818ce11546b..da3681f131c8 100644
--- a/arch/loongarch/mm/tlb.c
+++ b/arch/loongarch/mm/tlb.c
@@ -258,7 +258,7 @@ extern long exception_handlers[VECSIZE * 128 / sizeof(long)];
 void setup_tlb_handler(int cpu)
 {
 	setup_ptwalker();
-	output_pgtable_bits_defines();
+	local_flush_tlb_all();
 
 	/* The tlb handlers are generated only once */
 	if (cpu == 0) {
@@ -301,6 +301,7 @@ void tlb_init(int cpu)
 	write_csr_pagesize(PS_DEFAULT_SIZE);
 	write_csr_stlbpgsize(PS_DEFAULT_SIZE);
 	write_csr_tlbrefill_pagesize(PS_DEFAULT_SIZE);
+
 	setup_tlb_handler(cpu);
-	local_flush_tlb_all();
+	output_pgtable_bits_defines();
 }
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 39743337999e..d8ee8fbc8c67 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -10,15 +10,20 @@
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 
+#define PTRS_PER_PGD_BITS	(PAGE_SHIFT - 3)
+#define PTRS_PER_PUD_BITS	(PAGE_SHIFT - 3)
+#define PTRS_PER_PMD_BITS	(PAGE_SHIFT - 3)
+#define PTRS_PER_PTE_BITS	(PAGE_SHIFT - 3)
+
 	.macro tlb_do_page_fault, write
 	SYM_FUNC_START(tlb_do_page_fault_\write)
 	SAVE_ALL
-	csrrd	a2, LOONGARCH_CSR_BADV
-	move	a0, sp
-	REG_S	a2, sp, PT_BVADDR
-	li.w	a1, \write
-	la.abs	t0, do_page_fault
-	jirl	ra, t0, 0
+	csrrd		a2, LOONGARCH_CSR_BADV
+	move		a0, sp
+	REG_S		a2, sp, PT_BVADDR
+	li.w		a1, \write
+	la.abs		t0, do_page_fault
+	jirl		ra, t0, 0
 	RESTORE_ALL_AND_RET
 	SYM_FUNC_END(tlb_do_page_fault_\write)
 	.endm
@@ -29,133 +34,115 @@
 SYM_FUNC_START(handle_tlb_protect)
 	BACKUP_T0T1
 	SAVE_ALL
-	move	a0, sp
-	move	a1, zero
-	csrrd	a2, LOONGARCH_CSR_BADV
-	REG_S	a2, sp, PT_BVADDR
-	la.abs	t0, do_page_fault
-	jirl	ra, t0, 0
+	move		a0, sp
+	move		a1, zero
+	csrrd		a2, LOONGARCH_CSR_BADV
+	REG_S		a2, sp, PT_BVADDR
+	la.abs		t0, do_page_fault
+	jirl		ra, t0, 0
 	RESTORE_ALL_AND_RET
 SYM_FUNC_END(handle_tlb_protect)
 
 SYM_FUNC_START(handle_tlb_load)
-	csrwr	t0, EXCEPTION_KS0
-	csrwr	t1, EXCEPTION_KS1
-	csrwr	ra, EXCEPTION_KS2
+	csrwr		t0, EXCEPTION_KS0
+	csrwr		t1, EXCEPTION_KS1
+	csrwr		ra, EXCEPTION_KS2
 
 	/*
 	 * The vmalloc handling is not in the hotpath.
 	 */
-	csrrd	t0, LOONGARCH_CSR_BADV
-	bltz	t0, vmalloc_load
-	csrrd	t1, LOONGARCH_CSR_PGDL
+	csrrd		t0, LOONGARCH_CSR_BADV
+	bltz		t0, vmalloc_load
+	csrrd		t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_load:
 	/* Get PGD offset in bytes */
-	srli.d	t0, t0, PGDIR_SHIFT
-	andi	t0, t0, (PTRS_PER_PGD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+	alsl.d		t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PUD_SHIFT
-	andi	t0, t0, (PTRS_PER_PUD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PMD_SHIFT
-	andi	t0, t0, (PTRS_PER_PMD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
-	ld.d	ra, t1, 0
+	ld.d		ra, t1, 0
 
 	/*
 	 * For huge tlb entries, pmde doesn't contain an address but
 	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
 	 * see if we need to jump to huge tlb processing.
 	 */
-	andi	t0, ra, _PAGE_HUGE
-	bnez	t0, tlb_huge_update_load
+	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
+	bltz		ra, tlb_huge_update_load
 
-	csrrd	t0, LOONGARCH_CSR_BADV
-	srli.d	t0, t0, PAGE_SHIFT
-	andi	t0, t0, (PTRS_PER_PTE - 1)
-	slli.d	t0, t0, _PTE_T_LOG2
-	add.d	t1, ra, t0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+	alsl.d		t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_load:
-#endif
-#ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
+	ll.d		t0, t1, 0
 #else
-	ld.d	t0, t1, 0
+	ld.d		t0, t1, 0
 #endif
-	tlbsrch
-
-	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
-	andi	ra, ra, 1
-	beqz	ra, nopage_tlb_load
+	andi		ra, t0, _PAGE_PRESENT
+	beqz		ra, nopage_tlb_load
 
-	ori	t0, t0, _PAGE_VALID
+	ori		t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, smp_pgtable_change_load
+	sc.d		t0, t1, 0
+	beqz		t0, smp_pgtable_change_load
 #else
-	st.d	t0, t1, 0
+	st.d		t0, t1, 0
 #endif
-	ori	t1, t1, 8
-	xori	t1, t1, 8
-	ld.d	t0, t1, 0
-	ld.d	t1, t1, 8
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	csrwr	t1, LOONGARCH_CSR_TLBELO1
+	tlbsrch
+	bstrins.d	t1, zero, 3, 3
+	ld.d		t0, t1, 0
+	ld.d		t1, t1, 8
+	csrwr		t0, LOONGARCH_CSR_TLBELO0
+	csrwr		t1, LOONGARCH_CSR_TLBELO1
 	tlbwr
-leave_load:
-	csrrd	t0, EXCEPTION_KS0
-	csrrd	t1, EXCEPTION_KS1
-	csrrd	ra, EXCEPTION_KS2
+
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
 	ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_load:
-	la.abs	t1, swapper_pg_dir
-	b	vmalloc_done_load
+	la.abs		t1, swapper_pg_dir
+	b		vmalloc_done_load
 #endif
 
-	/*
-	 * This is the entry point when build_tlbchange_handler_head
-	 * spots a huge page.
-	 */
+	/* This is the entry point of a huge page. */
 tlb_huge_update_load:
 #ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
-#else
-	ld.d	t0, t1, 0
+	ll.d		ra, t1, 0
 #endif
-	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
-	andi	ra, ra, 1
-	beqz	ra, nopage_tlb_load
-	tlbsrch
+	andi		t0, ra, _PAGE_PRESENT
+	beqz		t0, nopage_tlb_load
 
-	ori	t0, t0, _PAGE_VALID
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, tlb_huge_update_load
-	ld.d	t0, t1, 0
+	ori		t0, ra, _PAGE_VALID
+	sc.d		t0, t1, 0
+	beqz		t0, tlb_huge_update_load
+	ori		t0, ra, _PAGE_VALID
 #else
-	st.d	t0, t1, 0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	ori		t0, ra, _PAGE_VALID
+	st.d		t0, t1, 0
 #endif
+	tlbsrch
 	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
 	addi.d		ra, t1, 0
 	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
 	tlbwr
 
-	csrxchg	zero, t1, LOONGARCH_CSR_TLBIDX
+	csrxchg		zero, t1, LOONGARCH_CSR_TLBIDX
 
 	/*
 	 * A huge PTE describes an area the size of the
@@ -167,21 +154,20 @@ tlb_huge_update_load:
 	 * address space.
 	 */
 	/* Huge page: Move Global bit */
-	xori	t0, t0, _PAGE_HUGE
-	lu12i.w	t1, _PAGE_HGLOBAL >> 12
-	and	t1, t0, t1
-	srli.d	t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-	or	t0, t0, t1
+	xori		t0, t0, _PAGE_HUGE
+	lu12i.w		t1, _PAGE_HGLOBAL >> 12
+	and		t1, t0, t1
+	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+	or		t0, t0, t1
 
-	addi.d	ra, t0, 0
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	addi.d	t0, ra, 0
+	move		ra, t0
+	csrwr		ra, LOONGARCH_CSR_TLBELO0
 
 	/* Convert to entrylo1 */
-	addi.d	t1, zero, 1
-	slli.d	t1, t1, (HPAGE_SHIFT - 1)
-	add.d	t0, t0, t1
-	csrwr	t0, LOONGARCH_CSR_TLBELO1
+	addi.d		t1, zero, 1
+	slli.d		t1, t1, (HPAGE_SHIFT - 1)
+	add.d		t0, t0, t1
+	csrwr		t0, LOONGARCH_CSR_TLBELO1
 
 	/* Set huge page tlb entry size */
 	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -194,136 +180,120 @@ tlb_huge_update_load:
 	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX
 
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
+	ertn
+
 nopage_tlb_load:
-	dbar	0
-	csrrd	ra, EXCEPTION_KS2
-	la.abs	t0, tlb_do_page_fault_0
-	jr	t0
+	dbar		0
+	csrrd		ra, EXCEPTION_KS2
+	la.abs		t0, tlb_do_page_fault_0
+	jr		t0
 SYM_FUNC_END(handle_tlb_load)
 
 SYM_FUNC_START(handle_tlb_store)
-	csrwr	t0, EXCEPTION_KS0
-	csrwr	t1, EXCEPTION_KS1
-	csrwr	ra, EXCEPTION_KS2
+	csrwr		t0, EXCEPTION_KS0
+	csrwr		t1, EXCEPTION_KS1
+	csrwr		ra, EXCEPTION_KS2
 
 	/*
 	 * The vmalloc handling is not in the hotpath.
 	 */
-	csrrd	t0, LOONGARCH_CSR_BADV
-	bltz	t0, vmalloc_store
-	csrrd	t1, LOONGARCH_CSR_PGDL
+	csrrd		t0, LOONGARCH_CSR_BADV
+	bltz		t0, vmalloc_store
+	csrrd		t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_store:
 	/* Get PGD offset in bytes */
-	srli.d	t0, t0, PGDIR_SHIFT
-	andi	t0, t0, (PTRS_PER_PGD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
-
+	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+	alsl.d		t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PUD_SHIFT
-	andi	t0, t0, (PTRS_PER_PUD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PMD_SHIFT
-	andi	t0, t0, (PTRS_PER_PMD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
-	ld.d	ra, t1, 0
+	ld.d		ra, t1, 0
 
 	/*
 	 * For huge tlb entries, pmde doesn't contain an address but
 	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
 	 * see if we need to jump to huge tlb processing.
 	 */
-	andi	t0, ra, _PAGE_HUGE
-	bnez	t0, tlb_huge_update_store
+	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
+	bltz		ra, tlb_huge_update_store
 
-	csrrd	t0, LOONGARCH_CSR_BADV
-	srli.d	t0, t0, PAGE_SHIFT
-	andi	t0, t0, (PTRS_PER_PTE - 1)
-	slli.d	t0, t0, _PTE_T_LOG2
-	add.d	t1, ra, t0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+	alsl.d		t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_store:
-#endif
-#ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
+	ll.d		t0, t1, 0
 #else
-	ld.d	t0, t1, 0
+	ld.d		t0, t1, 0
 #endif
-	tlbsrch
-
-	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
-	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	bnez	ra, nopage_tlb_store
+	andi		ra, t0, _PAGE_PRESENT | _PAGE_WRITE
+	xori		ra, ra, _PAGE_PRESENT | _PAGE_WRITE
+	bnez		ra, nopage_tlb_store
 
-	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	ori		t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, smp_pgtable_change_store
+	sc.d		t0, t1, 0
+	beqz		t0, smp_pgtable_change_store
 #else
-	st.d	t0, t1, 0
+	st.d		t0, t1, 0
 #endif
-
-	ori	t1, t1, 8
-	xori	t1, t1, 8
-	ld.d	t0, t1, 0
-	ld.d	t1, t1, 8
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	csrwr	t1, LOONGARCH_CSR_TLBELO1
+	tlbsrch
+	bstrins.d	t1, zero, 3, 3
+	ld.d		t0, t1, 0
+	ld.d		t1, t1, 8
+	csrwr		t0, LOONGARCH_CSR_TLBELO0
+	csrwr		t1, LOONGARCH_CSR_TLBELO1
 	tlbwr
-leave_store:
-	csrrd	t0, EXCEPTION_KS0
-	csrrd	t1, EXCEPTION_KS1
-	csrrd	ra, EXCEPTION_KS2
+
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
 	ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_store:
-	la.abs	t1, swapper_pg_dir
-	b	vmalloc_done_store
+	la.abs		t1, swapper_pg_dir
+	b		vmalloc_done_store
 #endif
 
-	/*
-	 * This is the entry point when build_tlbchange_handler_head
-	 * spots a huge page.
-	 */
+	/* This is the entry point of a huge page. */
 tlb_huge_update_store:
 #ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
-#else
-	ld.d	t0, t1, 0
+	ll.d		ra, t1, 0
 #endif
-	srli.d	ra, t0, _PAGE_PRESENT_SHIFT
-	andi	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	xori	ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
-	bnez	ra, nopage_tlb_store
-
-	tlbsrch
-	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	andi		t0, ra, _PAGE_PRESENT | _PAGE_WRITE
+	xori		t0, t0, _PAGE_PRESENT | _PAGE_WRITE
+	bnez		t0, nopage_tlb_store
 
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, tlb_huge_update_store
-	ld.d	t0, t1, 0
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	sc.d		t0, t1, 0
+	beqz		t0, tlb_huge_update_store
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #else
-	st.d	t0, t1, 0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	st.d		t0, t1, 0
 #endif
+	tlbsrch
 	addu16i.d	t1, zero, -(CSR_TLBIDX_EHINV >> 16)
 	addi.d		ra, t1, 0
 	csrxchg		ra, t1, LOONGARCH_CSR_TLBIDX
 	tlbwr
 
-	csrxchg	zero, t1, LOONGARCH_CSR_TLBIDX
+	csrxchg		zero, t1, LOONGARCH_CSR_TLBIDX
 	/*
 	 * A huge PTE describes an area the size of the
 	 * configured huge page size. This is twice the
@@ -334,21 +304,20 @@ tlb_huge_update_store:
 	 * address space.
 	 */
 	/* Huge page: Move Global bit */
-	xori	t0, t0, _PAGE_HUGE
-	lu12i.w	t1, _PAGE_HGLOBAL >> 12
-	and	t1, t0, t1
-	srli.d	t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-	or	t0, t0, t1
+	xori		t0, t0, _PAGE_HUGE
+	lu12i.w		t1, _PAGE_HGLOBAL >> 12
+	and		t1, t0, t1
+	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+	or		t0, t0, t1
 
-	addi.d	ra, t0, 0
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	addi.d	t0, ra, 0
+	move		ra, t0
+	csrwr		ra, LOONGARCH_CSR_TLBELO0
 
 	/* Convert to entrylo1 */
-	addi.d	t1, zero, 1
-	slli.d	t1, t1, (HPAGE_SHIFT - 1)
-	add.d	t0, t0, t1
-	csrwr	t0, LOONGARCH_CSR_TLBELO1
+	addi.d		t1, zero, 1
+	slli.d		t1, t1, (HPAGE_SHIFT - 1)
+	add.d		t0, t0, t1
+	csrwr		t0, LOONGARCH_CSR_TLBELO1
 
 	/* Set huge page tlb entry size */
 	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -362,126 +331,110 @@ tlb_huge_update_store:
 	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX
 
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
+	ertn
+
 nopage_tlb_store:
-	dbar	0
-	csrrd	ra, EXCEPTION_KS2
-	la.abs	t0, tlb_do_page_fault_1
-	jr	t0
+	dbar		0
+	csrrd		ra, EXCEPTION_KS2
+	la.abs		t0, tlb_do_page_fault_1
+	jr		t0
 SYM_FUNC_END(handle_tlb_store)
 
 SYM_FUNC_START(handle_tlb_modify)
-	csrwr	t0, EXCEPTION_KS0
-	csrwr	t1, EXCEPTION_KS1
-	csrwr	ra, EXCEPTION_KS2
+	csrwr		t0, EXCEPTION_KS0
+	csrwr		t1, EXCEPTION_KS1
+	csrwr		ra, EXCEPTION_KS2
 
 	/*
 	 * The vmalloc handling is not in the hotpath.
 	 */
-	csrrd	t0, LOONGARCH_CSR_BADV
-	bltz	t0, vmalloc_modify
-	csrrd	t1, LOONGARCH_CSR_PGDL
+	csrrd		t0, LOONGARCH_CSR_BADV
+	bltz		t0, vmalloc_modify
+	csrrd		t1, LOONGARCH_CSR_PGDL
 
 vmalloc_done_modify:
 	/* Get PGD offset in bytes */
-	srli.d	t0, t0, PGDIR_SHIFT
-	andi	t0, t0, (PTRS_PER_PGD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	bstrpick.d	ra, t0, PTRS_PER_PGD_BITS + PGDIR_SHIFT - 1, PGDIR_SHIFT
+	alsl.d		t1, ra, t1, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PUD_SHIFT
-	andi	t0, t0, (PTRS_PER_PUD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PUD_BITS + PUD_SHIFT - 1, PUD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-	csrrd	t0, LOONGARCH_CSR_BADV
-	ld.d	t1, t1, 0
-	srli.d	t0, t0, PMD_SHIFT
-	andi	t0, t0, (PTRS_PER_PMD - 1)
-	slli.d	t0, t0, 3
-	add.d	t1, t1, t0
+	ld.d		t1, t1, 0
+	bstrpick.d	ra, t0, PTRS_PER_PMD_BITS + PMD_SHIFT - 1, PMD_SHIFT
+	alsl.d		t1, ra, t1, 3
 #endif
-	ld.d	ra, t1, 0
+	ld.d		ra, t1, 0
 
 	/*
 	 * For huge tlb entries, pmde doesn't contain an address but
 	 * instead contains the tlb pte. Check the PAGE_HUGE bit and
 	 * see if we need to jump to huge tlb processing.
 	 */
-	andi	t0, ra, _PAGE_HUGE
-	bnez	t0, tlb_huge_update_modify
+	rotri.d		ra, ra, _PAGE_HUGE_SHIFT + 1
+	bltz		ra, tlb_huge_update_modify
 
-	csrrd	t0, LOONGARCH_CSR_BADV
-	srli.d	t0, t0, PAGE_SHIFT
-	andi	t0, t0, (PTRS_PER_PTE - 1)
-	slli.d	t0, t0, _PTE_T_LOG2
-	add.d	t1, ra, t0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	bstrpick.d	t0, t0, PTRS_PER_PTE_BITS + PAGE_SHIFT - 1, PAGE_SHIFT
+	alsl.d		t1, t0, ra, _PTE_T_LOG2
 
 #ifdef CONFIG_SMP
 smp_pgtable_change_modify:
-#endif
-#ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
+	ll.d		t0, t1, 0
 #else
-	ld.d	t0, t1, 0
+	ld.d		t0, t1, 0
 #endif
-	tlbsrch
-
-	srli.d	ra, t0, _PAGE_WRITE_SHIFT
-	andi	ra, ra, 1
-	beqz	ra, nopage_tlb_modify
+	andi		ra, t0, _PAGE_WRITE
+	beqz		ra, nopage_tlb_modify
 
-	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	ori		t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, smp_pgtable_change_modify
+	sc.d		t0, t1, 0
+	beqz		t0, smp_pgtable_change_modify
 #else
-	st.d	t0, t1, 0
+	st.d		t0, t1, 0
 #endif
-	ori	t1, t1, 8
-	xori	t1, t1, 8
-	ld.d	t0, t1, 0
-	ld.d	t1, t1, 8
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	csrwr	t1, LOONGARCH_CSR_TLBELO1
+	tlbsrch
+	bstrins.d	t1, zero, 3, 3
+	ld.d		t0, t1, 0
+	ld.d		t1, t1, 8
+	csrwr		t0, LOONGARCH_CSR_TLBELO0
+	csrwr		t1, LOONGARCH_CSR_TLBELO1
 	tlbwr
-leave_modify:
-	csrrd	t0, EXCEPTION_KS0
-	csrrd	t1, EXCEPTION_KS1
-	csrrd	ra, EXCEPTION_KS2
+
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
 	ertn
+
 #ifdef CONFIG_64BIT
 vmalloc_modify:
-	la.abs	t1, swapper_pg_dir
-	b	vmalloc_done_modify
+	la.abs		t1, swapper_pg_dir
+	b		vmalloc_done_modify
 #endif
 
-	/*
-	 * This is the entry point when
-	 * build_tlbchange_handler_head spots a huge page.
-	 */
+	/* This is the entry point of a huge page. */
 tlb_huge_update_modify:
 #ifdef CONFIG_SMP
-	ll.d	t0, t1, 0
-#else
-	ld.d	t0, t1, 0
+	ll.d		ra, t1, 0
 #endif
-
-	srli.d	ra, t0, _PAGE_WRITE_SHIFT
-	andi	ra, ra, 1
-	beqz	ra, nopage_tlb_modify
-
-	tlbsrch
-	ori	t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	andi		t0, ra, _PAGE_WRITE
+	beqz		t0, nopage_tlb_modify
 
 #ifdef CONFIG_SMP
-	sc.d	t0, t1, 0
-	beqz	t0, tlb_huge_update_modify
-	ld.d	t0, t1, 0
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	sc.d		t0, t1, 0
+	beqz		t0, tlb_huge_update_modify
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
 #else
-	st.d	t0, t1, 0
+	rotri.d		ra, ra, 64 - (_PAGE_HUGE_SHIFT + 1)
+	ori		t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
+	st.d		t0, t1, 0
 #endif
 	/*
 	 * A huge PTE describes an area the size of the
@@ -493,21 +446,20 @@ tlb_huge_update_modify:
 	 * address space.
 	 */
 	/* Huge page: Move Global bit */
-	xori	t0, t0, _PAGE_HUGE
-	lu12i.w	t1, _PAGE_HGLOBAL >> 12
-	and	t1, t0, t1
-	srli.d	t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
-	or	t0, t0, t1
+	xori		t0, t0, _PAGE_HUGE
+	lu12i.w		t1, _PAGE_HGLOBAL >> 12
+	and		t1, t0, t1
+	srli.d		t1, t1, (_PAGE_HGLOBAL_SHIFT - _PAGE_GLOBAL_SHIFT)
+	or		t0, t0, t1
 
-	addi.d	ra, t0, 0
-	csrwr	t0, LOONGARCH_CSR_TLBELO0
-	addi.d	t0, ra, 0
+	move		ra, t0
+	csrwr		ra, LOONGARCH_CSR_TLBELO0
 
 	/* Convert to entrylo1 */
-	addi.d	t1, zero, 1
-	slli.d	t1, t1, (HPAGE_SHIFT - 1)
-	add.d	t0, t0, t1
-	csrwr	t0, LOONGARCH_CSR_TLBELO1
+	addi.d		t1, zero, 1
+	slli.d		t1, t1, (HPAGE_SHIFT - 1)
+	add.d		t0, t0, t1
+	csrwr		t0, LOONGARCH_CSR_TLBELO1
 
 	/* Set huge page tlb entry size */
 	addu16i.d	t0, zero, (CSR_TLBIDX_PS >> 16)
@@ -521,26 +473,31 @@ tlb_huge_update_modify:
 	addu16i.d	t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
 	csrxchg		t1, t0, LOONGARCH_CSR_TLBIDX
 
+	csrrd		t0, EXCEPTION_KS0
+	csrrd		t1, EXCEPTION_KS1
+	csrrd		ra, EXCEPTION_KS2
+	ertn
+
 nopage_tlb_modify:
-	dbar	0
-	csrrd	ra, EXCEPTION_KS2
-	la.abs	t0, tlb_do_page_fault_1
-	jr	t0
+	dbar		0
+	csrrd		ra, EXCEPTION_KS2
+	la.abs		t0, tlb_do_page_fault_1
+	jr		t0
 SYM_FUNC_END(handle_tlb_modify)
 
 SYM_FUNC_START(handle_tlb_refill)
-	csrwr	t0, LOONGARCH_CSR_TLBRSAVE
-	csrrd	t0, LOONGARCH_CSR_PGD
-	lddir	t0, t0, 3
+	csrwr		t0, LOONGARCH_CSR_TLBRSAVE
+	csrrd		t0, LOONGARCH_CSR_PGD
+	lddir		t0, t0, 3
 #if CONFIG_PGTABLE_LEVELS > 3
-	lddir	t0, t0, 2
+	lddir		t0, t0, 2
 #endif
 #if CONFIG_PGTABLE_LEVELS > 2
-	lddir	t0, t0, 1
+	lddir		t0, t0, 1
 #endif
-	ldpte	t0, 0
-	ldpte	t0, 1
+	ldpte		t0, 0
+	ldpte		t0, 1
 	tlbfill
-	csrrd	t0, LOONGARCH_CSR_TLBRSAVE
+	csrrd		t0, LOONGARCH_CSR_TLBRSAVE
 	ertn
 SYM_FUNC_END(handle_tlb_refill)
diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile
new file mode 100644
index 000000000000..1ec12a0c324a
--- /dev/null
+++ b/arch/loongarch/net/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for arch/loongarch/net
+#
+# Copyright (C) 2022 Loongson Technology Corporation Limited
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit.o
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
new file mode 100644
index 000000000000..43f0a98efe38
--- /dev/null
+++ b/arch/loongarch/net/bpf_jit.c
@@ -0,0 +1,1179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * BPF JIT compiler for LoongArch
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include "bpf_jit.h"
+
+#define REG_TCC		LOONGARCH_GPR_A6
+#define TCC_SAVED	LOONGARCH_GPR_S5
+
+#define SAVE_RA		BIT(0)
+#define SAVE_TCC	BIT(1)
+
+static const int regmap[] = {
+	/* return value from in-kernel function, and exit value for eBPF program */
+	[BPF_REG_0] = LOONGARCH_GPR_A5,
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = LOONGARCH_GPR_A0,
+	[BPF_REG_2] = LOONGARCH_GPR_A1,
+	[BPF_REG_3] = LOONGARCH_GPR_A2,
+	[BPF_REG_4] = LOONGARCH_GPR_A3,
+	[BPF_REG_5] = LOONGARCH_GPR_A4,
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = LOONGARCH_GPR_S0,
+	[BPF_REG_7] = LOONGARCH_GPR_S1,
+	[BPF_REG_8] = LOONGARCH_GPR_S2,
+	[BPF_REG_9] = LOONGARCH_GPR_S3,
+	/* read-only frame pointer to access stack */
+	[BPF_REG_FP] = LOONGARCH_GPR_S4,
+	/* temporary register for blinding constants */
+	[BPF_REG_AX] = LOONGARCH_GPR_T0,
+};
+
+static void mark_call(struct jit_ctx *ctx)
+{
+	ctx->flags |= SAVE_RA;
+}
+
+static void mark_tail_call(struct jit_ctx *ctx)
+{
+	ctx->flags |= SAVE_TCC;
+}
+
+static bool seen_call(struct jit_ctx *ctx)
+{
+	return (ctx->flags & SAVE_RA);
+}
+
+static bool seen_tail_call(struct jit_ctx *ctx)
+{
+	return (ctx->flags & SAVE_TCC);
+}
+
+static u8 tail_call_reg(struct jit_ctx *ctx)
+{
+	if (seen_call(ctx))
+		return TCC_SAVED;
+
+	return REG_TCC;
+}
+
+/*
+ * eBPF prog stack layout:
+ *
+ *                                        high
+ * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
+ *                            |           $ra           |
+ *                            +-------------------------+
+ *                            |           $fp           |
+ *                            +-------------------------+
+ *                            |           $s0           |
+ *                            +-------------------------+
+ *                            |           $s1           |
+ *                            +-------------------------+
+ *                            |           $s2           |
+ *                            +-------------------------+
+ *                            |           $s3           |
+ *                            +-------------------------+
+ *                            |           $s4           |
+ *                            +-------------------------+
+ *                            |           $s5           |
+ *                            +-------------------------+ <--BPF_REG_FP
+ *                            |  prog->aux->stack_depth |
+ *                            |        (optional)       |
+ * current $sp -------------> +-------------------------+
+ *                                        low
+ */
+static void build_prologue(struct jit_ctx *ctx)
+{
+	int stack_adjust = 0, store_offset, bpf_stack_adjust;
+
+	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+
+	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
+	stack_adjust += sizeof(long) * 8;
+
+	stack_adjust = round_up(stack_adjust, 16);
+	stack_adjust += bpf_stack_adjust;
+
+	/*
+	 * First instruction initializes the tail call count (TCC).
+	 * On tail call we skip this instruction, and the TCC is
+	 * passed in REG_TCC from the caller.
+	 */
+	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
+
+	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
+
+	store_offset = stack_adjust - sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
+
+	store_offset -= sizeof(long);
+	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
+
+	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
+
+	if (bpf_stack_adjust)
+		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
+
+	/*
+	 * Program contains calls and tail calls, so REG_TCC need
+	 * to be saved across calls.
+	 */
+	if (seen_tail_call(ctx) && seen_call(ctx))
+		move_reg(ctx, TCC_SAVED, REG_TCC);
+
+	ctx->stack_size = stack_adjust;
+}
+
+static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
+{
+	int stack_adjust = ctx->stack_size;
+	int load_offset;
+
+	load_offset = stack_adjust - sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
+
+	load_offset -= sizeof(long);
+	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
+
+	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
+
+	if (!is_tail_call) {
+		/* Set return value */
+		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
+		/* Return to the caller */
+		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
+	} else {
+		/*
+		 * Call the next bpf prog and skip the first instruction
+		 * of TCC initialization.
+		 */
+		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
+	}
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	__build_epilogue(ctx, false);
+}
+
+bool bpf_jit_supports_kfunc_call(void)
+{
+	return true;
+}
+
+/* initialized on the first pass of build_body() */
+static int out_offset = -1;
+static int emit_bpf_tail_call(struct jit_ctx *ctx)
+{
+	int off;
+	u8 tcc = tail_call_reg(ctx);
+	u8 a1 = LOONGARCH_GPR_A1;
+	u8 a2 = LOONGARCH_GPR_A2;
+	u8 t1 = LOONGARCH_GPR_T1;
+	u8 t2 = LOONGARCH_GPR_T2;
+	u8 t3 = LOONGARCH_GPR_T3;
+	const int idx0 = ctx->idx;
+
+#define cur_offset (ctx->idx - idx0)
+#define jmp_offset (out_offset - (cur_offset))
+
+	/*
+	 * a0: &ctx
+	 * a1: &array
+	 * a2: index
+	 *
+	 * if (index >= array->map.max_entries)
+	 *	 goto out;
+	 */
+	off = offsetof(struct bpf_array, map.max_entries);
+	emit_insn(ctx, ldwu, t1, a1, off);
+	/* bgeu $a2, $t1, jmp_offset */
+	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
+		goto toofar;
+
+	/*
+	 * if (--TCC < 0)
+	 *	 goto out;
+	 */
+	emit_insn(ctx, addid, REG_TCC, tcc, -1);
+	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+		goto toofar;
+
+	/*
+	 * prog = array->ptrs[index];
+	 * if (!prog)
+	 *	 goto out;
+	 */
+	emit_insn(ctx, alsld, t2, a2, a1, 2);
+	off = offsetof(struct bpf_array, ptrs);
+	emit_insn(ctx, ldd, t2, t2, off);
+	/* beq $t2, $zero, jmp_offset */
+	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+		goto toofar;
+
+	/* goto *(prog->bpf_func + 4); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_insn(ctx, ldd, t3, t2, off);
+	__build_epilogue(ctx, true);
+
+	/* out: */
+	if (out_offset == -1)
+		out_offset = cur_offset;
+	if (cur_offset != out_offset) {
+		pr_err_once("tail_call out_offset = %d, expected %d!\n",
+			    cur_offset, out_offset);
+		return -1;
+	}
+
+	return 0;
+
+toofar:
+	pr_info_once("tail_call: jump too far\n");
+	return -1;
+#undef cur_offset
+#undef jmp_offset
+}
+
+static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 t1 = LOONGARCH_GPR_T1;
+	const u8 t2 = LOONGARCH_GPR_T2;
+	const u8 t3 = LOONGARCH_GPR_T3;
+	const u8 src = regmap[insn->src_reg];
+	const u8 dst = regmap[insn->dst_reg];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
+
+	move_imm(ctx, t1, off, false);
+	emit_insn(ctx, addd, t1, dst, t1);
+	move_reg(ctx, t3, src);
+
+	switch (imm) {
+	/* lock *(size *)(dst + off) <op>= src */
+	case BPF_ADD:
+		if (isdw)
+			emit_insn(ctx, amaddd, t2, t1, src);
+		else
+			emit_insn(ctx, amaddw, t2, t1, src);
+		break;
+	case BPF_AND:
+		if (isdw)
+			emit_insn(ctx, amandd, t2, t1, src);
+		else
+			emit_insn(ctx, amandw, t2, t1, src);
+		break;
+	case BPF_OR:
+		if (isdw)
+			emit_insn(ctx, amord, t2, t1, src);
+		else
+			emit_insn(ctx, amorw, t2, t1, src);
+		break;
+	case BPF_XOR:
+		if (isdw)
+			emit_insn(ctx, amxord, t2, t1, src);
+		else
+			emit_insn(ctx, amxorw, t2, t1, src);
+		break;
+	/* src = atomic_fetch_<op>(dst + off, src) */
+	case BPF_ADD | BPF_FETCH:
+		if (isdw) {
+			emit_insn(ctx, amaddd, src, t1, t3);
+		} else {
+			emit_insn(ctx, amaddw, src, t1, t3);
+			emit_zext_32(ctx, src, true);
+		}
+		break;
+	case BPF_AND | BPF_FETCH:
+		if (isdw) {
+			emit_insn(ctx, amandd, src, t1, t3);
+		} else {
+			emit_insn(ctx, amandw, src, t1, t3);
+			emit_zext_32(ctx, src, true);
+		}
+		break;
+	case BPF_OR | BPF_FETCH:
+		if (isdw) {
+			emit_insn(ctx, amord, src, t1, t3);
+		} else {
+			emit_insn(ctx, amorw, src, t1, t3);
+			emit_zext_32(ctx, src, true);
+		}
+		break;
+	case BPF_XOR | BPF_FETCH:
+		if (isdw) {
+			emit_insn(ctx, amxord, src, t1, t3);
+		} else {
+			emit_insn(ctx, amxorw, src, t1, t3);
+			emit_zext_32(ctx, src, true);
+		}
+		break;
+	/* src = atomic_xchg(dst + off, src); */
+	case BPF_XCHG:
+		if (isdw) {
+			emit_insn(ctx, amswapd, src, t1, t3);
+		} else {
+			emit_insn(ctx, amswapw, src, t1, t3);
+			emit_zext_32(ctx, src, true);
+		}
+		break;
+	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
+	case BPF_CMPXCHG:
+		u8 r0 = regmap[BPF_REG_0];
+
+		move_reg(ctx, t2, r0);
+		if (isdw) {
+			emit_insn(ctx, lld, r0, t1, 0);
+			emit_insn(ctx, bne, t2, r0, 4);
+			move_reg(ctx, t3, src);
+			emit_insn(ctx, scd, t3, t1, 0);
+			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
+		} else {
+			emit_insn(ctx, llw, r0, t1, 0);
+			emit_zext_32(ctx, t2, true);
+			emit_zext_32(ctx, r0, true);
+			emit_insn(ctx, bne, t2, r0, 4);
+			move_reg(ctx, t3, src);
+			emit_insn(ctx, scw, t3, t1, 0);
+			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
+			emit_zext_32(ctx, r0, true);
+		}
+		break;
+	}
+}
+
+static bool is_signed_bpf_cond(u8 cond)
+{
+	return cond == BPF_JSGT || cond == BPF_JSLT ||
+	       cond == BPF_JSGE || cond == BPF_JSLE;
+}
+
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
+{
+	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU ||
+			  BPF_CLASS(insn->code) == BPF_JMP32;
+	const u8 code = insn->code;
+	const u8 cond = BPF_OP(code);
+	const u8 t1 = LOONGARCH_GPR_T1;
+	const u8 t2 = LOONGARCH_GPR_T2;
+	const u8 src = regmap[insn->src_reg];
+	const u8 dst = regmap[insn->dst_reg];
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+	int jmp_offset;
+	int i = insn - ctx->prog->insnsi;
+
+	switch (code) {
+	/* dst = src */
+	case BPF_ALU | BPF_MOV | BPF_X:
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		move_reg(ctx, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = imm */
+	case BPF_ALU | BPF_MOV | BPF_K:
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		move_imm(ctx, dst, imm, is32);
+		break;
+
+	/* dst = dst + src */
+	case BPF_ALU | BPF_ADD | BPF_X:
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		emit_insn(ctx, addd, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst + imm */
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		if (is_signed_imm12(imm)) {
+			emit_insn(ctx, addid, dst, dst, imm);
+		} else {
+			move_imm(ctx, t1, imm, is32);
+			emit_insn(ctx, addd, dst, dst, t1);
+		}
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst - src */
+	case BPF_ALU | BPF_SUB | BPF_X:
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		emit_insn(ctx, subd, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst - imm */
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		if (is_signed_imm12(-imm)) {
+			emit_insn(ctx, addid, dst, dst, -imm);
+		} else {
+			move_imm(ctx, t1, imm, is32);
+			emit_insn(ctx, subd, dst, dst, t1);
+		}
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst * src */
+	case BPF_ALU | BPF_MUL | BPF_X:
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		emit_insn(ctx, muld, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst * imm */
+	case BPF_ALU | BPF_MUL | BPF_K:
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		move_imm(ctx, t1, imm, is32);
+		emit_insn(ctx, muld, dst, dst, t1);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst / src */
+	case BPF_ALU | BPF_DIV | BPF_X:
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit_zext_32(ctx, dst, is32);
+		move_reg(ctx, t1, src);
+		emit_zext_32(ctx, t1, is32);
+		emit_insn(ctx, divdu, dst, dst, t1);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst / imm */
+	case BPF_ALU | BPF_DIV | BPF_K:
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+		move_imm(ctx, t1, imm, is32);
+		emit_zext_32(ctx, dst, is32);
+		emit_insn(ctx, divdu, dst, dst, t1);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst % src */
+	case BPF_ALU | BPF_MOD | BPF_X:
+	case BPF_ALU64 | BPF_MOD | BPF_X:
+		emit_zext_32(ctx, dst, is32);
+		move_reg(ctx, t1, src);
+		emit_zext_32(ctx, t1, is32);
+		emit_insn(ctx, moddu, dst, dst, t1);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst % imm */
+	case BPF_ALU | BPF_MOD | BPF_K:
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+		move_imm(ctx, t1, imm, is32);
+		emit_zext_32(ctx, dst, is32);
+		emit_insn(ctx, moddu, dst, dst, t1);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = -dst */
+	case BPF_ALU | BPF_NEG:
+	case BPF_ALU64 | BPF_NEG:
+		move_imm(ctx, t1, imm, is32);
+		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst & src */
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		emit_insn(ctx, and, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst & imm */
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		if (is_unsigned_imm12(imm)) {
+			emit_insn(ctx, andi, dst, dst, imm);
+		} else {
+			move_imm(ctx, t1, imm, is32);
+			emit_insn(ctx, and, dst, dst, t1);
+		}
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst | src */
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		emit_insn(ctx, or, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst | imm */
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		if (is_unsigned_imm12(imm)) {
+			emit_insn(ctx, ori, dst, dst, imm);
+		} else {
+			move_imm(ctx, t1, imm, is32);
+			emit_insn(ctx, or, dst, dst, t1);
+		}
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst ^ src */
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		emit_insn(ctx, xor, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst ^ imm */
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		if (is_unsigned_imm12(imm)) {
+			emit_insn(ctx, xori, dst, dst, imm);
+		} else {
+			move_imm(ctx, t1, imm, is32);
+			emit_insn(ctx, xor, dst, dst, t1);
+		}
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	/* dst = dst << src (logical) */
+	case BPF_ALU | BPF_LSH | BPF_X:
+		emit_insn(ctx, sllw, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit_insn(ctx, slld, dst, dst, src);
+		break;
+
+	/* dst = dst << imm (logical) */
+	case BPF_ALU | BPF_LSH | BPF_K:
+		emit_insn(ctx, slliw, dst, dst, imm);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		emit_insn(ctx, sllid, dst, dst, imm);
+		break;
+
+	/* dst = dst >> src (logical) */
+	case BPF_ALU | BPF_RSH | BPF_X:
+		emit_insn(ctx, srlw, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit_insn(ctx, srld, dst, dst, src);
+		break;
+
+	/* dst = dst >> imm (logical) */
+	case BPF_ALU | BPF_RSH | BPF_K:
+		emit_insn(ctx, srliw, dst, dst, imm);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		emit_insn(ctx, srlid, dst, dst, imm);
+		break;
+
+	/* dst = dst >> src (arithmetic) */
+	case BPF_ALU | BPF_ARSH | BPF_X:
+		emit_insn(ctx, sraw, dst, dst, src);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit_insn(ctx, srad, dst, dst, src);
+		break;
+
+	/* dst = dst >> imm (arithmetic) */
+	case BPF_ALU | BPF_ARSH | BPF_K:
+		emit_insn(ctx, sraiw, dst, dst, imm);
+		emit_zext_32(ctx, dst, is32);
+		break;
+
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		emit_insn(ctx, sraid, dst, dst, imm);
+		break;
+
+	/* dst = BSWAP##imm(dst) */
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		switch (imm) {
+		case 16:
+			/* zero-extend 16 bits into 64 bits */
+			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
+			break;
+		case 32:
+			/* zero-extend 32 bits into 64 bits */
+			emit_zext_32(ctx, dst, is32);
+			break;
+		case 64:
+			/* do nothing */
+			break;
+		}
+		break;
+
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		switch (imm) {
+		case 16:
+			emit_insn(ctx, revb2h, dst, dst);
+			/* zero-extend 16 bits into 64 bits */
+			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
+			break;
+		case 32:
+			emit_insn(ctx, revb2w, dst, dst);
+			/* zero-extend 32 bits into 64 bits */
+			emit_zext_32(ctx, dst, is32);
+			break;
+		case 64:
+			emit_insn(ctx, revbd, dst, dst);
+			break;
+		}
+		break;
+
+	/* PC += off if dst cond src */
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP32 | BPF_JEQ | BPF_X:
+	case BPF_JMP32 | BPF_JNE | BPF_X:
+	case BPF_JMP32 | BPF_JGT | BPF_X:
+	case BPF_JMP32 | BPF_JGE | BPF_X:
+	case BPF_JMP32 | BPF_JLT | BPF_X:
+	case BPF_JMP32 | BPF_JLE | BPF_X:
+	case BPF_JMP32 | BPF_JSGT | BPF_X:
+	case BPF_JMP32 | BPF_JSGE | BPF_X:
+	case BPF_JMP32 | BPF_JSLT | BPF_X:
+	case BPF_JMP32 | BPF_JSLE | BPF_X:
+		jmp_offset = bpf2la_offset(i, off, ctx);
+		move_reg(ctx, t1, dst);
+		move_reg(ctx, t2, src);
+		if (is_signed_bpf_cond(BPF_OP(code))) {
+			emit_sext_32(ctx, t1, is32);
+			emit_sext_32(ctx, t2, is32);
+		} else {
+			emit_zext_32(ctx, t1, is32);
+			emit_zext_32(ctx, t2, is32);
+		}
+		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* PC += off if dst cond imm */
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JEQ | BPF_K:
+	case BPF_JMP32 | BPF_JNE | BPF_K:
+	case BPF_JMP32 | BPF_JGT | BPF_K:
+	case BPF_JMP32 | BPF_JGE | BPF_K:
+	case BPF_JMP32 | BPF_JLT | BPF_K:
+	case BPF_JMP32 | BPF_JLE | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+		u8 t7 = -1;
+		jmp_offset = bpf2la_offset(i, off, ctx);
+		if (imm) {
+			move_imm(ctx, t1, imm, false);
+			t7 = t1;
+		} else {
+			/* If imm is 0, simply use zero register. */
+			t7 = LOONGARCH_GPR_ZERO;
+		}
+		move_reg(ctx, t2, dst);
+		if (is_signed_bpf_cond(BPF_OP(code))) {
+			emit_sext_32(ctx, t7, is32);
+			emit_sext_32(ctx, t2, is32);
+		} else {
+			emit_zext_32(ctx, t7, is32);
+			emit_zext_32(ctx, t2, is32);
+		}
+		if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* PC += off if dst & src */
+	case BPF_JMP | BPF_JSET | BPF_X:
+	case BPF_JMP32 | BPF_JSET | BPF_X:
+		jmp_offset = bpf2la_offset(i, off, ctx);
+		emit_insn(ctx, and, t1, dst, src);
+		emit_zext_32(ctx, t1, is32);
+		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* PC += off if dst & imm */
+	case BPF_JMP | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		jmp_offset = bpf2la_offset(i, off, ctx);
+		move_imm(ctx, t1, imm, is32);
+		emit_insn(ctx, and, t1, dst, t1);
+		emit_zext_32(ctx, t1, is32);
+		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* PC += off */
+	case BPF_JMP | BPF_JA:
+		jmp_offset = bpf2la_offset(i, off, ctx);
+		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* function call */
+	case BPF_JMP | BPF_CALL:
+		int ret;
+		u64 func_addr;
+		bool func_addr_fixed;
+
+		mark_call(ctx);
+		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+					    &func_addr, &func_addr_fixed);
+		if (ret < 0)
+			return ret;
+
+		move_imm(ctx, t1, func_addr, is32);
+		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
+		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
+		break;
+
+	/* tail call */
+	case BPF_JMP | BPF_TAIL_CALL:
+		mark_tail_call(ctx);
+		if (emit_bpf_tail_call(ctx) < 0)
+			return -EINVAL;
+		break;
+
+	/* function return */
+	case BPF_JMP | BPF_EXIT:
+		emit_sext_32(ctx, regmap[BPF_REG_0], true);
+
+		if (i == ctx->prog->len - 1)
+			break;
+
+		jmp_offset = epilogue_offset(ctx);
+		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
+			goto toofar;
+		break;
+
+	/* dst = imm64 */
+	case BPF_LD | BPF_IMM | BPF_DW:
+		u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
+
+		move_imm(ctx, dst, imm64, is32);
+		return 1;
+
+	/* dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_DW:
+		switch (BPF_SIZE(code)) {
+		case BPF_B:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, ldbu, dst, src, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, ldxbu, dst, src, t1);
+			}
+			break;
+		case BPF_H:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, ldhu, dst, src, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, ldxhu, dst, src, t1);
+			}
+			break;
+		case BPF_W:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, ldwu, dst, src, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, ldptrw, dst, src, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, ldxwu, dst, src, t1);
+			}
+			break;
+		case BPF_DW:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, ldd, dst, src, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, ldptrd, dst, src, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, ldxd, dst, src, t1);
+			}
+			break;
+		}
+		break;
+
+	/* *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_DW:
+		switch (BPF_SIZE(code)) {
+		case BPF_B:
+			move_imm(ctx, t1, imm, is32);
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, stb, t1, dst, off);
+			} else {
+				move_imm(ctx, t2, off, is32);
+				emit_insn(ctx, stxb, t1, dst, t2);
+			}
+			break;
+		case BPF_H:
+			move_imm(ctx, t1, imm, is32);
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, sth, t1, dst, off);
+			} else {
+				move_imm(ctx, t2, off, is32);
+				emit_insn(ctx, stxh, t1, dst, t2);
+			}
+			break;
+		case BPF_W:
+			move_imm(ctx, t1, imm, is32);
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, stw, t1, dst, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, stptrw, t1, dst, off);
+			} else {
+				move_imm(ctx, t2, off, is32);
+				emit_insn(ctx, stxw, t1, dst, t2);
+			}
+			break;
+		case BPF_DW:
+			move_imm(ctx, t1, imm, is32);
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, std, t1, dst, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, stptrd, t1, dst, off);
+			} else {
+				move_imm(ctx, t2, off, is32);
+				emit_insn(ctx, stxd, t1, dst, t2);
+			}
+			break;
+		}
+		break;
+
+	/* *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_DW:
+		switch (BPF_SIZE(code)) {
+		case BPF_B:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, stb, src, dst, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, stxb, src, dst, t1);
+			}
+			break;
+		case BPF_H:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, sth, src, dst, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, stxh, src, dst, t1);
+			}
+			break;
+		case BPF_W:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, stw, src, dst, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, stptrw, src, dst, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, stxw, src, dst, t1);
+			}
+			break;
+		case BPF_DW:
+			if (is_signed_imm12(off)) {
+				emit_insn(ctx, std, src, dst, off);
+			} else if (is_signed_imm14(off)) {
+				emit_insn(ctx, stptrd, src, dst, off);
+			} else {
+				move_imm(ctx, t1, off, is32);
+				emit_insn(ctx, stxd, src, dst, t1);
+			}
+			break;
+		}
+		break;
+
+	case BPF_STX | BPF_ATOMIC | BPF_W:
+	case BPF_STX | BPF_ATOMIC | BPF_DW:
+		emit_atomic(insn, ctx);
+		break;
+
+	default:
+		pr_err("bpf_jit: unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
+
+	return 0;
+
+toofar:
+	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
+	return -E2BIG;
+}
+
+static int build_body(struct jit_ctx *ctx, bool extra_pass)
+{
+	int i;
+	const struct bpf_prog *prog = ctx->prog;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		int ret;
+
+		if (ctx->image == NULL)
+			ctx->offset[i] = ctx->idx;
+
+		ret = build_insn(insn, ctx, extra_pass);
+		if (ret > 0) {
+			i++;
+			if (ctx->image == NULL)
+				ctx->offset[i] = ctx->idx;
+			continue;
+		}
+		if (ret)
+			return ret;
+	}
+
+	if (ctx->image == NULL)
+		ctx->offset[i] = ctx->idx;
+
+	return 0;
+}
+
+/* Fill space with break instructions */
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	u32 *ptr;
+
+	/* We are guaranteed to have aligned memory */
+	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+		*ptr++ = INSN_BREAK;
+}
+
+static int validate_code(struct jit_ctx *ctx)
+{
+	int i;
+	union loongarch_instruction insn;
+
+	for (i = 0; i < ctx->idx; i++) {
+		insn = ctx->image[i];
+		/* Check INSN_BREAK */
+		if (insn.word == INSN_BREAK)
+			return -1;
+	}
+
+	return 0;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	bool tmp_blinded = false, extra_pass = false;
+	u8 *image_ptr;
+	int image_size;
+	struct jit_ctx ctx;
+	struct jit_data *jit_data;
+	struct bpf_binary_header *header;
+	struct bpf_prog *tmp, *orig_prog = prog;
+
+	/*
+	 * If BPF JIT was not enabled then we must fall back to
+	 * the interpreter.
+	 */
+	if (!prog->jit_requested)
+		return orig_prog;
+
+	tmp = bpf_jit_blind_constants(prog);
+	/*
+	 * If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter. Otherwise, we save
+	 * the new JITed code.
+	 */
+	if (IS_ERR(tmp))
+		return orig_prog;
+
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	jit_data = prog->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			prog = orig_prog;
+			goto out;
+		}
+		prog->aux->jit_data = jit_data;
+	}
+	if (jit_data->ctx.offset) {
+		ctx = jit_data->ctx;
+		image_ptr = jit_data->image;
+		header = jit_data->header;
+		extra_pass = true;
+		image_size = sizeof(u32) * ctx.idx;
+		goto skip_init_ctx;
+	}
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
+	if (ctx.offset == NULL) {
+		prog = orig_prog;
+		goto out_offset;
+	}
+
+	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
+	build_prologue(&ctx);
+	if (build_body(&ctx, extra_pass)) {
+		prog = orig_prog;
+		goto out_offset;
+	}
+	ctx.epilogue_offset = ctx.idx;
+	build_epilogue(&ctx);
+
+	/* Now we know the actual image size.
+	 * As each LoongArch instruction is of length 32bit,
+	 * we are translating number of JITed intructions into
+	 * the size required to store these JITed code.
+	 */
+	image_size = sizeof(u32) * ctx.idx;
+	/* Now we know the size of the structure to make */
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_offset;
+	}
+
+	/* 2. Now, the actual pass to generate final JIT code */
+	ctx.image = (union loongarch_instruction *)image_ptr;
+
+skip_init_ctx:
+	ctx.idx = 0;
+
+	build_prologue(&ctx);
+	if (build_body(&ctx, extra_pass)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_offset;
+	}
+	build_epilogue(&ctx);
+
+	/* 3. Extra pass to validate JITed code */
+	if (validate_code(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_offset;
+	}
+
+	/* And we're done */
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+
+	/* Update the icache */
+	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
+
+	if (!prog->is_func || extra_pass) {
+		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
+			pr_err_once("multi-func JIT bug %d != %d\n",
+				    ctx.idx, jit_data->ctx.idx);
+			bpf_jit_binary_free(header);
+			prog->bpf_func = NULL;
+			prog->jited = 0;
+			prog->jited_len = 0;
+			goto out_offset;
+		}
+		bpf_jit_binary_lock_ro(header);
+	} else {
+		jit_data->ctx = ctx;
+		jit_data->image = image_ptr;
+		jit_data->header = header;
+	}
+	prog->jited = 1;
+	prog->jited_len = image_size;
+	prog->bpf_func = (void *)ctx.image;
+
+	if (!prog->is_func || extra_pass) {
+		int i;
+
+		/* offset[prog->len] is the size of program */
+		for (i = 0; i <= prog->len; i++)
+			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
+		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
+
+out_offset:
+		kvfree(ctx.offset);
+		kfree(jit_data);
+		prog->aux->jit_data = NULL;
+	}
+
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
+
+	out_offset = -1;
+
+	return prog;
+}
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
new file mode 100644
index 000000000000..e665ddb0aeb8
--- /dev/null
+++ b/arch/loongarch/net/bpf_jit.h
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * BPF JIT compiler for LoongArch
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+
+struct jit_ctx {
+	const struct bpf_prog *prog;
+	unsigned int idx;
+	unsigned int flags;
+	unsigned int epilogue_offset;
+	u32 *offset;
+	union loongarch_instruction *image;
+	u32 stack_size;
+};
+
+struct jit_data {
+	struct bpf_binary_header *header;
+	u8 *image;
+	struct jit_ctx ctx;
+};
+
+#define emit_insn(ctx, func, ...)						\
+do {										\
+	if (ctx->image != NULL) {						\
+		union loongarch_instruction *insn = &ctx->image[ctx->idx];	\
+		emit_##func(insn, ##__VA_ARGS__);				\
+	}									\
+	ctx->idx++;								\
+} while (0)
+
+#define is_signed_imm12(val)	signed_imm_check(val, 12)
+#define is_signed_imm14(val)	signed_imm_check(val, 14)
+#define is_signed_imm16(val)	signed_imm_check(val, 16)
+#define is_signed_imm26(val)	signed_imm_check(val, 26)
+#define is_signed_imm32(val)	signed_imm_check(val, 32)
+#define is_signed_imm52(val)	signed_imm_check(val, 52)
+#define is_unsigned_imm12(val)	unsigned_imm_check(val, 12)
+
+static inline int bpf2la_offset(int bpf_insn, int off, const struct jit_ctx *ctx)
+{
+	/* BPF JMP offset is relative to the next instruction */
+	bpf_insn++;
+	/*
+	 * Whereas LoongArch branch instructions encode the offset
+	 * from the branch itself, so we must subtract 1 from the
+	 * instruction offset.
+	 */
+	return (ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1));
+}
+
+static inline int epilogue_offset(const struct jit_ctx *ctx)
+{
+	int from = ctx->idx;
+	int to = ctx->epilogue_offset;
+
+	return (to - from);
+}
+
+/* Zero-extend 32 bits into 64 bits */
+static inline void emit_zext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32)
+{
+	if (!is32)
+		return;
+
+	emit_insn(ctx, lu32id, reg, 0);
+}
+
+/* Signed-extend 32 bits into 64 bits */
+static inline void emit_sext_32(struct jit_ctx *ctx, enum loongarch_gpr reg, bool is32)
+{
+	if (!is32)
+		return;
+
+	emit_insn(ctx, addiw, reg, reg, 0);
+}
+
+static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm, bool is32)
+{
+	long imm_11_0, imm_31_12, imm_51_32, imm_63_52, imm_51_0, imm_51_31;
+
+	/* or rd, $zero, $zero */
+	if (imm == 0) {
+		emit_insn(ctx, or, rd, LOONGARCH_GPR_ZERO, LOONGARCH_GPR_ZERO);
+		return;
+	}
+
+	/* addiw rd, $zero, imm_11_0 */
+	if (is_signed_imm12(imm)) {
+		emit_insn(ctx, addiw, rd, LOONGARCH_GPR_ZERO, imm);
+		goto zext;
+	}
+
+	/* ori rd, $zero, imm_11_0 */
+	if (is_unsigned_imm12(imm)) {
+		emit_insn(ctx, ori, rd, LOONGARCH_GPR_ZERO, imm);
+		goto zext;
+	}
+
+	/* lu52id rd, $zero, imm_63_52 */
+	imm_63_52 = (imm >> 52) & 0xfff;
+	imm_51_0 = imm & 0xfffffffffffff;
+	if (imm_63_52 != 0 && imm_51_0 == 0) {
+		emit_insn(ctx, lu52id, rd, LOONGARCH_GPR_ZERO, imm_63_52);
+		return;
+	}
+
+	/* lu12iw rd, imm_31_12 */
+	imm_31_12 = (imm >> 12) & 0xfffff;
+	emit_insn(ctx, lu12iw, rd, imm_31_12);
+
+	/* ori rd, rd, imm_11_0 */
+	imm_11_0 = imm & 0xfff;
+	if (imm_11_0 != 0)
+		emit_insn(ctx, ori, rd, rd, imm_11_0);
+
+	if (!is_signed_imm32(imm)) {
+		if (imm_51_0 != 0) {
+			/*
+			 * If bit[51:31] is all 0 or all 1,
+			 * it means bit[51:32] is sign extended by lu12iw,
+			 * no need to call lu32id to do a new filled operation.
+			 */
+			imm_51_31 = (imm >> 31) & 0x1fffff;
+			if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) {
+				/* lu32id rd, imm_51_32 */
+				imm_51_32 = (imm >> 32) & 0xfffff;
+				emit_insn(ctx, lu32id, rd, imm_51_32);
+			}
+		}
+
+		/* lu52id rd, rd, imm_63_52 */
+		if (!is_signed_imm52(imm))
+			emit_insn(ctx, lu52id, rd, rd, imm_63_52);
+	}
+
+zext:
+	emit_zext_32(ctx, rd, is32);
+}
+
+static inline void move_reg(struct jit_ctx *ctx, enum loongarch_gpr rd,
+			    enum loongarch_gpr rj)
+{
+	emit_insn(ctx, or, rd, rj, LOONGARCH_GPR_ZERO);
+}
+
+static inline int invert_jmp_cond(u8 cond)
+{
+	switch (cond) {
+	case BPF_JEQ:
+		return BPF_JNE;
+	case BPF_JNE:
+	case BPF_JSET:
+		return BPF_JEQ;
+	case BPF_JGT:
+		return BPF_JLE;
+	case BPF_JGE:
+		return BPF_JLT;
+	case BPF_JLT:
+		return BPF_JGE;
+	case BPF_JLE:
+		return BPF_JGT;
+	case BPF_JSGT:
+		return BPF_JSLE;
+	case BPF_JSGE:
+		return BPF_JSLT;
+	case BPF_JSLT:
+		return BPF_JSGE;
+	case BPF_JSLE:
+		return BPF_JSGT;
+	}
+	return -1;
+}
+
+static inline void cond_jmp_offset(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+				   enum loongarch_gpr rd, int jmp_offset)
+{
+	switch (cond) {
+	case BPF_JEQ:
+		/* PC += jmp_offset if rj == rd */
+		emit_insn(ctx, beq, rj, rd, jmp_offset);
+		return;
+	case BPF_JNE:
+	case BPF_JSET:
+		/* PC += jmp_offset if rj != rd */
+		emit_insn(ctx, bne, rj, rd, jmp_offset);
+		return;
+	case BPF_JGT:
+		/* PC += jmp_offset if rj > rd (unsigned) */
+		emit_insn(ctx, bltu, rd, rj, jmp_offset);
+		return;
+	case BPF_JLT:
+		/* PC += jmp_offset if rj < rd (unsigned) */
+		emit_insn(ctx, bltu, rj, rd, jmp_offset);
+		return;
+	case BPF_JGE:
+		/* PC += jmp_offset if rj >= rd (unsigned) */
+		emit_insn(ctx, bgeu, rj, rd, jmp_offset);
+		return;
+	case BPF_JLE:
+		/* PC += jmp_offset if rj <= rd (unsigned) */
+		emit_insn(ctx, bgeu, rd, rj, jmp_offset);
+		return;
+	case BPF_JSGT:
+		/* PC += jmp_offset if rj > rd (signed) */
+		emit_insn(ctx, blt, rd, rj, jmp_offset);
+		return;
+	case BPF_JSLT:
+		/* PC += jmp_offset if rj < rd (signed) */
+		emit_insn(ctx, blt, rj, rd, jmp_offset);
+		return;
+	case BPF_JSGE:
+		/* PC += jmp_offset if rj >= rd (signed) */
+		emit_insn(ctx, bge, rj, rd, jmp_offset);
+		return;
+	case BPF_JSLE:
+		/* PC += jmp_offset if rj <= rd (signed) */
+		emit_insn(ctx, bge, rd, rj, jmp_offset);
+		return;
+	}
+}
+
+static inline void cond_jmp_offs26(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+				   enum loongarch_gpr rd, int jmp_offset)
+{
+	cond = invert_jmp_cond(cond);
+	cond_jmp_offset(ctx, cond, rj, rd, 2);
+	emit_insn(ctx, b, jmp_offset);
+}
+
+static inline void uncond_jmp_offs26(struct jit_ctx *ctx, int jmp_offset)
+{
+	emit_insn(ctx, b, jmp_offset);
+}
+
+static inline int emit_cond_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+				enum loongarch_gpr rd, int jmp_offset)
+{
+	/*
+	 * A large PC-relative jump offset may overflow the immediate field of
+	 * the native conditional branch instruction, triggering a conversion
+	 * to use an absolute jump instead, this jump sequence is particularly
+	 * nasty. For now, use cond_jmp_offs26() directly to keep it simple.
+	 * In the future, maybe we can add support for far branching, the branch
+	 * relaxation requires more than two passes to converge, the code seems
+	 * too complex to understand, not quite sure whether it is necessary and
+	 * worth the extra pain. Anyway, just leave it as it is to enhance code
+	 * readability now.
+	 */
+	if (is_signed_imm26(jmp_offset)) {
+		cond_jmp_offs26(ctx, cond, rj, rd, jmp_offset);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static inline int emit_uncond_jmp(struct jit_ctx *ctx, int jmp_offset)
+{
+	if (is_signed_imm26(jmp_offset)) {
+		uncond_jmp_offs26(ctx, jmp_offset);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static inline int emit_tailcall_jmp(struct jit_ctx *ctx, u8 cond, enum loongarch_gpr rj,
+				    enum loongarch_gpr rd, int jmp_offset)
+{
+	if (is_signed_imm16(jmp_offset)) {
+		cond_jmp_offset(ctx, cond, rj, rd, jmp_offset);
+		return 0;
+	}
+
+	return -EINVAL;
+}
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c
index bf921487333c..8235ec92b41f 100644
--- a/arch/loongarch/pci/acpi.c
+++ b/arch/loongarch/pci/acpi.c
@@ -83,6 +83,69 @@ static int acpi_prepare_root_resources(struct acpi_pci_root_info *ci)
 }
 
 /*
+ * Create a PCI config space window
+ *  - reserve mem region
+ *  - alloc struct pci_config_window with space for all mappings
+ *  - ioremap the config space
+ */
+static struct pci_config_window *arch_pci_ecam_create(struct device *dev,
+		struct resource *cfgres, struct resource *busr, const struct pci_ecam_ops *ops)
+{
+	int bsz, bus_range, err;
+	struct resource *conflict;
+	struct pci_config_window *cfg;
+
+	if (busr->start > busr->end)
+		return ERR_PTR(-EINVAL);
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg)
+		return ERR_PTR(-ENOMEM);
+
+	cfg->parent = dev;
+	cfg->ops = ops;
+	cfg->busr.start = busr->start;
+	cfg->busr.end = busr->end;
+	cfg->busr.flags = IORESOURCE_BUS;
+	bus_range = resource_size(cfgres) >> ops->bus_shift;
+
+	bsz = 1 << ops->bus_shift;
+
+	cfg->res.start = cfgres->start;
+	cfg->res.end = cfgres->end;
+	cfg->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	cfg->res.name = "PCI ECAM";
+
+	conflict = request_resource_conflict(&iomem_resource, &cfg->res);
+	if (conflict) {
+		err = -EBUSY;
+		dev_err(dev, "can't claim ECAM area %pR: address conflict with %s %pR\n",
+			&cfg->res, conflict->name, conflict);
+		goto err_exit;
+	}
+
+	cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz);
+	if (!cfg->win)
+		goto err_exit_iomap;
+
+	if (ops->init) {
+		err = ops->init(cfg);
+		if (err)
+			goto err_exit;
+	}
+	dev_info(dev, "ECAM at %pR for %pR\n", &cfg->res, &cfg->busr);
+
+	return cfg;
+
+err_exit_iomap:
+	err = -ENOMEM;
+	dev_err(dev, "ECAM ioremap failed\n");
+err_exit:
+	pci_ecam_free(cfg);
+	return ERR_PTR(err);
+}
+
+/*
  * Lookup the bus range for the domain in MCFG, and set up config space
  * mapping.
  */
@@ -106,11 +169,16 @@ pci_acpi_setup_ecam_mapping(struct acpi_pci_root *root)
 
 	bus_shift = ecam_ops->bus_shift ? : 20;
 
-	cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift);
-	cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1;
-	cfgres.flags = IORESOURCE_MEM;
+	if (bus_shift == 20)
+		cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
+	else {
+		cfgres.start = root->mcfg_addr + (bus_res->start << bus_shift);
+		cfgres.end = cfgres.start + (resource_size(bus_res) << bus_shift) - 1;
+		cfgres.end |= BIT(28) + (((PCI_CFG_SPACE_EXP_SIZE - 1) & 0xf00) << 16);
+		cfgres.flags = IORESOURCE_MEM;
+		cfg = arch_pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
+	}
 
-	cfg = pci_ecam_create(dev, &cfgres, bus_res, ecam_ops);
 	if (IS_ERR(cfg)) {
 		dev_err(dev, "%04x:%pR error %ld mapping ECAM\n", seg, bus_res, PTR_ERR(cfg));
 		return NULL;
diff --git a/arch/loongarch/pci/pci.c b/arch/loongarch/pci/pci.c
index e9b7c34d9b6d..2726639150bc 100644
--- a/arch/loongarch/pci/pci.c
+++ b/arch/loongarch/pci/pci.c
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
+#include <asm/cacheflush.h>
 #include <asm/loongson.h>
 
 #define PCI_DEVICE_ID_LOONGSON_HOST     0x7a00
@@ -45,12 +46,10 @@ static int __init pcibios_init(void)
 	unsigned int lsize;
 
 	/*
-	 * Set PCI cacheline size to that of the highest level in the
+	 * Set PCI cacheline size to that of the last level in the
 	 * cache hierarchy.
 	 */
-	lsize = cpu_dcache_line_size();
-	lsize = cpu_vcache_line_size() ? : lsize;
-	lsize = cpu_scache_line_size() ? : lsize;
+	lsize = cpu_last_level_cache_line_size();
 
 	BUG_ON(!lsize);
 
diff --git a/arch/m68k/68000/Makefile b/arch/m68k/68000/Makefile
index 674541fdf5b8..279560add577 100644
--- a/arch/m68k/68000/Makefile
+++ b/arch/m68k/68000/Makefile
@@ -17,4 +17,4 @@ obj-$(CONFIG_DRAGEN2)	+= dragen2.o
 obj-$(CONFIG_UCSIMM)	+= ucsimm.o
 obj-$(CONFIG_UCDIMM)	+= ucsimm.o
 
-extra-y 		:= head.o
+obj-y			+= head.o
diff --git a/arch/m68k/68000/ints.c b/arch/m68k/68000/ints.c
index cda49b12d7be..f9a5ec781408 100644
--- a/arch/m68k/68000/ints.c
+++ b/arch/m68k/68000/ints.c
@@ -18,12 +18,12 @@
 #include <asm/io.h>
 #include <asm/machdep.h>
 
-#if defined(CONFIG_M68328)
-#include <asm/MC68328.h>
-#elif defined(CONFIG_M68EZ328)
+#if defined(CONFIG_M68EZ328)
 #include <asm/MC68EZ328.h>
 #elif defined(CONFIG_M68VZ328)
 #include <asm/MC68VZ328.h>
+#else
+#include <asm/MC68328.h>
 #endif
 
 /* assembler routines */
diff --git a/arch/m68k/Kconfig.cpu b/arch/m68k/Kconfig.cpu
index e0e9e31339c1..9380f6e3bb66 100644
--- a/arch/m68k/Kconfig.cpu
+++ b/arch/m68k/Kconfig.cpu
@@ -46,6 +46,7 @@ config M68000
 	select GENERIC_CSUM
 	select CPU_NO_EFFICIENT_FFS
 	select HAVE_ARCH_HASH
+	select LEGACY_TIMER_TICK
 	help
 	  The Freescale (was Motorola) 68000 CPU is the first generation of
 	  the well known M68K family of processors. The CPU core as well as
@@ -97,7 +98,6 @@ config M68060
 config M68328
 	bool
 	depends on !MMU
-	select LEGACY_TIMER_TICK
 	select M68000
 	help
 	  Motorola 68328 processor support.
@@ -105,7 +105,6 @@ config M68328
 config M68EZ328
 	bool
 	depends on !MMU
-	select LEGACY_TIMER_TICK
 	select M68000
 	help
 	  Motorola 68EX328 processor support.
@@ -113,7 +112,6 @@ config M68EZ328
 config M68VZ328
 	bool
 	depends on !MMU
-	select LEGACY_TIMER_TICK
 	select M68000
 	help
 	  Motorola 68VZ328 processor support.
@@ -399,7 +397,7 @@ config SINGLE_MEMORY_CHUNK
 	  order" to save memory that could be wasted for unused memory map.
 	  Say N if not sure.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order" if ADVANCED
 	depends on !SINGLE_MEMORY_CHUNK
 	default "11"
diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index e358605b70ba..43e39040d3ac 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -86,15 +86,6 @@ ifdef CONFIG_KGDB
 KBUILD_CFLAGS := $(subst -fomit-frame-pointer,,$(KBUILD_CFLAGS)) -g
 endif
 
-#
-# Select the assembler head startup code. Order is important. The default
-# head code is first, processor specific selections can override it after.
-#
-head-y				:= arch/m68k/kernel/head.o
-head-$(CONFIG_SUN3)		:= arch/m68k/kernel/sun3-head.o
-head-$(CONFIG_M68000)		:= arch/m68k/68000/head.o
-head-$(CONFIG_COLDFIRE)		:= arch/m68k/coldfire/head.o
-
 libs-y				+= arch/m68k/lib/
 
 
diff --git a/arch/m68k/coldfire/Makefile b/arch/m68k/coldfire/Makefile
index 9419a6c1f036..c56bc0dc7f2e 100644
--- a/arch/m68k/coldfire/Makefile
+++ b/arch/m68k/coldfire/Makefile
@@ -45,4 +45,4 @@ obj-$(CONFIG_STMARK2)	+= stmark2.o
 obj-$(CONFIG_PCI)	+= pci.o
 
 obj-y			+= gpio.o
-extra-y := head.o
+obj-y			+= head.o
diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig
index 6d9ed2198170..041adcf6ecfc 100644
--- a/arch/m68k/configs/amcore_defconfig
+++ b/arch/m68k/configs/amcore_defconfig
@@ -27,9 +27,6 @@ CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 # CONFIG_UEVENT_HELPER is not set
@@ -85,7 +82,6 @@ CONFIG_ROMFS_FS=y
 CONFIG_ROMFS_BACKED_BY_BOTH=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_PRINTK_TIME=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/m68k/configs/m5208evb_defconfig b/arch/m68k/configs/m5208evb_defconfig
index 0ee3079f6ca9..31035a0b9247 100644
--- a/arch/m68k/configs/m5208evb_defconfig
+++ b/arch/m68k/configs/m5208evb_defconfig
@@ -21,9 +21,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5249evb_defconfig b/arch/m68k/configs/m5249evb_defconfig
index f84f68c04065..5706d7a1daba 100644
--- a/arch/m68k/configs/m5249evb_defconfig
+++ b/arch/m68k/configs/m5249evb_defconfig
@@ -22,9 +22,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5272c3_defconfig b/arch/m68k/configs/m5272c3_defconfig
index eca65020aae3..f02fe144f4ad 100644
--- a/arch/m68k/configs/m5272c3_defconfig
+++ b/arch/m68k/configs/m5272c3_defconfig
@@ -22,9 +22,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5275evb_defconfig b/arch/m68k/configs/m5275evb_defconfig
index 9402c7a3e9c7..781f307ff330 100644
--- a/arch/m68k/configs/m5275evb_defconfig
+++ b/arch/m68k/configs/m5275evb_defconfig
@@ -22,9 +22,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5307c3_defconfig b/arch/m68k/configs/m5307c3_defconfig
index bb8b0eb4bdfc..6eac482356ca 100644
--- a/arch/m68k/configs/m5307c3_defconfig
+++ b/arch/m68k/configs/m5307c3_defconfig
@@ -22,9 +22,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/configs/m5407c3_defconfig b/arch/m68k/configs/m5407c3_defconfig
index ce9ccf13c7c0..496dcccb1c18 100644
--- a/arch/m68k/configs/m5407c3_defconfig
+++ b/arch/m68k/configs/m5407c3_defconfig
@@ -23,9 +23,6 @@ CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index d86b4009880b..7a2da780830b 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -145,11 +145,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 /* Forward declaration, a strange C thing */
 struct task_struct;
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
 unsigned long __get_wchan(struct task_struct *p);
 void show_registers(struct pt_regs *regs);
 
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index c0833da6a2ca..af015447dfb4 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -3,19 +3,20 @@
 # Makefile for the linux kernel.
 #
 
-extra-$(CONFIG_AMIGA)	:= head.o
-extra-$(CONFIG_ATARI)	:= head.o
-extra-$(CONFIG_MAC)	:= head.o
-extra-$(CONFIG_APOLLO)	:= head.o
-extra-$(CONFIG_VME)	:= head.o
-extra-$(CONFIG_HP300)	:= head.o
-extra-$(CONFIG_Q40)	:= head.o
-extra-$(CONFIG_SUN3X)	:= head.o
-extra-$(CONFIG_VIRT)	:= head.o
-extra-$(CONFIG_SUN3)	:= sun3-head.o
 extra-y			+= vmlinux.lds
 
-obj-y	:= entry.o irq.o module.o process.o ptrace.o
+obj-$(CONFIG_AMIGA)	:= head.o
+obj-$(CONFIG_ATARI)	:= head.o
+obj-$(CONFIG_MAC)	:= head.o
+obj-$(CONFIG_APOLLO)	:= head.o
+obj-$(CONFIG_VME)	:= head.o
+obj-$(CONFIG_HP300)	:= head.o
+obj-$(CONFIG_Q40)	:= head.o
+obj-$(CONFIG_SUN3X)	:= head.o
+obj-$(CONFIG_VIRT)	:= head.o
+obj-$(CONFIG_SUN3)	:= sun3-head.o
+
+obj-y	+= entry.o irq.o module.o process.o ptrace.o
 obj-y	+= setup.o signal.o sys_m68k.o syscalltable.o time.o traps.o
 
 obj-$(CONFIG_MMU_MOTOROLA) += ints.o vectors.o
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile
index 1826d9ce4459..3f8a86c4336a 100644
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -48,7 +48,6 @@ CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
 # r31 holds current when in kernel mode
 KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-y) $(CPUFLAGS-1) $(CPUFLAGS-2)
 
-head-y := arch/microblaze/kernel/head.o
 libs-y += arch/microblaze/lib/
 
 boot := arch/microblaze/boot
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 51337fffb947..8150daf04a76 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -83,7 +83,7 @@ CONFIG_CIFS=y
 CONFIG_CIFS_STATS2=y
 CONFIG_ENCRYPTED_KEYS=y
 CONFIG_DMA_CMA=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_KGDB=y
 CONFIG_KGDB_TESTS=y
 CONFIG_KGDB_KDB=y
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 7e9e92670df3..4e193c7550df 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -63,11 +63,6 @@ struct thread_struct {
 	.pgdir = swapper_pg_dir, \
 }
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
 unsigned long __get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile
index 15a20eb814ce..4393bee64eaf 100644
--- a/arch/microblaze/kernel/Makefile
+++ b/arch/microblaze/kernel/Makefile
@@ -12,9 +12,9 @@ CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_process.o = -pg
 endif
 
-extra-y := head.o vmlinux.lds
+extra-y := vmlinux.lds
 
-obj-y += dma.o exceptions.o \
+obj-y += head.o dma.o exceptions.o \
 	hw_exception_handler.o irq.o \
 	process.o prom.o ptrace.o \
 	reset.o setup.o signal.o sys_microblaze.o timer.o traps.o unwind.o
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 25dd4c5a8ef5..b26b77673c2c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2140,7 +2140,7 @@ config PAGE_SIZE_64KB
 
 endchoice
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 14 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB
 	default "14" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 4d2a3e73fc45..b296e33f8e33 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -324,8 +324,6 @@ endif
 
 OBJCOPYFLAGS		+= --remove-section=.reginfo
 
-head-y := arch/mips/kernel/head.o
-
 libs-y			+= arch/mips/lib/
 libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
 
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index 91ce75edbfb4..22ffde722bb9 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -72,7 +72,7 @@ CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_CRC32_SARWATE=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index a2311495af79..0bc2e3cc573b 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -161,7 +161,7 @@ CONFIG_CRYPTO_SHA1_OCTEON=m
 CONFIG_CRYPTO_SHA256_OCTEON=m
 CONFIG_CRYPTO_SHA512_OCTEON=m
 CONFIG_CRYPTO_DES=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig
index e1b49f77414a..11f08b6a3013 100644
--- a/arch/mips/configs/ci20_defconfig
+++ b/arch/mips/configs/ci20_defconfig
@@ -199,7 +199,7 @@ CONFIG_NLS_UTF8=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/mips/configs/cu1000-neo_defconfig b/arch/mips/configs/cu1000-neo_defconfig
index 5bd55eb32fe5..1cbc9302e1d1 100644
--- a/arch/mips/configs/cu1000-neo_defconfig
+++ b/arch/mips/configs/cu1000-neo_defconfig
@@ -113,7 +113,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
 CONFIG_CONSOLE_LOGLEVEL_QUIET=15
 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/mips/configs/cu1830-neo_defconfig b/arch/mips/configs/cu1830-neo_defconfig
index cc69688962e8..a0f73f3cd6ce 100644
--- a/arch/mips/configs/cu1830-neo_defconfig
+++ b/arch/mips/configs/cu1830-neo_defconfig
@@ -116,7 +116,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
 CONFIG_CONSOLE_LOGLEVEL_QUIET=15
 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index a8b62df3c021..af070be1b583 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -9,7 +9,6 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=16
 CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
index 714169e411cf..c2cd2b181ef3 100644
--- a/arch/mips/configs/generic_defconfig
+++ b/arch/mips/configs/generic_defconfig
@@ -3,7 +3,6 @@ CONFIG_NO_HZ_IDLE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
@@ -83,7 +82,7 @@ CONFIG_ROOT_NFS=y
 # CONFIG_XZ_DEC_ARMTHUMB is not set
 # CONFIG_XZ_DEC_SPARC is not set
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_DEBUG_FS=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig
index 9c34daf83563..91fe2822f897 100644
--- a/arch/mips/configs/omega2p_defconfig
+++ b/arch/mips/configs/omega2p_defconfig
@@ -113,7 +113,7 @@ CONFIG_CRYPTO_LZO=y
 CONFIG_CRC16=y
 CONFIG_XZ_DEC=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
index b4448d0876d5..7e5d9741bd5d 100644
--- a/arch/mips/configs/qi_lb60_defconfig
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -166,7 +166,7 @@ CONFIG_NLS_UTF8=y
 CONFIG_FONTS=y
 CONFIG_FONT_SUN8x16=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_READABLE_ASM=y
 CONFIG_KGDB=y
diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig
index 0722a3bf03c0..e47d4cc3353b 100644
--- a/arch/mips/configs/vocore2_defconfig
+++ b/arch/mips/configs/vocore2_defconfig
@@ -113,7 +113,7 @@ CONFIG_CRYPTO_LZO=y
 CONFIG_CRC16=y
 CONFIG_XZ_DEC=y
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig
new file mode 100644
index 000000000000..9003a5c1e879
--- /dev/null
+++ b/arch/mips/crypto/Kconfig
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (mips)"
+
+config CRYPTO_CRC32_MIPS
+	tristate "CRC32c and CRC32"
+	depends on MIPS_CRC_SUPPORT
+	select CRYPTO_HASH
+	help
+	  CRC32c and CRC32 CRC algorithms
+
+	  Architecture: mips
+
+config CRYPTO_POLY1305_MIPS
+	tristate "Hash functions: Poly1305"
+	depends on MIPS
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: mips
+
+config CRYPTO_MD5_OCTEON
+	tristate "Digests: MD5 (OCTEON)"
+	depends on CPU_CAVIUM_OCTEON
+	select CRYPTO_MD5
+	select CRYPTO_HASH
+	help
+	  MD5 message digest algorithm (RFC1321)
+
+	  Architecture: mips OCTEON using crypto instructions, when available
+
+config CRYPTO_SHA1_OCTEON
+	tristate "Hash functions: SHA-1 (OCTEON)"
+	depends on CPU_CAVIUM_OCTEON
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: mips OCTEON
+
+config CRYPTO_SHA256_OCTEON
+	tristate "Hash functions: SHA-224 and SHA-256 (OCTEON)"
+	depends on CPU_CAVIUM_OCTEON
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: mips OCTEON using crypto instructions, when available
+
+config CRYPTO_SHA512_OCTEON
+	tristate "Hash functions: SHA-384 and SHA-512 (OCTEON)"
+	depends on CPU_CAVIUM_OCTEON
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: mips OCTEON using crypto instructions, when available
+
+config CRYPTO_CHACHA_MIPS
+	tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (MIPS32r2)"
+	depends on CPU_MIPS32_R2
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
+
+	  Architecture: MIPS32r2
+
+endmenu
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 4bb24579d12e..3fde1ff72bd1 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -344,9 +344,6 @@ struct thread_struct {
 
 struct task_struct;
 
-/* Free all resources held by a thread. */
-#define release_thread(thread) do { } while(0)
-
 /*
  * Do necessary setup to start up a newly executed thread.
  */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 1be428663c10..c6e1fc77c996 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -103,6 +103,8 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 7c96282bff2e..5d1addac5e28 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for the Linux/MIPS kernel.
 #
 
-extra-y		:= head.o vmlinux.lds
+extra-y		:= vmlinux.lds
 
-obj-y		+= branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \
+obj-y		+= head.o branch.o cmpxchg.o elf.o entry.o genex.o idle.o irq.o \
 		   process.o prom.o ptrace.o reset.o setup.o signal.o \
 		   syscall.o time.o topology.o traps.o unaligned.o watch.o \
 		   vdso.o cacheinfo.o
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 39c79f67c7a3..f1c88f8a1dc5 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -751,7 +751,7 @@ static void __init prefill_possible_map(void)
 	for (; i < NR_CPUS; i++)
 		set_cpu_possible(i, false);
 
-	nr_cpu_ids = possible;
+	set_nr_cpu_ids(possible);
 }
 #else
 static inline void prefill_possible_map(void) {}
diff --git a/arch/nios2/Kbuild b/arch/nios2/Kbuild
index 4e39f7abdeb6..fc2952edd2de 100644
--- a/arch/nios2/Kbuild
+++ b/arch/nios2/Kbuild
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+obj-y += kernel/ mm/ platform/ boot/dts/
+
 # for cleaning
 subdir- += boot
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4167f1eb4cd8..a582f72104f3 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -44,7 +44,7 @@ menu "Kernel features"
 
 source "kernel/Kconfig.hz"
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 9 20
 	default "11"
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index d6a7499b814c..f1ff4ce0f1a2 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -37,10 +37,7 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"$(UTS_SYSNAME)\"
 KBUILD_CFLAGS += -fno-builtin
 KBUILD_CFLAGS += -G 0
 
-head-y		:= arch/nios2/kernel/head.o
 libs-y		+= arch/nios2/lib/ $(LIBGCC)
-core-y		+= arch/nios2/kernel/ arch/nios2/mm/
-core-y		+= arch/nios2/platform/
 
 INSTALL_PATH ?= /tftpboot
 nios2-boot := arch/$(ARCH)/boot
@@ -48,8 +45,6 @@ BOOT_TARGETS = vmImage zImage
 PHONY += $(BOOT_TARGETS) install
 KBUILD_IMAGE := $(nios2-boot)/vmImage
 
-core-y	+= $(nios2-boot)/dts/
-
 all: vmImage
 
 $(BOOT_TARGETS): vmlinux
diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig
index a7967b4cfb6e..91c3fce4dc7f 100644
--- a/arch/nios2/configs/10m50_defconfig
+++ b/arch/nios2/configs/10m50_defconfig
@@ -74,4 +74,4 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
 CONFIG_SUNRPC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig
index 423a0c40a162..c42ad7e162a3 100644
--- a/arch/nios2/configs/3c120_defconfig
+++ b/arch/nios2/configs/3c120_defconfig
@@ -71,4 +71,4 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
 CONFIG_SUNRPC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index b8125dfbcad2..8916d93d5c2d 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -64,11 +64,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long pc,
 
 struct task_struct;
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
 extern unsigned long __get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
diff --git a/arch/nios2/kernel/Makefile b/arch/nios2/kernel/Makefile
index 0b645e1e3158..78a913181fa1 100644
--- a/arch/nios2/kernel/Makefile
+++ b/arch/nios2/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for the nios2 linux kernel.
 #
 
-extra-y	+= head.o
 extra-y	+= vmlinux.lds
 
+obj-y	+= head.o
 obj-y	+= cpuinfo.o
 obj-y	+= entry.o
 obj-y	+= insnemu.o
diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile
index b446510173cd..68249521db5a 100644
--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@@ -55,8 +55,6 @@ ifeq ($(CONFIG_OPENRISC_HAVE_INST_SEXT),y)
 	KBUILD_CFLAGS += $(call cc-option,-msext)
 endif
 
-head-y 		:= arch/openrisc/kernel/head.o
-
 libs-y		+= $(LIBGCC)
 
 PHONY += vmlinux.bin
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index aa1699c18add..ed9efb430afa 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -72,7 +72,6 @@ struct thread_struct {
 
 
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
-void release_thread(struct task_struct *);
 unsigned long __get_wchan(struct task_struct *p);
 
 #define cpu_relax()     barrier()
diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
index 2d172e79f58d..79129161f3e0 100644
--- a/arch/openrisc/kernel/Makefile
+++ b/arch/openrisc/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for the linux kernel.
 #
 
-extra-y	:= head.o vmlinux.lds
+extra-y	:= vmlinux.lds
 
-obj-y	:= setup.o or32_ksyms.o process.o dma.o \
+obj-y	:= head.o setup.o or32_ksyms.o process.o dma.o \
 	   traps.o time.o irq.o entry.o ptrace.o signal.o \
 	   sys_call_table.o unwinder.o
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 52dc983ddeba..f94b5ec06786 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -125,10 +125,6 @@ void show_regs(struct pt_regs *regs)
 	show_registers(regs);
 }
 
-void release_thread(struct task_struct *dead_task)
-{
-}
-
 /*
  * Copy the thread-specific (arch specific) info from the current
  * process to the new one p
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index e38d993d87f2..a2d8600521f9 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -113,8 +113,6 @@ cflags-$(CONFIG_PA7100LC)	+= -march=1.1 -mschedule=7100LC
 cflags-$(CONFIG_PA7300LC)	+= -march=1.1 -mschedule=7300
 cflags-$(CONFIG_PA8X00)		+= -march=2.0 -mschedule=8000
 
-head-y			:= arch/parisc/kernel/head.o 
-
 KBUILD_CFLAGS	+= $(cflags-y)
 LIBGCC		:= $(shell $(CC) -print-libgcc-file-name)
 export LIBGCC
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 4621ceb51314..a608970b249a 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -266,9 +266,6 @@ on downward growing arches, it looks like this:
 
 struct mm_struct;
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
 extern unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)	((tsk)->thread.regs.iaoq[0])
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index a7ea3204a5fa..22133a6a506e 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -70,6 +70,8 @@
 #define MADV_WIPEONFORK 71		/* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 72		/* Undo MADV_WIPEONFORK */
 
+#define MADV_COLLAPSE	73		/* Synchronous hugepage collapse */
+
 #define MADV_HWPOISON     100		/* poison a page for testing */
 #define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
 
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index d0bfac89a842..3d138c9cf9ce 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for arch/parisc/kernel
 #
 
-extra-y			:= head.o vmlinux.lds
+extra-y		:= vmlinux.lds
 
-obj-y	     	:= cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
+obj-y		:= head.o cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
 		   pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
 		   ptrace.o hardware.o inventory.o drivers.o alternative.o \
 		   signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 3feb7694e0ca..1d3b8bc8a623 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -657,15 +657,20 @@ static inline unsigned long mm_total_size(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
 	unsigned long usize = 0;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	for (vma = mm->mmap; vma && usize < parisc_cache_flush_threshold; vma = vma->vm_next)
+	for_each_vma(vmi, vma) {
+		if (usize >= parisc_cache_flush_threshold)
+			break;
 		usize += vma->vm_end - vma->vm_start;
+	}
 	return usize;
 }
 
 void flush_cache_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	/*
 	 * Flushing the whole cache on each cpu takes forever on
@@ -685,7 +690,7 @@ void flush_cache_mm(struct mm_struct *mm)
 	}
 
 	/* Flush mm */
-	for (vma = mm->mmap; vma; vma = vma->vm_next)
+	for_each_vma(vmi, vma)
 		flush_cache_pages(vma, vma->vm_start, vma->vm_end);
 }
 
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 7c37e09c92da..3db0e97e6c06 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -146,10 +146,6 @@ void flush_thread(void)
 	*/
 }
 
-void release_thread(struct task_struct *dead_task)
-{
-}
-
 /*
  * Idle thread support
  *
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4c466acdc70d..699df27b0e2f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,8 +135,9 @@ config PPC
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC_BOOK3S || PPC_8xx || 40x) && !HIBERNATION
-	select ARCH_HAS_STRICT_KERNEL_RWX	if FSL_BOOKE && !HIBERNATION && !RANDOMIZE_BASE
+	select ARCH_HAS_STRICT_KERNEL_RWX	if PPC_85xx && !HIBERNATION && !RANDOMIZE_BASE
 	select ARCH_HAS_STRICT_MODULE_RWX	if ARCH_HAS_STRICT_KERNEL_RWX
+	select ARCH_HAS_SYSCALL_WRAPPER		if !SPU_BASE && !COMPAT
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UACCESS_FLUSHCACHE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -194,7 +195,7 @@ config PPC
 	select HAVE_ARCH_KASAN			if PPC_RADIX_MMU
 	select HAVE_ARCH_KASAN			if PPC_BOOK3E_64
 	select HAVE_ARCH_KASAN_VMALLOC		if HAVE_ARCH_KASAN
-	select HAVE_ARCH_KFENCE			if PPC_BOOK3S_32 || PPC_8xx || 40x
+	select HAVE_ARCH_KFENCE			if ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS
@@ -211,7 +212,7 @@ config PPC
 	select HAVE_DYNAMIC_FTRACE_WITH_ARGS	if MPROFILE_KERNEL || PPC32
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS	if MPROFILE_KERNEL || PPC32
 	select HAVE_EBPF_JIT
-	select HAVE_EFFICIENT_UNALIGNED_ACCESS	if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FAST_GUP
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_DESCRIPTORS	if PPC64_ELF_ABI_V1
@@ -290,7 +291,7 @@ config PPC_LONG_DOUBLE_128
 config PPC_BARRIER_NOSPEC
 	bool
 	default y
-	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
+	depends on PPC_BOOK3S_64 || PPC_E500
 
 config EARLY_PRINTK
 	bool
@@ -548,7 +549,7 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
 
 config KEXEC
 	bool "kexec system call"
-	depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP)) || PPC_BOOK3E
+	depends on PPC_BOOK3S || PPC_E500 || (44x && !SMP)
 	select KEXEC_CORE
 	help
 	  kexec is a system call that implements the ability to shutdown your
@@ -583,7 +584,7 @@ config ARCH_HAS_KEXEC_PURGATORY
 
 config RELOCATABLE
 	bool "Build a relocatable kernel"
-	depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
+	depends on PPC64 || (FLATMEM && (44x || PPC_85xx))
 	select NONSTATIC_KERNEL
 	help
 	  This builds a kernel image that is capable of running at the
@@ -606,7 +607,7 @@ config RELOCATABLE
 
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
-	depends on (FSL_BOOKE && FLATMEM && PPC32)
+	depends on PPC_85xx && FLATMEM
 	depends on RELOCATABLE
 	help
 	  Randomizes the virtual address at which the kernel image is
@@ -625,8 +626,8 @@ config RELOCATABLE_TEST
 
 config CRASH_DUMP
 	bool "Build a dump capture kernel"
-	depends on PPC64 || PPC_BOOK3S_32 || FSL_BOOKE || (44x && !SMP)
-	select RELOCATABLE if PPC64 || 44x || FSL_BOOKE
+	depends on PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+	select RELOCATABLE if PPC64 || 44x || PPC_85xx
 	help
 	  Build a kernel suitable for use as a dump capture kernel.
 	  The same kernel binary can be used as production kernel and dump
@@ -815,7 +816,7 @@ config DATA_SHIFT_BOOL
 	depends on ADVANCED_OPTIONS
 	depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE
 	depends on PPC_BOOK3S_32 || (PPC_8xx && !PIN_TLB_DATA && !STRICT_KERNEL_RWX) || \
-		   FSL_BOOKE
+		   PPC_85xx
 	help
 	  This option allows you to set the kernel data alignment. When
 	  RAM is mapped by blocks, the alignment needs to fit the size and
@@ -828,13 +829,13 @@ config DATA_SHIFT
 	default 24 if STRICT_KERNEL_RWX && PPC64
 	range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
 	range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
-	range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_FSL_BOOKE
+	range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx
 	default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
 	default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
 	default 23 if STRICT_KERNEL_RWX && PPC_8xx
 	default 23 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && PIN_TLB_DATA
 	default 19 if (DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
-	default 24 if STRICT_KERNEL_RWX && FSL_BOOKE
+	default 24 if STRICT_KERNEL_RWX && PPC_85xx
 	default PPC_PAGE_SHIFT
 	help
 	  On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
@@ -845,7 +846,7 @@ config DATA_SHIFT
 	  in that case. If PIN_TLB is selected, it must be aligned to 8M as
 	  8M pages will be pinned.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 8 9 if PPC64 && PPC_64K_PAGES
 	default "9" if PPC64 && PPC_64K_PAGES
@@ -1150,7 +1151,7 @@ config LOWMEM_SIZE
 
 config LOWMEM_CAM_NUM_BOOL
 	bool "Set number of CAMs to use to map low memory"
-	depends on ADVANCED_OPTIONS && FSL_BOOKE
+	depends on ADVANCED_OPTIONS && PPC_85xx
 	help
 	  This option allows you to set the maximum number of CAM slots that
 	  will be used to map low memory.  There are a limited number of slots
@@ -1161,7 +1162,7 @@ config LOWMEM_CAM_NUM_BOOL
 	  Say N here unless you know what you are doing.
 
 config LOWMEM_CAM_NUM
-	depends on FSL_BOOKE
+	depends on PPC_85xx
 	int "Number of CAMs to use to map low memory" if LOWMEM_CAM_NUM_BOOL
 	default 3 if !STRICT_KERNEL_RWX
 	default 9 if DATA_SHIFT >= 24
@@ -1170,7 +1171,7 @@ config LOWMEM_CAM_NUM
 
 config DYNAMIC_MEMSTART
 	bool "Enable page aligned dynamic load address for kernel"
-	depends on ADVANCED_OPTIONS && FLATMEM && (FSL_BOOKE || 44x)
+	depends on ADVANCED_OPTIONS && FLATMEM && (PPC_85xx || 44x)
 	select NONSTATIC_KERNEL
 	help
 	  This option enables the kernel to be loaded at any page aligned
@@ -1219,7 +1220,7 @@ config KERNEL_START
 
 config PHYSICAL_START_BOOL
 	bool "Set physical address where the kernel is loaded"
-	depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
+	depends on ADVANCED_OPTIONS && FLATMEM && PPC_85xx
 	help
 	  This gives the physical address where the kernel is loaded.
 
@@ -1232,7 +1233,7 @@ config PHYSICAL_START
 
 config PHYSICAL_ALIGN
 	hex
-	default "0x04000000" if FSL_BOOKE
+	default "0x04000000" if PPC_85xx
 	help
 	  This value puts the alignment restrictions on physical address
 	  where kernel is loaded and run from. Kernel is compiled for an
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ae727d4218b9..6aaf8dc60610 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -283,6 +283,12 @@ config PPC_EARLY_DEBUG_MEMCONS
 	  This console provides input and output buffers stored within the
 	  kernel BSS and should be safe to select on any system. A debugger
 	  can then be used to read kernel output or send input to the console.
+
+config PPC_EARLY_DEBUG_16550
+	bool "Serial 16550"
+	depends on PPC_UDBG_16550
+	help
+	  Select this to enable early debugging via Serial 16550 console
 endchoice
 
 config PPC_MEMCONS_OUTPUT_SIZE
@@ -354,6 +360,15 @@ config PPC_EARLY_DEBUG_CPM_ADDR
 	  platform probing is done, all platforms selected must
 	  share the same address.
 
+config PPC_EARLY_DEBUG_16550_PHYSADDR
+	hex "Early debug Serial 16550 physical address"
+	depends on PPC_EARLY_DEBUG_16550
+
+config PPC_EARLY_DEBUG_16550_STRIDE
+	int "Early debug Serial 16550 stride"
+	depends on PPC_EARLY_DEBUG_16550
+	default 1
+
 config FAIL_IOMMU
 	bool "Fault-injection capability for IOMMU"
 	depends on FAULT_INJECTION
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 02742facf895..dc4cbf0a5ca9 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -149,11 +149,12 @@ CFLAGS-$(CONFIG_PPC32)	+= $(call cc-option,-mno-readonly-in-sdata)
 ifdef CONFIG_PPC_BOOK3S_64
 ifdef CONFIG_CPU_LITTLE_ENDIAN
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
 else
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
+CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
 endif
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power10,	\
+				  $(call cc-option,-mtune=power9,	\
+				  $(call cc-option,-mtune=power8)))
 else ifdef CONFIG_PPC_BOOK3E_64
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
@@ -191,9 +192,14 @@ ifdef CONFIG_476FPE_ERR46
 		-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
 endif
 
-# No AltiVec or VSX instructions when building kernel
+# No prefix or pcrel
+KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
+KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
+
+# No AltiVec or VSX or MMA instructions when building kernel
 KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
 KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
+KBUILD_CFLAGS += $(call cc-option,-mno-mma)
 
 # No SPE instruction when building kernel
 # (We use all available options to help semi-broken compilers)
@@ -210,7 +216,7 @@ KBUILD_CFLAGS		+= $(call cc-option,-mno-string)
 cpu-as-$(CONFIG_40x)		+= -Wa,-m405
 cpu-as-$(CONFIG_44x)		+= -Wa,-m440
 cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
-cpu-as-$(CONFIG_E500)		+= -Wa,-me500
+cpu-as-$(CONFIG_PPC_E500)		+= -Wa,-me500
 
 # When using '-many -mpower4' gas will first try and find a matching power4
 # mnemonic and failing that it will allow any valid mnemonic that GAS knows
@@ -226,18 +232,6 @@ KBUILD_CFLAGS += $(cpu-as-y)
 KBUILD_AFLAGS += $(aflags-y)
 KBUILD_CFLAGS += $(cflags-y)
 
-head-$(CONFIG_PPC64)		:= arch/powerpc/kernel/head_64.o
-head-$(CONFIG_PPC_BOOK3S_32)	:= arch/powerpc/kernel/head_book3s_32.o
-head-$(CONFIG_PPC_8xx)		:= arch/powerpc/kernel/head_8xx.o
-head-$(CONFIG_40x)		:= arch/powerpc/kernel/head_40x.o
-head-$(CONFIG_44x)		:= arch/powerpc/kernel/head_44x.o
-head-$(CONFIG_FSL_BOOKE)	:= arch/powerpc/kernel/head_fsl_booke.o
-
-head-$(CONFIG_PPC64)		+= arch/powerpc/kernel/entry_64.o
-head-$(CONFIG_PPC_FPU)		+= arch/powerpc/kernel/fpu.o
-head-$(CONFIG_ALTIVEC)		+= arch/powerpc/kernel/vector.o
-head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)  += arch/powerpc/kernel/prom_init.o
-
 # Default to zImage, override when needed
 all: zImage
 
diff --git a/arch/powerpc/boot/44x.h b/arch/powerpc/boot/44x.h
index 02563443788a..9b15e59522d6 100644
--- a/arch/powerpc/boot/44x.h
+++ b/arch/powerpc/boot/44x.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * PowerPC 44x related functions
  *
  * Copyright 2007 David Gibson, IBM Corporation.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 #ifndef _PPC_BOOT_44X_H_
 #define _PPC_BOOT_44X_H_
diff --git a/arch/powerpc/boot/4xx.h b/arch/powerpc/boot/4xx.h
index 7dc5d45361bc..77f15d124c81 100644
--- a/arch/powerpc/boot/4xx.h
+++ b/arch/powerpc/boot/4xx.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * PowerPC 4xx related functions
  *
  * Copyright 2007 IBM Corporation.
  * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 #ifndef _POWERPC_BOOT_4XX_H_
 #define _POWERPC_BOOT_4XX_H_
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index a9cd2ea4a861..d32d95aea5d6 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -34,6 +34,7 @@ endif
 
 BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
 		 -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
+		 $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
 		 -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
 		 $(LINUXINCLUDE)
 
diff --git a/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
new file mode 100644
index 000000000000..7e2a90cde72e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
@@ -0,0 +1,51 @@
+/*
+ * e500v1 Power ISA Device Tree Source (include)
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+	cpus {
+		power-isa-version = "2.03";
+		power-isa-b;		// Base
+		power-isa-e;		// Embedded
+		power-isa-atb;		// Alternate Time Base
+		power-isa-cs;		// Cache Specification
+		power-isa-e.le;		// Embedded.Little-Endian
+		power-isa-e.pm;		// Embedded.Performance Monitor
+		power-isa-ecl;		// Embedded Cache Locking
+		power-isa-mmc;		// Memory Coherence
+		power-isa-sp;		// Signal Processing Engine
+		power-isa-sp.fs;	// SPE.Embedded Float Scalar Single
+		power-isa-sp.fv;	// SPE.Embedded Float Vector
+		mmu-type = "power-embedded";
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
index 18a885130538..e03ae130162b 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8540ads.dts
@@ -7,7 +7,7 @@
 
 /dts-v1/;
 
-/include/ "e500v2_power_isa.dtsi"
+/include/ "e500v1_power_isa.dtsi"
 
 / {
 	model = "MPC8540ADS";
diff --git a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
index ac381e7b1c60..a2a6c5cf852e 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8541cds.dts
@@ -7,7 +7,7 @@
 
 /dts-v1/;
 
-/include/ "e500v2_power_isa.dtsi"
+/include/ "e500v1_power_isa.dtsi"
 
 / {
 	model = "MPC8541CDS";
diff --git a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
index 9f58db2a7e66..901b6ff06dfb 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8555cds.dts
@@ -7,7 +7,7 @@
 
 /dts-v1/;
 
-/include/ "e500v2_power_isa.dtsi"
+/include/ "e500v1_power_isa.dtsi"
 
 / {
 	model = "MPC8555CDS";
diff --git a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
index a24722ccaebf..c2f9aea78b29 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8560ads.dts
@@ -7,7 +7,7 @@
 
 /dts-v1/;
 
-/include/ "e500v2_power_isa.dtsi"
+/include/ "e500v1_power_isa.dtsi"
 
 / {
 	model = "MPC8560ADS";
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index fe6c17c8812a..37a7eb576d02 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -14,6 +14,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "KSI8560";
 	compatible = "emerson,KSI8560";
diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts
index 7de068991bde..9cefed207234 100644
--- a/arch/powerpc/boot/dts/mgcoge.dts
+++ b/arch/powerpc/boot/dts/mgcoge.dts
@@ -225,13 +225,6 @@
 				interrupts = <2 8>;
 				interrupt-parent = <&PIC>;
 				cs-gpios = < &cpm2_pio_d 19 0>;
-				#address-cells = <1>;
-				#size-cells = <0>;
-				ds3106@1 {
-					compatible = "gen,spidev";
-					reg = <0>;
-					spi-max-frequency = <8000000>;
-				};
 			};
 
 		};
diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts
index d1ab698eef36..e73f7e75b0b4 100644
--- a/arch/powerpc/boot/dts/stx_gp3_8560.dts
+++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts
@@ -7,6 +7,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "stx,gp3";
 	compatible = "stx,gp3-8560", "stx,gp3";
diff --git a/arch/powerpc/boot/dts/stxssa8555.dts b/arch/powerpc/boot/dts/stxssa8555.dts
index 5dca2a91c41f..96add25c904b 100644
--- a/arch/powerpc/boot/dts/stxssa8555.dts
+++ b/arch/powerpc/boot/dts/stxssa8555.dts
@@ -9,6 +9,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "stx,gp3";
         compatible = "stx,gp3-8560", "stx,gp3";
diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts
index 9c1eb9779108..eb4d8fd3f7aa 100644
--- a/arch/powerpc/boot/dts/tqm8540.dts
+++ b/arch/powerpc/boot/dts/tqm8540.dts
@@ -7,6 +7,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "tqc,tqm8540";
 	compatible = "tqc,tqm8540";
diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts
index 44595cf675d0..fe5d3d873ec9 100644
--- a/arch/powerpc/boot/dts/tqm8541.dts
+++ b/arch/powerpc/boot/dts/tqm8541.dts
@@ -7,6 +7,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "tqc,tqm8541";
 	compatible = "tqc,tqm8541";
diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts
index 54f3e82907d6..4be05b7d225d 100644
--- a/arch/powerpc/boot/dts/tqm8555.dts
+++ b/arch/powerpc/boot/dts/tqm8555.dts
@@ -7,6 +7,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "tqc,tqm8555";
 	compatible = "tqc,tqm8555";
diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts
index 7415cb69f60d..8ea48502420b 100644
--- a/arch/powerpc/boot/dts/tqm8560.dts
+++ b/arch/powerpc/boot/dts/tqm8560.dts
@@ -8,6 +8,8 @@
 
 /dts-v1/;
 
+/include/ "fsl/e500v1_power_isa.dtsi"
+
 / {
 	model = "tqc,tqm8560";
 	compatible = "tqc,tqm8560";
diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts
index 12e08271e61f..045af668e928 100644
--- a/arch/powerpc/boot/dts/turris1x.dts
+++ b/arch/powerpc/boot/dts/turris1x.dts
@@ -147,7 +147,7 @@
 
 					port@0 {
 						reg = <0>;
-						label = "cpu1";
+						label = "cpu";
 						ethernet = <&enet1>;
 						phy-mode = "rgmii-id";
 
@@ -184,7 +184,7 @@
 
 					port@6 {
 						reg = <6>;
-						label = "cpu0";
+						label = "cpu";
 						ethernet = <&enet0>;
 						phy-mode = "rgmii-id";
 
@@ -263,21 +263,21 @@
 				};
 
 				partition@20000 {
-					/* 1.7 MB for Rescue Linux Kernel Image */
+					/* 1.7 MB for Linux Kernel Image */
 					reg = <0x00020000 0x001a0000>;
-					label = "rescue-kernel";
+					label = "kernel";
 				};
 
 				partition@1c0000 {
 					/* 1.5 MB for Rescue JFFS2 Root File System */
 					reg = <0x001c0000 0x00180000>;
-					label = "rescue-rootfs";
+					label = "rescue";
 				};
 
 				partition@340000 {
-					/* 11 MB for TAR.XZ Backup with content of NAND Root File System */
+					/* 11 MB for TAR.XZ Archive with Factory content of NAND Root File System */
 					reg = <0x00340000 0x00b00000>;
-					label = "backup-rootfs";
+					label = "factory";
 				};
 
 				partition@e40000 {
diff --git a/arch/powerpc/boot/dummy.c b/arch/powerpc/boot/dummy.c
deleted file mode 100644
index 31dbf45bf99c..000000000000
--- a/arch/powerpc/boot/dummy.c
+++ /dev/null
@@ -1,4 +0,0 @@
-int main(void)
-{
-	return 0;
-}
diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S
index ad0e15d930c4..1f2f330a459e 100644
--- a/arch/powerpc/boot/opal-calls.S
+++ b/arch/powerpc/boot/opal-calls.S
@@ -16,7 +16,7 @@ opal_kentry:
 	li	r5, 0
 	li	r6, 0
 	li	r7, 0
-	ld	r11,opal@got(r2)
+	LOAD_REG_ADDR(r11, opal)
 	ld	r8,0(r11)
 	ld	r9,8(r11)
 	bctr
@@ -35,7 +35,7 @@ opal_call:
 	mr	r13,r2
 
 	/* Set opal return address */
-	ld	r11,opal_return@got(r2)
+	LOAD_REG_ADDR(r11, opal_return)
 	mtlr	r11
 	mfmsr	r12
 
@@ -45,7 +45,7 @@ opal_call:
 	mtspr	SPRN_HSRR1,r12
 
 	/* load the opal call entry point and base */
-	ld	r11,opal@got(r2)
+	LOAD_REG_ADDR(r11, opal)
 	ld	r12,8(r11)
 	ld	r2,0(r11)
 	mtspr	SPRN_HSRR0,r12
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index 8334bc3cbe49..a40c2162a4e9 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -1,12 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Global definition of all the bootwrapper operations.
  *
  * Author: Mark A. Greer <mgreer@mvista.com>
  *
- * 2006 (c) MontaVista Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2006 (c) MontaVista Software, Inc.
  */
 #ifndef _PPC_BOOT_OPS_H_
 #define _PPC_BOOT_OPS_H_
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index 192b97523b05..a66cfd76fa4d 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -84,4 +84,14 @@
 #define MFTBU(dest)			mfspr dest, SPRN_TBRU
 #endif
 
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+#define LOAD_REG_ADDR(reg,name)			\
+	addis	reg,r2,name@toc@ha;		\
+	addi	reg,reg,name@toc@l
+#else
+#define LOAD_REG_ADDR(reg,name)			\
+	lis	reg,name@ha;			\
+	addi	reg,reg,name@l
+#endif
+
 #endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 54d2522be485..c6d32a8c3612 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generic serial console support
  *
@@ -6,10 +7,7 @@
  * Code in serial_edit_cmdline() copied from <file:arch/ppc/boot/simple/misc.c>
  * and was written by Matt Porter <mporter@kernel.crashing.org>.
  *
- * 2001,2006 (c) MontaVista Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2001,2006 (c) MontaVista Software, Inc.
  */
 #include <stdarg.h>
 #include <stddef.h>
diff --git a/arch/powerpc/boot/simple_alloc.c b/arch/powerpc/boot/simple_alloc.c
index 65ec135d0157..267d6524caac 100644
--- a/arch/powerpc/boot/simple_alloc.c
+++ b/arch/powerpc/boot/simple_alloc.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement primitive realloc(3) functionality.
  *
  * Author: Mark A. Greer <mgreer@mvista.com>
  *
- * 2006 (c) MontaVista, Software, Inc.  This file is licensed under
- * the terms of the GNU General Public License version 2.  This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2006 (c) MontaVista, Software, Inc.
  */
 
 #include <stddef.h>
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 55978f32fa77..5bdd4dd20bbb 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -433,7 +433,7 @@ fi
 # Extract kernel version information, some platforms want to include
 # it in the image header
 version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \
-    cut -d' ' -f3`
+    head -n1 | cut -d' ' -f3`
 if [ -n "$version" ]; then
     uboot_version="-n Linux-$version"
 fi
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index cbcae2a927e9..4e3373381ab6 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -77,6 +77,5 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_T10DIF=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index f29c166998af..ea719898b581 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -30,7 +30,7 @@ CONFIG_PREEMPT=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_BINFMT_MISC=m
 CONFIG_MATH_EMULATION=y
-CONFIG_FORCE_MAX_ZONEORDER=17
+CONFIG_ARCH_FORCE_MAX_ORDER=17
 CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_MSI=y
@@ -74,7 +74,6 @@ CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_RAW_NAND=y
 CONFIG_MTD_NAND_FSL_ELBC=y
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
diff --git a/arch/powerpc/configs/85xx/ppa8548_defconfig b/arch/powerpc/configs/85xx/ppa8548_defconfig
index 190978a5b7d5..4bd5f993d26a 100644
--- a/arch/powerpc/configs/85xx/ppa8548_defconfig
+++ b/arch/powerpc/configs/85xx/ppa8548_defconfig
@@ -7,9 +7,7 @@ CONFIG_RAPIDIO=y
 CONFIG_FSL_RIO=y
 CONFIG_RAPIDIO_DMA_ENGINE=y
 CONFIG_RAPIDIO_ENUM_BASIC=y
-CONFIG_RAPIDIO_TSI57X=y
 CONFIG_RAPIDIO_CPS_XX=y
-CONFIG_RAPIDIO_TSI568=y
 CONFIG_RAPIDIO_CPS_GEN2=y
 CONFIG_ADVANCED_OPTIONS=y
 CONFIG_LOWMEM_SIZE_BOOL=y
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 7fd9e596ea33..06391cc2af3a 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -195,7 +195,6 @@ CONFIG_NLS_ISO8859_9=m
 CONFIG_NLS_ISO8859_13=m
 CONFIG_NLS_ISO8859_14=m
 CONFIG_NLS_ISO8859_15=m
-# CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_MUTEXES=y
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
index f14c6dbd7346..ab8a8c4530d9 100644
--- a/arch/powerpc/configs/fsl-emb-nonhw.config
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -41,7 +41,7 @@ CONFIG_FIXED_PHY=y
 CONFIG_FONT_8x16=y
 CONFIG_FONT_8x8=y
 CONFIG_FONTS=y
-CONFIG_FORCE_MAX_ZONEORDER=13
+CONFIG_ARCH_FORCE_MAX_ORDER=13
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAME_WARN=1024
 CONFIG_FTL=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 9d6212a8b195..71d9d112c0b6 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -119,7 +119,6 @@ CONFIG_INPUT_EVDEV=y
 # CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 # CONFIG_HW_RANDOM is not set
-CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_AGP=m
 CONFIG_AGP_UNINORTH=m
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
index eff933ebbb9e..ea2dbd778aad 100644
--- a/arch/powerpc/configs/microwatt_defconfig
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -75,7 +75,12 @@ CONFIG_SPI_BITBANG=y
 CONFIG_SPI_SPIDEV=y
 # CONFIG_HWMON is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_LITEX=y
 # CONFIG_VIRTIO_MENU is not set
+CONFIG_COMMON_CLK=y
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_NVMEM is not set
 CONFIG_EXT4_FS=y
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index e75d3f3060c9..10fe061c5e6d 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -114,5 +114,4 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_MUST_CHECK is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 700115d85d6f..56b876e418e9 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -78,4 +78,4 @@ CONFIG_DEBUG_VM_PGTABLE=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BDI_SWITCH=y
 CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_PTDUMP=y
+CONFIG_GENERIC_PTDUMP=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index e00a703581c3..96aa5355911f 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -92,7 +92,6 @@ CONFIG_LEGACY_PTY_COUNT=4
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
-CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PASEMI=y
 CONFIG_SENSORS_LM85=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 13885ec563d1..019163c2571e 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -284,7 +284,6 @@ CONFIG_BOOTX_TEXT=y
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 49f49c263935..c92652575064 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -17,7 +17,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
 CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
@@ -50,6 +49,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
 CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_PPC_UV=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_KEXEC=y
 CONFIG_KEXEC_FILE=y
@@ -65,6 +65,8 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_SCHED_SMT=y
 CONFIG_PM=y
 CONFIG_HOTPLUG_PCI=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_DEVICE_PRIVATE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -252,7 +254,6 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_GENERIC=y
 # CONFIG_VIRTIO_MENU is not set
 CONFIG_LIBNVDIMM=y
-# CONFIG_ND_BLK is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
@@ -325,13 +326,11 @@ CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c8b0e80d613b..d6949a6c5b2b 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -1,7 +1,9 @@
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_IKCONFIG=y
@@ -213,7 +215,6 @@ CONFIG_HVC_RTAS=y
 CONFIG_HVCS=m
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
-CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_AMD8111=y
 CONFIG_I2C_PASEMI=y
@@ -342,13 +343,11 @@ CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 5cf49a515f8e..f97a2d31bbf7 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -118,7 +118,6 @@ CONFIG_INPUT_MISC=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_AMD8111=y
 CONFIG_FB=y
@@ -234,13 +233,11 @@ CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index a24f484bfbd2..d23deb94b36e 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -321,7 +321,6 @@ CONFIG_PNP=y
 CONFIG_ISAPNP=y
 CONFIG_MAC_FLOPPY=m
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=16384
@@ -590,7 +589,6 @@ CONFIG_GAMEPORT_EMU10K1=m
 CONFIG_GAMEPORT_FM801=m
 # CONFIG_LEGACY_PTYS is not set
 CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
 CONFIG_SYNCLINK_GT=m
 CONFIG_NOZOMI=m
 CONFIG_N_HDLC=m
@@ -1107,13 +1105,9 @@ CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA1=y
 CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
@@ -1121,7 +1115,6 @@ CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2d9ac233da68..0a1b42c4f26a 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -165,6 +165,5 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60
 # CONFIG_FTRACE is not set
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index b571d084c148..7497e17ea657 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -3,8 +3,10 @@ CONFIG_NR_CPUS=2048
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
+# CONFIG_CONTEXT_TRACKING_USER_FORCE is not set
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_VIRT_CPU_ACCOUNTING_GEN=y
 CONFIG_TASKSTATS=y
 CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
@@ -16,7 +18,6 @@ CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
 CONFIG_NUMA_BALANCING=y
 CONFIG_CGROUPS=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CPUSETS=y
@@ -40,6 +41,7 @@ CONFIG_PPC_SPLPAR=y
 CONFIG_DTL=y
 CONFIG_PPC_SMLPAR=y
 CONFIG_IBMEBUS=y
+CONFIG_LIBNVDIMM=m
 CONFIG_PAPR_SCM=m
 CONFIG_PPC_SVM=y
 # CONFIG_PPC_PMAC is not set
@@ -187,7 +189,6 @@ CONFIG_HVC_RTAS=y
 CONFIG_HVCS=m
 CONFIG_VIRTIO_CONSOLE=m
 CONFIG_IBM_BSR=m
-CONFIG_RAW_DRIVER=y
 CONFIG_I2C_CHARDEV=y
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
@@ -302,13 +303,11 @@ CONFIG_CRYPTO_MD5_PPC=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_SHA1_PPC=m
 CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index f491875700e8..e0964210f259 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -133,7 +133,6 @@ CONFIG_ACENIC_OMIT_TIGON_I=y
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_AURORA is not set
 CONFIG_TIGON3=m
 CONFIG_BNX2X=m
 # CONFIG_NET_VENDOR_BROCADE is not set
@@ -274,7 +273,6 @@ CONFIG_NLS_UTF8=y
 CONFIG_ENCRYPTED_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_HARDENED_USERCOPY=y
-CONFIG_HARDENED_USERCOPY_PAGESPAN=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
 CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 47dcfaddc1ac..7a978d396991 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -76,4 +76,3 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=y
 CONFIG_CRC_T10DIF=y
-# CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
new file mode 100644
index 000000000000..c1b964447401
--- /dev/null
+++ b/arch/powerpc/crypto/Kconfig
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (powerpc)"
+
+config CRYPTO_CRC32C_VPMSUM
+	tristate "CRC32c"
+	depends on PPC64 && ALTIVEC
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+
+	  Architecture: powerpc64 using
+	  - AltiVec extensions
+
+	  Enable on POWER8 and newer processors for improved performance.
+
+config CRYPTO_CRCT10DIF_VPMSUM
+	tristate "CRC32T10DIF"
+	depends on PPC64 && ALTIVEC && CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
+
+	  Architecture: powerpc64 using
+	  - AltiVec extensions
+
+	  Enable on POWER8 and newer processors for improved performance.
+
+config CRYPTO_VPMSUM_TESTER
+	tristate "CRC32c and CRC32T10DIF hardware acceleration tester"
+	depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM
+	help
+	  Stress test for CRC32c and CRCT10DIF algorithms implemented with
+	  powerpc64 AltiVec extensions (POWER8 vpmsum instructions).
+	  Unless you are testing these algorithms, you don't need this.
+
+config CRYPTO_MD5_PPC
+	tristate "Digests: MD5"
+	depends on PPC
+	select CRYPTO_HASH
+	help
+	  MD5 message digest algorithm (RFC1321)
+
+	  Architecture: powerpc
+
+config CRYPTO_SHA1_PPC
+	tristate "Hash functions: SHA-1"
+	depends on PPC
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: powerpc
+
+config CRYPTO_SHA1_PPC_SPE
+	tristate "Hash functions: SHA-1 (SPE)"
+	depends on PPC && SPE
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: powerpc using
+	  - SPE (Signal Processing Engine) extensions
+
+config CRYPTO_SHA256_PPC_SPE
+	tristate "Hash functions: SHA-224 and SHA-256 (SPE)"
+	depends on PPC && SPE
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: powerpc using
+	  - SPE (Signal Processing Engine) extensions
+
+config CRYPTO_AES_PPC_SPE
+	tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
+	depends on PPC && SPE
+	select CRYPTO_SKCIPHER
+	help
+	  Block ciphers: AES cipher algorithms (FIPS-197)
+	  Length-preserving ciphers: AES with ECB, CBC, CTR, and XTS modes
+
+	  Architecture: powerpc using:
+	  - SPE (Signal Processing Engine) extensions
+
+	  SPE is available for:
+	  - Processor Type: Freescale 8500
+	  - CPU selection: e500 (8540)
+
+	  This module should only be used for low power (router) devices
+	  without hardware AES acceleration (e.g. caam crypto). It reduces the
+	  size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
+	  timining attacks. Nevertheless it might be not as secure as other
+	  architecture specific assembler implementations that work on 1KB
+	  tables or 256 bytes S-boxes.
+
+endmenu
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 81631e64dbeb..274bce76f5da 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -36,6 +36,7 @@ int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
 		    int64_t opcode, uint64_t msr);
 
 /* misc runtime */
+void enable_machine_check(void);
 extern u64 __bswapdi2(u64);
 extern s64 __lshrdi3(s64, int);
 extern s64 __ashldi3(s64, int);
@@ -55,19 +56,6 @@ struct kvm_vcpu;
 void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 
-/* Patch sites */
-extern s32 patch__call_flush_branch_caches1;
-extern s32 patch__call_flush_branch_caches2;
-extern s32 patch__call_flush_branch_caches3;
-extern s32 patch__flush_count_cache_return;
-extern s32 patch__flush_link_stack_return;
-extern s32 patch__call_kvm_flush_link_stack;
-extern s32 patch__call_kvm_flush_link_stack_p9;
-extern s32 patch__memset_nocache, patch__memcpy_nocache;
-
-extern long flush_branch_caches;
-extern long kvm_flush_link_stack;
-
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
 void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index ef2d8b15eaab..e80b2c0e9315 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -86,7 +86,7 @@ do {									\
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #define NOSPEC_BARRIER_SLOT   nop
-#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#elif defined(CONFIG_PPC_E500)
 #define NOSPEC_BARRIER_SLOT   nop; nop
 #endif
 
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 40041ac713d9..75823f39e042 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -112,31 +112,11 @@ static inline bool pte_user(pte_t pte)
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
-/*
- * Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP		(PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
 #define PTE_INDEX_SIZE	PTE_SHIFT
 #define PMD_INDEX_SIZE	0
 #define PUD_INDEX_SIZE	0
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index e1af0b394ceb..dd2cff53a111 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -113,9 +113,11 @@ static inline void __pud_free(pud_t *pud)
 
 	/*
 	 * Early pud pages allocated via memblock allocator
-	 * can't be directly freed to slab
+	 * can't be directly freed to slab. KFENCE pages have
+	 * both reserved and slab flags set so need to be freed
+	 * kmem_cache_free.
 	 */
-	if (PageReserved(page))
+	if (PageReserved(page) && !PageSlab(page))
 		free_reserved_page(page);
 	else
 		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index 4e697bc2f4cd..48f21820afe2 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -26,16 +26,6 @@ static inline int pud_huge(pud_t pud)
 	return 0;
 }
 
-static inline int pgd_huge(pgd_t pgd)
-{
-	/*
-	 * leaf pte for huge page
-	 */
-	if (radix_enabled())
-		return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
-	return 0;
-}
-#define pgd_huge pgd_huge
 /*
  * With radix , we have hugepage ptes in the pud and pmd entries. We don't
  * need to setup hugepage directory for them. Our pte and page directory format
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index 34d1018896b3..2fce3498b000 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -30,15 +30,6 @@ static inline int pud_huge(pud_t pud)
 	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
 
-static inline int pgd_huge(pgd_t pgd)
-{
-	/*
-	 * leaf pte for huge page
-	 */
-	return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE));
-}
-#define pgd_huge pgd_huge
-
 /*
  * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
  * need to setup hugepage directory for them. Our pte and page directory format
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 392ff48f77df..c436d8422654 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -117,8 +117,7 @@
 #define _PAGE_KERNEL_RW		(_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
 #define _PAGE_KERNEL_RO		 (_PAGE_PRIVILEGED | _PAGE_READ)
 #define _PAGE_KERNEL_ROX	 (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
-#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY |	\
-				 _PAGE_RW | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX	(_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
 /*
  * _PAGE_CHG_MASK masks of bits that are to be preserved across
  * pgprot changes
@@ -151,33 +150,17 @@
 #define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
 #define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_READ)
 #define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
+/* Radix only, Hash uses PAGE_READONLY_X + execute-only pkey instead */
+#define PAGE_EXECONLY	__pgprot(_PAGE_BASE | _PAGE_EXEC)
 
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_TOLERANT)
-#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NON_IDEMPOTENT)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NON_IDEMPOTENT)
 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
-/*
- * Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
-	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
-#define PAGE_AGP		(PAGE_KERNEL_NC)
-
 #ifndef __ASSEMBLY__
 /*
  * page table defines
@@ -333,9 +316,6 @@ extern unsigned long pci_io_base;
 #define IOREMAP_END	(KERN_IO_END - FIXADDR_SIZE)
 #define FIXADDR_SIZE	SZ_32M
 
-/* Advertise special mapping type for AGP */
-#define HAVE_PAGE_AGP
-
 #ifndef __ASSEMBLY__
 
 /*
@@ -411,6 +391,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
  * event of it not getting flushed for a long time the delay
  * shouldn't really matter because there's no real memory
  * pressure for swapout to react to. ]
+ *
+ * Note: this optimisation also exists in pte_needs_flush() and
+ * huge_pmd_needs_flush().
  */
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young ptep_test_and_clear_young
@@ -1123,7 +1106,7 @@ static inline void vmemmap_remove_mapping(unsigned long start,
 }
 #endif
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
 static inline void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (radix_enabled())
@@ -1457,12 +1440,5 @@ static inline bool pud_is_leaf(pud_t pud)
 	return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define p4d_is_leaf p4d_is_leaf
-#define p4d_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
-{
-	return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PTE));
-}
-
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 8b762f282190..fab8332fe1ad 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -112,13 +112,11 @@ static inline void hash__flush_tlb_kernel_range(unsigned long start,
 
 struct mmu_gather;
 extern void hash__tlb_flush(struct mmu_gather *tlb);
-void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
 
 #ifdef CONFIG_PPC_64S_HASH_MMU
 /* Private function for use by PCI IO mapping code */
 extern void __flush_hash_table_range(unsigned long start, unsigned long end);
-extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
-				unsigned long addr);
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
 #else
 static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { }
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 206f920fe5b9..67655cd60545 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -163,6 +163,62 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
 	 */
 }
 
+static inline bool __pte_flags_need_flush(unsigned long oldval,
+					  unsigned long newval)
+{
+	unsigned long delta = oldval ^ newval;
+
+	/*
+	 * The return value of this function doesn't matter for hash,
+	 * ptep_modify_prot_start() does a pte_update() which does or schedules
+	 * any necessary hash table update and flush.
+	 */
+	if (!radix_enabled())
+		return true;
+
+	/*
+	 * We do not expect kernel mappings or non-PTEs or not-present PTEs.
+	 */
+	VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
+	VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
+	VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
+	VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
+	VM_WARN_ON_ONCE(!(newval & _PAGE_PRESENT));
+
+	/*
+	*  Must flush on any change except READ, WRITE, EXEC, DIRTY, ACCESSED.
+	*
+	 * In theory, some changed software bits could be tolerated, in
+	 * practice those should rarely if ever matter.
+	 */
+
+	if (delta & ~(_PAGE_RWX | _PAGE_DIRTY | _PAGE_ACCESSED))
+		return true;
+
+	/*
+	 * If any of the above was present in old but cleared in new, flush.
+	 * With the exception of _PAGE_ACCESSED, don't worry about flushing
+	 * if that was cleared (see the comment in ptep_clear_flush_young()).
+	 */
+	if ((delta & ~_PAGE_ACCESSED) & oldval)
+		return true;
+
+	return false;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+	return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+	return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
 extern bool tlbie_capable;
 extern bool tlbie_enabled;
 
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index e8269434ecbe..d18b748ea3ae 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -25,7 +25,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				     unsigned long size, pgprot_t vma_prot);
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 
-#if defined(CONFIG_PPC32) || defined(CONFIG_PPC_64S_HASH_MMU)
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+
 /*
  * This gets called at the end of handling a page fault, when
  * the kernel has put a new PTE into the page table for the process.
@@ -35,10 +36,14 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  * corresponding HPTE into the hash table ahead of time, instead of
  * waiting for the inevitable extra hash-table miss exception.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
-#else
-static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) {}
-#endif
+static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+	if (IS_ENABLED(CONFIG_PPC32) && !mmu_has_feature(MMU_FTR_HPTE_TABLE))
+		return;
+	if (radix_enabled())
+		return;
+	__update_mmu_cache(vma, address, ptep);
+}
 
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/cpu_setup.h b/arch/powerpc/include/asm/cpu_setup.h
new file mode 100644
index 000000000000..30e2fe389502
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_setup.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_CPU_SETUP_H
+#define _ASM_POWERPC_CPU_SETUP_H
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_power7(void);
+void __restore_cpu_power8(void);
+void __restore_cpu_power9(void);
+void __restore_cpu_power10(void);
+
+void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440ep(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440epx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440gx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440grx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440spe(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440x5(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460ex(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460gt(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_603(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_604(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750cx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750fx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7400(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7410(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_745x(unsigned long offset, struct cpu_spec *spec);
+
+void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_pa6t(void);
+void __restore_cpu_ppc970(void);
+
+void __setup_cpu_e5500(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e6500(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_e5500(void);
+void __restore_cpu_e6500(void);
+#endif /* _ASM_POWERPC_CPU_SETUP_H */
diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h
deleted file mode 100644
index 24be9131f803..000000000000
--- a/arch/powerpc/include/asm/cpu_setup_power.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2020 IBM Corporation
- */
-void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec);
-void __restore_cpu_power7(void);
-void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec);
-void __restore_cpu_power8(void);
-void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec);
-void __restore_cpu_power9(void);
-void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec);
-void __restore_cpu_power10(void);
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index ae8c3e13cfce..757dbded11dc 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -463,7 +463,7 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_COMPATIBLE	(CPU_FTR_PPCAS_ARCH_V2)
 
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 #define CPU_FTRS_POSSIBLE	(CPU_FTRS_E6500 | CPU_FTRS_E5500)
 #else
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
@@ -510,7 +510,7 @@ enum {
 #elif defined(CONFIG_44x)
 	    CPU_FTRS_44X | CPU_FTRS_440x6 |
 #endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 	    CPU_FTRS_E500 | CPU_FTRS_E500_2 |
 #endif
 #ifdef CONFIG_PPC_E500MC
@@ -521,7 +521,7 @@ enum {
 #endif /* __powerpc64__ */
 
 #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 #define CPU_FTRS_ALWAYS		(CPU_FTRS_E6500 & CPU_FTRS_E5500)
 #else
 
@@ -584,7 +584,7 @@ enum {
 #elif defined(CONFIG_44x)
 	    CPU_FTRS_44X & CPU_FTRS_440x6 &
 #endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 	    CPU_FTRS_E500 & CPU_FTRS_E500_2 &
 #endif
 #ifdef CONFIG_PPC_E500MC
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 6d2b27997492..431ae2343022 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -95,7 +95,7 @@ static notrace inline void account_stolen_time(void)
 		struct lppaca *lp = local_paca->lppaca_ptr;
 
 		if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
-			accumulate_stolen_time();
+			pseries_accumulate_stolen_time();
 	}
 #endif
 }
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
index 1625888f27ef..4bcb9f9ac764 100644
--- a/arch/powerpc/include/asm/dtl.h
+++ b/arch/powerpc/include/asm/dtl.h
@@ -37,14 +37,6 @@ struct dtl_entry {
 extern struct kmem_cache *dtl_cache;
 extern rwlock_t dtl_access_lock;
 
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log.  If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
 extern void register_dtl_buffer(int cpu);
 extern void alloc_dtl_buffers(unsigned long *time_limit);
 extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 32ce0fb7548f..ea71f7245a63 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -7,8 +7,8 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/book3s/64/hugetlb.h>
-#elif defined(CONFIG_PPC_FSL_BOOK3E)
-#include <asm/nohash/hugetlb-book3e.h>
+#elif defined(CONFIG_PPC_E500)
+#include <asm/nohash/hugetlb-e500.h>
 #elif defined(CONFIG_PPC_8xx)
 #include <asm/nohash/32/hugetlb-8xx.h>
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 983551859891..77fa88c2aed0 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -157,36 +157,18 @@ static inline notrace void irq_soft_mask_set(unsigned long mask)
 
 static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
 {
-	unsigned long flags;
-
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
-	WARN_ON(mask && !(mask & IRQS_DISABLED));
-#endif
+	unsigned long flags = irq_soft_mask_return();
 
-	asm volatile(
-		"lbz %0,%1(13); stb %2,%1(13)"
-		: "=&r" (flags)
-		: "i" (offsetof(struct paca_struct, irq_soft_mask)),
-		  "r" (mask)
-		: "memory");
+	irq_soft_mask_set(mask);
 
 	return flags;
 }
 
 static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
 {
-	unsigned long flags, tmp;
-
-	asm volatile(
-		"lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
-		: "=&r" (flags), "=r" (tmp)
-		: "i" (offsetof(struct paca_struct, irq_soft_mask)),
-		  "r" (mask)
-		: "memory");
+	unsigned long flags = irq_soft_mask_return();
 
-#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
-	WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
-#endif
+	irq_soft_mask_set(flags | mask);
 
 	return flags;
 }
@@ -489,6 +471,30 @@ static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned l
 }
 #endif /* CONFIG_PPC64 */
 
+static inline unsigned long mtmsr_isync_irqsafe(unsigned long msr)
+{
+#ifdef CONFIG_PPC64
+	if (arch_irqs_disabled()) {
+		/*
+		 * With soft-masking, MSR[EE] can change from 1 to 0
+		 * asynchronously when irqs are disabled, and we don't want to
+		 * set MSR[EE] back to 1 here if that has happened. A race-free
+		 * way to do this is ensure EE is already 0. Another way it
+		 * could be done is with a RESTART_TABLE handler, but that's
+		 * probably overkill here.
+		 */
+		msr &= ~MSR_EE;
+		mtmsr_isync(msr);
+		irq_soft_mask_set(IRQS_ALL_DISABLED);
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+	} else
+#endif
+		mtmsr_isync(msr);
+
+	return msr;
+}
+
+
 #define ARCH_IRQ_INIT_FLAGS	IRQ_NOREQUEST
 
 #endif  /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 8069dbc4b8d1..4745bb9998bd 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -74,6 +74,19 @@
 #include <asm/kprobes.h>
 #include <asm/runlatch.h>
 
+#ifdef CONFIG_PPC64
+/*
+ * WARN/BUG is handled with a program interrupt so minimise checks here to
+ * avoid recursion and maximise the chance of getting the first oops handled.
+ */
+#define INT_SOFT_MASK_BUG_ON(regs, cond)				\
+do {									\
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&		\
+	    (user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM)))	\
+		BUG_ON(cond);						\
+} while (0)
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 extern char __end_soft_masked[];
 bool search_kernel_soft_mask_table(unsigned long addr);
@@ -170,8 +183,7 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 	 * context.
 	 */
 	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
-		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-			BUG_ON(!(regs->msr & MSR_EE));
+		INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
 		__hard_irq_enable();
 	} else {
 		__hard_RI_enable();
@@ -194,19 +206,15 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs)
 		 * CT_WARN_ON comes here via program_check_exception,
 		 * so avoid recursion.
 		 */
-		if (TRAP(regs) != INTERRUPT_PROGRAM) {
-			CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
-			if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-				BUG_ON(is_implicit_soft_masked(regs));
-		}
-
-		/* Move this under a debugging check */
-		if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) &&
-				arch_irq_disabled_regs(regs))
-			BUG_ON(search_kernel_restart_table(regs->nip));
+		if (TRAP(regs) != INTERRUPT_PROGRAM)
+			CT_WARN_ON(ct_state() != CONTEXT_KERNEL &&
+				   ct_state() != CONTEXT_IDLE);
+		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
+		INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
+					   search_kernel_restart_table(regs->nip));
 	}
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		BUG_ON(!arch_irq_disabled_regs(regs) && !(regs->msr & MSR_EE));
+	INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
+				   !(regs->msr & MSR_EE));
 #endif
 
 	booke_restore_dbcr0();
@@ -281,7 +289,7 @@ static inline bool nmi_disables_ftrace(struct pt_regs *regs)
 		if (TRAP(regs) == INTERRUPT_PERFMON)
 		       return false;
 	}
-	if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
 		if (TRAP(regs) == INTERRUPT_PERFMON)
 			return false;
 	}
@@ -665,8 +673,7 @@ static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
 		local_irq_enable();
 }
 
-long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8,
-			   unsigned long r0, struct pt_regs *regs);
+long system_call_exception(struct pt_regs *regs, unsigned long r0);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index f8d122d16af4..a1ddba01e7d1 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -3,7 +3,7 @@
 #define _ASM_POWERPC_KEXEC_H
 #ifdef __KERNEL__
 
-#if defined(CONFIG_FSL_BOOKE) || defined(CONFIG_44x)
+#if defined(CONFIG_PPC_85xx) || defined(CONFIG_44x)
 
 /*
  * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index a9846b68c6b9..6fd2b4d486c5 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -11,11 +11,25 @@
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define ARCH_FUNC_PREFIX "."
+#endif
+
 static inline bool arch_kfence_init_pool(void)
 {
 	return true;
 }
 
+#ifdef CONFIG_PPC64
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+	struct page *page = virt_to_page(addr);
+
+	__kernel_map_pages(page, 1, !protect);
+
+	return true;
+}
+#else
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
 {
 	pte_t *kpte = virt_to_kpte(addr);
@@ -29,5 +43,6 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
 
 	return true;
 }
+#endif
 
 #endif /* __ASM_POWERPC_KFENCE_H */
diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h
index a9e098a3b881..715c18b75334 100644
--- a/arch/powerpc/include/asm/kgdb.h
+++ b/arch/powerpc/include/asm/kgdb.h
@@ -52,7 +52,7 @@ static inline void arch_kgdb_breakpoint(void)
 /* On non-E500 family PPC32 we determine the size by picking the last
  * register we need, but on E500 we skip sections so we list what we
  * need to store, and add it up. */
-#ifndef CONFIG_E500
+#ifndef CONFIG_PPC_E500
 #define MAXREG			(PT_FPSCR+1)
 #else
 /* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c2b003550dc9..caea15dcb91d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -443,7 +443,7 @@ struct kvmppc_passthru_irqmap {
 };
 #endif
 
-# ifdef CONFIG_PPC_FSL_BOOK3E
+# ifdef CONFIG_PPC_E500
 #define KVMPPC_BOOKE_IAC_NUM	2
 #define KVMPPC_BOOKE_DAC_NUM	2
 # else
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9f625af3b65b..bfacf12784dd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -104,7 +104,6 @@ extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
                            unsigned int gtlb_idx);
-extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
 extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
 extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
 extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
@@ -153,7 +152,6 @@ extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
 
-extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
 
@@ -162,8 +160,6 @@ extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
 extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
 extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
 extern void kvmppc_rmap_reset(struct kvm *kvm);
-extern long kvmppc_prepare_vrma(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem);
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
 			struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index c390ec377bae..34d44cb17c87 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -104,14 +104,18 @@ struct lppaca {
 	volatile __be32 dispersion_count; /* dispatch changed physical cpu */
 	volatile __be64 cmo_faults;	/* CMO page fault count */
 	volatile __be64 cmo_fault_time;	/* CMO page fault time */
-	u8	reserved10[104];
+	u8	reserved10[64];		/* [S]PURR expropriated/donated */
+	volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */
+	volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */
+	volatile __be64 wait_ready_tb;	/* Total TB wait->ready */
+	u8	reserved11[16];
 
 	/* cacheline 4-5 */
 
 	__be32	page_ins;		/* CMO Hint - # page ins by OS */
-	u8	reserved11[148];
+	u8	reserved12[148];
 	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
-	u8	reserved12[96];
+	u8	reserved13[96];
 } ____cacheline_aligned;
 
 #define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 8cb83600c434..378b8d5836a7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -204,7 +204,6 @@ struct machdep_calls {
 extern void e500_idle(void);
 extern void power4_idle(void);
 extern void ppc6xx_idle(void);
-extern void book3e_idle(void);
 
 /*
  * ppc_md contains a copy of the machine description structure for the
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 860d0290ca4d..94b981152667 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -120,6 +120,9 @@
  */
 #define MMU_FTR_1T_SEGMENT		ASM_CONST(0x40000000)
 
+// NX paste RMA reject in DSI
+#define MMU_FTR_NX_DSI			ASM_CONST(0x80000000)
+
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	(MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE)
 #define MMU_FTRS_POWER		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
@@ -141,7 +144,7 @@
 
 typedef pte_t *pgtable_t;
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 #include <asm/percpu.h>
 DECLARE_PER_CPU(int, next_tlbcam_idx);
 #endif
@@ -162,7 +165,7 @@ enum {
 #elif defined(CONFIG_44x)
 		MMU_FTR_TYPE_44x |
 #endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 		MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX |
 #endif
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -181,7 +184,7 @@ enum {
 #endif
 #ifdef CONFIG_PPC_RADIX_MMU
 		MMU_FTR_TYPE_RADIX |
-		MMU_FTR_GTSE |
+		MMU_FTR_GTSE | MMU_FTR_NX_DSI |
 #endif /* CONFIG_PPC_RADIX_MMU */
 #endif
 #ifdef CONFIG_PPC_KUAP
@@ -211,7 +214,7 @@ enum {
 #elif defined(CONFIG_44x)
 #define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_44x
 #endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 #define MMU_FTRS_ALWAYS		MMU_FTR_TYPE_FSL_E
 #endif
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 3f25bd3e14eb..c1ea270bb848 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -31,7 +31,6 @@ extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
 extern long mm_iommu_put(struct mm_struct *mm,
 		struct mm_iommu_table_group_mem_t *mem);
 extern void mm_iommu_init(struct mm_struct *mm);
-extern void mm_iommu_cleanup(struct mm_struct *mm);
 extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
 		unsigned long ua, unsigned long size);
 extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
@@ -117,7 +116,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 }
 #endif
 
-extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
 
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 9091e4904a6b..0d40b33184eb 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -130,10 +130,10 @@ void unmap_kernel_page(unsigned long va);
 #include <asm/nohash/32/pte-40x.h>
 #elif defined(CONFIG_44x)
 #include <asm/nohash/32/pte-44x.h>
-#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
-#include <asm/nohash/pte-book3e.h>
-#elif defined(CONFIG_FSL_BOOKE)
-#include <asm/nohash/32/pte-fsl-booke.h>
+#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT)
+#include <asm/nohash/pte-e500.h>
+#elif defined(CONFIG_PPC_85xx)
+#include <asm/nohash/32/pte-85xx.h>
 #elif defined(CONFIG_PPC_8xx)
 #include <asm/nohash/32/pte-8xx.h>
 #endif
diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
index 0fc1bd42bb3e..93fb8e11a3f1 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H
-#define _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H
 #ifdef __KERNEL__
 
 /* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
@@ -71,4 +71,4 @@
 #define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
 
 #endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 599921cc257e..879e9a6e5a87 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -70,7 +70,7 @@
 /*
  * Include the PTE bits definitions
  */
-#include <asm/nohash/pte-book3e.h>
+#include <asm/nohash/pte-e500.h>
 
 #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
 
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
index ecd8694cb229..8f04ad20e040 100644
--- a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
+++ b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
-#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
+#ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H
+#define _ASM_POWERPC_NOHASH_HUGETLB_E500_H
 
 static inline pte_t *hugepd_page(hugepd_t hpd)
 {
@@ -30,7 +30,7 @@ void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 
 static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
 {
-	/* We use the old format for PPC_FSL_BOOK3E */
+	/* We use the old format for PPC_E500 */
 	*hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
 }
 
@@ -42,4 +42,4 @@ static inline int check_and_get_huge_psize(int shift)
 	return shift_to_mmu_psize(shift);
 }
 
-#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */
+#endif /* _ASM_POWERPC_NOHASH_HUGETLB_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
index e43a418d3ccd..e43a418d3ccd 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
index edc793e5f08f..e264be219fdb 100644
--- a/arch/powerpc/include/asm/nohash/mmu.h
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -8,9 +8,9 @@
 #elif defined(CONFIG_44x)
 /* 44x-style software loaded TLB */
 #include <asm/nohash/32/mmu-44x.h>
-#elif defined(CONFIG_PPC_BOOK3E_MMU)
+#elif defined(CONFIG_PPC_E500)
 /* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
 #elif defined (CONFIG_PPC_8xx)
 /* Motorola/Freescale 8xx software loaded TLB */
 #include <asm/nohash/32/mmu-8xx.h>
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 29c43665a753..4b62376318e1 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -15,7 +15,7 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
 {
 
 }
-#endif /* !CONFIG_PPC_BOOK3E */
+#endif /* !CONFIG_PPC_BOOK3E_64 */
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index b499da6c1a99..d9067dfc531c 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -11,31 +11,11 @@
 /* Permission masks used for kernel mappings */
 #define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
 #define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
 #define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
 #define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
 #define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
-/*
- * Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP		(PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
 #ifndef __ASSEMBLY__
 
 /* Generic accessors to PTE bits */
@@ -277,12 +257,6 @@ static inline int pud_huge(pud_t pud)
 	return 0;
 }
 
-static inline int pgd_huge(pgd_t pgd)
-{
-	return 0;
-}
-#define pgd_huge		pgd_huge
-
 #define is_hugepd(hpd)		(hugepd_ok(hpd))
 #endif
 
@@ -292,7 +266,7 @@ static inline int pgd_huge(pgd_t pgd)
  * We use it to ensure coherency between the i-cache and d-cache
  * for the page which has just been mapped in.
  */
-#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_HUGETLB_PAGE)
+#if defined(CONFIG_PPC_E500) && defined(CONFIG_HUGETLB_PAGE)
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
 #else
 static inline
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-e500.h
index f798640422c2..0934e8965e4e 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-e500.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_PTE_BOOK3E_H
-#define _ASM_POWERPC_NOHASH_PTE_BOOK3E_H
+#ifndef _ASM_POWERPC_NOHASH_PTE_E500_H
+#define _ASM_POWERPC_NOHASH_PTE_E500_H
 #ifdef __KERNEL__
 
 /* PTE bit definitions for processors compliant to the Book3E
@@ -126,4 +126,4 @@ static inline pte_t pte_mkexec(pte_t pte)
 #endif /* __ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */
+#endif /*  _ASM_POWERPC_NOHASH_PTE_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
index 698935d4f72d..bdaf34ad41ea 100644
--- a/arch/powerpc/include/asm/nohash/tlbflush.h
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -18,7 +18,7 @@
 /*
  * TLB flushing for software loaded TLB chips
  *
- * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range &
+ * TODO: (CONFIG_PPC_85xx) determine if flush_tlb_range &
  * flush_tlb_kernel_range are best implemented as tlbia vs
  * specific tlbie's
  */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index bfd3142cd0ba..726125a534de 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -324,16 +324,10 @@ extern int opal_flush_console(uint32_t vtermno);
 
 extern void hvc_opal_init_early(void);
 
-extern int opal_notifier_register(struct notifier_block *nb);
-extern int opal_notifier_unregister(struct notifier_block *nb);
-
 extern int opal_message_notifier_register(enum opal_msg_type msg_type,
 						struct notifier_block *nb);
 extern int opal_message_notifier_unregister(enum opal_msg_type msg_type,
 					    struct notifier_block *nb);
-extern void opal_notifier_enable(void);
-extern void opal_notifier_disable(void);
-extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
 
 extern int opal_async_get_token_interruptible(void);
 extern int opal_async_release_token(int token);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 4d7aaab82702..09f1790d0ae1 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -18,7 +18,7 @@
 #include <asm/lppaca.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 #include <asm/exception-64e.h>
 #else
 #include <asm/exception-64s.h>
@@ -127,7 +127,7 @@ struct paca_struct {
 #endif
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	u64 exgen[8] __aligned(0x40);
 	/* Keep pgd in the same cacheline as the start of extlb */
 	pgd_t *pgd __aligned(0x40); /* Current PGD */
@@ -151,7 +151,7 @@ struct paca_struct {
 	void *dbg_kstack;
 
 	struct tlb_core_data tcd;
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 #ifdef CONFIG_PPC_64S_HASH_MMU
 	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
@@ -168,7 +168,7 @@ struct paca_struct {
 #ifdef CONFIG_PPC64
 	u64 exit_save_r1;		/* Syscall/interrupt R1 save */
 #endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	u16 trap_save;			/* Used when bad stack is encountered */
 #endif
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -263,7 +263,6 @@ struct paca_struct {
 	u64 l1d_flush_size;
 #endif
 #ifdef CONFIG_PPC_PSERIES
-	struct rtas_args *rtas_args_reentrant;
 	u8 *mce_data_buf;		/* buffer to hold per cpu rtas errlog */
 #endif /* CONFIG_PPC_PSERIES */
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index e5f75c70eda8..edf1dd1b0ca9 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -31,7 +31,7 @@ extern unsigned int hpage_shift;
 #define HPAGE_SHIFT hpage_shift
 #elif defined(CONFIG_PPC_8xx)
 #define HPAGE_SHIFT		19	/* 512k pages */
-#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#elif defined(CONFIG_PPC_E500)
 #define HPAGE_SHIFT		22	/* 4M pages */
 #endif
 #define HPAGE_SIZE		((1UL) << HPAGE_SHIFT)
@@ -308,12 +308,6 @@ static inline bool pfn_valid(unsigned long pfn)
 #include <asm/pgtable-types.h>
 #endif
 
-
-#ifndef CONFIG_HUGETLB_PAGE
-#define is_hugepd(pdep)		(0)
-#define pgd_huge(pgd)		(0)
-#endif /* CONFIG_HUGETLB_PAGE */
-
 struct page;
 extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
 extern void copy_user_page(void *to, void *from, unsigned long vaddr,
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index eb7df559ae74..f5ba1a3c41f8 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -21,6 +21,18 @@ static inline bool is_shared_processor(void)
 	return static_branch_unlikely(&shared_processor);
 }
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 pseries_paravirt_steal_clock(int cpu);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+	return pseries_paravirt_steal_clock(cpu);
+}
+#endif
+
 /* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
 static inline u32 yield_count_of(int cpu)
 {
diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..d25ca7ac57c7
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt_api_clock.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/paravirt.h>
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index b169bbf95fcb..82633200b500 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -101,6 +101,7 @@ static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new)
 	return pmd_raw(old) == prev;
 }
 
+#ifdef CONFIG_ARCH_HAS_HUGEPD
 typedef struct { __be64 pdbe; } hugepd_t;
 #define __hugepd(x) ((hugepd_t) { cpu_to_be64(x) })
 
@@ -108,5 +109,6 @@ static inline unsigned long hpd_val(hugepd_t x)
 {
 	return be64_to_cpu(x.pdbe);
 }
+#endif
 
 #endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index efed0db7b1db..082c85cc09b1 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -83,11 +83,13 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
 }
 #endif
 
+#ifdef CONFIG_ARCH_HAS_HUGEPD
 typedef struct { unsigned long pd; } hugepd_t;
 #define __hugepd(x) ((hugepd_t) { (x) })
 static inline unsigned long hpd_val(hugepd_t x)
 {
 	return x.pd;
 }
+#endif
 
 #endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 33f4bf8d22b0..283f40d05a4d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -20,6 +20,25 @@ struct mm_struct;
 #include <asm/nohash/pgtable.h>
 #endif /* !CONFIG_PPC_BOOK3S */
 
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) || \
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP		(PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
 #ifndef __ASSEMBLY__
 
 #ifndef MAX_PTRS_PER_PGD
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index c6d724104ed1..21e33e46f4b8 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -330,6 +330,7 @@
 #define __PPC_XSP(s)	((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21)
 #define __PPC_XTP(s)	__PPC_XSP(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
+#define __PPC_PL(p)	(((p) & 0x3) << 16)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 #define __PPC_WS(w)	(((w) & 0x1f) << 11)
 #define __PPC_SH(s)	__PPC_WS(s)
@@ -388,7 +389,8 @@
 #define PPC_RAW_RFDI			(0x4c00004e)
 #define PPC_RAW_RFMCI			(0x4c00004c)
 #define PPC_RAW_TLBILX(t, a, b)		(0x7c000024 | __PPC_T_TLB(t) | 	__PPC_RA0(a) | __PPC_RB(b))
-#define PPC_RAW_WAIT(w)			(0x7c00007c | __PPC_WC(w))
+#define PPC_RAW_WAIT_v203		(0x7c00007c)
+#define PPC_RAW_WAIT(w, p)		(0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
 #define PPC_RAW_TLBIE(lp, a)		(0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp))
 #define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \
 	(0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
@@ -606,7 +608,8 @@
 #define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
 #define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
 #define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
-#define PPC_WAIT(w)		stringify_in_c(.long PPC_RAW_WAIT(w))
+#define PPC_WAIT_v203		stringify_in_c(.long PPC_RAW_WAIT_v203)
+#define PPC_WAIT(w, p)		stringify_in_c(.long PPC_RAW_WAIT(w, p))
 #define PPC_TLBIE(lp, a) 	stringify_in_c(.long PPC_RAW_TLBIE(lp, a))
 #define	PPC_TLBIE_5(rb, rs, ric, prs, r) \
 				stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 83c02f5a7f2a..753a2757bcd4 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -34,6 +34,20 @@
 .endm
 
 /*
+ * This expands to a sequence of register clears for regs start to end
+ * inclusive, of the form:
+ *
+ *   li rN, 0
+ */
+.macro ZEROIZE_REGS start, end
+	.Lreg=\start
+	.rept (\end - \start + 1)
+	li	.Lreg, 0
+	.Lreg=.Lreg+1
+	.endr
+.endm
+
+/*
  * Macros for storing registers into and loading registers from
  * exception frames.
  */
@@ -49,6 +63,14 @@
 #define REST_NVGPRS(base)		REST_GPRS(13, 31, base)
 #endif
 
+#define	ZEROIZE_GPRS(start, end)	ZEROIZE_REGS start, end
+#ifdef __powerpc64__
+#define	ZEROIZE_NVGPRS()		ZEROIZE_GPRS(14, 31)
+#else
+#define	ZEROIZE_NVGPRS()		ZEROIZE_GPRS(13, 31)
+#endif
+#define	ZEROIZE_GPR(n)			ZEROIZE_GPRS(n, n)
+
 #define SAVE_GPR(n, base)		SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)		REST_GPRS(n, n, base)
 
@@ -305,6 +327,12 @@ n:
 
 #ifdef __powerpc64__
 
+#define __LOAD_PACA_TOC(reg)			\
+	ld	reg,PACATOC(r13)
+
+#define LOAD_PACA_TOC()				\
+	__LOAD_PACA_TOC(r2)
+
 #define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
 
 #define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
@@ -315,7 +343,19 @@ n:
 	rldimi	reg, tmp, 32, 0
 
 #define LOAD_REG_ADDR(reg,name)			\
-	ld	reg,name@got(r2)
+	addis	reg,r2,name@toc@ha;		\
+	addi	reg,reg,name@toc@l
+
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * This is used in register-constrained interrupt handlers. Not to be used
+ * by BOOK3S. ld complains with "got/toc optimization is not supported" if r2
+ * is not used for the TOC offset, so use @got(tocreg). If the interrupt
+ * handlers saved r2 instead, LOAD_REG_ADDR could be used.
+ */
+#define LOAD_REG_ADDR_ALTTOC(reg,tocreg,name)	\
+	ld	reg,name@got(tocreg)
+#endif
 
 #define LOAD_REG_ADDRBASE(reg,name)	LOAD_REG_ADDR(reg,name)
 #define ADDROFF(name)			0
@@ -342,7 +382,7 @@ n:
 #endif
 
 /* various errata or part fixups */
-#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500)
 #define MFTB(dest)			\
 90:	mfspr dest, SPRN_TBRL;		\
 BEGIN_FTR_SECTION_NESTED(96);		\
@@ -709,7 +749,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
  * kernel is built for.
  */
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 #define FIXUP_ENDIAN
 #else
 /*
@@ -749,7 +789,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 	.long 0x2402004c; /* hrfid				*/ \
 191:
 
-#endif /* !CONFIG_PPC_BOOK3E */
+#endif /* !CONFIG_PPC_BOOK3E_64 */
 
 #endif /*  __ASSEMBLY__ */
 
@@ -768,7 +808,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 	stringify_in_c(.llong (_target);)	\
 	stringify_in_c(.previous)
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 #define BTB_FLUSH(reg)			\
 	lis reg,BUCSR_INIT@h;		\
 	ori reg,reg,BUCSR_INIT@l;	\
@@ -776,6 +816,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
 	isync;
 #else
 #define BTB_FLUSH(reg)
-#endif /* CONFIG_PPC_FSL_BOOK3E */
+#endif /* CONFIG_PPC_E500 */
 
 #endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index fdfaae194ddd..631802999d59 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -75,7 +75,6 @@ extern int _chrp_type;
 
 struct task_struct;
 void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
-void release_thread(struct task_struct *);
 
 #define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
 #define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
@@ -355,11 +354,23 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 
 #ifdef CONFIG_PPC64
 
-#define spin_begin()	HMT_low()
+#define spin_begin()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 1,1,1", /* HMT_LOW */				\
+		"nop", /* v3.1 uses pause_short in cpu_relax instead */	\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
 
-#define spin_cpu_relax()	barrier()
+#define spin_cpu_relax()						\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"nop", /* Before v3.1 use priority nops in spin_begin/end */ \
+		PPC_WAIT(2, 0),	/* aka pause_short */			\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
 
-#define spin_end()	HMT_medium()
+#define spin_end()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		"or 2,2,2", /* HMT_MEDIUM */				\
+		"nop",							\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
 
 #endif
 
@@ -426,6 +437,8 @@ extern int fix_alignment(struct pt_regs *);
 #endif
 
 int do_mathemu(struct pt_regs *regs);
+int do_spe_mathemu(struct pt_regs *regs);
+int speround_handler(struct pt_regs *regs);
 
 /* VMX copying */
 int enter_vmx_usercopy(void);
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
index 82db78fc169d..c8b0f2ffcd35 100644
--- a/arch/powerpc/include/asm/ps3av.h
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -726,6 +726,4 @@ extern int ps3av_video_mode2res(u32, u32 *, u32 *);
 extern int ps3av_video_mute(int);
 extern int ps3av_audio_mute(int);
 extern int ps3av_audio_mute_analog(int);
-extern int ps3av_dev_open(void);
-extern int ps3av_dev_close(void);
 #endif	/* _ASM_POWERPC_PS3AV_H_ */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index a03403695cd4..2efec6d87049 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -99,6 +99,13 @@ struct pt_regs
 
 #define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs))
 
+// Always displays as "REGS" in memory dumps
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x52454753)
+#else
+#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x53474552)
+#endif
+
 #ifdef __powerpc64__
 
 /*
@@ -115,7 +122,6 @@ struct pt_regs
 
 #define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
-#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x7265677368657265)
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
 #define STACK_FRAME_MARKER	12
@@ -136,7 +142,6 @@ struct pt_regs
 #define KERNEL_REDZONE_SIZE	0
 #define STACK_FRAME_OVERHEAD	16	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
-#define STACK_FRAME_REGS_MARKER	ASM_CONST(0x72656773)
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_FRAME_MARKER	2
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 17b8dcd9a40d..af56980b6cdb 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -246,7 +246,7 @@
 #define PPC47x_MCSR_FPR	0x00800000 /* FPR parity error */
 #define PPC47x_MCSR_IPR	0x00400000 /* Imprecise Machine Check Exception */
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 /* All e500 */
 #define MCSR_MCP 	0x80000000UL /* Machine Check Input Pin */
 #define MCSR_ICPERR 	0x40000000UL /* I-Cache Parity Error */
@@ -282,7 +282,7 @@
 #endif
 
 /* Bit definitions for the HID1 */
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 /* e500v1/v2 */
 #define HID1_PLL_CFG_MASK 0xfc000000	/* PLL_CFG input pins */
 #define HID1_RFXE	0x00020000	/* Read fault exception enable */
@@ -545,7 +545,7 @@
 #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
 #define TCR_ARE		0x00400000	/* Auto Reload Enable */
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 #define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
 			      (((tcr) & 0x1E0000) >> 15))
 #else
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 00531af17ce0..56319aea646e 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -240,7 +240,6 @@ extern struct rtas_t rtas;
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
-int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
 void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
 			int nret, ...);
 extern void __noreturn rtas_restart(char *cmd);
diff --git a/arch/powerpc/include/asm/runlatch.h b/arch/powerpc/include/asm/runlatch.h
index cfb390edf7d0..ceb66d761fe1 100644
--- a/arch/powerpc/include/asm/runlatch.h
+++ b/arch/powerpc/include/asm/runlatch.h
@@ -19,10 +19,9 @@ extern void __ppc64_runlatch_off(void);
 	do {							\
 		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
 		    test_thread_local_flags(_TLF_RUNLATCH)) {	\
-			unsigned long msr = mfmsr();		\
 			__hard_irq_disable();			\
 			__ppc64_runlatch_off();			\
-			if (msr & MSR_EE)			\
+			if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
 				__hard_irq_enable();		\
 		}      						\
 	} while (0)
@@ -31,10 +30,9 @@ extern void __ppc64_runlatch_off(void);
 	do {							\
 		if (cpu_has_feature(CPU_FTR_CTRL) &&		\
 		    !test_thread_local_flags(_TLF_RUNLATCH)) {	\
-			unsigned long msr = mfmsr();		\
 			__hard_irq_disable();			\
 			__ppc64_runlatch_on();			\
-			if (msr & MSR_EE)			\
+			if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
 				__hard_irq_enable();		\
 		}      						\
 	} while (0)
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 8be2c491c733..9c00c9c0ca8f 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -13,15 +13,26 @@ typedef struct func_desc func_desc_t;
 #include <asm-generic/sections.h>
 
 extern char __head_end[];
+extern char __srwx_boundary[];
+
+/* Patch sites */
+extern s32 patch__call_flush_branch_caches1;
+extern s32 patch__call_flush_branch_caches2;
+extern s32 patch__call_flush_branch_caches3;
+extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
+extern s32 patch__memset_nocache, patch__memcpy_nocache;
+
+extern long flush_branch_caches;
+extern long kvm_flush_link_stack;
 
 #ifdef __powerpc64__
 
 extern char __start_interrupts[];
 extern char __end_interrupts[];
 
-extern char __prom_init_toc_start[];
-extern char __prom_init_toc_end[];
-
 #ifdef CONFIG_PPC_POWERNV
 extern char start_real_trampolines[];
 extern char end_real_trampolines[];
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index d8c28902cf59..e29e83f8a89c 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -7,7 +7,6 @@
 #ifndef __ASSEMBLY__
 extern void ppc_printk_progress(char *s, unsigned short hex);
 
-extern unsigned int rtas_data;
 extern unsigned long long memory_limit;
 extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
 
@@ -70,7 +69,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
 static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }
 #endif
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 void __init setup_spectre_v2(void);
 #else
 static inline void setup_spectre_v2(void) {}
@@ -89,6 +88,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
 			       unsigned long pp, unsigned long r6,
 			       unsigned long r7, unsigned long kbase);
 
+extern struct seq_buf ppc_hw_desc;
+
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 7130176d8cb8..b0b4c64870d7 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -44,7 +44,7 @@ static inline void ppc_after_tlbiel_barrier(void)
 
 #if defined(__powerpc64__)
 #    define LWSYNC	lwsync
-#elif defined(CONFIG_E500)
+#elif defined(CONFIG_PPC_E500)
 #    define LWSYNC					\
 	START_LWSYNC_SECTION(96);			\
 	sync;						\
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 25fc8ad9a27a..3dd36c5e334a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -14,9 +14,16 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+typedef long (*syscall_fn)(const struct pt_regs *);
+#else
+typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
+			   unsigned long, unsigned long, unsigned long);
+#endif
+
 /* ftrace syscalls requires exporting the sys_call_table */
-extern const unsigned long sys_call_table[];
-extern const unsigned long compat_sys_call_table[];
+extern const syscall_fn sys_call_table[];
+extern const syscall_fn compat_sys_call_table[];
 
 static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..67486c67e8a2
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_POWERPC_SYSCALL_WRAPPER_H
+#define __ASM_POWERPC_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...)				\
+	__MAP(x,__SC_ARGS					\
+	      ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]	\
+	      ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#define __SYSCALL_DEFINEx(x, name, ...)						\
+	long sys##name(const struct pt_regs *regs);			\
+	ALLOW_ERROR_INJECTION(sys##name, ERRNO);			\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));		\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	long sys##name(const struct pt_regs *regs)			\
+	{									\
+		return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__));	\
+	}									\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))		\
+	{									\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		__MAP(x,__SC_TEST,__VA_ARGS__);					\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));		\
+		return ret;							\
+	}									\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname)							\
+	SYSCALL_METADATA(_##sname, 0);						\
+	long sys_##sname(const struct pt_regs *__unused);		\
+	ALLOW_ERROR_INJECTION(sys_##sname, ERRNO);			\
+	long sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name)							\
+	long sys_##name(const struct pt_regs *regs);			\
+	long __weak sys_##name(const struct pt_regs *regs)		\
+	{									\
+		return sys_ni_syscall();					\
+	}
+
+#endif // __ASM_POWERPC_SYSCALL_WRAPPER_H
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index a2b13e55254f..a1142496cd58 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -8,49 +8,147 @@
 #include <linux/types.h>
 #include <linux/compat.h>
 
+#include <asm/syscall.h>
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#endif
+#include <asm/unistd.h>
+#include <asm/ucontext.h>
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+long sys_ni_syscall(void);
+#else
+long sys_ni_syscall(const struct pt_regs *regs);
+#endif
+
 struct rtas_args;
 
-asmlinkage long sys_mmap(unsigned long addr, size_t len,
-		unsigned long prot, unsigned long flags,
-		unsigned long fd, off_t offset);
-asmlinkage long sys_mmap2(unsigned long addr, size_t len,
-		unsigned long prot, unsigned long flags,
-		unsigned long fd, unsigned long pgoff);
-asmlinkage long ppc64_personality(unsigned long personality);
-asmlinkage long sys_rtas(struct rtas_args __user *uargs);
-int ppc_select(int n, fd_set __user *inp, fd_set __user *outp,
-	       fd_set __user *exp, struct __kernel_old_timeval __user *tvp);
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
-		      u32 len_high, u32 len_low);
+/*
+ * long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * High and low parts are swapped depending on endian mode,
+ * so define a macro (similar to mips linux32) to handle that.
+ */
+#ifdef __LITTLE_ENDIAN__
+#define merge_64(low, high) (((u64)high << 32) | low)
+#else
+#define merge_64(high, low) (((u64)high << 32) | low)
+#endif
+
+/*
+ * PowerPC architecture-specific syscalls
+ */
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+
+long sys_rtas(struct rtas_args __user *uargs);
 
+#ifdef CONFIG_PPC64
+long sys_ppc64_personality(unsigned long personality);
 #ifdef CONFIG_COMPAT
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
-			       unsigned long prot, unsigned long flags,
-			       unsigned long fd, unsigned long pgoff);
+long compat_sys_ppc64_personality(unsigned long personality);
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
 
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
-				  u32 reg6, u32 pos1, u32 pos2);
+long sys_swapcontext(struct ucontext __user *old_ctx,
+		     struct ucontext __user *new_ctx, long ctx_size);
+long sys_mmap(unsigned long addr, size_t len,
+	      unsigned long prot, unsigned long flags,
+	      unsigned long fd, off_t offset);
+long sys_mmap2(unsigned long addr, size_t len,
+	       unsigned long prot, unsigned long flags,
+	       unsigned long fd, unsigned long pgoff);
+long sys_switch_endian(void);
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
-				   u32 reg6, u32 pos1, u32 pos2);
+#ifdef CONFIG_PPC32
+long sys_sigreturn(void);
+long sys_debug_setcontext(struct ucontext __user *ctx, int ndbg,
+			  struct sig_dbg_op __user *dbg);
+#endif
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count);
+long sys_rt_sigreturn(void);
 
-int compat_sys_truncate64(const char __user *path, u32 reg4,
-			  unsigned long len1, unsigned long len2);
+long sys_subpage_prot(unsigned long addr,
+		      unsigned long len, u32 __user *map);
 
-long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, u32 len1, u32 len2);
+#ifdef CONFIG_COMPAT
+long compat_sys_swapcontext(struct ucontext32 __user *old_ctx,
+			    struct ucontext32 __user *new_ctx,
+			    int ctx_size);
+long compat_sys_old_getrlimit(unsigned int resource,
+			      struct compat_rlimit __user *rlim);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+#endif /* CONFIG_COMPAT */
 
-int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
-			   unsigned long len2);
+/*
+ * Architecture specific signatures required by long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * The following signatures provide a machine long parameter for
+ * each register that will be supplied. The implementation is
+ * responsible for combining parameter pairs.
+ */
 
-long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
-		     size_t len, int advice);
+#ifdef CONFIG_PPC32
+long sys_ppc_pread64(unsigned int fd,
+		     char __user *ubuf, compat_size_t count,
+		     u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_pwrite64(unsigned int fd,
+		      const char __user *ubuf, compat_size_t count,
+		      u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_readahead(int fd, u32 r4,
+		       u32 offset1, u32 offset2, u32 count);
+long sys_ppc_truncate64(const char __user *path, u32 reg4,
+		        unsigned long len1, unsigned long len2);
+long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+			 unsigned long len1, unsigned long len2);
+long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+			 size_t len, int advice);
+#endif
+#ifdef CONFIG_COMPAT
+long compat_sys_mmap2(unsigned long addr, size_t len,
+		      unsigned long prot, unsigned long flags,
+		      unsigned long fd, unsigned long pgoff);
+long compat_sys_ppc_pread64(unsigned int fd,
+			    char __user *ubuf, compat_size_t count,
+			    u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_pwrite64(unsigned int fd,
+			     const char __user *ubuf, compat_size_t count,
+			     u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_readahead(int fd, u32 r4,
+			      u32 offset1, u32 offset2, u32 count);
+long compat_sys_ppc_truncate64(const char __user *path, u32 reg4,
+			       unsigned long len1, unsigned long len2);
+long compat_sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+				unsigned long len1, unsigned long len2);
+long compat_sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+				size_t len, int advice);
+long compat_sys_ppc_sync_file_range2(int fd, unsigned int flags,
+				     unsigned int offset1,
+				     unsigned int offset2,
+				     unsigned int nbytes1,
+				     unsigned int nbytes2);
+#endif /* CONFIG_COMPAT */
 
-long compat_sys_sync_file_range2(int fd, unsigned int flags,
-				 unsigned int offset1, unsigned int offset2,
-				 unsigned int nbytes1, unsigned int nbytes2);
+#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT)
+long sys_ppc_fadvise64_64(int fd, int advice,
+			  u32 offset_high, u32 offset_low,
+			  u32 len_high, u32 len_low);
 #endif
 
+#else
+
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+#define __SYSCALL(nr, entry) \
+	long entry(const struct pt_regs *regs);
+
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_POWERPC_SYSCALLS_H */
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/include/asm/syscalls_32.h
index 2346f8c7ff2e..749255568be9 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/include/asm/syscalls_32.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _PPC64_PPC32_H
-#define _PPC64_PPC32_H
+#ifndef _ASM_POWERPC_SYSCALLS_32_H
+#define _ASM_POWERPC_SYSCALLS_32_H
 
 #include <linux/compat.h>
 #include <asm/siginfo.h>
@@ -57,4 +57,4 @@ struct ucontext32 {
 	struct mcontext32	uc_mcontext;
 };
 
-#endif  /* _PPC64_PPC32_H */
+#endif  // _ASM_POWERPC_SYSCALLS_32_H
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 1e5643a9b1f2..9f50766c4623 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -116,8 +116,9 @@ unsigned long long tb_to_ns(unsigned long long tb_ticks);
 
 void timer_broadcast_interrupt(void);
 
-/* SPLPAR */
-void accumulate_stolen_time(void);
+/* SPLPAR and VIRT_CPU_ACCOUNTING_NATIVE */
+void pseries_accumulate_stolen_time(void);
+u64 pseries_calculate_stolen_time(u64 stop_tb);
 
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index b4aa0d88ce2c..b1f094728b35 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -15,13 +15,13 @@ extern void (*udbg_flush)(void);
 extern int (*udbg_getc)(void);
 extern int (*udbg_getc_poll)(void);
 
-extern void udbg_puts(const char *s);
-extern int udbg_write(const char *s, int n);
+void udbg_puts(const char *s);
+int udbg_write(const char *s, int n);
 
-extern void register_early_udbg_console(void);
-extern void udbg_printf(const char *fmt, ...)
+void register_early_udbg_console(void);
+void udbg_printf(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
-extern void udbg_progress(char *s, unsigned short hex);
+void udbg_progress(char *s, unsigned short hex);
 
 void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
 void __init udbg_uart_init_pio(unsigned long port, unsigned int stride);
@@ -31,28 +31,28 @@ unsigned int __init udbg_probe_uart_speed(unsigned int clock);
 
 struct device_node;
 void __init udbg_scc_init(int force_scc);
-extern int udbg_adb_init(int force_btext);
-extern void udbg_adb_init_early(void);
-
-extern void __init udbg_early_init(void);
-extern void __init udbg_init_debug_lpar(void);
-extern void __init udbg_init_debug_lpar_hvsi(void);
-extern void __init udbg_init_pmac_realmode(void);
-extern void __init udbg_init_maple_realmode(void);
-extern void __init udbg_init_pas_realmode(void);
-extern void __init udbg_init_rtas_panel(void);
-extern void __init udbg_init_rtas_console(void);
-extern void __init udbg_init_debug_beat(void);
-extern void __init udbg_init_btext(void);
-extern void __init udbg_init_44x_as1(void);
-extern void __init udbg_init_40x_realmode(void);
-extern void __init udbg_init_cpm(void);
-extern void __init udbg_init_usbgecko(void);
-extern void __init udbg_init_memcons(void);
-extern void __init udbg_init_ehv_bc(void);
-extern void __init udbg_init_ps3gelic(void);
-extern void __init udbg_init_debug_opal_raw(void);
-extern void __init udbg_init_debug_opal_hvsi(void);
+int udbg_adb_init(int force_btext);
+void udbg_adb_init_early(void);
+
+void __init udbg_early_init(void);
+void __init udbg_init_debug_lpar(void);
+void __init udbg_init_debug_lpar_hvsi(void);
+void __init udbg_init_pmac_realmode(void);
+void __init udbg_init_maple_realmode(void);
+void __init udbg_init_pas_realmode(void);
+void __init udbg_init_rtas_panel(void);
+void __init udbg_init_rtas_console(void);
+void __init udbg_init_btext(void);
+void __init udbg_init_44x_as1(void);
+void __init udbg_init_40x_realmode(void);
+void __init udbg_init_cpm(void);
+void __init udbg_init_usbgecko(void);
+void __init udbg_init_memcons(void);
+void __init udbg_init_ehv_bc(void);
+void __init udbg_init_ps3gelic(void);
+void __init udbg_init_debug_opal_raw(void);
+void __init udbg_init_debug_opal_hvsi(void);
+void __init udbg_init_debug_16550(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index b1129b4ef57d..659a996c75aa 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -45,6 +45,7 @@
 #define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_STAT
+#define __ARCH_WANT_COMPAT_FALLOCATE
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
 #define __ARCH_WANT_SYS_FORK
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index 8542e9bbeead..7650b6ce14c8 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -2,9 +2,6 @@
 #ifndef _ASM_POWERPC_VDSO_H
 #define _ASM_POWERPC_VDSO_H
 
-/* Default map addresses for 32bit vDSO */
-#define VDSO32_MBASE	0x100000
-
 #define VDSO_VERSION_STRING	LINUX_2.6.15
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
index 8d79f994b4aa..80d13207c568 100644
--- a/arch/powerpc/include/asm/vdso/processor.h
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -22,7 +22,13 @@
 #endif
 
 #ifdef CONFIG_PPC64
-#define cpu_relax()	do { HMT_low(); HMT_medium(); barrier(); } while (0)
+#define cpu_relax()							\
+	asm volatile(ASM_FTR_IFCLR(					\
+		/* Pre-POWER10 uses low ; medium priority nops */	\
+		"or 1,1,1 ; or 2,2,2",					\
+		/* POWER10 onward uses pause_short (wait 2,0) */	\
+		PPC_WAIT(2, 0),						\
+		%0) :: "i" (CPU_FTR_ARCH_31) : "memory")
 #else
 #define cpu_relax()	barrier()
 #endif
diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h
index 891c9d5eaabe..e9245f86a46c 100644
--- a/arch/powerpc/include/asm/vdso/timebase.h
+++ b/arch/powerpc/include/asm/vdso/timebase.h
@@ -12,7 +12,7 @@
  * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
  * version below in the else case of the ifdef.
  */
-#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_E500))
+#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500))
 #define mftb()		({unsigned long rval;				\
 			asm volatile(					\
 				"90:	mfspr %0, %2;\n"		\
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index e2e704eca5f6..89090485bec1 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -159,7 +159,6 @@ extern void xics_setup_cpu(void);
 extern void xics_update_irq_servers(void);
 extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
 extern void xics_mask_unknown_vec(unsigned int vec);
-extern irqreturn_t xics_ipi_dispatch(int cpu);
 extern void xics_smp_probe(void);
 extern void xics_register_ics(struct ics *ics);
 extern void xics_teardown_cpu(void);
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S
index dedc17fac8f8..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/85xx_entry_mapping.S
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 06d2d1f78f71..9b6146056e48 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -73,6 +73,7 @@ obj-y				:= cputable.o syscalls.o \
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o irq_64.o\
 				   paca.o nvram_64.o note.o
+obj-$(CONFIG_PPC32)		+= sys_ppc32.o
 obj-$(CONFIG_COMPAT)		+= sys_ppc32.o signal_32.o
 obj-$(CONFIG_VDSO32)		+= vdso32_wrapper.o
 obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
@@ -81,7 +82,7 @@ obj-$(CONFIG_PPC_DAWR)		+= dawr.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
-obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
+obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_64e.o
 obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
 obj-$(CONFIG_PPC64)		+= vdso64_wrapper.o
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
@@ -100,30 +101,28 @@ obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_FA_DUMP)		+= fadump.o
 obj-$(CONFIG_PRESERVE_FA_DUMP)	+= fadump.o
-ifdef CONFIG_PPC32
-obj-$(CONFIG_E500)		+= idle_e500.o
-endif
+obj-$(CONFIG_PPC_85xx)		+= idle_85xx.o
 obj-$(CONFIG_PPC_BOOK3S_32)	+= idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
 obj-$(CONFIG_TAU)		+= tau_6xx.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o suspend.o
-ifdef CONFIG_FSL_BOOKE
-obj-$(CONFIG_HIBERNATION)	+= swsusp_booke.o
+ifdef CONFIG_PPC_85xx
+obj-$(CONFIG_HIBERNATION)	+= swsusp_85xx.o
 else
 obj-$(CONFIG_HIBERNATION)	+= swsusp_$(BITS).o
 endif
 obj64-$(CONFIG_HIBERNATION)	+= swsusp_asm64.o
 obj-$(CONFIG_MODULES)		+= module.o module_$(BITS).o
 obj-$(CONFIG_44x)		+= cpu_setup_44x.o
-obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o
+obj-$(CONFIG_PPC_E500)		+= cpu_setup_e500.o
 obj-$(CONFIG_PPC_DOORBELL)	+= dbell.o
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
-extra-$(CONFIG_PPC64)		:= head_64.o
-extra-$(CONFIG_PPC_BOOK3S_32)	:= head_book3s_32.o
-extra-$(CONFIG_40x)		:= head_40x.o
-extra-$(CONFIG_44x)		:= head_44x.o
-extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
-extra-$(CONFIG_PPC_8xx)		:= head_8xx.o
+obj-$(CONFIG_PPC64)		+= head_64.o
+obj-$(CONFIG_PPC_BOOK3S_32)	+= head_book3s_32.o
+obj-$(CONFIG_40x)		+= head_40x.o
+obj-$(CONFIG_44x)		+= head_44x.o
+obj-$(CONFIG_PPC_8xx)		+= head_8xx.o
+obj-$(CONFIG_PPC_85xx)		+= head_85xx.o
 extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
@@ -198,10 +197,10 @@ KCOV_INSTRUMENT_paca.o := n
 CFLAGS_setup_64.o		+= -fno-stack-protector
 CFLAGS_paca.o			+= -fno-stack-protector
 
-extra-$(CONFIG_PPC_FPU)		+= fpu.o
-extra-$(CONFIG_ALTIVEC)		+= vector.o
-extra-$(CONFIG_PPC64)		+= entry_64.o
-extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)	+= prom_init.o
+obj-$(CONFIG_PPC_FPU)		+= fpu.o
+obj-$(CONFIG_ALTIVEC)		+= vector.o
+obj-$(CONFIG_PPC64)		+= entry_64.o
+obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)	+= prom_init.o
 
 extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)	+= prom_init_check
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8c10f536e478..4ce2a4aa3985 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -59,7 +59,7 @@
 #endif
 #endif
 
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
 #include "../mm/mmu_decl.h"
 #endif
 
@@ -197,7 +197,7 @@ int main(void)
 	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
 	OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	OFFSET(PACAPGD, paca_struct, pgd);
 	OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd);
 	OFFSET(PACA_EXGEN, paca_struct, exgen);
@@ -213,7 +213,7 @@ int main(void)
 	OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next);
 	OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max);
 	OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	OFFSET(PACA_EXGEN, paca_struct, exgen);
@@ -248,7 +248,7 @@ int main(void)
 #ifdef CONFIG_PPC64
 	OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
 #endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
 #endif
 	OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
@@ -651,7 +651,7 @@ int main(void)
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
 	OFFSET(TLBCAM_MAS0, tlbcam, MAS0);
 	OFFSET(TLBCAM_MAS1, tlbcam, MAS1);
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_e500.S
index 4bf33f1b4193..2ab25161b0ad 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_e500.S
@@ -12,7 +12,7 @@
 #include <asm/processor.h>
 #include <asm/cputable.h>
 #include <asm/ppc_asm.h>
-#include <asm/nohash/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
 #include <asm/asm-offsets.h>
 #include <asm/mpc85xx.h>
 
@@ -108,7 +108,7 @@ _GLOBAL(__setup_cpu_e6500)
 #endif /* CONFIG_PPC_E500MC */
 
 #ifdef CONFIG_PPC32
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 #ifndef CONFIG_PPC_E500MC
 _GLOBAL(__setup_cpu_e500v1)
 _GLOBAL(__setup_cpu_e500v2)
@@ -156,7 +156,7 @@ _GLOBAL(__setup_cpu_e5500)
 	mtlr	r5
 	blr
 #endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC_E500 */
 #endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC_BOOK3E_64
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index 3dc61e203f37..097c033668f0 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -11,7 +11,7 @@
 #include <asm/synch.h>
 #include <linux/bitops.h>
 #include <asm/cputable.h>
-#include <asm/cpu_setup_power.h>
+#include <asm/cpu_setup.h>
 
 /* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */
 static bool init_hvmode_206(struct cpu_spec *t)
diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h
new file mode 100644
index 000000000000..85ded3f77204
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifdef CONFIG_40x
+#include "cpu_specs_40x.h"
+#endif
+
+#ifdef CONFIG_PPC_47x
+#include "cpu_specs_47x.h"
+#elif defined(CONFIG_44x)
+#include "cpu_specs_44x.h"
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include "cpu_specs_8xx.h"
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#include "cpu_specs_e500mc.h"
+#elif defined(CONFIG_PPC_85xx)
+#include "cpu_specs_85xx.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include "cpu_specs_book3s_32.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include "cpu_specs_book3s_64.h"
+#endif
diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h
new file mode 100644
index 000000000000..a1362a75b8c8
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_40x.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* STB 04xxx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41810000,
+		.cpu_name		= "STB04xxx",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* NP405L */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41610000,
+		.cpu_name		= "NP405L",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* NP4GS3 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x40B10000,
+		.cpu_name		= "NP4GS3",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{   /* NP405H */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41410000,
+		.cpu_name		= "NP405H",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405GPr */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x50910000,
+		.cpu_name		= "405GPr",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{   /* STBx25xx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x51510000,
+		.cpu_name		= "STBx25xx",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405LP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41F10000,
+		.cpu_name		= "405LP",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x51210000,
+		.cpu_name		= "405EP",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. A/B with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910007,
+		.cpu_name		= "405EX Rev. A/B",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. C without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000d,
+		.cpu_name		= "405EX Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. C with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000f,
+		.cpu_name		= "405EX Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. D without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910003,
+		.cpu_name		= "405EX Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EX Rev. D with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910005,
+		.cpu_name		= "405EX Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. A/B without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910001,
+		.cpu_name		= "405EXr Rev. A/B",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. C without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910009,
+		.cpu_name		= "405EXr Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. C with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x1291000b,
+		.cpu_name		= "405EXr Rev. C",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. D without Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910000,
+		.cpu_name		= "405EXr Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* 405EXr Rev. D with Security */
+		.pvr_mask		= 0xffff000f,
+		.pvr_value		= 0x12910002,
+		.cpu_name		= "405EXr Rev. D",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{
+		/* 405EZ */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x41510000,
+		.cpu_name		= "405EZ",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* APM8018X */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff11432,
+		.cpu_name		= "APM8018X",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 40x PPC)",
+		.cpu_features		= CPU_FTRS_40X,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU |
+					  PPC_FEATURE_HAS_4xxMAC,
+		.mmu_features		= MMU_FTR_TYPE_40x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc405",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h
new file mode 100644
index 000000000000..69c4cdc0cdee
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_44x.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000850,
+		.cpu_name		= "440GR Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000858,
+		.cpu_name		= "440EP Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x400008d3,
+		.cpu_name		= "440GR Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000ff7,
+		.pvr_value		= 0x400008d4,
+		.cpu_name		= "440EP Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x400008db,
+		.cpu_name		= "440EP Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440ep,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* 440GRX */
+		.pvr_mask		= 0xf0000ffb,
+		.pvr_value		= 0x200008D0,
+		.cpu_name		= "440GRX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440grx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000ffb,
+		.pvr_value		= 0x200008D8,
+		.cpu_name		= "440EPX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440epx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{	/* 440GP Rev. B */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000440,
+		.cpu_name		= "440GP Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440gp",
+	},
+	{	/* 440GP Rev. C */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000481,
+		.cpu_name		= "440GP Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440gp",
+	},
+	{ /* 440GX Rev. A */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000850,
+		.cpu_name		= "440GX Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. B */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000851,
+		.cpu_name		= "440GX Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. C */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000892,
+		.cpu_name		= "440GX Rev. C",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440GX Rev. F */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x50000894,
+		.cpu_name		= "440GX Rev. F",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440gx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 440SP Rev. A */
+		.pvr_mask		= 0xfff00fff,
+		.pvr_value		= 0x53200891,
+		.cpu_name		= "440SP Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	},
+	{ /* 440SPe Rev. A */
+		.pvr_mask               = 0xfff00fff,
+		.pvr_value              = 0x53400890,
+		.cpu_name               = "440SPe Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features      = COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize           = 32,
+		.dcache_bsize           = 32,
+		.cpu_setup		= __setup_cpu_440spe,
+		.machine_check		= machine_check_440A,
+		.platform               = "ppc440",
+	},
+	{ /* 440SPe Rev. B */
+		.pvr_mask		= 0xfff00fff,
+		.pvr_value		= 0x53400891,
+		.cpu_name		= "440SPe Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440spe,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460EX */
+		.pvr_mask		= 0xffff0006,
+		.pvr_value		= 0x13020002,
+		.cpu_name		= "460EX",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460ex,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460EX Rev B */
+		.pvr_mask		= 0xffff0007,
+		.pvr_value		= 0x13020004,
+		.cpu_name		= "460EX Rev. B",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460ex,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460GT */
+		.pvr_mask		= 0xffff0006,
+		.pvr_value		= 0x13020000,
+		.cpu_name		= "460GT",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460gt,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460GT Rev B */
+		.pvr_mask		= 0xffff0007,
+		.pvr_value		= 0x13020005,
+		.cpu_name		= "460GT Rev. B",
+		.cpu_features		= CPU_FTRS_440x6,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460gt,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 460SX */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x13541800,
+		.cpu_name		= "460SX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_460sx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{ /* 464 in APM821xx */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x12C41C80,
+		.cpu_name		= "APM821XX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_apm821xx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 44x PPC)",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_4xx,
+		.platform		= "ppc440",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h
new file mode 100644
index 000000000000..3143cd504a51
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_47x.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{ /* 476 DD2 core */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x11a52080,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476fpe */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x7ff50000,
+		.cpu_name		= "476fpe",
+		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476 iss */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00050000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{ /* 476 others */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x11a50000,
+		.cpu_name		= "476",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+					  MMU_FTR_LOCK_BCAST_INVAL,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic 47x PPC)",
+		.cpu_features		= CPU_FTRS_47X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_47x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 128,
+		.machine_check		= machine_check_47x,
+		.platform		= "ppc470",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h
new file mode 100644
index 000000000000..aaae202c1a89
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_85xx.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* e500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80200000,
+		.cpu_name		= "e500",
+		.cpu_features		= CPU_FTRS_E500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500v1,
+		.machine_check		= machine_check_e500,
+		.platform		= "ppc8540",
+	},
+	{	/* e500v2 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80210000,
+		.cpu_name		= "e500v2",
+		.cpu_features		= CPU_FTRS_E500_2,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+					  PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500v2,
+		.machine_check		= machine_check_e500,
+		.platform		= "ppc8548",
+		.cpu_down_flush		= cpu_down_flush_e500v2,
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic E500 PPC)",
+		.cpu_features		= CPU_FTRS_E500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+					  PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_e500,
+		.platform		= "powerpc",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h
new file mode 100644
index 000000000000..93ddbc202ba3
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_8xx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* 8xx */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= PVR_8xx,
+		.cpu_name		= "8xx",
+		/*
+		 * CPU_FTR_MAYBE_CAN_DOZE is possible,
+		 * if the 8xx code is there....
+		 */
+		.cpu_features		= CPU_FTRS_8XX,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_TYPE_8xx,
+		.icache_bsize		= 16,
+		.dcache_bsize		= 16,
+		.machine_check		= machine_check_8xx,
+		.platform		= "ppc823",
+	},
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h
new file mode 100644
index 000000000000..3714634d194a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER	(PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+			 PPC_FEATURE_HAS_MMU)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC_BOOK3S_603
+	{	/* 603 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00030000,
+		.cpu_name		= "603",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 603e */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00060000,
+		.cpu_name		= "603e",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 603ev */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00070000,
+		.cpu_name		= "603ev",
+		.cpu_features		= CPU_FTRS_603,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* 82xx (8240, 8245, 8260 are all 603e cores) */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00810000,
+		.cpu_name		= "82xx",
+		.cpu_features		= CPU_FTRS_82XX,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= 0,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+	{	/* All G2_LE (603e core, plus some) have the same pvr */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00820000,
+		.cpu_name		= "G2_LE",
+		.cpu_features		= CPU_FTRS_G2_LE,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+#ifdef CONFIG_PPC_83xx
+	{	/* e300c1 (a 603e core, plus some) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00830000,
+		.cpu_name		= "e300c1",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.platform		= "ppc603",
+	},
+	{	/* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00840000,
+		.cpu_name		= "e300c2",
+		.cpu_features		= CPU_FTRS_E300C2,
+		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.platform		= "ppc603",
+	},
+	{	/* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00850000,
+		.cpu_name		= "e300c3",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.num_pmcs		= 4,
+		.platform		= "ppc603",
+	},
+	{	/* e300c4 (e300c1, plus one IU) */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00860000,
+		.cpu_name		= "e300c4",
+		.cpu_features		= CPU_FTRS_E300,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_603,
+		.machine_check		= machine_check_83xx,
+		.num_pmcs		= 4,
+		.platform		= "ppc603",
+	},
+#endif
+#endif /* CONFIG_PPC_BOOK3S_603 */
+#ifdef CONFIG_PPC_BOOK3S_604
+	{	/* 604 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00040000,
+		.cpu_name		= "604",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 2,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604e */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x00090000,
+		.cpu_name		= "604e",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604r */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00090000,
+		.cpu_name		= "604r",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 604ev */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x000a0000,
+		.cpu_name		= "604ev",
+		.cpu_features		= CPU_FTRS_604,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_604,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc604",
+	},
+	{	/* 740/750 (0x4202, don't support TAU ?) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00084202,
+		.cpu_name		= "740/750",
+		.cpu_features		= CPU_FTRS_740_NOTAU,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CX (80100 and 8010x?) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00080100,
+		.cpu_name		= "750CX",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CX (82201 and 82202) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00082200,
+		.cpu_name		= "750CX",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CXe (82214) */
+		.pvr_mask		= 0xfffffff0,
+		.pvr_value		= 0x00082210,
+		.cpu_name		= "750CXe",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CXe "Gekko" (83214) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00083214,
+		.cpu_name		= "750CXe",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750cx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750CL (and "Broadway") */
+		.pvr_mask		= 0xfffff0e0,
+		.pvr_value		= 0x00087000,
+		.cpu_name		= "750CL",
+		.cpu_features		= CPU_FTRS_750CL,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 745/755 */
+		.pvr_mask		= 0xfffff000,
+		.pvr_value		= 0x00083000,
+		.cpu_name		= "745/755",
+		.cpu_features		= CPU_FTRS_750,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX rev 1.x */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x70000100,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX1,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX rev 2.0 must disable HID0[DPM] */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x70000200,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX2,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750FX (All revs except 2.0) */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x70000000,
+		.cpu_name		= "750FX",
+		.cpu_features		= CPU_FTRS_750FX,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750fx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 750GX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x70020000,
+		.cpu_name		= "750GX",
+		.cpu_features		= CPU_FTRS_750GX,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750fx,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 740/750 (L2CR bit need fixup for 740) */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00080000,
+		.cpu_name		= "740/750",
+		.cpu_features		= CPU_FTRS_740,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_750,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc750",
+	},
+	{	/* 7400 rev 1.1 ? (no TAU) */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x000c1101,
+		.cpu_name		= "7400 (1.1)",
+		.cpu_features		= CPU_FTRS_7400_NOTAU,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7400,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7400 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x000c0000,
+		.cpu_name		= "7400",
+		.cpu_features		= CPU_FTRS_7400,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7400,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7410 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x800c0000,
+		.cpu_name		= "7410",
+		.cpu_features		= CPU_FTRS_7400,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_7410,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7400",
+	},
+	{	/* 7450 2.0 - no doze/nap */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80000200,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_20,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7450 2.1 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80000201,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_21,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7450 2.3 and newer */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80000000,
+		.cpu_name		= "7450",
+		.cpu_features		= CPU_FTRS_7450_23,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 rev 1.x */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x80010100,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455_1,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 rev 2.0 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80010200,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455_20,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7455 others */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80010000,
+		.cpu_name		= "7455",
+		.cpu_features		= CPU_FTRS_7455,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.0 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80020100,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447_10,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.1 */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x80020101,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447_10,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447/7457 Rev 1.2 and later */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80020000,
+		.cpu_name		= "7447/7457",
+		.cpu_features		= CPU_FTRS_7447,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7447A */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80030000,
+		.cpu_name		= "7447A",
+		.cpu_features		= CPU_FTRS_7447A,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* 7448 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80040000,
+		.cpu_name		= "7448",
+		.cpu_features		= CPU_FTRS_7448,
+		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+					  PPC_FEATURE_PPC_LE,
+		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_G4,
+		.cpu_setup		= __setup_cpu_745x,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc7450",
+	},
+	{	/* default match, we assume split I/D cache & TB (non-601)... */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "(generic PPC)",
+		.cpu_features		= CPU_FTRS_CLASSIC32,
+		.cpu_user_features	= COMMON_USER,
+		.mmu_features		= MMU_FTR_HPTE_TABLE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.machine_check		= machine_check_generic,
+		.platform		= "ppc603",
+	},
+#endif /* CONFIG_PPC_BOOK3S_604 */
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h
new file mode 100644
index 000000000000..c370c1b804a9
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
+#define COMMON_USER_PPC64	(PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+				 PPC_FEATURE_HAS_MMU | PPC_FEATURE_64)
+#define COMMON_USER_POWER4	(COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER5_PLUS	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER6	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7	(PPC_FEATURE2_DSCR)
+#define COMMON_USER_POWER8	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_HTM_COMP | \
+				 PPC_FEATURE2_HTM_NOSC_COMP | \
+				 PPC_FEATURE2_DSCR | \
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
+#define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+				 PPC_FEATURE_TRUE_LE | \
+				 PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9	COMMON_USER_POWER8
+#define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
+				 PPC_FEATURE2_ARCH_3_00 | \
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN | \
+				 PPC_FEATURE2_SCV)
+#define COMMON_USER_POWER10	COMMON_USER_POWER9
+#define COMMON_USER2_POWER10	(PPC_FEATURE2_ARCH_3_1 | \
+				 PPC_FEATURE2_MMA | \
+				 PPC_FEATURE2_ARCH_3_00 | \
+				 PPC_FEATURE2_HAS_IEEE128 | \
+				 PPC_FEATURE2_DARN | \
+				 PPC_FEATURE2_SCV | \
+				 PPC_FEATURE2_ARCH_2_07 | \
+				 PPC_FEATURE2_DSCR | \
+				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+				 PPC_FEATURE2_VEC_CRYPTO)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+	{	/* PPC970 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00390000,
+		.cpu_name		= "PPC970",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970FX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003c0000,
+		.cpu_name		= "PPC970FX",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x00440100,
+		.cpu_name		= "PPC970MP",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970MP */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00440000,
+		.cpu_name		= "PPC970MP",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970MP,
+		.cpu_restore		= __restore_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* PPC970GX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00450000,
+		.cpu_name		= "PPC970GX",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.mmu_features		= MMU_FTRS_PPC970,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.platform		= "ppc970",
+	},
+	{	/* Power5 GR */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003a0000,
+		.cpu_name		= "POWER5 (gr)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5",
+	},
+	{	/* Power5++ */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x003b0300,
+		.cpu_name		= "POWER5+ (gs)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.platform		= "power5+",
+	},
+	{	/* Power5 GS */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003b0000,
+		.cpu_name		= "POWER5+ (gs)",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5+",
+	},
+	{	/* POWER6 in P5+ mode; 2.04-compliant processor */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000001,
+		.cpu_name		= "POWER5+",
+		.cpu_features		= CPU_FTRS_POWER5,
+		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
+		.mmu_features		= MMU_FTRS_POWER5,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.platform		= "power5+",
+	},
+	{	/* Power6 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003e0000,
+		.cpu_name		= "POWER6 (raw)",
+		.cpu_features		= CPU_FTRS_POWER6,
+		.cpu_user_features	= COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT,
+		.mmu_features		= MMU_FTRS_POWER6,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power6x",
+	},
+	{	/* 2.05-compliant processor, i.e. Power6 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000002,
+		.cpu_name		= "POWER6 (architected)",
+		.cpu_features		= CPU_FTRS_POWER6,
+		.cpu_user_features	= COMMON_USER_POWER6,
+		.mmu_features		= MMU_FTRS_POWER6,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.platform		= "power6",
+	},
+	{	/* 2.06-compliant processor, i.e. Power7 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000003,
+		.cpu_name		= "POWER7 (architected)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7",
+	},
+	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000004,
+		.cpu_name		= "POWER8 (architected)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* 3.00-compliant processor, i.e. Power9 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000005,
+		.cpu_name		= "POWER9 (architected)",
+		.cpu_features		= CPU_FTRS_POWER9,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.platform		= "power9",
+	},
+	{	/* 3.1-compliant processor, i.e. Power10 "architected" mode */
+		.pvr_mask		= 0xffffffff,
+		.pvr_value		= 0x0f000006,
+		.cpu_name		= "POWER10 (architected)",
+		.cpu_features		= CPU_FTRS_POWER10,
+		.cpu_user_features	= COMMON_USER_POWER10,
+		.cpu_user_features2	= COMMON_USER2_POWER10,
+		.mmu_features		= MMU_FTRS_POWER10,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.cpu_setup		= __setup_cpu_power10,
+		.cpu_restore		= __restore_cpu_power10,
+		.platform		= "power10",
+	},
+	{	/* Power7 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x003f0000,
+		.cpu_name		= "POWER7 (raw)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7",
+	},
+	{	/* Power7+ */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004A0000,
+		.cpu_name		= "POWER7+ (raw)",
+		.cpu_features		= CPU_FTRS_POWER7,
+		.cpu_user_features	= COMMON_USER_POWER7,
+		.cpu_user_features2	= COMMON_USER2_POWER7,
+		.mmu_features		= MMU_FTRS_POWER7,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power7,
+		.cpu_restore		= __restore_cpu_power7,
+		.machine_check_early	= __machine_check_early_realmode_p7,
+		.platform		= "power7+",
+	},
+	{	/* Power8E */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004b0000,
+		.cpu_name		= "POWER8E (raw)",
+		.cpu_features		= CPU_FTRS_POWER8E,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power8NVL */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004c0000,
+		.cpu_name		= "POWER8NVL (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power8 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004d0000,
+		.cpu_name		= "POWER8 (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.machine_check_early	= __machine_check_early_realmode_p8,
+		.platform		= "power8",
+	},
+	{	/* Power9 DD2.0 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0200,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_0,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD 2.1 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0201,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_1,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD2.2 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0202,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_2,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD2.3 or later */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004e0000,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_3,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power10 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00800000,
+		.cpu_name		= "POWER10 (raw)",
+		.cpu_features		= CPU_FTRS_POWER10,
+		.cpu_user_features	= COMMON_USER_POWER10,
+		.cpu_user_features2	= COMMON_USER2_POWER10,
+		.mmu_features		= MMU_FTRS_POWER10,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.cpu_setup		= __setup_cpu_power10,
+		.cpu_restore		= __restore_cpu_power10,
+		.machine_check_early	= __machine_check_early_realmode_p10,
+		.platform		= "power10",
+	},
+	{	/* Cell Broadband Engine */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00700000,
+		.cpu_name		= "Cell Broadband Engine",
+		.cpu_features		= CPU_FTRS_CELL,
+		.cpu_user_features	= COMMON_USER_PPC64 | PPC_FEATURE_CELL |
+					  PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT,
+		.mmu_features		= MMU_FTRS_CELL,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 4,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "ppc-cell-be",
+	},
+	{	/* PA Semi PA6T */
+		.pvr_mask		= 0x7fff0000,
+		.pvr_value		= 0x00900000,
+		.cpu_name		= "PA6T",
+		.cpu_features		= CPU_FTRS_PA6T,
+		.cpu_user_features	= COMMON_USER_PA6T,
+		.mmu_features		= MMU_FTRS_PA6T,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_PA6T,
+		.cpu_setup		= __setup_cpu_pa6t,
+		.cpu_restore		= __restore_cpu_pa6t,
+		.platform		= "pa6t",
+	},
+	{	/* default match */
+		.pvr_mask		= 0x00000000,
+		.pvr_value		= 0x00000000,
+		.cpu_name		= "POWER5 (compatible)",
+		.cpu_features		= CPU_FTRS_COMPATIBLE,
+		.cpu_user_features	= COMMON_USER_PPC64,
+		.mmu_features		= MMU_FTRS_POWER,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.platform		= "power5",
+	}
+};
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
new file mode 100644
index 000000000000..ceb06b109f83
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *  Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  Modifications for ppc64:
+ *      Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+#ifdef CONFIG_PPC64
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_HAS_FPU | PPC_FEATURE_64)
+#else
+#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+				 PPC_FEATURE_BOOKE)
+#endif
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC32
+	{	/* e500mc */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80230000,
+		.cpu_name		= "e500mc",
+		.cpu_features		= CPU_FTRS_E500MC,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e500mc,
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce500mc",
+		.cpu_down_flush		= cpu_down_flush_e500mc,
+	},
+#endif /* CONFIG_PPC32 */
+	{	/* e5500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80240000,
+		.cpu_name		= "e5500",
+		.cpu_features		= CPU_FTRS_E5500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 4,
+		.cpu_setup		= __setup_cpu_e5500,
+#ifndef CONFIG_PPC32
+		.cpu_restore		= __restore_cpu_e5500,
+#endif
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce5500",
+		.cpu_down_flush		= cpu_down_flush_e5500,
+	},
+	{	/* e6500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80400000,
+		.cpu_name		= "e6500",
+		.cpu_features		= CPU_FTRS_E6500,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
+					  PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.cpu_user_features2	= PPC_FEATURE2_ISEL,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 6,
+		.cpu_setup		= __setup_cpu_e6500,
+#ifndef CONFIG_PPC32
+		.cpu_restore		= __restore_cpu_e6500,
+#endif
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce6500",
+		.cpu_down_flush		= cpu_down_flush_e6500,
+	},
+};
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index d8e42ef750f1..8a32bffefa5b 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -18,6 +18,7 @@
 #include <asm/mce.h>
 #include <asm/mmu.h>
 #include <asm/setup.h>
+#include <asm/cpu_setup.h>
 
 static struct cpu_spec the_cpu_spec __read_mostly;
 
@@ -27,1922 +28,7 @@ EXPORT_SYMBOL(cur_cpu_spec);
 /* The platform string corresponding to the real PVR */
 const char *powerpc_base_platform;
 
-/* NOTE:
- * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
- * the responsibility of the appropriate CPU save/restore functions to
- * eventually copy these settings over. Those save/restore aren't yet
- * part of the cputable though. That has to be fixed for both ppc32
- * and ppc64
- */
-#ifdef CONFIG_PPC32
-extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-#include <asm/cpu_setup_power.h>
-extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_pa6t(void);
-extern void __restore_cpu_ppc970(void);
-extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
-#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_E500)
-extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_e5500(void);
-extern void __restore_cpu_e6500(void);
-#endif /* CONFIG_E500 */
-
-/* This table only contains "desktop" CPUs, it need to be filled with embedded
- * ones as well...
- */
-#define COMMON_USER		(PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
-				 PPC_FEATURE_HAS_MMU)
-#define COMMON_USER_PPC64	(COMMON_USER | PPC_FEATURE_64)
-#define COMMON_USER_POWER4	(COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
-#define COMMON_USER_POWER5	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
-				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER5_PLUS	(COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
-				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER6	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
-				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
-				 PPC_FEATURE_TRUE_LE | \
-				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER_POWER7	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
-				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
-				 PPC_FEATURE_TRUE_LE | \
-				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER7	(PPC_FEATURE2_DSCR)
-#define COMMON_USER_POWER8	(COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
-				 PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
-				 PPC_FEATURE_TRUE_LE | \
-				 PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER8	(PPC_FEATURE2_ARCH_2_07 | \
-				 PPC_FEATURE2_HTM_COMP | \
-				 PPC_FEATURE2_HTM_NOSC_COMP | \
-				 PPC_FEATURE2_DSCR | \
-				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
-				 PPC_FEATURE2_VEC_CRYPTO)
-#define COMMON_USER_PA6T	(COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
-				 PPC_FEATURE_TRUE_LE | \
-				 PPC_FEATURE_HAS_ALTIVEC_COMP)
-#define COMMON_USER_POWER9	COMMON_USER_POWER8
-#define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
-				 PPC_FEATURE2_ARCH_3_00 | \
-				 PPC_FEATURE2_HAS_IEEE128 | \
-				 PPC_FEATURE2_DARN | \
-				 PPC_FEATURE2_SCV)
-#define COMMON_USER_POWER10	COMMON_USER_POWER9
-#define COMMON_USER2_POWER10	(PPC_FEATURE2_ARCH_3_1 | \
-				 PPC_FEATURE2_MMA | \
-				 PPC_FEATURE2_ARCH_3_00 | \
-				 PPC_FEATURE2_HAS_IEEE128 | \
-				 PPC_FEATURE2_DARN | \
-				 PPC_FEATURE2_SCV | \
-				 PPC_FEATURE2_ARCH_2_07 | \
-				 PPC_FEATURE2_DSCR | \
-				 PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
-				 PPC_FEATURE2_VEC_CRYPTO)
-
-#ifdef CONFIG_PPC_BOOK3E_64
-#define COMMON_USER_BOOKE	(COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
-#else
-#define COMMON_USER_BOOKE	(PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
-				 PPC_FEATURE_BOOKE)
-#endif
-
-static struct cpu_spec __initdata cpu_specs[] = {
-#ifdef CONFIG_PPC_BOOK3S_64
-	{	/* PPC970 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00390000,
-		.cpu_name		= "PPC970",
-		.cpu_features		= CPU_FTRS_PPC970,
-		.cpu_user_features	= COMMON_USER_POWER4 |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTRS_PPC970,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_ppc970,
-		.cpu_restore		= __restore_cpu_ppc970,
-		.platform		= "ppc970",
-	},
-	{	/* PPC970FX */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x003c0000,
-		.cpu_name		= "PPC970FX",
-		.cpu_features		= CPU_FTRS_PPC970,
-		.cpu_user_features	= COMMON_USER_POWER4 |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTRS_PPC970,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_ppc970,
-		.cpu_restore		= __restore_cpu_ppc970,
-		.platform		= "ppc970",
-	},
-	{	/* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x00440100,
-		.cpu_name		= "PPC970MP",
-		.cpu_features		= CPU_FTRS_PPC970,
-		.cpu_user_features	= COMMON_USER_POWER4 |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTRS_PPC970,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_ppc970,
-		.cpu_restore		= __restore_cpu_ppc970,
-		.platform		= "ppc970",
-	},
-	{	/* PPC970MP */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00440000,
-		.cpu_name		= "PPC970MP",
-		.cpu_features		= CPU_FTRS_PPC970,
-		.cpu_user_features	= COMMON_USER_POWER4 |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTRS_PPC970,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_ppc970MP,
-		.cpu_restore		= __restore_cpu_ppc970,
-		.platform		= "ppc970",
-	},
-	{	/* PPC970GX */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00450000,
-		.cpu_name		= "PPC970GX",
-		.cpu_features		= CPU_FTRS_PPC970,
-		.cpu_user_features	= COMMON_USER_POWER4 |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.mmu_features		= MMU_FTRS_PPC970,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_ppc970,
-		.platform		= "ppc970",
-	},
-	{	/* Power5 GR */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x003a0000,
-		.cpu_name		= "POWER5 (gr)",
-		.cpu_features		= CPU_FTRS_POWER5,
-		.cpu_user_features	= COMMON_USER_POWER5,
-		.mmu_features		= MMU_FTRS_POWER5,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "power5",
-	},
-	{	/* Power5++ */
-		.pvr_mask		= 0xffffff00,
-		.pvr_value		= 0x003b0300,
-		.cpu_name		= "POWER5+ (gs)",
-		.cpu_features		= CPU_FTRS_POWER5,
-		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTRS_POWER5,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.platform		= "power5+",
-	},
-	{	/* Power5 GS */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x003b0000,
-		.cpu_name		= "POWER5+ (gs)",
-		.cpu_features		= CPU_FTRS_POWER5,
-		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTRS_POWER5,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "power5+",
-	},
-	{	/* POWER6 in P5+ mode; 2.04-compliant processor */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000001,
-		.cpu_name		= "POWER5+",
-		.cpu_features		= CPU_FTRS_POWER5,
-		.cpu_user_features	= COMMON_USER_POWER5_PLUS,
-		.mmu_features		= MMU_FTRS_POWER5,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.platform		= "power5+",
-	},
-	{	/* Power6 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x003e0000,
-		.cpu_name		= "POWER6 (raw)",
-		.cpu_features		= CPU_FTRS_POWER6,
-		.cpu_user_features	= COMMON_USER_POWER6 |
-			PPC_FEATURE_POWER6_EXT,
-		.mmu_features		= MMU_FTRS_POWER6,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "power6x",
-	},
-	{	/* 2.05-compliant processor, i.e. Power6 "architected" mode */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000002,
-		.cpu_name		= "POWER6 (architected)",
-		.cpu_features		= CPU_FTRS_POWER6,
-		.cpu_user_features	= COMMON_USER_POWER6,
-		.mmu_features		= MMU_FTRS_POWER6,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.platform		= "power6",
-	},
-	{	/* 2.06-compliant processor, i.e. Power7 "architected" mode */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000003,
-		.cpu_name		= "POWER7 (architected)",
-		.cpu_features		= CPU_FTRS_POWER7,
-		.cpu_user_features	= COMMON_USER_POWER7,
-		.cpu_user_features2	= COMMON_USER2_POWER7,
-		.mmu_features		= MMU_FTRS_POWER7,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.cpu_setup		= __setup_cpu_power7,
-		.cpu_restore		= __restore_cpu_power7,
-		.machine_check_early	= __machine_check_early_realmode_p7,
-		.platform		= "power7",
-	},
-	{	/* 2.07-compliant processor, i.e. Power8 "architected" mode */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000004,
-		.cpu_name		= "POWER8 (architected)",
-		.cpu_features		= CPU_FTRS_POWER8,
-		.cpu_user_features	= COMMON_USER_POWER8,
-		.cpu_user_features2	= COMMON_USER2_POWER8,
-		.mmu_features		= MMU_FTRS_POWER8,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.cpu_setup		= __setup_cpu_power8,
-		.cpu_restore		= __restore_cpu_power8,
-		.machine_check_early	= __machine_check_early_realmode_p8,
-		.platform		= "power8",
-	},
-	{	/* 3.00-compliant processor, i.e. Power9 "architected" mode */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000005,
-		.cpu_name		= "POWER9 (architected)",
-		.cpu_features		= CPU_FTRS_POWER9,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.platform		= "power9",
-	},
-	{	/* 3.1-compliant processor, i.e. Power10 "architected" mode */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x0f000006,
-		.cpu_name		= "POWER10 (architected)",
-		.cpu_features		= CPU_FTRS_POWER10,
-		.cpu_user_features	= COMMON_USER_POWER10,
-		.cpu_user_features2	= COMMON_USER2_POWER10,
-		.mmu_features		= MMU_FTRS_POWER10,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.cpu_setup		= __setup_cpu_power10,
-		.cpu_restore		= __restore_cpu_power10,
-		.platform		= "power10",
-	},
-	{	/* Power7 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x003f0000,
-		.cpu_name		= "POWER7 (raw)",
-		.cpu_features		= CPU_FTRS_POWER7,
-		.cpu_user_features	= COMMON_USER_POWER7,
-		.cpu_user_features2	= COMMON_USER2_POWER7,
-		.mmu_features		= MMU_FTRS_POWER7,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power7,
-		.cpu_restore		= __restore_cpu_power7,
-		.machine_check_early	= __machine_check_early_realmode_p7,
-		.platform		= "power7",
-	},
-	{	/* Power7+ */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004A0000,
-		.cpu_name		= "POWER7+ (raw)",
-		.cpu_features		= CPU_FTRS_POWER7,
-		.cpu_user_features	= COMMON_USER_POWER7,
-		.cpu_user_features2	= COMMON_USER2_POWER7,
-		.mmu_features		= MMU_FTRS_POWER7,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power7,
-		.cpu_restore		= __restore_cpu_power7,
-		.machine_check_early	= __machine_check_early_realmode_p7,
-		.platform		= "power7+",
-	},
-	{	/* Power8E */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004b0000,
-		.cpu_name		= "POWER8E (raw)",
-		.cpu_features		= CPU_FTRS_POWER8E,
-		.cpu_user_features	= COMMON_USER_POWER8,
-		.cpu_user_features2	= COMMON_USER2_POWER8,
-		.mmu_features		= MMU_FTRS_POWER8,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power8,
-		.cpu_restore		= __restore_cpu_power8,
-		.machine_check_early	= __machine_check_early_realmode_p8,
-		.platform		= "power8",
-	},
-	{	/* Power8NVL */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004c0000,
-		.cpu_name		= "POWER8NVL (raw)",
-		.cpu_features		= CPU_FTRS_POWER8,
-		.cpu_user_features	= COMMON_USER_POWER8,
-		.cpu_user_features2	= COMMON_USER2_POWER8,
-		.mmu_features		= MMU_FTRS_POWER8,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power8,
-		.cpu_restore		= __restore_cpu_power8,
-		.machine_check_early	= __machine_check_early_realmode_p8,
-		.platform		= "power8",
-	},
-	{	/* Power8 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004d0000,
-		.cpu_name		= "POWER8 (raw)",
-		.cpu_features		= CPU_FTRS_POWER8,
-		.cpu_user_features	= COMMON_USER_POWER8,
-		.cpu_user_features2	= COMMON_USER2_POWER8,
-		.mmu_features		= MMU_FTRS_POWER8,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power8,
-		.cpu_restore		= __restore_cpu_power8,
-		.machine_check_early	= __machine_check_early_realmode_p8,
-		.platform		= "power8",
-	},
-	{	/* Power9 DD2.0 */
-		.pvr_mask		= 0xffffefff,
-		.pvr_value		= 0x004e0200,
-		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD2_0,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.machine_check_early	= __machine_check_early_realmode_p9,
-		.platform		= "power9",
-	},
-	{	/* Power9 DD 2.1 */
-		.pvr_mask		= 0xffffefff,
-		.pvr_value		= 0x004e0201,
-		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD2_1,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.machine_check_early	= __machine_check_early_realmode_p9,
-		.platform		= "power9",
-	},
-	{	/* Power9 DD2.2 */
-		.pvr_mask		= 0xffffefff,
-		.pvr_value		= 0x004e0202,
-		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD2_2,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.machine_check_early	= __machine_check_early_realmode_p9,
-		.platform		= "power9",
-	},
-	{	/* Power9 DD2.3 or later */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x004e0000,
-		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD2_3,
-		.cpu_user_features	= COMMON_USER_POWER9,
-		.cpu_user_features2	= COMMON_USER2_POWER9,
-		.mmu_features		= MMU_FTRS_POWER9,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power9,
-		.cpu_restore		= __restore_cpu_power9,
-		.machine_check_early	= __machine_check_early_realmode_p9,
-		.platform		= "power9",
-	},
-	{	/* Power10 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00800000,
-		.cpu_name		= "POWER10 (raw)",
-		.cpu_features		= CPU_FTRS_POWER10,
-		.cpu_user_features	= COMMON_USER_POWER10,
-		.cpu_user_features2	= COMMON_USER2_POWER10,
-		.mmu_features		= MMU_FTRS_POWER10,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_power10,
-		.cpu_restore		= __restore_cpu_power10,
-		.machine_check_early	= __machine_check_early_realmode_p10,
-		.platform		= "power10",
-	},
-	{	/* Cell Broadband Engine */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00700000,
-		.cpu_name		= "Cell Broadband Engine",
-		.cpu_features		= CPU_FTRS_CELL,
-		.cpu_user_features	= COMMON_USER_PPC64 |
-			PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
-			PPC_FEATURE_SMT,
-		.mmu_features		= MMU_FTRS_CELL,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "ppc-cell-be",
-	},
-	{	/* PA Semi PA6T */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00900000,
-		.cpu_name		= "PA6T",
-		.cpu_features		= CPU_FTRS_PA6T,
-		.cpu_user_features	= COMMON_USER_PA6T,
-		.mmu_features		= MMU_FTRS_PA6T,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_PA6T,
-		.cpu_setup		= __setup_cpu_pa6t,
-		.cpu_restore		= __restore_cpu_pa6t,
-		.platform		= "pa6t",
-	},
-	{	/* default match */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "POWER5 (compatible)",
-		.cpu_features		= CPU_FTRS_COMPATIBLE,
-		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTRS_POWER,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "power5",
-	}
-#endif	/* CONFIG_PPC_BOOK3S_64 */
-
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
-#ifdef CONFIG_PPC_BOOK3S_604
-	{	/* 604 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00040000,
-		.cpu_name		= "604",
-		.cpu_features		= CPU_FTRS_604,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 2,
-		.cpu_setup		= __setup_cpu_604,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc604",
-	},
-	{	/* 604e */
-		.pvr_mask		= 0xfffff000,
-		.pvr_value		= 0x00090000,
-		.cpu_name		= "604e",
-		.cpu_features		= CPU_FTRS_604,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_604,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc604",
-	},
-	{	/* 604r */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00090000,
-		.cpu_name		= "604r",
-		.cpu_features		= CPU_FTRS_604,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_604,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc604",
-	},
-	{	/* 604ev */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x000a0000,
-		.cpu_name		= "604ev",
-		.cpu_features		= CPU_FTRS_604,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_604,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc604",
-	},
-	{	/* 740/750 (0x4202, don't support TAU ?) */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x00084202,
-		.cpu_name		= "740/750",
-		.cpu_features		= CPU_FTRS_740_NOTAU,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750CX (80100 and 8010x?) */
-		.pvr_mask		= 0xfffffff0,
-		.pvr_value		= 0x00080100,
-		.cpu_name		= "750CX",
-		.cpu_features		= CPU_FTRS_750,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_750cx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750CX (82201 and 82202) */
-		.pvr_mask		= 0xfffffff0,
-		.pvr_value		= 0x00082200,
-		.cpu_name		= "750CX",
-		.cpu_features		= CPU_FTRS_750,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750cx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750CXe (82214) */
-		.pvr_mask		= 0xfffffff0,
-		.pvr_value		= 0x00082210,
-		.cpu_name		= "750CXe",
-		.cpu_features		= CPU_FTRS_750,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750cx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750CXe "Gekko" (83214) */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x00083214,
-		.cpu_name		= "750CXe",
-		.cpu_features		= CPU_FTRS_750,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750cx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750CL (and "Broadway") */
-		.pvr_mask		= 0xfffff0e0,
-		.pvr_value		= 0x00087000,
-		.cpu_name		= "750CL",
-		.cpu_features		= CPU_FTRS_750CL,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 745/755 */
-		.pvr_mask		= 0xfffff000,
-		.pvr_value		= 0x00083000,
-		.cpu_name		= "745/755",
-		.cpu_features		= CPU_FTRS_750,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750FX rev 1.x */
-		.pvr_mask		= 0xffffff00,
-		.pvr_value		= 0x70000100,
-		.cpu_name		= "750FX",
-		.cpu_features		= CPU_FTRS_750FX1,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750FX rev 2.0 must disable HID0[DPM] */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x70000200,
-		.cpu_name		= "750FX",
-		.cpu_features		= CPU_FTRS_750FX2,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750FX (All revs except 2.0) */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x70000000,
-		.cpu_name		= "750FX",
-		.cpu_features		= CPU_FTRS_750FX,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750fx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 750GX */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x70020000,
-		.cpu_name		= "750GX",
-		.cpu_features		= CPU_FTRS_750GX,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750fx,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 740/750 (L2CR bit need fixup for 740) */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00080000,
-		.cpu_name		= "740/750",
-		.cpu_features		= CPU_FTRS_740,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_IBM,
-		.cpu_setup		= __setup_cpu_750,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc750",
-	},
-	{	/* 7400 rev 1.1 ? (no TAU) */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x000c1101,
-		.cpu_name		= "7400 (1.1)",
-		.cpu_features		= CPU_FTRS_7400_NOTAU,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_7400,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7400",
-	},
-	{	/* 7400 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x000c0000,
-		.cpu_name		= "7400",
-		.cpu_features		= CPU_FTRS_7400,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_7400,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7400",
-	},
-	{	/* 7410 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x800c0000,
-		.cpu_name		= "7410",
-		.cpu_features		= CPU_FTRS_7400,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_7410,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7400",
-	},
-	{	/* 7450 2.0 - no doze/nap */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x80000200,
-		.cpu_name		= "7450",
-		.cpu_features		= CPU_FTRS_7450_20,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7450 2.1 */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x80000201,
-		.cpu_name		= "7450",
-		.cpu_features		= CPU_FTRS_7450_21,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7450 2.3 and newer */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80000000,
-		.cpu_name		= "7450",
-		.cpu_features		= CPU_FTRS_7450_23,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7455 rev 1.x */
-		.pvr_mask		= 0xffffff00,
-		.pvr_value		= 0x80010100,
-		.cpu_name		= "7455",
-		.cpu_features		= CPU_FTRS_7455_1,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7455 rev 2.0 */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x80010200,
-		.cpu_name		= "7455",
-		.cpu_features		= CPU_FTRS_7455_20,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7455 others */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80010000,
-		.cpu_name		= "7455",
-		.cpu_features		= CPU_FTRS_7455,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7447/7457 Rev 1.0 */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x80020100,
-		.cpu_name		= "7447/7457",
-		.cpu_features		= CPU_FTRS_7447_10,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7447/7457 Rev 1.1 */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x80020101,
-		.cpu_name		= "7447/7457",
-		.cpu_features		= CPU_FTRS_7447_10,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7447/7457 Rev 1.2 and later */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80020000,
-		.cpu_name		= "7447/7457",
-		.cpu_features		= CPU_FTRS_7447,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7447A */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80030000,
-		.cpu_name		= "7447A",
-		.cpu_features		= CPU_FTRS_7447A,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-	{	/* 7448 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80040000,
-		.cpu_name		= "7448",
-		.cpu_features		= CPU_FTRS_7448,
-		.cpu_user_features	= COMMON_USER |
-			PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
-		.mmu_features		= MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 6,
-		.pmc_type		= PPC_PMC_G4,
-		.cpu_setup		= __setup_cpu_745x,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc7450",
-	},
-#endif /* CONFIG_PPC_BOOK3S_604 */
-#ifdef CONFIG_PPC_BOOK3S_603
-	{	/* 603 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00030000,
-		.cpu_name		= "603",
-		.cpu_features		= CPU_FTRS_603,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= 0,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-	{	/* 603e */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00060000,
-		.cpu_name		= "603e",
-		.cpu_features		= CPU_FTRS_603,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= 0,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-	{	/* 603ev */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00070000,
-		.cpu_name		= "603ev",
-		.cpu_features		= CPU_FTRS_603,
-		.cpu_user_features	= COMMON_USER | PPC_FEATURE_PPC_LE,
-		.mmu_features		= 0,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-	{	/* 82xx (8240, 8245, 8260 are all 603e cores) */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00810000,
-		.cpu_name		= "82xx",
-		.cpu_features		= CPU_FTRS_82XX,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= 0,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-	{	/* All G2_LE (603e core, plus some) have the same pvr */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00820000,
-		.cpu_name		= "G2_LE",
-		.cpu_features		= CPU_FTRS_G2_LE,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-#ifdef CONFIG_PPC_83xx
-	{	/* e300c1 (a 603e core, plus some) on 83xx */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00830000,
-		.cpu_name		= "e300c1",
-		.cpu_features		= CPU_FTRS_E300,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= MMU_FTR_USE_HIGH_BATS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_83xx,
-		.platform		= "ppc603",
-	},
-	{	/* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00840000,
-		.cpu_name		= "e300c2",
-		.cpu_features		= CPU_FTRS_E300C2,
-		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
-		.mmu_features		= MMU_FTR_USE_HIGH_BATS |
-			MMU_FTR_NEED_DTLB_SW_LRU,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_83xx,
-		.platform		= "ppc603",
-	},
-	{	/* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00850000,
-		.cpu_name		= "e300c3",
-		.cpu_features		= CPU_FTRS_E300,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= MMU_FTR_USE_HIGH_BATS |
-			MMU_FTR_NEED_DTLB_SW_LRU,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_83xx,
-		.num_pmcs		= 4,
-		.platform		= "ppc603",
-	},
-	{	/* e300c4 (e300c1, plus one IU) */
-		.pvr_mask		= 0x7fff0000,
-		.pvr_value		= 0x00860000,
-		.cpu_name		= "e300c4",
-		.cpu_features		= CPU_FTRS_E300,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= MMU_FTR_USE_HIGH_BATS |
-			MMU_FTR_NEED_DTLB_SW_LRU,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_603,
-		.machine_check		= machine_check_83xx,
-		.num_pmcs		= 4,
-		.platform		= "ppc603",
-	},
-#endif
-#endif /* CONFIG_PPC_BOOK3S_603 */
-#ifdef CONFIG_PPC_BOOK3S_604
-	{	/* default match, we assume split I/D cache & TB (non-601)... */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "(generic PPC)",
-		.cpu_features		= CPU_FTRS_CLASSIC32,
-		.cpu_user_features	= COMMON_USER,
-		.mmu_features		= MMU_FTR_HPTE_TABLE,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_generic,
-		.platform		= "ppc603",
-	},
-#endif /* CONFIG_PPC_BOOK3S_604 */
-#endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_PPC_8xx
-	{	/* 8xx */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= PVR_8xx,
-		.cpu_name		= "8xx",
-		/* CPU_FTR_MAYBE_CAN_DOZE is possible,
-		 * if the 8xx code is there.... */
-		.cpu_features		= CPU_FTRS_8XX,
-		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
-		.mmu_features		= MMU_FTR_TYPE_8xx,
-		.icache_bsize		= 16,
-		.dcache_bsize		= 16,
-		.machine_check		= machine_check_8xx,
-		.platform		= "ppc823",
-	},
-#endif /* CONFIG_PPC_8xx */
-#ifdef CONFIG_40x
-	{	/* STB 04xxx */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x41810000,
-		.cpu_name		= "STB04xxx",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* NP405L */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x41610000,
-		.cpu_name		= "NP405L",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* NP4GS3 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x40B10000,
-		.cpu_name		= "NP4GS3",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{   /* NP405H */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x41410000,
-		.cpu_name		= "NP405H",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405GPr */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x50910000,
-		.cpu_name		= "405GPr",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{   /* STBx25xx */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x51510000,
-		.cpu_name		= "STBx25xx",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405LP */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x41F10000,
-		.cpu_name		= "405LP",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EP */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x51210000,
-		.cpu_name		= "405EP",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EX Rev. A/B with Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910007,
-		.cpu_name		= "405EX Rev. A/B",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EX Rev. C without Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x1291000d,
-		.cpu_name		= "405EX Rev. C",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EX Rev. C with Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x1291000f,
-		.cpu_name		= "405EX Rev. C",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EX Rev. D without Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910003,
-		.cpu_name		= "405EX Rev. D",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EX Rev. D with Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910005,
-		.cpu_name		= "405EX Rev. D",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EXr Rev. A/B without Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910001,
-		.cpu_name		= "405EXr Rev. A/B",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EXr Rev. C without Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910009,
-		.cpu_name		= "405EXr Rev. C",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EXr Rev. C with Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x1291000b,
-		.cpu_name		= "405EXr Rev. C",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EXr Rev. D without Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910000,
-		.cpu_name		= "405EXr Rev. D",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* 405EXr Rev. D with Security */
-		.pvr_mask		= 0xffff000f,
-		.pvr_value		= 0x12910002,
-		.cpu_name		= "405EXr Rev. D",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{
-		/* 405EZ */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x41510000,
-		.cpu_name		= "405EZ",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* APM8018X */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x7ff11432,
-		.cpu_name		= "APM8018X",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	},
-	{	/* default match */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "(generic 40x PPC)",
-		.cpu_features		= CPU_FTRS_40X,
-		.cpu_user_features	= PPC_FEATURE_32 |
-			PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
-		.mmu_features		= MMU_FTR_TYPE_40x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc405",
-	}
-
-#endif /* CONFIG_40x */
-#ifdef CONFIG_44x
-#ifndef CONFIG_PPC_47x
-	{
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x40000850,
-		.cpu_name		= "440GR Rev. A",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x40000858,
-		.cpu_name		= "440EP Rev. A",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440ep,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x400008d3,
-		.cpu_name		= "440GR Rev. B",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{ /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
-		.pvr_mask		= 0xf0000ff7,
-		.pvr_value		= 0x400008d4,
-		.cpu_name		= "440EP Rev. C",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440ep,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x400008db,
-		.cpu_name		= "440EP Rev. B",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440ep,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{ /* 440GRX */
-		.pvr_mask		= 0xf0000ffb,
-		.pvr_value		= 0x200008D0,
-		.cpu_name		= "440GRX",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440grx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
-		.pvr_mask		= 0xf0000ffb,
-		.pvr_value		= 0x200008D8,
-		.cpu_name		= "440EPX",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440epx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{	/* 440GP Rev. B */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x40000440,
-		.cpu_name		= "440GP Rev. B",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440gp",
-	},
-	{	/* 440GP Rev. C */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x40000481,
-		.cpu_name		= "440GP Rev. C",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440gp",
-	},
-	{ /* 440GX Rev. A */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x50000850,
-		.cpu_name		= "440GX Rev. A",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440gx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 440GX Rev. B */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x50000851,
-		.cpu_name		= "440GX Rev. B",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440gx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 440GX Rev. C */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x50000892,
-		.cpu_name		= "440GX Rev. C",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440gx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 440GX Rev. F */
-		.pvr_mask		= 0xf0000fff,
-		.pvr_value		= 0x50000894,
-		.cpu_name		= "440GX Rev. F",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440gx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 440SP Rev. A */
-		.pvr_mask		= 0xfff00fff,
-		.pvr_value		= 0x53200891,
-		.cpu_name		= "440SP Rev. A",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	},
-	{ /* 440SPe Rev. A */
-		.pvr_mask               = 0xfff00fff,
-		.pvr_value              = 0x53400890,
-		.cpu_name               = "440SPe Rev. A",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features      = COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize           = 32,
-		.dcache_bsize           = 32,
-		.cpu_setup		= __setup_cpu_440spe,
-		.machine_check		= machine_check_440A,
-		.platform               = "ppc440",
-	},
-	{ /* 440SPe Rev. B */
-		.pvr_mask		= 0xfff00fff,
-		.pvr_value		= 0x53400891,
-		.cpu_name		= "440SPe Rev. B",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440spe,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 460EX */
-		.pvr_mask		= 0xffff0006,
-		.pvr_value		= 0x13020002,
-		.cpu_name		= "460EX",
-		.cpu_features		= CPU_FTRS_440x6,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460ex,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 460EX Rev B */
-		.pvr_mask		= 0xffff0007,
-		.pvr_value		= 0x13020004,
-		.cpu_name		= "460EX Rev. B",
-		.cpu_features		= CPU_FTRS_440x6,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460ex,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 460GT */
-		.pvr_mask		= 0xffff0006,
-		.pvr_value		= 0x13020000,
-		.cpu_name		= "460GT",
-		.cpu_features		= CPU_FTRS_440x6,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460gt,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 460GT Rev B */
-		.pvr_mask		= 0xffff0007,
-		.pvr_value		= 0x13020005,
-		.cpu_name		= "460GT Rev. B",
-		.cpu_features		= CPU_FTRS_440x6,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460gt,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 460SX */
-		.pvr_mask		= 0xffffff00,
-		.pvr_value		= 0x13541800,
-		.cpu_name		= "460SX",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460sx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{ /* 464 in APM821xx */
-		.pvr_mask		= 0xfffffff0,
-		.pvr_value		= 0x12C41C80,
-		.cpu_name		= "APM821XX",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_apm821xx,
-		.machine_check		= machine_check_440A,
-		.platform		= "ppc440",
-	},
-	{	/* default match */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "(generic 44x PPC)",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_44x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_4xx,
-		.platform		= "ppc440",
-	}
-#else /* CONFIG_PPC_47x */
-	{ /* 476 DD2 core */
-		.pvr_mask		= 0xffffffff,
-		.pvr_value		= 0x11a52080,
-		.cpu_name		= "476",
-		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_47x |
-			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 128,
-		.machine_check		= machine_check_47x,
-		.platform		= "ppc470",
-	},
-	{ /* 476fpe */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x7ff50000,
-		.cpu_name		= "476fpe",
-		.cpu_features		= CPU_FTRS_47X | CPU_FTR_476_DD2,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_47x |
-			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 128,
-		.machine_check		= machine_check_47x,
-		.platform		= "ppc470",
-	},
-	{ /* 476 iss */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00050000,
-		.cpu_name		= "476",
-		.cpu_features		= CPU_FTRS_47X,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_47x |
-			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 128,
-		.machine_check		= machine_check_47x,
-		.platform		= "ppc470",
-	},
-	{ /* 476 others */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x11a50000,
-		.cpu_name		= "476",
-		.cpu_features		= CPU_FTRS_47X,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_FPU,
-		.mmu_features		= MMU_FTR_TYPE_47x |
-			MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 128,
-		.machine_check		= machine_check_47x,
-		.platform		= "ppc470",
-	},
-	{	/* default match */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "(generic 47x PPC)",
-		.cpu_features		= CPU_FTRS_47X,
-		.cpu_user_features	= COMMON_USER_BOOKE,
-		.mmu_features		= MMU_FTR_TYPE_47x,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 128,
-		.machine_check		= machine_check_47x,
-		.platform		= "ppc470",
-	}
-#endif /* CONFIG_PPC_47x */
-#endif /* CONFIG_44x */
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_E500
-#ifdef CONFIG_PPC32
-#ifndef CONFIG_PPC_E500MC
-	{	/* e500 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80200000,
-		.cpu_name		= "e500",
-		.cpu_features		= CPU_FTRS_E500,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_SPE_COMP |
-			PPC_FEATURE_HAS_EFP_SINGLE_COMP,
-		.cpu_user_features2	= PPC_FEATURE2_ISEL,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_e500v1,
-		.machine_check		= machine_check_e500,
-		.platform		= "ppc8540",
-	},
-	{	/* e500v2 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80210000,
-		.cpu_name		= "e500v2",
-		.cpu_features		= CPU_FTRS_E500_2,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_SPE_COMP |
-			PPC_FEATURE_HAS_EFP_SINGLE_COMP |
-			PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
-		.cpu_user_features2	= PPC_FEATURE2_ISEL,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_e500v2,
-		.machine_check		= machine_check_e500,
-		.platform		= "ppc8548",
-		.cpu_down_flush		= cpu_down_flush_e500v2,
-	},
-#else
-	{	/* e500mc */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80230000,
-		.cpu_name		= "e500mc",
-		.cpu_features		= CPU_FTRS_E500MC,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.cpu_user_features2	= PPC_FEATURE2_ISEL,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
-			MMU_FTR_USE_TLBILX,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_e500mc,
-		.machine_check		= machine_check_e500mc,
-		.platform		= "ppce500mc",
-		.cpu_down_flush		= cpu_down_flush_e500mc,
-	},
-#endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC_E500MC
-	{	/* e5500 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80240000,
-		.cpu_name		= "e5500",
-		.cpu_features		= CPU_FTRS_E5500,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.cpu_user_features2	= PPC_FEATURE2_ISEL,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
-			MMU_FTR_USE_TLBILX,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 4,
-		.cpu_setup		= __setup_cpu_e5500,
-#ifndef CONFIG_PPC32
-		.cpu_restore		= __restore_cpu_e5500,
-#endif
-		.machine_check		= machine_check_e500mc,
-		.platform		= "ppce5500",
-		.cpu_down_flush		= cpu_down_flush_e5500,
-	},
-	{	/* e6500 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x80400000,
-		.cpu_name		= "e6500",
-		.cpu_features		= CPU_FTRS_E6500,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
-			PPC_FEATURE_HAS_ALTIVEC_COMP,
-		.cpu_user_features2	= PPC_FEATURE2_ISEL,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
-			MMU_FTR_USE_TLBILX,
-		.icache_bsize		= 64,
-		.dcache_bsize		= 64,
-		.num_pmcs		= 6,
-		.cpu_setup		= __setup_cpu_e6500,
-#ifndef CONFIG_PPC32
-		.cpu_restore		= __restore_cpu_e6500,
-#endif
-		.machine_check		= machine_check_e500mc,
-		.platform		= "ppce6500",
-		.cpu_down_flush		= cpu_down_flush_e6500,
-	},
-#endif /* CONFIG_PPC_E500MC */
-#ifdef CONFIG_PPC32
-	{	/* default match */
-		.pvr_mask		= 0x00000000,
-		.pvr_value		= 0x00000000,
-		.cpu_name		= "(generic E500 PPC)",
-		.cpu_features		= CPU_FTRS_E500,
-		.cpu_user_features	= COMMON_USER_BOOKE |
-			PPC_FEATURE_HAS_SPE_COMP |
-			PPC_FEATURE_HAS_EFP_SINGLE_COMP,
-		.mmu_features		= MMU_FTR_TYPE_FSL_E,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.machine_check		= machine_check_e500,
-		.platform		= "powerpc",
-	}
-#endif /* CONFIG_PPC32 */
-#endif /* CONFIG_E500 */
-};
+#include "cpu_specs.h"
 
 void __init set_cur_cpu_spec(struct cpu_spec *s)
 {
@@ -2018,6 +104,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
 	struct cpu_spec *s = cpu_specs;
 	int i;
 
+	BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs));
+
 	s = PTRRELOC(s);
 
 	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index fc800a9fb2c4..c3fb9fdf5bd7 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1099,7 +1099,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 
 	prop = of_get_flat_dt_prop(node, "display-name", NULL);
 	if (prop && strlen((char *)prop) != 0) {
-		strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+		strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
 		cur_cpu_spec->cpu_name = dt_cpu_name;
 	}
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1d599df6f169..3fc7c9886bb7 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -49,7 +49,7 @@
  */
 	.align	12
 
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500)
 	.globl	prepare_transfer_to_handler
 prepare_transfer_to_handler:
 	/* if from kernel, check interrupted DOZE/NAP mode */
@@ -68,10 +68,10 @@ prepare_transfer_to_handler:
 	lwz	r9,_MSR(r11)		/* if sleeping, clear MSR.EE */
 	rlwinm	r9,r9,0,~MSR_EE
 	lwz	r12,_LINK(r11)		/* and return to address in LR */
-	lwz	r2, GPR2(r11)
+	REST_GPR(2, r11)
 	b	fast_exception_return
 _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
-#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */
 
 #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
 	.globl	__kuep_lock
@@ -101,6 +101,7 @@ __kuep_unlock:
 
 	.globl	transfer_to_syscall
 transfer_to_syscall:
+	stw	r3, ORIG_GPR3(r1)
 	stw	r11, GPR1(r1)
 	stw	r11, 0(r1)
 	mflr	r12
@@ -121,9 +122,9 @@ transfer_to_syscall:
 	SAVE_NVGPRS(r1)
 	kuep_lock
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	addi	r10,r1,STACK_FRAME_OVERHEAD
-	mr	r9,r0
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r0
 	bl	system_call_exception
 
 ret_from_syscall:
@@ -143,7 +144,7 @@ ret_from_syscall:
 	lwz	r7,_NIP(r1)
 	lwz	r8,_MSR(r1)
 	cmpwi	r3,0
-	lwz	r3,GPR3(r1)
+	REST_GPR(3, r1)
 syscall_exit_finish:
 	mtspr	SPRN_SRR0,r7
 	mtspr	SPRN_SRR1,r8
@@ -151,8 +152,8 @@ syscall_exit_finish:
 	bne	3f
 	mtcr	r5
 
-1:	lwz	r2,GPR2(r1)
-	lwz	r1,GPR1(r1)
+1:	REST_GPR(2, r1)
+	REST_GPR(1, r1)
 	rfi
 #ifdef CONFIG_40x
 	b .	/* Prevent prefetch past rfi */
@@ -164,10 +165,8 @@ syscall_exit_finish:
 	REST_NVGPRS(r1)
 	mtctr	r4
 	mtxer	r5
-	lwz	r0,GPR0(r1)
-	lwz	r3,GPR3(r1)
-	REST_GPRS(4, 11, r1)
-	lwz	r12,GPR12(r1)
+	REST_GPR(0, r1)
+	REST_GPRS(3, 12, r1)
 	b	1b
 
 #ifdef CONFIG_44x
@@ -259,13 +258,12 @@ fast_exception_return:
 	beq	3f			/* if not, we've got problems */
 #endif
 
-2:	REST_GPRS(3, 6, r11)
-	lwz	r10,_CCR(r11)
-	REST_GPRS(1, 2, r11)
+2:	lwz	r10,_CCR(r11)
+	REST_GPRS(1, 6, r11)
 	mtcr	r10
 	lwz	r10,_LINK(r11)
 	mtlr	r10
-	/* Clear the exception_marker on the stack to avoid confusing stacktrace */
+	/* Clear the exception marker on the stack to avoid confusing stacktrace */
 	li	r10, 0
 	stw	r10, 8(r11)
 	REST_GPR(10, r11)
@@ -276,7 +274,7 @@ fast_exception_return:
 	mtspr	SPRN_SRR0,r12
 	REST_GPR(9, r11)
 	REST_GPR(12, r11)
-	lwz	r11,GPR11(r11)
+	REST_GPR(11, r11)
 	rfi
 #ifdef CONFIG_40x
 	b .	/* Prevent prefetch past rfi */
@@ -322,7 +320,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	li	r0,0
 
 	/*
-	 * Leaving a stale exception_marker on the stack can confuse
+	 * Leaving a stale exception marker on the stack can confuse
 	 * the reliable stack unwinder later on. Clear it.
 	 */
 	stw	r0,8(r1)
@@ -374,7 +372,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	mtspr	SPRN_XER,r5
 
 	/*
-	 * Leaving a stale exception_marker on the stack can confuse
+	 * Leaving a stale exception marker on the stack can confuse
 	 * the reliable stack unwinder later on. Clear it.
 	 */
 	stw	r0,8(r1)
@@ -453,9 +451,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
 	lwz	r3,_MSR(r1);						\
 	andi.	r3,r3,MSR_PR;						\
 	bne	interrupt_return;					\
-	lwz	r0,GPR0(r1);						\
-	lwz	r2,GPR2(r1);						\
-	REST_GPRS(3, 8, r1);						\
+	REST_GPR(0, r1);						\
+	REST_GPRS(2, 8, r1);						\
 	lwz	r10,_XER(r1);						\
 	lwz	r11,_CTR(r1);						\
 	mtspr	SPRN_XER,r10;						\
@@ -474,11 +471,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
 	lwz	r12,_MSR(r1);						\
 	mtspr	exc_lvl_srr0,r11;					\
 	mtspr	exc_lvl_srr1,r12;					\
-	lwz	r9,GPR9(r1);						\
-	lwz	r12,GPR12(r1);						\
-	lwz	r10,GPR10(r1);						\
-	lwz	r11,GPR11(r1);						\
-	lwz	r1,GPR1(r1);						\
+	REST_GPRS(9, 12, r1);						\
+	REST_GPR(1, r1);						\
 	exc_lvl_rfi;							\
 	b	.;		/* prevent prefetch past exc_lvl_rfi */
 
@@ -488,7 +482,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
 	mtspr	SPRN_##exc_lvl_srr0,r9;					\
 	mtspr	SPRN_##exc_lvl_srr1,r10;
 
-#if defined(CONFIG_PPC_BOOK3E_MMU)
+#if defined(CONFIG_PPC_E500)
 #ifdef CONFIG_PHYS_64BIT
 #define	RESTORE_MAS7							\
 	lwz	r11,MAS7(r1);						\
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 01ace4c56104..3e2e37e6ecab 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -292,16 +292,16 @@ _GLOBAL(enter_prom)
 
 	/* Prepare a 32-bit mode big endian MSR
 	 */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	rlwinm	r11,r11,0,1,31
 	mtsrr1	r11
 	rfi
-#else /* CONFIG_PPC_BOOK3E */
+#else /* CONFIG_PPC_BOOK3E_64 */
 	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
 	andc	r11,r11,r12
 	mtsrr1	r11
 	RFI_TO_KERNEL
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 1:	/* Return from OF */
 	FIXUP_ENDIAN
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 67dc4e3179a0..930e36099015 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -216,17 +216,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 	mtlr	r10
 	mtcr	r11
 
-	ld	r10,GPR10(r1)
-	ld	r11,GPR11(r1)
-	ld	r12,GPR12(r1)
+	REST_GPRS(10, 12, r1)
 	mtspr	\scratch,r0
 
 	std	r10,\paca_ex+EX_R10(r13);
 	std	r11,\paca_ex+EX_R11(r13);
 	ld	r10,_NIP(r1)
 	ld	r11,_MSR(r1)
-	ld	r0,GPR0(r1)
-	ld	r1,GPR1(r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
 	mtspr	\srr0,r10
 	mtspr	\srr1,r11
 	ld	r10,\paca_ex+EX_R10(r13)
@@ -291,7 +289,6 @@ ret_from_mc_except:
 #define SPRN_MC_SRR0	SPRN_MCSRR0
 #define SPRN_MC_SRR1	SPRN_MCSRR1
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
 #define GEN_BTB_FLUSH			\
 	START_BTB_FLUSH_SECTION		\
 		beq 1f;			\
@@ -307,13 +304,6 @@ ret_from_mc_except:
 #define DBG_BTB_FLUSH CRIT_BTB_FLUSH
 #define MC_BTB_FLUSH CRIT_BTB_FLUSH
 #define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
-#else
-#define GEN_BTB_FLUSH
-#define CRIT_BTB_FLUSH
-#define DBG_BTB_FLUSH
-#define MC_BTB_FLUSH
-#define GDBELL_BTB_FLUSH
-#endif
 
 #define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			    \
 	EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
@@ -372,25 +362,24 @@ ret_from_mc_except:
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf)				    \
 exc_##n##_common:							    \
-	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
-	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
-	SAVE_GPRS(3, 9, r1);		/* save r3 - r9 in stackframe */    \
+	SAVE_GPR(0, r1);		/* save r0 in stackframe */	    \
+	SAVE_GPRS(2, 9, r1);		/* save r2 - r9 in stackframe */    \
 	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \
 	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \
 	beq	2f;			/* if from kernel mode */	    \
 2:	ld	r3,excf+EX_R10(r13);	/* get back r10 */		    \
 	ld	r4,excf+EX_R11(r13);	/* get back r11 */		    \
 	mfspr	r5,scratch;		/* get back r13 */		    \
-	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
-	ld	r2,PACATOC(r13);	/* get kernel TOC into r2 */	    \
+	SAVE_GPR(12, r1);		/* save r12 in stackframe */	    \
+	LOAD_PACA_TOC();		/* get kernel TOC into r2 */	    \
 	mflr	r6;			/* save LR in stackframe */	    \
 	mfctr	r7;			/* save CTR in stackframe */	    \
 	mfspr	r8,SPRN_XER;		/* save XER in stackframe */	    \
 	ld	r9,excf+EX_R1(r13);	/* load orig r1 back from PACA */   \
 	lwz	r10,excf+EX_CR(r13);	/* load orig CR back from PACA	*/  \
 	lbz	r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */	    \
-	ld	r12,exception_marker@toc(r2);				    \
-	li	r0,0;							    \
+	LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER);		    \
+	ZEROIZE_GPR(0);							    \
 	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
 	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
 	std	r5,GPR13(r1);		/* save it to stackframe */	    \
@@ -470,12 +459,6 @@ exc_##n##_bad_stack:							    \
 	bl	hdlr;							\
 	b	interrupt_return
 
-/* This value is used to mark exception frames on the stack. */
-	.section	".toc","aw"
-exception_marker:
-	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
-
 /*
  * And here we have the exception vectors !
  */
@@ -704,9 +687,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	beq+	1f
 
 #ifdef CONFIG_RELOCATABLE
-	ld	r15,PACATOC(r13)
-	ld	r14,interrupt_base_book3e@got(r15)
-	ld	r15,__end_interrupts@got(r15)
+	__LOAD_PACA_TOC(r15)
+	LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+	LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 #else
@@ -775,9 +758,9 @@ kernel_dbg_exc:
 	beq+	1f
 
 #ifdef CONFIG_RELOCATABLE
-	ld	r15,PACATOC(r13)
-	ld	r14,interrupt_base_book3e@got(r15)
-	ld	r15,__end_interrupts@got(r15)
+	__LOAD_PACA_TOC(r15)
+	LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+	LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
 #else
@@ -900,9 +883,9 @@ kernel_dbg_exc:
 
 .macro SEARCH_RESTART_TABLE
 #ifdef CONFIG_RELOCATABLE
-	ld	r11,PACATOC(r13)
-	ld	r14,__start___restart_table@got(r11)
-	ld	r15,__stop___restart_table@got(r11)
+	__LOAD_PACA_TOC(r11)
+	LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table)
+	LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table)
 #else
 	LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
 	LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
@@ -1056,15 +1039,14 @@ bad_stack_book3e:
 	mfspr	r11,SPRN_ESR
 	std	r10,_DEAR(r1)
 	std	r11,_ESR(r1)
-	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \
-	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \
-	SAVE_GPRS(3, 9, r1);		/* save r3 - r9 in stackframe */    \
+	SAVE_GPR(0, r1);		/* save r0 in stackframe */	    \
+	SAVE_GPRS(2, 9, r1);		/* save r2 - r9 in stackframe */    \
 	ld	r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */		    \
 	ld	r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */		    \
 	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
 	std	r3,GPR10(r1);		/* save r10 to stackframe */	    \
 	std	r4,GPR11(r1);		/* save r11 to stackframe */	    \
-	std	r12,GPR12(r1);		/* save r12 in stackframe */	    \
+	SAVE_GPR(12, r1);		/* save r12 in stackframe */	    \
 	std	r5,GPR13(r1);		/* save it to stackframe */	    \
 	mflr	r10
 	mfctr	r11
@@ -1072,14 +1054,14 @@ bad_stack_book3e:
 	std	r10,_LINK(r1)
 	std	r11,_CTR(r1)
 	std	r12,_XER(r1)
-	SAVE_GPRS(14, 31, r1)
+	SAVE_NVGPRS(r1)
 	lhz	r12,PACA_TRAP_SAVE(r13)
 	std	r12,_TRAP(r1)
 	addi	r11,r1,INT_FRAME_SIZE
 	std	r11,0(r1)
-	li	r12,0
+	ZEROIZE_GPR(12)
 	std	r12,0(r11)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 1:	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	kernel_bad_stack
 	b	1b
@@ -1320,8 +1302,8 @@ a2_tlbinit_after_linear_map:
 
 	/* Now we branch the new virtual address mapped by this entry */
 #ifdef CONFIG_RELOCATABLE
-	ld	r5,PACATOC(r13)
-	ld	r3,1f@got(r5)
+	__LOAD_PACA_TOC(r5)
+	LOAD_REG_ADDR_ALTTOC(r3, r5, 1f)
 #else
 	LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
 #endif
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3d0dc133a9ae..5381a43e50fe 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -281,7 +281,7 @@ BEGIN_FTR_SECTION
 	mfspr	r9,SPRN_PPR
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	HMT_MEDIUM
-	std	r10,IAREA+EX_R10(r13)		/* save r10 - r12 */
+	std	r10,IAREA+EX_R10(r13)		/* save r10 */
 	.if ICFAR
 BEGIN_FTR_SECTION
 	mfspr	r10,SPRN_CFAR
@@ -321,7 +321,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	mfctr	r10
 	std	r10,IAREA+EX_CTR(r13)
 	mfcr	r9
-	std	r11,IAREA+EX_R11(r13)
+	std	r11,IAREA+EX_R11(r13)		/* save r11 - r12 */
 	std	r12,IAREA+EX_R12(r13)
 
 	/*
@@ -580,7 +580,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	std	r2,GPR2(r1)		/* save r2 in stackframe	*/
 	SAVE_GPRS(3, 8, r1)		/* save r3 - r8 in stackframe   */
 	mflr	r9			/* Get LR, later save to stack	*/
-	ld	r2,PACATOC(r13)		/* get kernel TOC into r2	*/
+	LOAD_PACA_TOC()			/* get kernel TOC into r2	*/
 	std	r9,_LINK(r1)
 	lbz	r10,PACAIRQSOFTMASK(r13)
 	mfspr	r11,SPRN_XER		/* save XER in stackframe	*/
@@ -589,7 +589,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	li	r9,IVEC
 	std	r9,_TRAP(r1)		/* set trap number		*/
 	li	r10,0
-	ld	r11,exception_marker@toc(r2)
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
 	std	r10,RESULT(r1)		/* clear regs->result		*/
 	std	r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame	*/
 .endm
@@ -610,7 +610,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 .macro SEARCH_RESTART_TABLE
 #ifdef CONFIG_RELOCATABLE
 	mr	r12,r2
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	LOAD_REG_ADDR(r9, __start___restart_table)
 	LOAD_REG_ADDR(r10, __stop___restart_table)
 	mr	r2,r12
@@ -640,7 +640,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 .macro SEARCH_SOFT_MASK_TABLE
 #ifdef CONFIG_RELOCATABLE
 	mr	r12,r2
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	LOAD_REG_ADDR(r9, __start___soft_mask_table)
 	LOAD_REG_ADDR(r10, __stop___soft_mask_table)
 	mr	r2,r12
@@ -703,6 +703,71 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 .endm
 
 /*
+ * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot.
+ *
+ * There's a short window during boot where although the kernel is running
+ * little endian, any exceptions will cause the CPU to switch back to big
+ * endian. For example a WARN() boils down to a trap instruction, which will
+ * cause a program check, and we end up here but with the CPU in big endian
+ * mode. The first instruction of the program check handler (in GEN_INT_ENTRY
+ * below) is an mtsprg, which when executed in the wrong endian is an lhzu with
+ * a ~3GB displacement from r3. The content of r3 is random, so that is a load
+ * from some random location, and depending on the system can easily lead to a
+ * checkstop, or an infinitely recursive page fault.
+ *
+ * So to handle that case we have a trampoline here that can detect we are in
+ * the wrong endian and flip us back to the correct endian. We can't flip
+ * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1
+ * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for
+ * the paca. SPRG3 is user readable, but this trampoline is only active very
+ * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before
+ * userspace starts.
+ */
+.macro EARLY_BOOT_FIXUP
+BEGIN_FTR_SECTION
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+	tdi   0,0,0x48    // Trap never, or in reverse endian: b . + 8
+	b     2f          // Skip trampoline if endian is correct
+	.long 0xa643707d  // mtsprg  0, r11      Backup r11
+	.long 0xa6027a7d  // mfsrr0  r11
+	.long 0xa643727d  // mtsprg  2, r11      Backup SRR0 in SPRG2
+	.long 0xa6027b7d  // mfsrr1  r11
+	.long 0xa643737d  // mtsprg  3, r11      Backup SRR1 in SPRG3
+	.long 0xa600607d  // mfmsr   r11
+	.long 0x01006b69  // xori    r11, r11, 1 Invert MSR[LE]
+	.long 0xa6037b7d  // mtsrr1  r11
+	/*
+	 * This is 'li  r11,1f' where 1f is the absolute address of that
+	 * label, byteswapped into the SI field of the instruction.
+	 */
+	.long 0x00006039 | \
+		((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \
+		((ABS_ADDR(1f, real_vectors) & 0xff00) << 8)
+	.long 0xa6037a7d  // mtsrr0  r11
+	.long 0x2400004c  // rfid
+1:
+	mfsprg r11, 3
+	mtsrr1 r11        // Restore SRR1
+	mfsprg r11, 2
+	mtsrr0 r11        // Restore SRR0
+	mfsprg r11, 0     // Restore r11
+2:
+#endif
+	/*
+	 * program check could hit at any time, and pseries can not block
+	 * MSR[ME] in early boot. So check if there is anything useful in r13
+	 * yet, and spin forever if not.
+	 */
+	mtsprg	0, r11
+	mfcr	r11
+	cmpdi	r13, 0
+	beq	.
+	mtcr	r11
+	mfsprg	r11, 0
+END_FTR_SECTION(0, 1)     // nop out after boot
+.endm
+
+/*
  * There are a few constraints to be concerned with.
  * - Real mode exceptions code/data must be located at their physical location.
  * - Virtual mode exceptions must be mapped at their 0xc000... location.
@@ -1079,6 +1144,7 @@ INT_DEFINE_BEGIN(machine_check)
 INT_DEFINE_END(machine_check)
 
 EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+	EARLY_BOOT_FIXUP
 	GEN_INT_ENTRY machine_check_early, virt=0
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
@@ -1143,6 +1209,9 @@ BEGIN_FTR_SECTION
 	bl	enable_machine_check
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
+BEGIN_FTR_SECTION
+	bl	machine_check_early_boot
+END_FTR_SECTION(0, 1)     // nop out after boot
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
@@ -1619,51 +1688,7 @@ INT_DEFINE_BEGIN(program_check)
 INT_DEFINE_END(program_check)
 
 EXC_REAL_BEGIN(program_check, 0x700, 0x100)
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	/*
-	 * There's a short window during boot where although the kernel is
-	 * running little endian, any exceptions will cause the CPU to switch
-	 * back to big endian. For example a WARN() boils down to a trap
-	 * instruction, which will cause a program check, and we end up here but
-	 * with the CPU in big endian mode. The first instruction of the program
-	 * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when
-	 * executed in the wrong endian is an lhzu with a ~3GB displacement from
-	 * r3. The content of r3 is random, so that is a load from some random
-	 * location, and depending on the system can easily lead to a checkstop,
-	 * or an infinitely recursive page fault.
-	 *
-	 * So to handle that case we have a trampoline here that can detect we
-	 * are in the wrong endian and flip us back to the correct endian. We
-	 * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires
-	 * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as
-	 * SPRG1 is already used for the paca. SPRG3 is user readable, but this
-	 * trampoline is only active very early in boot, and SPRG3 will be
-	 * reinitialised in vdso_getcpu_init() before userspace starts.
-	 */
-BEGIN_FTR_SECTION
-	tdi   0,0,0x48    // Trap never, or in reverse endian: b . + 8
-	b     1f          // Skip trampoline if endian is correct
-	.long 0xa643707d  // mtsprg  0, r11      Backup r11
-	.long 0xa6027a7d  // mfsrr0  r11
-	.long 0xa643727d  // mtsprg  2, r11      Backup SRR0 in SPRG2
-	.long 0xa6027b7d  // mfsrr1  r11
-	.long 0xa643737d  // mtsprg  3, r11      Backup SRR1 in SPRG3
-	.long 0xa600607d  // mfmsr   r11
-	.long 0x01006b69  // xori    r11, r11, 1 Invert MSR[LE]
-	.long 0xa6037b7d  // mtsrr1  r11
-	.long 0x34076039  // li      r11, 0x734
-	.long 0xa6037a7d  // mtsrr0  r11
-	.long 0x2400004c  // rfid
-	mfsprg r11, 3
-	mtsrr1 r11        // Restore SRR1
-	mfsprg r11, 2
-	mtsrr0 r11        // Restore SRR0
-	mfsprg r11, 0     // Restore r11
-1:
-END_FTR_SECTION(0, 1)     // nop out after boot
-#endif /* CONFIG_CPU_LITTLE_ENDIAN */
-
+	EARLY_BOOT_FIXUP
 	GEN_INT_ENTRY program_check, virt=0
 EXC_REAL_END(program_check, 0x700, 0x100)
 EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
@@ -2794,6 +2819,20 @@ masked_Hinterrupt:
 masked_interrupt:
 	.endif
 	stw	r9,PACA_EXGEN+EX_CCR(r13)
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+	/*
+	 * Ensure there was no previous MUST_HARD_MASK interrupt or
+	 * HARD_DIS setting. If this does fire, the interrupt is still
+	 * masked and MSR[EE] will be cleared on return, so no need to
+	 * panic, but somebody probably enabled MSR[EE] under
+	 * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common
+	 * cause.
+	 */
+	lbz	r9,PACAIRQHAPPENED(r13)
+	andi.	r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS)
+0:	tdnei	r9,0
+	EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
 	lbz	r9,PACAIRQHAPPENED(r13)
 	or	r9,r9,r10
 	stb	r9,PACAIRQHAPPENED(r13)
@@ -3034,22 +3073,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
 MASKED_INTERRUPT
 MASKED_INTERRUPT hsrr=1
 
-	/*
-	 * Relocation-on interrupts: A subset of the interrupts can be delivered
-	 * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
-	 * it.  Addresses are the same as the original interrupt addresses, but
-	 * offset by 0xc000000000004000.
-	 * It's impossible to receive interrupts below 0x300 via this mechanism.
-	 * KVM: None of these traps are from the guest ; anything that escalated
-	 * to HV=1 from HV=0 is delivered via real mode handlers.
-	 */
-
-	/*
-	 * This uses the standard macro, since the original 0x300 vector
-	 * only has extra guff for STAB-based processors -- which never
-	 * come here.
-	 */
-
 USE_FIXED_SECTION(virt_trampolines)
 	/*
 	 * All code below __end_soft_masked is treated as soft-masked. If
@@ -3075,7 +3098,7 @@ CLOSE_FIXED_SECTION(virt_trampolines);
 USE_TEXT_SECTION()
 
 /* MSR[RI] should be clear because this uses SRR[01] */
-enable_machine_check:
+_GLOBAL(enable_machine_check)
 	mflr	r0
 	bcl	20,31,$+4
 0:	mflr	r3
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index cf2c08902c05..dedcc6fe2263 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -143,7 +143,7 @@ DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
 	.globl	__secondary_hold
 __secondary_hold:
 	FIXUP_ENDIAN
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
 	mfmsr	r24
 	ori	r24,r24,MSR_RI
 	mtmsrd	r24			/* RI on */
@@ -160,7 +160,7 @@ __secondary_hold:
 	sync
 
 	li	r26,0
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	tovirt(r26,r26)
 #endif
 	/* All secondary cpus wait here until told to start. */
@@ -169,7 +169,7 @@ __secondary_hold:
 	beq	100b
 
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	tovirt(r12,r12)
 #endif
 	mtctr	r12
@@ -178,7 +178,7 @@ __secondary_hold:
 	 * it may be the case that other platforms have r4 right to
 	 * begin with, this gives us some safety in case it is not
 	 */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	mr	r4,r25
 #else
 	li	r4,0
@@ -192,13 +192,6 @@ __secondary_hold:
 #endif
 CLOSE_FIXED_SECTION(first_256B)
 
-/* This value is used to mark exception frames on the stack. */
-	.section ".toc","aw"
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
-	.tc	ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-	.previous
-
 /*
  * On server, we include the exception vectors code here as it
  * relies on absolute addressing which is only possible within
@@ -214,7 +207,7 @@ USE_TEXT_SECTION()
 
 #include "interrupt_64.S"
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 /*
  * The booting_thread_hwid holds the thread id we want to boot in cpu
  * hotplug case. It is set by cpu hotplug code, and is invalid by default.
@@ -322,7 +315,7 @@ _GLOBAL(fsl_secondary_thread_init)
 	bl	book3e_secondary_thread_init
 	b	generic_secondary_common_init
 
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 /*
  * On pSeries and most other platforms, secondary processors spin
@@ -345,7 +338,7 @@ _GLOBAL(generic_secondary_smp_init)
 	bl	relative_toc
 	tovirt(r2,r2)
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* Book3E initialization */
 	mr	r3,r24
 	mr	r4,r25
@@ -400,8 +393,12 @@ generic_secondary_common_init:
 #else
 	LOAD_REG_ADDR(r8, paca_ptrs)	/* Load paca_ptrs pointe	 */
 	ld	r8,0(r8)		/* Get base vaddr of array	 */
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+	LOAD_REG_IMMEDIATE(r7, NR_CPUS)
+#else
 	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
 	lwz	r7,0(r7)		/* also the max paca allocated 	 */
+#endif
 	li	r5,0			/* logical cpu id                */
 1:
 	sldi	r9,r5,3			/* get paca_ptrs[] index from cpu id */
@@ -417,7 +414,7 @@ generic_secondary_common_init:
 	b	kexec_wait		/* next kernel might do better	 */
 
 2:	SET_PACA(r13)
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	addi	r12,r13,PACA_EXTLB	/* and TLB exc frame in another  */
 	mtspr	SPRN_SPRG_TLB_EXFRAME,r12
 #endif
@@ -494,6 +491,9 @@ __start_initialization_multiplatform:
 	/* Make sure we are running in 64 bits mode */
 	bl	enable_64b_mode
 
+	/* Zero r13 (paca) so early program check / mce don't use it */
+	li	r13,0
+
 	/* Get TOC pointer (current runtime address) */
 	bl	relative_toc
 
@@ -519,7 +519,7 @@ __start_initialization_multiplatform:
 	mr	r29,r9
 #endif
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	bl	start_initialization_book3e
 	b	__after_prom_start
 #else
@@ -540,7 +540,7 @@ __start_initialization_multiplatform:
 	/* Switch off MMU if not already off */
 	bl	__mmu_off
 	b	__after_prom_start
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 __REF
 __boot_from_prom:
@@ -587,11 +587,11 @@ __after_prom_start:
 	/* process relocations for the final address of the kernel */
 	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */
 	sldi	r25,r25,32
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
 	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */
 #endif
 	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
 	tophys(r26,r26)
 #endif
 	cmplwi	cr0,r7,1	/* flagged to stay where we are ? */
@@ -599,7 +599,7 @@ __after_prom_start:
 	add	r25,r25,r26
 1:	mr	r3,r25
 	bl	relocate
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
 	/* IVPR needs to be set after relocation. */
 	bl	init_core_book3e
 #endif
@@ -613,11 +613,11 @@ __after_prom_start:
  * Note: This process overwrites the OF exception vectors.
  */
 	li	r3,0			/* target addr */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	tovirt(r3,r3)		/* on booke, we already run at PAGE_OFFSET */
 #endif
 	mr.	r4,r26			/* In some cases the loader may  */
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
 	tovirt(r4,r4)
 #endif
 	beq	9f			/* have already put us at zero */
@@ -630,14 +630,14 @@ __after_prom_start:
  * variable __run_at_load, if it is set the kernel is treated as relocatable
  * kernel, otherwise it will be moved to PHYSICAL_START
  */
-#if defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3E_64)
 	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */
 #endif
 	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
 	cmplwi	cr0,r7,1
 	bne	3f
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	LOAD_REG_ADDR(r5, __end_interrupts)
 	LOAD_REG_ADDR(r11, _stext)
 	sub	r5,r5,r11
@@ -848,7 +848,7 @@ __secondary_start:
  * before going into C code.
  */
 start_secondary_prolog:
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	li	r3,0
 	std	r3,0(r1)		/* Zero the stack frame pointer	*/
 	bl	start_secondary
@@ -871,10 +871,10 @@ _GLOBAL(start_secondary_resume)
  */
 enable_64b_mode:
 	mfmsr	r11			/* grab the current MSR */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
 	mtmsr	r11
-#else /* CONFIG_PPC_BOOK3E */
+#else /* CONFIG_PPC_BOOK3E_64 */
 	LOAD_REG_IMMEDIATE(r12, MSR_64BIT)
 	or	r11,r11,r12
 	mtmsrd	r11
@@ -940,7 +940,7 @@ start_here_multiplatform:
 	std	r29,8(r11);
 #endif
 
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
 	mfmsr	r6
 	ori	r6,r6,MSR_RI
 	mtmsrd	r6			/* RI on */
@@ -988,7 +988,7 @@ start_here_common:
 	std	r1,PACAKSAVE(r13)
 
 	/* Load the TOC (virtual address) */
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
 	 * in the PACA when doing hotplug)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_85xx.S
index f0db4f52bc00..52c0ab416326 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -129,7 +129,7 @@ _GLOBAL(_start);
 
 	/*
 	 * For the second relocation, we already set the right tlb entries
-	 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+	 * for the kernel space, so skip the code in 85xx_entry_mapping.S
 	*/
 	cmpwi	r19,1
 	beq	set_ivor
@@ -159,7 +159,7 @@ _GLOBAL(__early_start)
 	lwz     r20,0(r20)
 
 #define ENTRY_MAPPING_BOOT_SETUP
-#include "fsl_booke_entry_mapping.S"
+#include "85xx_entry_mapping.S"
 #undef ENTRY_MAPPING_BOOT_SETUP
 
 set_ivor:
@@ -912,7 +912,7 @@ get_phys_addr:
  * Global functions
  */
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 #ifndef CONFIG_PPC_E500MC
 /* Adjust or setup IVORs for e500v1/v2 */
 _GLOBAL(__setup_e500_ivors)
@@ -955,7 +955,7 @@ _GLOBAL(__setup_ehv_ivors)
 	sync
 	blr
 #endif /* CONFIG_PPC_E500MC */
-#endif /* CONFIG_E500 */
+#endif /* CONFIG_PPC_E500 */
 
 #ifdef CONFIG_SPE
 /*
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index bb6d5d0fc4ac..1cb9d0f7cbf2 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -34,7 +34,7 @@
  */
 #define THREAD_NORMSAVE(offset)	(THREAD_NORMSAVES + (offset * 4))
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 #define BOOKE_CLEAR_BTB(reg)									\
 START_BTB_FLUSH_SECTION								\
 	BTB_FLUSH(reg)									\
@@ -103,7 +103,7 @@ END_BTB_FLUSH_SECTION
 .endm
 
 .macro prepare_transfer_to_handler
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 	andi.	r12,r9,MSR_PR
 	bne	777f
 	bl	prepare_transfer_to_handler
@@ -242,7 +242,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 
 
 .macro SAVE_MMU_REGS
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
 	mfspr	r0,SPRN_MAS0
 	stw	r0,MAS0(r1)
 	mfspr	r0,SPRN_MAS1
@@ -257,7 +257,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
 	mfspr	r0,SPRN_MAS7
 	stw	r0,MAS7(r1)
 #endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
 #ifdef CONFIG_44x
 	mfspr	r0,SPRN_MMUCR
 	stw	r0,MMUCR(r1)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 2669f80b3a49..8db1a15d7acb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/spinlock.h>
 #include <linux/debugfs.h>
 #include <linux/init.h>
 
@@ -129,7 +130,14 @@ struct breakpoint {
 	bool ptrace_bp;
 };
 
+/*
+ * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
+ * rely on it safely synchronizing internals here; however, we can rely on it
+ * not requesting more breakpoints than available.
+ */
+static DEFINE_SPINLOCK(cpu_bps_lock);
 static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
+static DEFINE_SPINLOCK(task_bps_lock);
 static LIST_HEAD(task_bps);
 
 static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
@@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
 	if (IS_ERR(tmp))
 		return PTR_ERR(tmp);
 
+	spin_lock(&task_bps_lock);
 	list_add(&tmp->list, &task_bps);
+	spin_unlock(&task_bps_lock);
 	return 0;
 }
 
@@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
 {
 	struct list_head *pos, *q;
 
+	spin_lock(&task_bps_lock);
 	list_for_each_safe(pos, q, &task_bps) {
 		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);
 
@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
 			break;
 		}
 	}
+	spin_unlock(&task_bps_lock);
 }
 
 /*
@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
 static bool all_task_bps_check(struct perf_event *bp)
 {
 	struct breakpoint *tmp;
+	bool ret = false;
 
+	spin_lock(&task_bps_lock);
 	list_for_each_entry(tmp, &task_bps, list) {
-		if (!can_co_exist(tmp, bp))
-			return true;
+		if (!can_co_exist(tmp, bp)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	spin_unlock(&task_bps_lock);
+	return ret;
 }
 
 /*
@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
 static bool same_task_bps_check(struct perf_event *bp)
 {
 	struct breakpoint *tmp;
+	bool ret = false;
 
+	spin_lock(&task_bps_lock);
 	list_for_each_entry(tmp, &task_bps, list) {
 		if (tmp->bp->hw.target == bp->hw.target &&
-		    !can_co_exist(tmp, bp))
-			return true;
+		    !can_co_exist(tmp, bp)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	spin_unlock(&task_bps_lock);
+	return ret;
 }
 
 static int cpu_bps_add(struct perf_event *bp)
@@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
 	if (IS_ERR(tmp))
 		return PTR_ERR(tmp);
 
+	spin_lock(&cpu_bps_lock);
 	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
 	for (i = 0; i < nr_wp_slots(); i++) {
 		if (!cpu_bp[i]) {
@@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
 			break;
 		}
 	}
+	spin_unlock(&cpu_bps_lock);
 	return 0;
 }
 
@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
 	struct breakpoint **cpu_bp;
 	int i = 0;
 
+	spin_lock(&cpu_bps_lock);
 	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
 	for (i = 0; i < nr_wp_slots(); i++) {
 		if (!cpu_bp[i])
@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
 			break;
 		}
 	}
+	spin_unlock(&cpu_bps_lock);
 }
 
 static bool cpu_bps_check(int cpu, struct perf_event *bp)
 {
 	struct breakpoint **cpu_bp;
+	bool ret = false;
 	int i;
 
+	spin_lock(&cpu_bps_lock);
 	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
 	for (i = 0; i < nr_wp_slots(); i++) {
-		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
-			return true;
+		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
+			ret = true;
+			break;
+		}
 	}
-	return false;
+	spin_unlock(&cpu_bps_lock);
+	return ret;
 }
 
 static bool all_cpu_bps_check(struct perf_event *bp)
@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
 	return false;
 }
 
-/*
- * We don't use any locks to serialize accesses to cpu_bps or task_bps
- * because are already inside nr_bp_mutex.
- */
 int arch_reserve_bp_slot(struct perf_event *bp)
 {
 	int ret;
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_64e.S
index cc008de58b05..0fc680e03dee 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_64e.S
@@ -2,7 +2,7 @@
 /*
  * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
  *
- * Generic idle routine for Book3E processors
+ * Generic idle routine for 64 bits e500 processors
  */
 
 #include <linux/threads.h>
@@ -16,8 +16,6 @@
 #include <asm/hw_irq.h>
 
 /* 64-bit version only for now */
-#ifdef CONFIG_PPC64
-
 .macro BOOK3E_IDLE name loop
 _GLOBAL(\name)
 	/* Save LR for later */
@@ -77,7 +75,7 @@ _GLOBAL(\name)
 
 .macro BOOK3E_IDLE_LOOP
 1:
-	PPC_WAIT(0)
+	PPC_WAIT_v203
 	b	1b
 .endm
 
@@ -98,6 +96,4 @@ epapr_ev_idle_start:
 
 BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
 
-BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP
-
-#endif /* CONFIG_PPC64 */
+BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_85xx.S
index 9e1bc4502c50..9e1bc4502c50 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_85xx.S
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 0e75cb03244a..f9db0a172401 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -431,16 +431,6 @@ again:
 
 		if (unlikely(stack_store))
 			__hard_EE_RI_disable();
-		/*
-		 * Returning to a kernel context with local irqs disabled.
-		 * Here, if EE was enabled in the interrupted context, enable
-		 * it on return as well. A problem exists here where a soft
-		 * masked interrupt may have cleared MSR[EE] and set HARD_DIS
-		 * here, and it will still exist on return to the caller. This
-		 * will be resolved by the masked interrupt firing again.
-		 */
-		if (regs->msr & MSR_EE)
-			local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
 #endif /* CONFIG_PPC64 */
 	}
 
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index ce25b28cf418..978a173eb339 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -13,16 +13,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/ptrace.h>
 
-	.section	".toc","aw"
-SYS_CALL_TABLE:
-	.tc sys_call_table[TC],sys_call_table
-
-#ifdef CONFIG_COMPAT
-COMPAT_SYS_CALL_TABLE:
-	.tc compat_sys_call_table[TC],compat_sys_call_table
-#endif
-	.previous
-
 	.align 7
 
 .macro DEBUG_SRR_VALID srr
@@ -67,16 +57,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 	std	r0,GPR0(r1)
 	std	r10,GPR1(r1)
 	std	r2,GPR2(r1)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	mfcr	r12
 	li	r11,0
-	/* Can we avoid saving r3-r8 in common case? */
-	std	r3,GPR3(r1)
-	std	r4,GPR4(r1)
-	std	r5,GPR5(r1)
-	std	r6,GPR6(r1)
-	std	r7,GPR7(r1)
-	std	r8,GPR8(r1)
+	/* Save syscall parameters in r3-r8 */
+	SAVE_GPRS(3, 8, r1)
 	/* Zero r9-r12, this should only be required when restoring all GPRs */
 	std	r11,GPR9(r1)
 	std	r11,GPR10(r1)
@@ -91,9 +76,12 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 	li	r11,\trapnr
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
-	addi	r10,r1,STACK_FRAME_OVERHEAD
-	ld	r11,exception_marker@toc(r2)
-	std	r11,-16(r10)		/* "regshere" marker */
+	std	r3,ORIG_GPR3(r1)
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r0
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,-16(r3)		/* "regshere" marker */
 
 BEGIN_FTR_SECTION
 	HMT_MEDIUM
@@ -108,8 +96,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 * but this is the best we can do.
 	 */
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	mr	r9,r0
 	bl	system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -148,17 +134,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* Could zero these as per ABI, but we may consider a stricter ABI
 	 * which preserves these if libc implementations can benefit, so
 	 * restore them for now until further measurement is done. */
-	ld	r0,GPR0(r1)
-	ld	r4,GPR4(r1)
-	ld	r5,GPR5(r1)
-	ld	r6,GPR6(r1)
-	ld	r7,GPR7(r1)
-	ld	r8,GPR8(r1)
+	REST_GPR(0, r1)
+	REST_GPRS(4, 8, r1)
 	/* Zero volatile regs that may contain sensitive kernel data */
-	li	r9,0
-	li	r10,0
-	li	r11,0
-	li	r12,0
+	ZEROIZE_GPRS(9, 12)
 	mtspr	SPRN_XER,r0
 
 	/*
@@ -181,7 +160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r5,_XER(r1)
 
 	REST_NVGPRS(r1)
-	ld	r0,GPR0(r1)
+	REST_GPR(0, r1)
 	mtcr	r2
 	mtctr	r3
 	mtlr	r4
@@ -195,7 +174,7 @@ syscall_vectored_\name\()_restart:
 _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	ld	r3,RESULT(r1)
 	addi	r4,r1,STACK_FRAME_OVERHEAD
 	li	r11,IRQS_ALL_DISABLED
@@ -240,21 +219,16 @@ _ASM_NOKPROBE_SYMBOL(system_call_common)
 	std	r0,GPR0(r1)
 	std	r10,GPR1(r1)
 	std	r2,GPR2(r1)
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 START_BTB_FLUSH_SECTION
 	BTB_FLUSH(r10)
 END_BTB_FLUSH_SECTION
 #endif
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	mfcr	r12
 	li	r11,0
-	/* Can we avoid saving r3-r8 in common case? */
-	std	r3,GPR3(r1)
-	std	r4,GPR4(r1)
-	std	r5,GPR5(r1)
-	std	r6,GPR6(r1)
-	std	r7,GPR7(r1)
-	std	r8,GPR8(r1)
+	/* Save syscall parameters in r3-r8 */
+	SAVE_GPRS(3, 8, r1)
 	/* Zero r9-r12, this should only be required when restoring all GPRs */
 	std	r11,GPR9(r1)
 	std	r11,GPR10(r1)
@@ -275,9 +249,12 @@ END_BTB_FLUSH_SECTION
 	std	r10,_LINK(r1)
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
-	addi	r10,r1,STACK_FRAME_OVERHEAD
-	ld	r11,exception_marker@toc(r2)
-	std	r11,-16(r10)		/* "regshere" marker */
+	std	r3,ORIG_GPR3(r1)
+	/* Calling convention has r3 = regs, r4 = orig r0 */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mr	r4,r0
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,-16(r3)		/* "regshere" marker */
 
 #ifdef CONFIG_PPC_BOOK3S
 	li	r11,1
@@ -298,8 +275,6 @@ END_BTB_FLUSH_SECTION
 	wrteei	1
 #endif
 
-	/* Calling convention has r9 = orig r0, r10 = regs */
-	mr	r9,r0
 	bl	system_call_exception
 
 .Lsyscall_exit:
@@ -343,16 +318,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	cmpdi	r3,0
 	bne	.Lsyscall_restore_regs
 	/* Zero volatile regs that may contain sensitive kernel data */
-	li	r0,0
-	li	r4,0
-	li	r5,0
-	li	r6,0
-	li	r7,0
-	li	r8,0
-	li	r9,0
-	li	r10,0
-	li	r11,0
-	li	r12,0
+	ZEROIZE_GPR(0)
+	ZEROIZE_GPRS(4, 12)
 	mtctr	r0
 	mtspr	SPRN_XER,r0
 .Lsyscall_restore_regs_cont:
@@ -378,7 +345,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	REST_NVGPRS(r1)
 	mtctr	r3
 	mtspr	SPRN_XER,r4
-	ld	r0,GPR0(r1)
+	REST_GPR(0, r1)
 	REST_GPRS(4, 12, r1)
 	b	.Lsyscall_restore_regs_cont
 .Lsyscall_rst_end:
@@ -388,7 +355,7 @@ syscall_restart:
 _ASM_NOKPROBE_SYMBOL(syscall_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	ld	r3,RESULT(r1)
 	addi	r4,r1,STACK_FRAME_OVERHEAD
 	li	r11,IRQS_ALL_DISABLED
@@ -535,7 +502,7 @@ interrupt_return_\srr\()_user_restart:
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
@@ -559,15 +526,54 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
 	ld	r11,SOFTE(r1)
 	cmpwi	r11,IRQS_ENABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
-	bne	1f
+	beq	.Linterrupt_return_\srr\()_soft_enabled
+
+	/*
+	 * Returning to soft-disabled context.
+	 * Check if a MUST_HARD_MASK interrupt has become pending, in which
+	 * case we need to disable MSR[EE] in the return context.
+	 */
+	ld	r12,_MSR(r1)
+	andi.	r10,r12,MSR_EE
+	beq	.Lfast_kernel_interrupt_return_\srr\() // EE already disabled
+	lbz	r11,PACAIRQHAPPENED(r13)
+	andi.	r10,r11,PACA_IRQ_MUST_HARD_MASK
+	beq	.Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
+
+	/* Must clear MSR_EE from _MSR */
+#ifdef CONFIG_PPC_BOOK3S
+	li	r10,0
+	/* Clear valid before changing _MSR */
+	.ifc \srr,srr
+	stb	r10,PACASRR_VALID(r13)
+	.else
+	stb	r10,PACAHSRR_VALID(r13)
+	.endif
+#endif
+	xori	r12,r12,MSR_EE
+	std	r12,_MSR(r1)
+	b	.Lfast_kernel_interrupt_return_\srr\()
+
+.Linterrupt_return_\srr\()_soft_enabled:
+	/*
+	 * In the soft-enabled case, need to double-check that we have no
+	 * pending interrupts that might have come in before we reached the
+	 * restart section of code, and restart the exit so those can be
+	 * handled.
+	 *
+	 * If there are none, it is be possible that the interrupt still
+	 * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the
+	 * interrupted context. This clear will not clobber a new pending
+	 * interrupt coming in, because we're in the restart section, so
+	 * such would return to the restart location.
+	 */
 #ifdef CONFIG_PPC_BOOK3S
 	lbz	r11,PACAIRQHAPPENED(r13)
 	andi.	r11,r11,(~PACA_IRQ_HARD_DIS)@l
 	bne-	interrupt_return_\srr\()_kernel_restart
 #endif
 	li	r11,0
-	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
-1:
+	stb	r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
 
 .Lfast_kernel_interrupt_return_\srr\():
 	cmpdi	cr1,r3,0
@@ -619,7 +625,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	mtspr	SPRN_XER,r5
 
 	/*
-	 * Leaving a stale exception_marker on the stack can confuse
+	 * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
 	 * the reliable stack unwinder later on. Clear it.
 	 */
 	std	r0,STACK_FRAME_OVERHEAD-16(r1)
@@ -668,7 +674,7 @@ interrupt_return_\srr\()_kernel_restart:
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index 01645e03e9f0..eb2b380e52a0 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -68,6 +68,35 @@
 
 int distribute_irqs = 1;
 
+static inline void next_interrupt(struct pt_regs *regs)
+{
+	/*
+	 * Softirq processing can enable/disable irqs, which will leave
+	 * MSR[EE] enabled and the soft mask set to IRQS_DISABLED. Fix
+	 * this up.
+	 */
+	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
+		hard_irq_disable();
+	else
+		irq_soft_mask_set(IRQS_ALL_DISABLED);
+
+	/*
+	 * We are responding to the next interrupt, so interrupt-off
+	 * latencies should be reset here.
+	 */
+	trace_hardirqs_on();
+	trace_hardirqs_off();
+}
+
+static inline bool irq_happened_test_and_clear(u8 irq)
+{
+	if (local_paca->irq_happened & irq) {
+		local_paca->irq_happened &= ~irq;
+		return true;
+	}
+	return false;
+}
+
 void replay_soft_interrupts(void)
 {
 	struct pt_regs regs;
@@ -79,18 +108,25 @@ void replay_soft_interrupts(void)
 	 * recurse into this function. Don't keep any state across
 	 * interrupt handler calls which may change underneath us.
 	 *
+	 * Softirqs can not be disabled over replay to stop this recursion
+	 * because interrupts taken in idle code may require RCU softirq
+	 * to run in the irq RCU tracking context. This is a hard problem
+	 * to fix without changes to the softirq or idle layer.
+	 *
 	 * We use local_paca rather than get_paca() to avoid all the
 	 * debug_smp_processor_id() business in this low level function.
 	 */
 
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+		WARN_ON_ONCE(mfmsr() & MSR_EE);
+		WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+	}
+
 	ppc_save_regs(&regs);
 	regs.softe = IRQS_ENABLED;
 	regs.msr |= MSR_EE;
 
 again:
-	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
-		WARN_ON_ONCE(mfmsr() & MSR_EE);
-
 	/*
 	 * Force the delivery of pending soft-disabled interrupts on PS3.
 	 * Any HV call will have this side effect.
@@ -105,56 +141,47 @@ again:
 	 * This is a higher priority interrupt than the others, so
 	 * replay it first.
 	 */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) {
-		local_paca->irq_happened &= ~PACA_IRQ_HMI;
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+	    irq_happened_test_and_clear(PACA_IRQ_HMI)) {
 		regs.trap = INTERRUPT_HMI;
 		handle_hmi_exception(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
+		next_interrupt(&regs);
 	}
 
-	if (local_paca->irq_happened & PACA_IRQ_DEC) {
-		local_paca->irq_happened &= ~PACA_IRQ_DEC;
+	if (irq_happened_test_and_clear(PACA_IRQ_DEC)) {
 		regs.trap = INTERRUPT_DECREMENTER;
 		timer_interrupt(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
+		next_interrupt(&regs);
 	}
 
-	if (local_paca->irq_happened & PACA_IRQ_EE) {
-		local_paca->irq_happened &= ~PACA_IRQ_EE;
+	if (irq_happened_test_and_clear(PACA_IRQ_EE)) {
 		regs.trap = INTERRUPT_EXTERNAL;
 		do_IRQ(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
+		next_interrupt(&regs);
 	}
 
-	if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) {
-		local_paca->irq_happened &= ~PACA_IRQ_DBELL;
+	if (IS_ENABLED(CONFIG_PPC_DOORBELL) &&
+	    irq_happened_test_and_clear(PACA_IRQ_DBELL)) {
 		regs.trap = INTERRUPT_DOORBELL;
 		doorbell_exception(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
+		next_interrupt(&regs);
 	}
 
 	/* Book3E does not support soft-masking PMI interrupts */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) {
-		local_paca->irq_happened &= ~PACA_IRQ_PMI;
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+	    irq_happened_test_and_clear(PACA_IRQ_PMI)) {
 		regs.trap = INTERRUPT_PERFMON;
 		performance_monitor_exception(&regs);
-		if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS))
-			hard_irq_disable();
+		next_interrupt(&regs);
 	}
 
-	if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) {
-		/*
-		 * We are responding to the next interrupt, so interrupt-off
-		 * latencies should be reset here.
-		 */
-		trace_hardirqs_on();
-		trace_hardirqs_off();
+	/*
+	 * Softirq processing can enable and disable interrupts, which can
+	 * result in new irqs becoming pending. Must keep looping until we
+	 * have cleared out all pending interrupts.
+	 */
+	if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS)
 		goto again;
-	}
 }
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
@@ -270,10 +297,12 @@ happened:
 	trace_hardirqs_off();
 
 	replay_soft_interrupts_irqrestore();
-	local_paca->irq_happened = 0;
 
 	trace_hardirqs_on();
 	irq_soft_mask_set(IRQS_ENABLED);
+	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+		WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+	local_paca->irq_happened = 0;
 	__hard_irq_enable();
 	preempt_enable();
 }
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index a20deebf233f..1a1e9995dae3 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -47,7 +47,7 @@ static struct hard_trap_info
 	{ 0x0c00, 0x14 /* SIGCHLD */ },		/* system call */
 #ifdef CONFIG_BOOKE_OR_40x
 	{ 0x2002, 0x05 /* SIGTRAP */ },		/* debug */
-#if defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_PPC_85xx)
 	{ 0x2010, 0x08 /* SIGFPE */  },		/* spe unavailable */
 	{ 0x2020, 0x08 /* SIGFPE */  },		/* spe unavailable */
 	{ 0x2030, 0x08 /* SIGFPE */  },		/* spe fp data */
@@ -57,7 +57,7 @@ static struct hard_trap_info
 	{ 0x2900, 0x08 /* SIGFPE */  },		/* apu unavailable */
 	{ 0x3100, 0x0e /* SIGALRM */ },		/* fixed interval timer */
 	{ 0x3200, 0x02 /* SIGINT */  }, 	/* watchdog */
-#else /* ! CONFIG_FSL_BOOKE */
+#else /* ! CONFIG_PPC_85xx */
 	{ 0x1000, 0x0e /* SIGALRM */ },		/* prog interval timer */
 	{ 0x1010, 0x0e /* SIGALRM */ },		/* fixed interval timer */
 	{ 0x1020, 0x02 /* SIGINT */  }, 	/* watchdog */
@@ -208,7 +208,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 	for (reg = 14; reg < 32; reg++)
 		PACK64(ptr, regs->gpr[reg]);
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 #ifdef CONFIG_SPE
 	for (reg = 0; reg < 32; reg++)
 		PACK64(ptr, p->thread.evr[reg]);
@@ -234,7 +234,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 #define GDB_SIZEOF_REG sizeof(unsigned long)
 #define GDB_SIZEOF_REG_U32 sizeof(u32)
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 #define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
 #else
 #define GDB_SIZEOF_FLOAT_REG sizeof(u64)
@@ -329,7 +329,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 
 	if (regno >= 32 && regno < 64) {
 		/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
 		if (current)
 			memcpy(mem, &current->thread.evr[regno-32],
 					dbg_reg_def[regno].size);
@@ -355,7 +355,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
 
 	if (regno >= 32 && regno < 64) {
 		/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
 		memcpy(&current->thread.evr[regno-32], mem,
 				dbg_reg_def[regno].size);
 #else
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 912d4f8a13be..bd7b1a035459 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -161,7 +161,13 @@ int arch_prepare_kprobe(struct kprobe *p)
 	preempt_disable();
 	prev = get_kprobe(p->addr - 1);
 	preempt_enable_no_resched();
-	if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
+
+	/*
+	 * When prev is a ftrace-based kprobe, we don't have an insn, and it
+	 * doesn't probe for prefixed instruction.
+	 */
+	if (prev && !kprobe_ftrace(prev) &&
+	    ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
 		printk("Cannot register a kprobe on the second word of prefixed instruction\n");
 		ret = -EINVAL;
 	}
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 6568823cf306..5b3c093611ba 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -455,7 +455,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
 		kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
 		break;
 
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
 	case KVM_INST_MFSPR(SPRN_MAS0):
 		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
 			kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
@@ -484,7 +484,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
 		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
 			kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
 		break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
 
 	case KVM_INST_MFSPR(SPRN_SPRG4):
 #ifdef CONFIG_BOOKE
@@ -557,7 +557,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
 	case KVM_INST_MTSPR(SPRN_DSISR):
 		kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
 		break;
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
 	case KVM_INST_MTSPR(SPRN_MAS0):
 		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
 			kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
@@ -586,7 +586,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features)
 		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
 			kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
 		break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
 
 	case KVM_INST_MTSPR(SPRN_SPRG4):
 		if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 5c58460b269a..f048c424c525 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -471,6 +471,8 @@ void __init find_legacy_serial_ports(void)
 	}
 #endif
 
+	of_node_put(stdout);
+
 	DBG("legacy_serial_console = %d\n", legacy_serial_console);
 	if (legacy_serial_console >= 0)
 		setup_legacy_serial_console(legacy_serial_console);
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index fd6d8d3a548e..36184cada00b 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -286,7 +286,7 @@ kexec_flag:
 
 
 #ifdef CONFIG_KEXEC_CORE
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 /*
  * BOOK3E has no real MMU mode, so we have to setup the initial TLB
  * for a core to identity map v:0 to p:0.  This current implementation
@@ -354,7 +354,7 @@ _GLOBAL(kexec_smp_wait)
  * don't overwrite r3 here, it is live for kexec_wait above.
  */
 real_mode:	/* assume normal blr return */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* Create an identity mapping. */
 	b	kexec_create_tlb
 #else
@@ -413,7 +413,7 @@ _GLOBAL(kexec_sequence)
 	lhz	r25,PACAHWCPUID(r13)	/* get our phys cpu from paca */
 
 	/* disable interrupts, we are overwriting kernel data next */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	wrteei	0
 #else
 	mfmsr	r3
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 5c7f0b4b784b..cd4e7bc32609 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -73,7 +73,7 @@ optprobe_template_entry:
 	 * further below.
 	 */
 #ifdef CONFIG_PPC64
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 #endif
 
 	.global optprobe_template_op_address
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ba593fd60124..be8db402e963 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -16,7 +16,6 @@
 #include <asm/kexec.h>
 #include <asm/svm.h>
 #include <asm/ultravisor.h>
-#include <asm/rtas.h>
 
 #include "setup.h"
 
@@ -170,30 +169,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
 }
 #endif /* CONFIG_PPC_64S_HASH_MMU */
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * new_rtas_args() - Allocates rtas args
- * @cpu:	CPU number
- * @limit:	Memory limit for this allocation
- *
- * Allocates a struct rtas_args and return it's pointer,
- * if not in Hypervisor mode
- *
- * Return:	Pointer to allocated rtas_args
- *		NULL if CPU in Hypervisor Mode
- */
-static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
-{
-	limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);
-
-	if (early_cpu_has_feature(CPU_FTR_HVMODE))
-		return NULL;
-
-	return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
-			       limit, cpu);
-}
-#endif /* CONFIG_PPC_PSERIES */
-
 /* The Paca is an array with one entry per processor.  Each contains an
  * lppaca, which contains the information shared between the
  * hypervisor and Linux.
@@ -211,7 +186,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 #ifdef CONFIG_PPC_PSERIES
 	new_paca->lppaca_ptr = NULL;
 #endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	new_paca->kernel_pgd = swapper_pg_dir;
 #endif
 	new_paca->lock_token = 0x8000;
@@ -228,14 +203,10 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 	new_paca->slb_shadow_ptr = NULL;
 #endif
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* For now -- if we have threads this will be adjusted later */
 	new_paca->tcd_ptr = &new_paca->tcd;
 #endif
-
-#ifdef CONFIG_PPC_PSERIES
-	new_paca->rtas_args_reentrant = NULL;
-#endif
 }
 
 /* Put the paca pointer into r13 and SPRG_PACA */
@@ -244,7 +215,7 @@ void setup_paca(struct paca_struct *new_paca)
 	/* Setup r13 */
 	local_paca = new_paca;
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* On Book3E, initialize the TLB miss exception frames */
 	mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
 #else
@@ -308,9 +279,6 @@ void __init allocate_paca(int cpu)
 #ifdef CONFIG_PPC_64S_HASH_MMU
 	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
 #endif
-#ifdef CONFIG_PPC_PSERIES
-	paca->rtas_args_reentrant = new_rtas_args(cpu, limit);
-#endif
 	paca_struct_size += sizeof(struct paca_struct);
 }
 
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 31de91c8359c..d67cf79bf5d0 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -135,7 +135,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 	list_add_tail(&phb->list_node, &hose_list);
 	spin_unlock(&hose_spinlock);
 
-	phb->dn = dev;
+	phb->dn = of_node_get(dev);
 	phb->is_dynamic = slab_is_available();
 #ifdef CONFIG_PPC64
 	if (dev) {
@@ -158,7 +158,7 @@ void pcibios_free_controller(struct pci_controller *phb)
 	/* Clear bit of phb_bitmap to allow reuse of this PHB number. */
 	if (phb->global_number < MAX_PHBS)
 		clear_bit(phb->global_number, phb_bitmap);
-
+	of_node_put(phb->dn);
 	list_del(&phb->list_node);
 	spin_unlock(&hose_spinlock);
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 7a35fc25a304..38561d6a2079 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -330,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
 	INIT_LIST_HEAD(&pdn->list);
 	parent = of_get_parent(dn);
 	pdn->parent = parent ? PCI_DN(parent) : NULL;
+	of_node_put(parent);
 	if (pdn->parent)
 		list_add_tail(&pdn->list, &pdn->parent->child_list);
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0fbda89cd1bb..40834ef84f0c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -127,7 +127,7 @@ unsigned long notrace msr_check_and_set(unsigned long bits)
 		newmsr |= MSR_VSX;
 
 	if (oldmsr != newmsr)
-		mtmsr_isync(newmsr);
+		newmsr = mtmsr_isync_irqsafe(newmsr);
 
 	return newmsr;
 }
@@ -145,7 +145,7 @@ void notrace __msr_check_and_clear(unsigned long bits)
 		newmsr &= ~MSR_VSX;
 
 	if (oldmsr != newmsr)
-		mtmsr_isync(newmsr);
+		mtmsr_isync_irqsafe(newmsr);
 }
 EXPORT_SYMBOL(__msr_check_and_clear);
 
@@ -1655,11 +1655,6 @@ EXPORT_SYMBOL_GPL(set_thread_tidr);
 
 #endif /* CONFIG_PPC64 */
 
-void
-release_thread(struct task_struct *t)
-{
-}
-
 /*
  * this gets called so that we can store coprocessor state into memory and
  * copy the current task into the new thread.
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index a730b951b64b..1eed87d954ba 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -30,6 +30,7 @@
 #include <linux/libfdt.h>
 #include <linux/cpu.h>
 #include <linux/pgtable.h>
+#include <linux/seq_buf.h>
 
 #include <asm/rtas.h>
 #include <asm/page.h>
@@ -137,7 +138,7 @@ static void __init move_device_tree(void)
 }
 
 /*
- * ibm,pa-features is a per-cpu property that contains a string of
+ * ibm,pa/pi-features is a per-cpu property that contains a string of
  * attribute descriptors, each of which has a 2 byte header plus up
  * to 254 bytes worth of processor attribute bits.  First header
  * byte specifies the number of bytes following the header.
@@ -149,15 +150,17 @@ static void __init move_device_tree(void)
  * is supported/not supported.  Note that the bit numbers are
  * big-endian to match the definition in PAPR.
  */
-static struct ibm_pa_feature {
+struct ibm_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
 	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
 	unsigned int	cpu_user_ftrs2;	/* PPC_FEATURE2_xxx bit */
-	unsigned char	pabyte;		/* byte number in ibm,pa-features */
+	unsigned char	pabyte;		/* byte number in ibm,pa/pi-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
-} ibm_pa_features[] __initdata = {
+};
+
+static struct ibm_feature ibm_pa_features[] __initdata = {
 	{ .pabyte = 0,  .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
 	{ .pabyte = 0,  .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
 	{ .pabyte = 0,  .pabit = 3, .cpu_features  = CPU_FTR_CTRL },
@@ -179,9 +182,19 @@ static struct ibm_pa_feature {
 	{ .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
 };
 
+/*
+ * ibm,pi-features property provides the support of processor specific
+ * options not described in ibm,pa-features. Right now use byte 0, bit 3
+ * which indicates the occurrence of DSI interrupt when the paste operation
+ * on the suspended NX window.
+ */
+static struct ibm_feature ibm_pi_features[] __initdata = {
+	{ .pabyte = 0, .pabit = 3, .mmu_features  = MMU_FTR_NX_DSI },
+};
+
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 				 unsigned long tablelen,
-				 struct ibm_pa_feature *fp,
+				 struct ibm_feature *fp,
 				 unsigned long ft_size)
 {
 	unsigned long i, len, bit;
@@ -218,17 +231,18 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 	}
 }
 
-static void __init check_cpu_pa_features(unsigned long node)
+static void __init check_cpu_features(unsigned long node, char *name,
+				      struct ibm_feature *fp,
+				      unsigned long size)
 {
 	const unsigned char *pa_ftrs;
 	int tablelen;
 
-	pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+	pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen);
 	if (pa_ftrs == NULL)
 		return;
 
-	scan_features(node, pa_ftrs, tablelen,
-		      ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
+	scan_features(node, pa_ftrs, tablelen, fp, size);
 }
 
 #ifdef CONFIG_PPC_64S_HASH_MMU
@@ -376,11 +390,16 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	 */
 	if (!dt_cpu_ftrs_in_use()) {
 		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
-		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
+		if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) {
 			identify_cpu(0, be32_to_cpup(prop));
+			seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop));
+		}
 
 		check_cpu_feature_properties(node);
-		check_cpu_pa_features(node);
+		check_cpu_features(node, "ibm,pa-features", ibm_pa_features,
+				   ARRAY_SIZE(ibm_pa_features));
+		check_cpu_features(node, "ibm,pi-features", ibm_pi_features,
+				   ARRAY_SIZE(ibm_pi_features));
 	}
 
 	identical_pvr_fixup(node);
@@ -696,6 +715,23 @@ static void __init tm_init(void)
 static void tm_init(void) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
+static int __init
+early_init_dt_scan_model(unsigned long node, const char *uname,
+			 int depth, void *data)
+{
+	const char *prop;
+
+	if (depth != 0)
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "model", NULL);
+	if (prop)
+		seq_buf_printf(&ppc_hw_desc, "%s ", prop);
+
+	/* break now */
+	return 1;
+}
+
 #ifdef CONFIG_PPC64
 static void __init save_fscr_to_task(void)
 {
@@ -724,6 +760,8 @@ void __init early_init_devtree(void *params)
 	if (!early_init_dt_verify(params))
 		panic("BUG: Failed verifying flat device tree, bad version?");
 
+	of_scan_flat_dt(early_init_dt_scan_model, NULL);
+
 #ifdef CONFIG_PPC_RTAS
 	/* Some machines might need RTAS info for debugging, grab it now. */
 	of_scan_flat_dt(early_init_dt_scan_rtas, NULL);
@@ -803,6 +841,9 @@ void __init early_init_devtree(void *params)
 
 	dt_cpu_ftrs_scan();
 
+	// We can now add the CPU name & PVR to the hardware description
+	seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR));
+
 	/* Retrieve CPU related informations from the flat tree
 	 * (altivec support, boot CPU ID, ...)
 	 */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a6669c40c1db..d464ba412084 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -96,12 +96,6 @@ static int of_workarounds __prombss;
 #define OF_WA_CLAIM	1	/* do phys/virt claim separately, then map */
 #define OF_WA_LONGTRAIL	2	/* work around longtrail bugs */
 
-#define PROM_BUG() do {						\
-        prom_printf("kernel BUG at %s line 0x%x!\n",		\
-		    __FILE__, __LINE__);			\
-	__builtin_trap();					\
-} while (0)
-
 #ifdef DEBUG_PROM
 #define prom_debug(x...)	prom_printf(x)
 #else
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index dfa5f729f774..311890d71c4c 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -26,8 +26,7 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
 logo_linux_clut224 btext_prepare_BAT
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
-relocate"
+btext_setup_display TOC. relocate"
 
 NM="$1"
 OBJ="$2"
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index 232e4549defe..efd52f2e7033 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -27,8 +27,8 @@ _GLOBAL(relocate)
 	add	r9,r9,r12	/* r9 has runtime addr of .rela.dyn section */
 	ld	r10,(p_st - 0b)(r12)
 	add	r10,r10,r12	/* r10 has runtime addr of _stext */
-	ld	r13,(p_sym - 0b)(r12)
-	add	r13,r13,r12	/* r13 has runtime addr of .dynsym */
+	ld	r4,(p_sym - 0b)(r12)
+	add	r4,r4,r12	/* r4 has runtime addr of .dynsym */
 
 	/*
 	 * Scan the dynamic section for the RELA, RELASZ and RELAENT entries.
@@ -84,16 +84,16 @@ _GLOBAL(relocate)
 	ld	r0,16(r9)	/* reloc->r_addend */
 	b	.Lstore
 .Luaddr64:
-	srdi	r14,r0,32	/* ELF64_R_SYM(reloc->r_info) */
+	srdi	r5,r0,32	/* ELF64_R_SYM(reloc->r_info) */
 	clrldi	r0,r0,32
 	cmpdi	r0,R_PPC64_UADDR64
 	bne	.Lnext
 	ld	r6,0(r9)
 	ld	r0,16(r9)
-	mulli	r14,r14,24	/* 24 == sizeof(elf64_sym) */
-	add	r14,r14,r13	/* elf64_sym[ELF64_R_SYM] */
-	ld	r14,8(r14)
-	add	r0,r0,r14
+	mulli	r5,r5,24	/* 24 == sizeof(elf64_sym) */
+	add	r5,r5,r4	/* elf64_sym[ELF64_R_SYM] */
+	ld	r5,8(r5)
+	add	r0,r0,r5
 .Lstore:
 	add	r0,r0,r3
 	stdx	r0,r7,r6
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 693133972294..e847f9b1c5b9 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -23,6 +23,7 @@
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/reboot.h>
+#include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -43,7 +44,6 @@
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
-#include <asm/paca.h>
 
 /* This is here deliberately so it's only used in this file */
 void enter_rtas(unsigned long);
@@ -464,6 +464,9 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
 	va_end(list);
 }
 
+static int ibm_open_errinjct_token;
+static int ibm_errinjct_token;
+
 int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 {
 	va_list list;
@@ -476,6 +479,16 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
 		return -1;
 
+	if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) {
+		/*
+		 * It would be nicer to not discard the error value
+		 * from security_locked_down(), but callers expect an
+		 * RTAS status, not an errno.
+		 */
+		if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION))
+			return -1;
+	}
+
 	if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) {
 		WARN_ON_ONCE(1);
 		return -1;
@@ -932,59 +945,6 @@ void rtas_activate_firmware(void)
 		pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }
 
-#ifdef CONFIG_PPC_PSERIES
-/**
- * rtas_call_reentrant() - Used for reentrant rtas calls
- * @token:	Token for desired reentrant RTAS call
- * @nargs:	Number of Input Parameters
- * @nret:	Number of Output Parameters
- * @outputs:	Array of outputs
- * @...:	Inputs for desired RTAS call
- *
- * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
- * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
- * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
- * PACA one instead.
- *
- * Return:	-1 on error,
- *		First output value of RTAS call if (nret > 0),
- *		0 otherwise,
- */
-int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
-{
-	va_list list;
-	struct rtas_args *args;
-	unsigned long flags;
-	int i, ret = 0;
-
-	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
-		return -1;
-
-	local_irq_save(flags);
-	preempt_disable();
-
-	/* We use the per-cpu (PACA) rtas args buffer */
-	args = local_paca->rtas_args_reentrant;
-
-	va_start(list, outputs);
-	va_rtas_call_unlocked(args, token, nargs, nret, list);
-	va_end(list);
-
-	if (nret > 1 && outputs)
-		for (i = 0; i < nret - 1; ++i)
-			outputs[i] = be32_to_cpu(args->rets[i + 1]);
-
-	if (nret > 0)
-		ret = be32_to_cpu(args->rets[0]);
-
-	local_irq_restore(flags);
-	preempt_enable();
-
-	return ret;
-}
-
-#endif /* CONFIG_PPC_PSERIES */
-
 /**
  * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
  *                          extended event log.
@@ -1227,6 +1187,14 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 	if (block_rtas_call(token, nargs, &args))
 		return -EINVAL;
 
+	if (token == ibm_open_errinjct_token || token == ibm_errinjct_token) {
+		int err;
+
+		err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION);
+		if (err)
+			return err;
+	}
+
 	/* Need to handle ibm,suspend_me call specially */
 	if (token == rtas_token("ibm,suspend-me")) {
 
@@ -1325,7 +1293,8 @@ void __init rtas_initialize(void)
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 	rtas_last_error_token = rtas_token("rtas-last-error");
 #endif
-
+	ibm_open_errinjct_token = rtas_token("ibm,open-errinjct");
+	ibm_errinjct_token = rtas_token("ibm,errinjct");
 	rtas_syscall_filter_init();
 }
 
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index d96fd14bd7c9..206475e3e0b4 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -16,6 +16,7 @@
 #include <asm/asm-prototypes.h>
 #include <asm/code-patching.h>
 #include <asm/security_features.h>
+#include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/inst.h>
 
@@ -34,7 +35,7 @@ static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_N
 bool barrier_nospec_enabled;
 static bool no_nospec;
 static bool btb_flush_enabled;
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
 static bool no_spectrev2;
 #endif
 
@@ -121,7 +122,7 @@ static __init int security_feature_debugfs_init(void)
 device_initcall(security_feature_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
 static int __init handle_nospectre_v2(char *p)
 {
 	no_spectrev2 = true;
@@ -129,9 +130,9 @@ static int __init handle_nospectre_v2(char *p)
 	return 0;
 }
 early_param("nospectre_v2", handle_nospectre_v2);
-#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 void __init setup_spectre_v2(void)
 {
 	if (no_spectrev2 || cpu_mitigations_off())
@@ -139,7 +140,7 @@ void __init setup_spectre_v2(void)
 	else
 		btb_flush_enabled = true;
 }
-#endif /* CONFIG_PPC_FSL_BOOK3E */
+#endif /* CONFIG_PPC_E500 */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index dd98f43bd685..6d041993a45d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/initrd.h>
 #include <linux/platform_device.h>
+#include <linux/printk.h>
 #include <linux/seq_file.h>
 #include <linux/ioport.h>
 #include <linux/console.h>
@@ -25,6 +26,7 @@
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/unistd.h>
+#include <linux/seq_buf.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
 #include <linux/percpu.h>
@@ -588,6 +590,15 @@ static __init int add_pcspkr(void)
 device_initcall(add_pcspkr);
 #endif	/* CONFIG_PCSPKR_PLATFORM */
 
+static char ppc_hw_desc_buf[128] __initdata;
+
+struct seq_buf ppc_hw_desc __initdata = {
+	.buffer = ppc_hw_desc_buf,
+	.size = sizeof(ppc_hw_desc_buf),
+	.len = 0,
+	.readpos = 0,
+};
+
 static __init void probe_machine(void)
 {
 	extern struct machdep_calls __machine_desc_start;
@@ -628,7 +639,13 @@ static __init void probe_machine(void)
 		for (;;);
 	}
 
-	printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+	// Append the machine name to other info we've gathered
+	seq_buf_puts(&ppc_hw_desc, ppc_md.name);
+
+	// Set the generic hardware description shown in oopses
+	dump_stack_set_arch_desc(ppc_hw_desc.buffer);
+
+	pr_info("Hardware name: %s\n", ppc_hw_desc.buffer);
 }
 
 /* Match a class of boards, not a specific device configuration. */
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 93f22da12abe..7912bb50a7cb 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -23,7 +23,7 @@ void check_smt_enabled(void);
 static inline void check_smt_enabled(void) { }
 #endif
 
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
 void setup_tlb_core_data(void);
 #else
 static inline void setup_tlb_core_data(void) { }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 813261789303..b761cc1a403c 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -207,7 +207,7 @@ void __init setup_power_save(void)
 		ppc_md.power_save = ppc6xx_idle;
 #endif
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 	if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
 	    cpu_has_feature(CPU_FTR_CAN_NAP))
 		ppc_md.power_save = e500_idle;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2b2d0b0fbb30..a0dee7354fe6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -34,6 +34,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 
+#include <asm/asm-prototypes.h>
 #include <asm/kvm_guest.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
@@ -86,7 +87,7 @@ struct ppc64_caches ppc64_caches = {
 };
 EXPORT_SYMBOL_GPL(ppc64_caches);
 
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
 void __init setup_tlb_core_data(void)
 {
 	int cpu;
@@ -176,14 +177,26 @@ early_param("smt-enabled", early_smt_enabled);
 #endif /* CONFIG_SMP */
 
 /** Fix up paca fields required for the boot cpu */
-static void __init fixup_boot_paca(void)
+static void __init fixup_boot_paca(struct paca_struct *boot_paca)
 {
 	/* The boot cpu is started */
-	get_paca()->cpu_start = 1;
+	boot_paca->cpu_start = 1;
+#ifdef CONFIG_PPC_BOOK3S_64
+	/*
+	 * Give the early boot machine check stack somewhere to use, use
+	 * half of the init stack. This is a bit hacky but there should not be
+	 * deep stack usage in early init so shouldn't overflow it or overwrite
+	 * things.
+	 */
+	boot_paca->mc_emergency_sp = (void *)&init_thread_union +
+		(THREAD_SIZE/2);
+#endif
 	/* Allow percpu accesses to work until we setup percpu data */
-	get_paca()->data_offset = 0;
-	/* Mark interrupts disabled in PACA */
-	irq_soft_mask_set(IRQS_DISABLED);
+	boot_paca->data_offset = 0;
+	/* Mark interrupts soft and hard disabled in PACA */
+	boot_paca->irq_soft_mask = IRQS_DISABLED;
+	boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
+	WARN_ON(mfmsr() & MSR_EE);
 }
 
 static void __init configure_exceptions(void)
@@ -350,11 +363,15 @@ void __init early_setup(unsigned long dt_ptr)
 	 * what CPU we are on.
 	 */
 	initialise_paca(&boot_paca, 0);
-	setup_paca(&boot_paca);
-	fixup_boot_paca();
+	fixup_boot_paca(&boot_paca);
+	WARN_ON(local_paca != 0);
+	setup_paca(&boot_paca); /* install the paca into registers */
 
 	/* -------- printk is now safe to use ------- */
 
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
+		enable_machine_check();
+
 	/* Try new device tree based feature discovery ... */
 	if (!dt_cpu_ftrs_init(__va(dt_ptr)))
 		/* Otherwise use the old style CPU table */
@@ -377,8 +394,8 @@ void __init early_setup(unsigned long dt_ptr)
 		/* Poison paca_ptrs[0] again if it's not the boot cpu */
 		memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
 	}
-	setup_paca(paca_ptrs[boot_cpuid]);
-	fixup_boot_paca();
+	fixup_boot_paca(paca_ptrs[boot_cpuid]);
+	setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
 
 	/*
 	 * Configure exception handlers. This include setting up trampolines
@@ -673,7 +690,7 @@ void __init initialize_cache_info(void)
  */
 __init u64 ppc64_bolted_size(void)
 {
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	/* Freescale BookE bolts the entire linear mapping */
 	/* XXX: BookE ppc64_rma_limit setup seems to disagree? */
 	if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E))
@@ -723,7 +740,7 @@ void __init irqstack_early_init(void)
 	}
 }
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 void __init exc_lvl_early_init(void)
 {
 	unsigned int i;
@@ -825,7 +842,7 @@ void __init setup_per_cpu_areas(void)
 	/*
 	 * BookE and BookS radix are historical values and should be revisited.
 	 */
-	if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
 		atom_size = SZ_1M;
 	} else if (radix_enabled()) {
 		atom_size = PAGE_SIZE;
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 618aeccdf691..a429c57ed433 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -196,9 +196,6 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 #else /* CONFIG_PPC64 */
 
-extern long sys_rt_sigreturn(void);
-extern long sys_sigreturn(void);
-
 static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 				     struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 157a7403e3eb..c114c7f25645 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -43,7 +43,7 @@
 #include <asm/tm.h>
 #include <asm/asm-prototypes.h>
 #ifdef CONFIG_PPC64
-#include "ppc32.h"
+#include <asm/syscalls_32.h>
 #include <asm/unistd.h>
 #else
 #include <asm/ucontext.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 169703fead57..0da6e59161cd 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -708,7 +708,7 @@ static struct task_struct *current_set[NR_CPUS];
 static void smp_store_cpu_info(int id)
 {
 	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	per_cpu(next_tlbcam_idx, id)
 		= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
 #endif
@@ -1257,7 +1257,12 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
-	int rc, c;
+	const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
+	const bool booting = system_state < SYSTEM_RUNNING;
+	const unsigned long hp_spin_ms = 1;
+	unsigned long deadline;
+	int rc;
+	const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
 
 	/*
 	 * Don't allow secondary threads to come online if inhibited
@@ -1302,22 +1307,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 	}
 
 	/*
-	 * wait to see if the cpu made a callin (is actually up).
-	 * use this value that I found through experimentation.
-	 * -- Cort
+	 * At boot time, simply spin on the callin word until the
+	 * deadline passes.
+	 *
+	 * At run time, spin for an optimistic amount of time to avoid
+	 * sleeping in the common case.
 	 */
-	if (system_state < SYSTEM_RUNNING)
-		for (c = 50000; c && !cpu_callin_map[cpu]; c--)
-			udelay(100);
-#ifdef CONFIG_HOTPLUG_CPU
-	else
-		/*
-		 * CPUs can take much longer to come up in the
-		 * hotplug case.  Wait five seconds.
-		 */
-		for (c = 5000; c && !cpu_callin_map[cpu]; c--)
-			msleep(1);
-#endif
+	deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
+	spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
+
+	if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
+		const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
+		const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
+
+		deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
+		while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
+			fsleep(sleep_interval_us);
+	}
 
 	if (!cpu_callin_map[cpu]) {
 		printk(KERN_ERR "Processor %u is stuck.\n", cpu);
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_85xx.S
index 88cfdbd530f1..88cfdbd530f1 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_85xx.S
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 9f1903c7f540..f645652c2654 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -76,16 +76,10 @@
 swsusp_save_area:
 	.space SL_SIZE
 
-	.section ".toc","aw"
-swsusp_save_area_ptr:
-	.tc	swsusp_save_area[TC],swsusp_save_area
-restore_pblist_ptr:
-	.tc	restore_pblist[TC],restore_pblist
-
 	.section .text
 	.align  5
 _GLOBAL(swsusp_arch_suspend)
-	ld	r11,swsusp_save_area_ptr@toc(r2)
+	LOAD_REG_ADDR(r11, swsusp_save_area)
 	SAVE_SPECIAL(LR)
 	SAVE_REGISTER(r1)
 	SAVE_SPECIAL(CR)
@@ -131,7 +125,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 	bl swsusp_save
 
 	/* restore LR */
-	ld	r11,swsusp_save_area_ptr@toc(r2)
+	LOAD_REG_ADDR(r11, swsusp_save_area)
 	RESTORE_SPECIAL(LR)
 	addi	r1,r1,128
 
@@ -145,7 +139,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	sync
 
-	ld	r12,restore_pblist_ptr@toc(r2)
+	LOAD_REG_ADDR(r11, restore_pblist)
 	ld	r12,0(r12)
 
 	cmpdi	r12,0
@@ -187,7 +181,7 @@ nothing_to_copy:
 	tlbia
 #endif
 
-	ld	r11,swsusp_save_area_ptr@toc(r2)
+	LOAD_REG_ADDR(r11, swsusp_save_area)
 
 	RESTORE_SPECIAL(CR)
 
@@ -265,7 +259,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
 	bl	do_after_copyback
 	addi	r1,r1,128
 
-	ld	r11,swsusp_save_area_ptr@toc(r2)
+	LOAD_REG_ADDR(r11, swsusp_save_area)
 	RESTORE_SPECIAL(LR)
 
 	li	r3, 0
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 16ff0399a257..1ab4a4d95aba 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -1,13 +1,23 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
+ * sys_ppc32.c: 32-bit system calls with complex calling conventions.
  *
  * Copyright (C) 2001 IBM
  * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
  *
- * These routines maintain argument size conversion between 32bit and 64bit
- * environment.
+ * 32-bit system calls with 64-bit arguments pass those in register pairs.
+ * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual
+ * in generic compat syscalls is not always usable because the register
+ * pairing is constrained depending on preceding arguments.
+ *
+ * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER,
+ * the defined system call functions take the pt_regs as an argument, and there
+ * is a mapping macro which maps registers to arguments
+ * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit
+ * arguments.
+ *
+ * This file contains these system calls.
  */
 
 #include <linux/kernel.h>
@@ -25,7 +35,6 @@
 #include <linux/poll.h>
 #include <linux/personality.h>
 #include <linux/stat.h>
-#include <linux/mman.h>
 #include <linux/in.h>
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
@@ -48,72 +57,65 @@
 #include <asm/syscalls.h>
 #include <asm/switch_to.h>
 
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
-			  unsigned long prot, unsigned long flags,
-			  unsigned long fd, unsigned long pgoff)
-{
-	/* This should remain 12 even if PAGE_SIZE changes */
-	return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
-}
-
-/* 
- * long long munging:
- * The 32 bit ABI passes long longs in an odd even register pair.
- * High and low parts are swapped depending on endian mode,
- * so define a macro (similar to mips linux32) to handle that.
- */
-#ifdef __LITTLE_ENDIAN__
-#define merge_64(low, high) ((u64)high << 32) | low
+#ifdef CONFIG_PPC32
+#define PPC32_SYSCALL_DEFINE4	SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5	SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6	SYSCALL_DEFINE6
 #else
-#define merge_64(high, low) ((u64)high << 32) | low
+#define PPC32_SYSCALL_DEFINE4	COMPAT_SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5	COMPAT_SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6	COMPAT_SYSCALL_DEFINE6
 #endif
 
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
-			     u32 reg6, u32 pos1, u32 pos2)
+PPC32_SYSCALL_DEFINE6(ppc_pread64,
+		       unsigned int, fd,
+		       char __user *, ubuf, compat_size_t, count,
+		       u32, reg6, u32, pos1, u32, pos2)
 {
 	return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
-			      u32 reg6, u32 pos1, u32 pos2)
+PPC32_SYSCALL_DEFINE6(ppc_pwrite64,
+		       unsigned int, fd,
+		       const char __user *, ubuf, compat_size_t, count,
+		       u32, reg6, u32, pos1, u32, pos2)
 {
 	return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count)
+PPC32_SYSCALL_DEFINE5(ppc_readahead,
+		       int, fd, u32, r4,
+		       u32, offset1, u32, offset2, u32, count)
 {
 	return ksys_readahead(fd, merge_64(offset1, offset2), count);
 }
 
-asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
-				unsigned long len1, unsigned long len2)
+PPC32_SYSCALL_DEFINE4(ppc_truncate64,
+		       const char __user *, path, u32, reg4,
+		       unsigned long, len1, unsigned long, len2)
 {
 	return ksys_truncate(path, merge_64(len1, len2));
 }
 
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2,
-				     u32 len1, u32 len2)
-{
-	return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2,
-			     merge_64(len1, len2));
-}
-
-asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
-				 unsigned long len2)
+PPC32_SYSCALL_DEFINE4(ppc_ftruncate64,
+		       unsigned int, fd, u32, reg4,
+		       unsigned long, len1, unsigned long, len2)
 {
 	return ksys_ftruncate(fd, merge_64(len1, len2));
 }
 
-long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
-		     size_t len, int advice)
+PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
+		       int, fd, u32, unused, u32, offset1, u32, offset2,
+		       size_t, len, int, advice)
 {
 	return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
 				 advice);
 }
 
-asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
-				   unsigned offset1, unsigned offset2,
-				   unsigned nbytes1, unsigned nbytes2)
+COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
+		       int, fd, unsigned int, flags,
+		       unsigned int, offset1, unsigned int, offset2,
+		       unsigned int, nbytes1, unsigned int, nbytes2)
 {
 	loff_t offset = merge_64(offset1, offset2);
 	loff_t nbytes = merge_64(nbytes1, nbytes2);
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 81ace9e8b72b..18b9d325395f 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -12,12 +12,8 @@
 #include <asm/unistd.h>
 
 
-typedef long (*syscall_fn)(long, long, long, long, long, long);
-
 /* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
-				   long r6, long r7, long r8,
-				   unsigned long r0, struct pt_regs *regs)
+notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
 {
 	long ret;
 	syscall_fn f;
@@ -25,7 +21,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	kuap_lock();
 
 	add_random_kstack_offset();
-	regs->orig_gpr3 = r3;
 
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
@@ -139,12 +134,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 		r0 = do_syscall_trace_enter(regs);
 		if (unlikely(r0 >= NR_syscalls))
 			return regs->gpr[3];
-		r3 = regs->gpr[3];
-		r4 = regs->gpr[4];
-		r5 = regs->gpr[5];
-		r6 = regs->gpr[6];
-		r7 = regs->gpr[7];
-		r8 = regs->gpr[8];
 
 	} else if (unlikely(r0 >= NR_syscalls)) {
 		if (unlikely(trap_is_unsupported_scv(regs))) {
@@ -158,21 +147,31 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	/* May be faster to do array_index_nospec? */
 	barrier_nospec();
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+	// No COMPAT if we have SYSCALL_WRAPPER, see Kconfig
+	f = (void *)sys_call_table[r0];
+	ret = f(regs);
+#else
 	if (unlikely(is_compat_task())) {
+		unsigned long r3, r4, r5, r6, r7, r8;
+
 		f = (void *)compat_sys_call_table[r0];
 
-		r3 &= 0x00000000ffffffffULL;
-		r4 &= 0x00000000ffffffffULL;
-		r5 &= 0x00000000ffffffffULL;
-		r6 &= 0x00000000ffffffffULL;
-		r7 &= 0x00000000ffffffffULL;
-		r8 &= 0x00000000ffffffffULL;
+		r3 = regs->gpr[3] & 0x00000000ffffffffULL;
+		r4 = regs->gpr[4] & 0x00000000ffffffffULL;
+		r5 = regs->gpr[5] & 0x00000000ffffffffULL;
+		r6 = regs->gpr[6] & 0x00000000ffffffffULL;
+		r7 = regs->gpr[7] & 0x00000000ffffffffULL;
+		r8 = regs->gpr[8] & 0x00000000ffffffffULL;
 
+		ret = f(r3, r4, r5, r6, r7, r8);
 	} else {
 		f = (void *)sys_call_table[r0];
-	}
 
-	ret = f(r3, r4, r5, r6, r7, r8);
+		ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
+			regs->gpr[6], regs->gpr[7], regs->gpr[8]);
+	}
+#endif
 
 	/*
 	 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index fc999140bc27..68ebb23a5af4 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -36,9 +36,9 @@
 #include <asm/time.h>
 #include <asm/unistd.h>
 
-static inline long do_mmap2(unsigned long addr, size_t len,
-			unsigned long prot, unsigned long flags,
-			unsigned long fd, unsigned long off, int shift)
+static long do_mmap2(unsigned long addr, size_t len,
+		     unsigned long prot, unsigned long flags,
+		     unsigned long fd, unsigned long off, int shift)
 {
 	if (!arch_validate_prot(prot, addr))
 		return -EINVAL;
@@ -56,6 +56,16 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
 	return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
 }
 
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE6(mmap2,
+		       unsigned long, addr, size_t, len,
+		       unsigned long, prot, unsigned long, flags,
+		       unsigned long, fd, unsigned long, off_4k)
+{
+	return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12);
+}
+#endif
+
 SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 		unsigned long, prot, unsigned long, flags,
 		unsigned long, fd, off_t, offset)
@@ -63,43 +73,39 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
 	return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
 }
 
-#ifdef CONFIG_PPC32
-/*
- * Due to some executables calling the wrong select we sometimes
- * get wrong args.  This determines how the args are being passed
- * (a single ptr to them all args passed) then calls
- * sys_select() with the appropriate args. -- Cort
- */
-int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp)
-{
-	if ((unsigned long)n >= 4096)
-		return sys_old_select((void __user *)n);
-
-	return sys_select(n, inp, outp, exp, tvp);
-}
-#endif
-
 #ifdef CONFIG_PPC64
-long ppc64_personality(unsigned long personality)
+static long do_ppc64_personality(unsigned long personality)
 {
 	long ret;
 
 	if (personality(current->personality) == PER_LINUX32
 	    && personality(personality) == PER_LINUX)
 		personality = (personality & ~PER_MASK) | PER_LINUX32;
-	ret = sys_personality(personality);
+	ret = ksys_personality(personality);
 	if (personality(ret) == PER_LINUX32)
 		ret = (ret & ~PER_MASK) | PER_LINUX;
 	return ret;
 }
-#endif
 
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
-		      u32 len_high, u32 len_low)
+SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+	return do_ppc64_personality(personality);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+	return do_ppc64_personality(personality);
+}
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
+
+SYSCALL_DEFINE6(ppc_fadvise64_64,
+		int, fd, int, advice, u32, offset_high, u32, offset_low,
+		u32, len_high, u32, len_low)
 {
-	return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low,
-				 (u64)len_high << 32 | len_low, advice);
+	return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low),
+				 merge_64(len_high, len_low), advice);
 }
 
 SYSCALL_DEFINE0(switch_endian)
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 2600b4237292..e9e0df4f9a61 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -110,7 +110,7 @@
 79	common	settimeofday			sys_settimeofday		compat_sys_settimeofday
 80	common	getgroups			sys_getgroups
 81	common	setgroups			sys_setgroups
-82	32	select				ppc_select			sys_ni_syscall
+82	32	select				sys_old_select			compat_sys_old_select
 82	64	select				sys_ni_syscall
 82	spu	select				sys_ni_syscall
 83	common	symlink				sys_symlink
@@ -178,9 +178,9 @@
 133	common	fchdir				sys_fchdir
 134	common	bdflush				sys_ni_syscall
 135	common	sysfs				sys_sysfs
-136	32	personality			sys_personality			ppc64_personality
-136	64	personality			ppc64_personality
-136	spu	personality			ppc64_personality
+136	32	personality			sys_personality			compat_sys_ppc64_personality
+136	64	personality			sys_ppc64_personality
+136	spu	personality			sys_ppc64_personality
 137	common	afs_syscall			sys_ni_syscall
 138	common	setfsuid			sys_setfsuid
 139	common	setfsgid			sys_setfsgid
@@ -228,8 +228,10 @@
 176	64	rt_sigtimedwait			sys_rt_sigtimedwait
 177	nospu 	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 178	nospu 	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
-179	common	pread64				sys_pread64			compat_sys_pread64
-180	common	pwrite64			sys_pwrite64			compat_sys_pwrite64
+179	32	pread64				sys_ppc_pread64			compat_sys_ppc_pread64
+179	64	pread64				sys_pread64
+180	32	pwrite64			sys_ppc_pwrite64		compat_sys_ppc_pwrite64
+180	64	pwrite64			sys_pwrite64
 181	common	chown				sys_chown
 182	common	getcwd				sys_getcwd
 183	common	capget				sys_capget
@@ -242,10 +244,11 @@
 188	common 	putpmsg				sys_ni_syscall
 189	nospu	vfork				sys_vfork
 190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
-191	common	readahead			sys_readahead			compat_sys_readahead
+191	32	readahead			sys_ppc_readahead		compat_sys_ppc_readahead
+191	64	readahead			sys_readahead
 192	32	mmap2				sys_mmap2			compat_sys_mmap2
-193	32	truncate64			sys_truncate64			compat_sys_truncate64
-194	32	ftruncate64			sys_ftruncate64			compat_sys_ftruncate64
+193	32	truncate64			sys_ppc_truncate64		compat_sys_ppc_truncate64
+194	32	ftruncate64			sys_ppc_ftruncate64		compat_sys_ppc_ftruncate64
 195	32	stat64				sys_stat64
 196	32	lstat64				sys_lstat64
 197	32	fstat64				sys_fstat64
@@ -288,7 +291,8 @@
 230	common	io_submit			sys_io_submit			compat_sys_io_submit
 231	common	io_cancel			sys_io_cancel
 232	nospu	set_tid_address			sys_set_tid_address
-233	common	fadvise64			sys_fadvise64			ppc32_fadvise64
+233	32	fadvise64			sys_ppc32_fadvise64		compat_sys_ppc32_fadvise64
+233	64	fadvise64			sys_fadvise64
 234	nospu	exit_group			sys_exit_group
 235	nospu	lookup_dcookie			sys_lookup_dcookie		compat_sys_lookup_dcookie
 236	common	epoll_create			sys_epoll_create
@@ -323,7 +327,7 @@
 251	spu	utimes				sys_utimes
 252	common	statfs64			sys_statfs64			compat_sys_statfs64
 253	common	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
-254	32	fadvise64_64			ppc_fadvise64_64
+254	32	fadvise64_64			sys_ppc_fadvise64_64
 254	spu	fadvise64_64			sys_ni_syscall
 255	common	rtas				sys_rtas
 256	32	sys_debug_setcontext		sys_debug_setcontext		sys_ni_syscall
@@ -390,7 +394,7 @@
 305	common	signalfd			sys_signalfd			compat_sys_signalfd
 306	common	timerfd_create			sys_timerfd_create
 307	common	eventfd				sys_eventfd
-308	common	sync_file_range2		sys_sync_file_range2		compat_sys_sync_file_range2
+308	common	sync_file_range2		sys_sync_file_range2		compat_sys_ppc_sync_file_range2
 309	nospu	fallocate			sys_fallocate			compat_sys_fallocate
 310	nospu	subpage_prot			sys_subpage_prot
 311	32	timerfd_settime			sys_timerfd_settime32
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 3a10cda9c05e..ef9a61718940 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -228,7 +228,7 @@ static void __init sysfs_create_dscr_default(void)
 }
 #endif /* CONFIG_PPC64 */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 #define MAX_BIT				63
 
 static u64 pw20_wt;
@@ -907,7 +907,7 @@ static int register_cpu_online(unsigned int cpu)
 		device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
 		device_create_file(s, &dev_attr_pw20_state);
 		device_create_file(s, &dev_attr_pw20_wait_time);
@@ -1003,7 +1003,7 @@ static int unregister_cpu_online(unsigned int cpu)
 		device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
 		device_remove_file(s, &dev_attr_pw20_state);
 		device_remove_file(s, &dev_attr_pw20_wait_time);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.c
index 6c1db3b6de2d..4305f2a2162f 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.c
@@ -10,32 +10,37 @@
  * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) 
  */
 
-#include <asm/ppc_asm.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/unistd.h>
+#include <asm/syscalls.h>
 
-.section .rodata,"a"
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
 
-#ifdef CONFIG_PPC64
-	.p2align	3
-#define __SYSCALL(nr, entry)	.8byte entry
+#undef __SYSCALL
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#define __SYSCALL(nr, entry) [nr] = entry,
 #else
-	.p2align	2
-#define __SYSCALL(nr, entry)	.long entry
+/*
+ * Coerce syscall handlers with arbitrary parameters to common type
+ * requires cast to void* to avoid -Wcast-function-type.
+ */
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
 #endif
 
-#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
-.globl sys_call_table
-sys_call_table:
+const syscall_fn sys_call_table[] = {
 #ifdef CONFIG_PPC64
 #include <asm/syscall_table_64.h>
 #else
 #include <asm/syscall_table_32.h>
 #endif
+};
 
 #ifdef CONFIG_COMPAT
 #undef __SYSCALL_WITH_COMPAT
 #define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, compat)
-.globl compat_sys_call_table
-compat_sys_call_table:
-#define compat_sys_sigsuspend	sys_sigsuspend
+const syscall_fn compat_sys_call_table[] = {
 #include <asm/syscall_table_32.h>
-#endif
+};
+#endif /* CONFIG_COMPAT */
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
deleted file mode 100644
index c7ac3ed657c4..000000000000
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0-or-later
-#
-# Just process the CPP output from systbl_chk.c and complain
-# if anything is out of order.
-#
-# Copyright © 2008 IBM Corporation
-#
-
-awk	'BEGIN { num = -1; }	# Ignore the beginning of the file
-	/^#/ { next; }
-	/^[ \t]*$/ { next; }
-	/^START_TABLE/ { num = 0; next; }
-	/^END_TABLE/ {
-		if (num != $2) {
-			printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n",
-				$2, num - 1;
-			exit(1);
-		}
-		num = -1;	# Ignore the rest of the file
-	}
-	{
-		if (num == -1) next;
-		if (($1 != -1) && ($1 != num)) {
-			printf "Error: Syscall %s out of order (expected %s)\n",
-				$1, num;
-			exit(1);
-		};
-		num++;
-	}' "$1"
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 587adcc12860..a2ab397065c6 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -178,92 +178,6 @@ static inline unsigned long read_spurr(unsigned long tb)
 	return tb;
 }
 
-#ifdef CONFIG_PPC_SPLPAR
-
-#include <asm/dtl.h>
-
-void (*dtl_consumer)(struct dtl_entry *, u64);
-
-/*
- * Scan the dispatch trace log and count up the stolen time.
- * Should be called with interrupts disabled.
- */
-static u64 scan_dispatch_log(u64 stop_tb)
-{
-	u64 i = local_paca->dtl_ridx;
-	struct dtl_entry *dtl = local_paca->dtl_curr;
-	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
-	struct lppaca *vpa = local_paca->lppaca_ptr;
-	u64 tb_delta;
-	u64 stolen = 0;
-	u64 dtb;
-
-	if (!dtl)
-		return 0;
-
-	if (i == be64_to_cpu(vpa->dtl_idx))
-		return 0;
-	while (i < be64_to_cpu(vpa->dtl_idx)) {
-		dtb = be64_to_cpu(dtl->timebase);
-		tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
-			be32_to_cpu(dtl->ready_to_enqueue_time);
-		barrier();
-		if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
-			/* buffer has overflowed */
-			i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
-			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
-			continue;
-		}
-		if (dtb > stop_tb)
-			break;
-		if (dtl_consumer)
-			dtl_consumer(dtl, i);
-		stolen += tb_delta;
-		++i;
-		++dtl;
-		if (dtl == dtl_end)
-			dtl = local_paca->dispatch_log;
-	}
-	local_paca->dtl_ridx = i;
-	local_paca->dtl_curr = dtl;
-	return stolen;
-}
-
-/*
- * Accumulate stolen time by scanning the dispatch trace log.
- * Called on entry from user mode.
- */
-void notrace accumulate_stolen_time(void)
-{
-	u64 sst, ust;
-	struct cpu_accounting_data *acct = &local_paca->accounting;
-
-	sst = scan_dispatch_log(acct->starttime_user);
-	ust = scan_dispatch_log(acct->starttime);
-	acct->stime -= sst;
-	acct->utime -= ust;
-	acct->steal_time += ust + sst;
-}
-
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
-		return 0;
-
-	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
-		return scan_dispatch_log(stop_tb);
-
-	return 0;
-}
-
-#else /* CONFIG_PPC_SPLPAR */
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC_SPLPAR */
-
 /*
  * Account time for a transition between system, hard irq
  * or soft irq state.
@@ -322,7 +236,11 @@ static unsigned long vtime_delta(struct cpu_accounting_data *acct,
 
 	*stime_scaled = vtime_delta_scaled(acct, now, stime);
 
-	*steal_time = calculate_stolen_time(now);
+	if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+			firmware_has_feature(FW_FEATURE_SPLPAR))
+		*steal_time = pseries_calculate_stolen_time(now);
+	else
+		*steal_time = 0;
 
 	return stime;
 }
@@ -614,22 +532,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
 		return;
 	}
 
-	/*
-	 * Ensure a positive value is written to the decrementer, or
-	 * else some CPUs will continue to take decrementer exceptions.
-	 * When the PPC_WATCHDOG (decrementer based) is configured,
-	 * keep this at most 31 bits, which is about 4 seconds on most
-	 * systems, which gives the watchdog a chance of catching timer
-	 * interrupt hard lockups.
-	 */
-	if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
-		set_dec(0x7fffffff);
-	else
-		set_dec(decrementer_max);
-
 	/* Conditionally hard-enable interrupts. */
-	if (should_hard_irq_enable())
+	if (should_hard_irq_enable()) {
+		/*
+		 * Ensure a positive value is written to the decrementer, or
+		 * else some CPUs will continue to take decrementer exceptions.
+		 * When the PPC_WATCHDOG (decrementer based) is configured,
+		 * keep this at most 31 bits, which is about 4 seconds on most
+		 * systems, which gives the watchdog a chance of catching timer
+		 * interrupt hard lockups.
+		 */
+		if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+			set_dec(0x7fffffff);
+		else
+			set_dec(decrementer_max);
+
 		do_hard_irq_enable();
+	}
 
 #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S
index 0bddf1fa6636..294d1e05958a 100644
--- a/arch/powerpc/kernel/trace/ftrace_low.S
+++ b/arch/powerpc/kernel/trace/ftrace_low.S
@@ -48,7 +48,7 @@ _GLOBAL(return_to_handler)
 	 * We might be called from a module.
 	 * Switch to our TOC to run inside the core kernel.
 	 */
-	ld	r2, PACATOC(r13)
+	LOAD_PACA_TOC()
 #else
 	stwu	r1, -16(r1)
 	stw	r3, 8(r1)
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index 4fa23e260cab..d031093bc436 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -83,10 +83,8 @@
 #ifdef CONFIG_PPC64
 	/* Save callee's TOC in the ABI compliant location */
 	std	r2, STK_GOT(r1)
-	ld	r2,PACATOC(r13)	/* get kernel TOC in r2 */
-
-	addis	r3,r2,function_trace_op@toc@ha
-	addi	r3,r3,function_trace_op@toc@l
+	LOAD_PACA_TOC()		/* get kernel TOC in r2 */
+	LOAD_REG_ADDR(r3, function_trace_op)
 	ld	r5,0(r3)
 #else
 	lis	r3,function_trace_op@ha
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index dadfcef5d6db..9bdd79aa51cf 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -68,6 +68,7 @@
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 #include <asm/disassemble.h>
+#include <asm/udbg.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
 int (*__debugger)(struct pt_regs *regs) __read_mostly;
@@ -600,7 +601,7 @@ static inline int check_io_access(struct pt_regs *regs)
 
 #define inst_length(reason)	(((reason) & REASON_PREFIXED) ? 8 : 4)
 
-#if defined(CONFIG_E500)
+#if defined(CONFIG_PPC_E500)
 int machine_check_e500mc(struct pt_regs *regs)
 {
 	unsigned long mcsr = mfspr(SPRN_MCSR);
@@ -850,6 +851,19 @@ bail:
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot)
+{
+	udbg_printf("Machine check (early boot)\n");
+	udbg_printf("SRR0=0x%016lx   SRR1=0x%016lx\n", regs->nip, regs->msr);
+	udbg_printf(" DAR=0x%016lx  DSISR=0x%08lx\n", regs->dar, regs->dsisr);
+	udbg_printf("  LR=0x%016lx     R1=0x%08lx\n", regs->link, regs->gpr[1]);
+	udbg_printf("------\n");
+	die("Machine check (early boot)", regs, SIGBUS);
+	for (;;)
+		;
+	return 0;
+}
+
 DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
 {
 	__machine_check_exception(regs);
@@ -2085,7 +2099,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
 }
 #endif /* CONFIG_ALTIVEC */
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 DEFINE_INTERRUPT_HANDLER(CacheLockingException)
 {
 	unsigned long error_code = regs->dsisr;
@@ -2098,12 +2112,11 @@ DEFINE_INTERRUPT_HANDLER(CacheLockingException)
 		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
 	return;
 }
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
 
 #ifdef CONFIG_SPE
 DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
 {
-	extern int do_spe_mathemu(struct pt_regs *regs);
 	unsigned long spefscr;
 	int fpexc_mode;
 	int code = FPE_FLTUNK;
@@ -2153,7 +2166,6 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
 
 DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
 {
-	extern int speround_handler(struct pt_regs *regs);
 	int err;
 
 	interrupt_cond_local_irq_enable(regs);
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index b1544b2f6321..92b3fc258d11 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -67,6 +67,8 @@ void __init udbg_early_init(void)
 	udbg_init_debug_opal_raw();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI)
 	udbg_init_debug_opal_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_16550)
+	udbg_init_debug_16550();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index d3942de254c6..74ddf836f7a2 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -8,6 +8,7 @@
 #include <asm/udbg.h>
 #include <asm/io.h>
 #include <asm/reg_a2.h>
+#include <asm/early_ioremap.h>
 
 extern u8 real_readb(volatile u8 __iomem  *addr);
 extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -297,41 +298,34 @@ void __init udbg_init_40x_realmode(void)
 
 #endif /* CONFIG_PPC_EARLY_DEBUG_40x */
 
-#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT
+#ifdef CONFIG_PPC_EARLY_DEBUG_16550
 
-#define UDBG_UART_MW_ADDR	((void __iomem *)0xc0002000)
+static void __iomem *udbg_uart_early_addr;
 
-static u8 udbg_uart_in_isa300_rm(unsigned int reg)
+void __init udbg_init_debug_16550(void)
 {
-	uint64_t msr = mfmsr();
-	uint8_t  c;
-
-	mtmsr(msr & ~(MSR_EE|MSR_DR));
-	isync();
-	eieio();
-	c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2));
-	mtmsr(msr);
-	isync();
-	return c;
+	udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+	udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
 }
 
-static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val)
+static int __init udbg_init_debug_16550_ioremap(void)
 {
-	uint64_t msr = mfmsr();
-
-	mtmsr(msr & ~(MSR_EE|MSR_DR));
-	isync();
-	eieio();
-	__raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2));
-	mtmsr(msr);
-	isync();
-}
+	void __iomem *addr;
 
-void __init udbg_init_debug_microwatt(void)
-{
-	udbg_uart_in = udbg_uart_in_isa300_rm;
-	udbg_uart_out = udbg_uart_out_isa300_rm;
-	udbg_use_uart();
+	if (!udbg_uart_early_addr)
+		return 0;
+
+	addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+	if (WARN_ON(!addr))
+		return -ENOMEM;
+
+	udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+	early_iounmap(udbg_uart_early_addr, 0x1000);
+	udbg_uart_early_addr = NULL;
+
+	return 0;
 }
 
-#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */
+early_initcall(udbg_init_debug_16550_ioremap);
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 0da287544054..4abc01949702 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -39,6 +39,8 @@
 extern char vdso32_start, vdso32_end;
 extern char vdso64_start, vdso64_end;
 
+long sys_ni_syscall(void);
+
 /*
  * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
  * Once the early boot kernel code no longer needs to muck around
@@ -113,18 +115,18 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page)
 int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 {
 	struct mm_struct *mm = task->mm;
+	VMA_ITERATOR(vmi, mm, 0);
 	struct vm_area_struct *vma;
 
 	mmap_read_lock(mm);
-
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		unsigned long size = vma->vm_end - vma->vm_start;
 
 		if (vma_is_special_mapping(vma, &vvar_spec))
 			zap_page_range(vma, vma->vm_start, size);
 	}
-
 	mmap_read_unlock(mm);
+
 	return 0;
 }
 
@@ -200,28 +202,19 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
 	if (is_32bit_task()) {
 		vdso_spec = &vdso32_spec;
 		vdso_size = &vdso32_end - &vdso32_start;
-		vdso_base = VDSO32_MBASE;
 	} else {
 		vdso_spec = &vdso64_spec;
 		vdso_size = &vdso64_end - &vdso64_start;
-		/*
-		 * On 64bit we don't have a preferred map address. This
-		 * allows get_unmapped_area to find an area near other mmaps
-		 * and most likely share a SLB entry.
-		 */
-		vdso_base = 0;
 	}
 
 	mappings_size = vdso_size + vvar_size;
 	mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
 
 	/*
-	 * pick a base address for the vDSO in process space. We try to put it
-	 * at vdso_base which is the "natural" base for it, but we might fail
-	 * and end up putting it elsewhere.
+	 * Pick a base address for the vDSO in process space.
 	 * Add enough to the size so that the result can be aligned.
 	 */
-	vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0);
+	vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0);
 	if (IS_ERR_VALUE(vdso_base))
 		return vdso_base;
 
@@ -313,10 +306,10 @@ static void __init vdso_setup_syscall_map(void)
 	unsigned int i;
 
 	for (i = 0; i < NR_syscalls; i++) {
-		if (sys_call_table[i] != (unsigned long)&sys_ni_syscall)
+		if (sys_call_table[i] != (void *)&sys_ni_syscall)
 			vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
 		if (IS_ENABLED(CONFIG_COMPAT) &&
-		    compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall)
+		    compat_sys_call_table[i] != (void *)&sys_ni_syscall)
 			vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
 	}
 }
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 096b0bf1335f..a2e7b0ce5b19 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -92,13 +92,13 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
 
 # actual build commands
 quiet_cmd_vdso32ld_and_check = VDSO32L $@
-      cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check)
+      cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $<
 quiet_cmd_vdso32cc = VDSO32C $@
       cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $<
 
 quiet_cmd_vdso64ld_and_check = VDSO64L $@
-      cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check)
+      cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check)
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $<
diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S
index e0d19d74455f..bc0be274a9ac 100644
--- a/arch/powerpc/kernel/vdso/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso32.lds.S
@@ -78,7 +78,6 @@ SECTIONS
 	__end = .;
 	PROVIDE(end = .);
 
-	STABS_DEBUG
 	DWARF_DEBUG
 	ELF_DETAILS
 
diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index 1a4a7bc4c815..744ae5363e6c 100644
--- a/arch/powerpc/kernel/vdso/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -76,7 +76,6 @@ SECTIONS
 	_end = .;
 	PROVIDE(end = .);
 
-	STABS_DEBUG
 	DWARF_DEBUG
 	ELF_DETAILS
 
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 5cc24d8cce94..5cf64740edb8 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -155,8 +155,8 @@ _GLOBAL(load_up_vsx)
  * usage of floating-point registers.  These routines must be called
  * with preempt disabled.
  */
-#ifdef CONFIG_PPC32
 	.data
+#ifdef CONFIG_PPC32
 fpzero:
 	.long	0
 fpone:
@@ -169,18 +169,17 @@ fphalf:
 	lfs	fr,name@l(r11)
 #else
 
-	.section ".toc","aw"
 fpzero:
-	.tc	FD_0_0[TC],0
+	.quad	0
 fpone:
-	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
+	.quad	0x3ff0000000000000	/* 1.0 */
 fphalf:
-	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
+	.quad	0x3fe0000000000000	/* 0.5 */
 
-#define LDCONST(fr, name)	\
-	lfd	fr,name@toc(r2)
+#define LDCONST(fr, name)		\
+	addis	r11,r2,name@toc@ha;	\
+	lfd	fr,name@toc@l(r11)
 #endif
-
 	.text
 /*
  * Internal routine to enable floating point and set FPSCR to 0.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index fe22d940412f..7786e3ac7611 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -32,6 +32,10 @@
 
 #define STRICT_ALIGN_SIZE	(1 << CONFIG_DATA_SHIFT)
 
+#if STRICT_ALIGN_SIZE < PAGE_SIZE
+#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT"
+#endif
+
 ENTRY(_stext)
 
 PHDRS {
@@ -67,7 +71,7 @@ SECTIONS
 	.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
 #ifdef CONFIG_PPC64
 		KEEP(*(.head.text.first_256B));
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 #else
 		KEEP(*(.head.text.real_vectors));
 		*(.head.text.real_trampolines);
@@ -122,14 +126,6 @@ SECTIONS
 		*(.sfpr);
 		MEM_KEEP(init.text)
 		MEM_KEEP(exit.text)
-
-#ifdef CONFIG_PPC32
-		*(.got1)
-		__got2_start = .;
-		*(.got2)
-		__got2_end = .;
-#endif /* CONFIG_PPC32 */
-
 	} :text
 
 	. = ALIGN(PAGE_SIZE);
@@ -139,15 +135,57 @@ SECTIONS
 	/* Read-only data */
 	RO_DATA(PAGE_SIZE)
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC32
+	.sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
+		*(.sdata2)
+	}
+#endif
+
+	.data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
+		*(.data.rel.ro*)
+	}
+
+	.branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
+		*(.branch_lt)
+	}
+
+#ifdef CONFIG_PPC32
+	.got1 : AT(ADDR(.got1) - LOAD_OFFSET) {
+		*(.got1)
+	}
+	.got2 : AT(ADDR(.got2) - LOAD_OFFSET) {
+		__got2_start = .;
+		*(.got2)
+		__got2_end = .;
+	}
+	.got : AT(ADDR(.got) - LOAD_OFFSET) {
+		*(.got)
+		*(.got.plt)
+	}
+	.plt : AT(ADDR(.plt) - LOAD_OFFSET) {
+		/* XXX: is .plt (and .got.plt) required? */
+		*(.plt)
+	}
+
+#else /* CONFIG_PPC32 */
+	.toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
+		*(.toc1)
+	}
+
+	.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+		*(.got .toc)
+	}
+
 	SOFT_MASK_TABLE(8)
 	RESTART_TABLE(8)
 
+#ifdef CONFIG_PPC64_ELF_ABI_V1
 	.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
 		__start_opd = .;
 		KEEP(*(.opd))
 		__end_opd = .;
 	}
+#endif
 
 	. = ALIGN(8);
 	__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
@@ -190,7 +228,7 @@ SECTIONS
 		*(__rfi_flush_fixup)
 		__stop___rfi_flush_fixup = .;
 	}
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
 	. = ALIGN(8);
@@ -201,7 +239,7 @@ SECTIONS
 	}
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	. = ALIGN(8);
 	__spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
 		__start__btb_flush_fixup = .;
@@ -210,12 +248,17 @@ SECTIONS
 	}
 #endif
 
+	/*
+	 * Various code relies on __init_begin being at the strict RWX boundary.
+	 */
+	. = ALIGN(STRICT_ALIGN_SIZE);
+	__srwx_boundary = .;
+	__end_rodata = .;
+	__init_begin = .;
+
 /*
  * Init sections discarded at runtime
  */
-	. = ALIGN(STRICT_ALIGN_SIZE);
-	__init_begin = .;
-	. = ALIGN(PAGE_SIZE);
 	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
 		_sinittext = .;
 		INIT_TEXT
@@ -317,34 +360,13 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	_sdata = .;
 
-#ifdef CONFIG_PPC32
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {
 		DATA_DATA
 		*(.data.rel*)
+#ifdef CONFIG_PPC32
 		*(SDATA_MAIN)
-		*(.sdata2)
-		*(.got.plt) *(.got)
-		*(.plt)
-		*(.branch_lt)
-	}
-#else
-	.data : AT(ADDR(.data) - LOAD_OFFSET) {
-		DATA_DATA
-		*(.data.rel*)
-		*(.toc1)
-		*(.branch_lt)
-	}
-
-	.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
-		*(.got)
-#ifndef CONFIG_RELOCATABLE
-		__prom_init_toc_start = .;
-		arch/powerpc/kernel/prom_init.o*(.toc)
-		__prom_init_toc_end = .;
 #endif
-		*(.toc)
 	}
-#endif
 
 	/* The initial task and kernel stack */
 	INIT_TASK_DATA_SECTION(THREAD_ALIGN)
@@ -382,7 +404,6 @@ SECTIONS
 	_end = . ;
 	PROVIDE32 (end = .);
 
-	STABS_DEBUG
 	DWARF_DEBUG
 	ELF_DETAILS
 
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index cf84bfe9e27e..de64c7962991 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -136,7 +136,7 @@ void __init reserve_crashkernel(void)
 #ifdef CONFIG_PPC64
 		/*
 		 * On the LPAR platform place the crash kernel to mid of
-		 * RMA size (512MB or more) to ensure the crash kernel
+		 * RMA size (max. of 512MB) to ensure the crash kernel
 		 * gets enough space to place itself and some stack to be
 		 * in the first segment. At the same time normal kernel
 		 * also get enough space to allocate memory for essential
@@ -144,9 +144,9 @@ void __init reserve_crashkernel(void)
 		 * kernel starts at 128MB offset on other platforms.
 		 */
 		if (firmware_has_feature(FW_FEATURE_LPAR))
-			crashk_res.start = ppc64_rma_size / 2;
+			crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_512M);
 		else
-			crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+			crashk_res.start = min_t(u64, ppc64_rma_size / 2, SZ_128M);
 #else
 		crashk_res.start = KDUMP_KERNELBASE;
 #endif
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
index b50aed48d09d..c95f96850c9e 100644
--- a/arch/powerpc/kexec/core_32.c
+++ b/arch/powerpc/kexec/core_32.c
@@ -55,7 +55,7 @@ void default_machine_kexec(struct kimage *image)
 				reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
 	printk(KERN_INFO "Bye!\n");
 
-	if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+	if (!IS_ENABLED(CONFIG_PPC_85xx) && !IS_ENABLED(CONFIG_44x))
 		relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
 
 	/* now call it */
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index c2bea9db1c1e..a79e28c91e2b 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image)
 	 * the RMA. On BookE there is no real MMU off mode, so we have to
 	 * keep it enabled as well (but then we have bolted TLB entries).
 	 */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	copy_with_mmu_off = false;
 #else
 	copy_with_mmu_off = radix_enabled() ||
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
index cf6e52bdf8d8..d9f0dd9b34ff 100644
--- a/arch/powerpc/kexec/relocate_32.S
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -25,14 +25,14 @@ relocate_new_kernel:
 	/* r4 = reboot_code_buffer */
 	/* r5 = start_address      */
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 
 	mr	r29, r3
 	mr	r30, r4
 	mr	r31, r5
 
 #define ENTRY_MAPPING_KEXEC_SETUP
-#include <kernel/fsl_booke_entry_mapping.S>
+#include <kernel/85xx_entry_mapping.S>
 #undef ENTRY_MAPPING_KEXEC_SETUP
 
 	mr      r3, r29
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index dcb398d5e009..61cdd782d3c5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -189,7 +189,7 @@ config KVM_EXIT_TIMING
 
 config KVM_E500V2
 	bool "KVM support for PowerPC E500v2 processors"
-	depends on E500 && !PPC_E500MC
+	depends on PPC_E500 && !PPC_E500MC
 	select KVM
 	select KVM_MMIO
 	select MMU_NOTIFIER
@@ -220,7 +220,7 @@ config KVM_E500MC
 
 config KVM_MPIC
 	bool "KVM in-kernel MPIC emulation"
-	depends on KVM && E500
+	depends on KVM && PPC_E500
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index e43704547a1e..6c2b1d17cb63 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -315,7 +315,7 @@ kvmppc_p9_exit_interrupt:
 	reg = reg + 1
 	.endr
 
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 
 	mflr	r4
 	std	r4,VCPU_LR(r3)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 57d0835e56fd..6ba68dd6190b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
 /*
  * We use the vcpu_load/put functions to measure stolen time.
+ *
  * Stolen time is counted as time when either the vcpu is able to
  * run as part of a virtual core, but the task running the vcore
  * is preempted or sleeping, or when the vcpu needs something done
@@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * lock.  The stolen times are measured in units of timebase ticks.
  * (Note that the != TB_NIL checks below are purely defensive;
  * they should never fail.)
+ *
+ * The POWER9 path is simpler, one vcpu per virtual core so the
+ * former case does not exist. If a vcpu is preempted when it is
+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
+ * state in the P9 path.
  */
 
 static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
@@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 	unsigned long flags;
 	u64 now;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (vcpu->arch.busy_preempt != TB_NIL) {
+			WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
+			vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
+			vcpu->arch.busy_preempt = TB_NIL;
+		}
 		return;
+	}
 
 	now = mftb();
 
@@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 	unsigned long flags;
 	u64 now;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		/*
+		 * In the P9 path, RUNNABLE is not preemptible
+		 * (nor takes host interrupts)
+		 */
+		WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
+		/*
+		 * Account stolen time when preempted while the vcpu task is
+		 * running in the kernel (but not in qemu, which is INACTIVE).
+		 */
+		if (task_is_running(current) &&
+				vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+			vcpu->arch.busy_preempt = mftb();
 		return;
+	}
 
 	now = mftb();
 
@@ -707,16 +733,15 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 }
 
 static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+					struct lppaca *vpa,
 					unsigned int pcpu, u64 now,
 					unsigned long stolen)
 {
 	struct dtl_entry *dt;
-	struct lppaca *vpa;
 
 	dt = vcpu->arch.dtl_ptr;
-	vpa = vcpu->arch.vpa.pinned_addr;
 
-	if (!dt || !vpa)
+	if (!dt)
 		return;
 
 	dt->dispatch_reason = 7;
@@ -737,17 +762,23 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	/* order writing *dt vs. writing vpa->dtl_idx */
 	smp_wmb();
 	vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
-	vcpu->arch.dtl.dirty = true;
+
+	/* vcpu->arch.dtl.dirty is set by the caller */
 }
 
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
-				    struct kvmppc_vcore *vc)
+static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
+				       struct kvmppc_vcore *vc)
 {
+	struct lppaca *vpa;
 	unsigned long stolen;
 	unsigned long core_stolen;
 	u64 now;
 	unsigned long flags;
 
+	vpa = vcpu->arch.vpa.pinned_addr;
+	if (!vpa)
+		return;
+
 	now = mftb();
 
 	core_stolen = vcore_stolen_time(vc, now);
@@ -758,7 +789,34 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	vcpu->arch.busy_stolen = 0;
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 
-	__kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+	vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
+
+	__kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen);
+
+	vcpu->arch.vpa.dirty = true;
+}
+
+static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu,
+				       struct kvmppc_vcore *vc,
+				       u64 now)
+{
+	struct lppaca *vpa;
+	unsigned long stolen;
+	unsigned long stolen_delta;
+
+	vpa = vcpu->arch.vpa.pinned_addr;
+	if (!vpa)
+		return;
+
+	stolen = vc->stolen_tb;
+	stolen_delta = stolen - vcpu->arch.stolen_logged;
+	vcpu->arch.stolen_logged = stolen;
+
+	vpa->enqueue_dispatch_tb = cpu_to_be64(stolen);
+
+	__kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta);
+
+	vcpu->arch.vpa.dirty = true;
 }
 
 /* See if there is a doorbell interrupt pending for a vcpu */
@@ -2517,10 +2575,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
 		break;
 	case KVM_REG_PPC_TB_OFFSET:
+	{
 		/* round up to multiple of 2^24 */
-		vcpu->arch.vcore->tb_offset =
-			ALIGN(set_reg_val(id, *val), 1UL << 24);
+		u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24);
+
+		/*
+		 * Now that we know the timebase offset, update the
+		 * decrementer expiry with a guest timebase value. If
+		 * the userspace does not set DEC_EXPIRY, this ensures
+		 * a migrated vcpu at least starts with an expired
+		 * decrementer, which is better than a large one that
+		 * causes a hang.
+		 */
+		if (!vcpu->arch.dec_expires && tb_offset)
+			vcpu->arch.dec_expires = get_tb() + tb_offset;
+
+		vcpu->arch.vcore->tb_offset = tb_offset;
 		break;
+	}
 	case KVM_REG_PPC_LPCR:
 		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
 		break;
@@ -3800,7 +3872,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 			 * kvmppc_core_prepare_to_enter.
 			 */
 			kvmppc_start_thread(vcpu, pvc);
-			kvmppc_create_dtl_entry(vcpu, pvc);
+			kvmppc_update_vpa_dispatch(vcpu, pvc);
 			trace_kvm_guest_enter(vcpu);
 			if (!vcpu->arch.ptid)
 				thr0_done = true;
@@ -3840,23 +3912,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	for (sub = 0; sub < core_info.n_subcores; ++sub)
 		spin_unlock(&core_info.vc[sub]->lock);
 
-	guest_enter_irqoff();
+	guest_timing_enter_irqoff();
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
+	guest_state_enter_irqoff();
 	this_cpu_disable_ftrace();
 
-	/*
-	 * Interrupts will be enabled once we get into the guest,
-	 * so tell lockdep that we're about to enable interrupts.
-	 */
-	trace_hardirqs_on();
-
 	trap = __kvmppc_vcore_entry();
 
-	trace_hardirqs_off();
-
 	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
 
 	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
 
@@ -3891,11 +3957,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	kvmppc_set_host_core(pcpu);
 
-	context_tracking_guest_exit();
 	if (!vtime_accounting_enabled_this_cpu()) {
 		local_irq_enable();
 		/*
-		 * Service IRQs here before vtime_account_guest_exit() so any
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
 		 * ticks that occurred while running the guest are accounted to
 		 * the guest. If vtime accounting is enabled, accounting uses
 		 * TB rather than ticks, so it can be done without enabling
@@ -3904,7 +3969,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 		 */
 		local_irq_disable();
 	}
-	vtime_account_guest_exit();
+	guest_timing_exit_irqoff();
 
 	local_irq_enable();
 
@@ -4404,7 +4469,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
 		if ((vc->vcore_state == VCORE_PIGGYBACK ||
 		     vc->vcore_state == VCORE_RUNNING) &&
 			   !VCORE_IS_EXITING(vc)) {
-			kvmppc_create_dtl_entry(vcpu, vc);
+			kvmppc_update_vpa_dispatch(vcpu, vc);
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -4520,7 +4585,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc = vcpu->arch.vcore;
 	vcpu->arch.ceded = 0;
 	vcpu->arch.run_task = current;
-	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
 	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
 
 	/* See if the MMU is ready to go */
@@ -4547,6 +4611,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	/* flags save not required, but irq_pmu has no disable/enable API */
 	powerpc_local_irq_pmu_save(flags);
 
+	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+
 	if (signal_pending(current))
 		goto sigpend;
 	if (need_resched() || !kvm->arch.mmu_ready)
@@ -4591,47 +4657,44 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	tb = mftb();
 
-	__kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
+	kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset);
 
 	trace_kvm_guest_enter(vcpu);
 
-	guest_enter_irqoff();
+	guest_timing_enter_irqoff();
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 
+	guest_state_enter_irqoff();
 	this_cpu_disable_ftrace();
 
-	/* Tell lockdep that we're about to enable interrupts */
-	trace_hardirqs_on();
-
 	trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
 	vcpu->arch.trap = trap;
 
-	trace_hardirqs_off();
-
 	this_cpu_enable_ftrace();
+	guest_state_exit_irqoff();
 
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	set_irq_happened(trap);
 
-	context_tracking_guest_exit();
+	vcpu->cpu = -1;
+	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
 	if (!vtime_accounting_enabled_this_cpu()) {
-		local_irq_enable();
+		powerpc_local_irq_pmu_restore(flags);
 		/*
-		 * Service IRQs here before vtime_account_guest_exit() so any
+		 * Service IRQs here before guest_timing_exit_irqoff() so any
 		 * ticks that occurred while running the guest are accounted to
 		 * the guest. If vtime accounting is enabled, accounting uses
 		 * TB rather than ticks, so it can be done without enabling
 		 * interrupts here, which has the problem that it accounts
 		 * interrupt processing overhead to the host.
 		 */
-		local_irq_disable();
+		powerpc_local_irq_pmu_save(flags);
 	}
-	vtime_account_guest_exit();
-
-	vcpu->cpu = -1;
-	vcpu->arch.thread_cpu = -1;
+	guest_timing_exit_irqoff();
 
 	powerpc_local_irq_pmu_restore(flags);
 
@@ -4694,6 +4757,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
  out:
 	vcpu->cpu = -1;
 	vcpu->arch.thread_cpu = -1;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 	powerpc_local_irq_pmu_restore(flags);
 	preempt_enable();
 	goto done;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7ded202bf995..37f50861dd98 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1024,7 +1024,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
 	/* Restore R1/R2 so we can handle faults */
 	ld	r1, HSTATE_HOST_R1(r13)
-	ld	r2, PACATOC(r13)
+	LOAD_PACA_TOC()
 
 	mfspr	r10, SPRN_SRR0
 	mfspr	r11, SPRN_SRR1
@@ -2727,8 +2727,8 @@ kvmppc_bad_host_intr:
 	std	r4, _CTR(r1)
 	std	r5, _XER(r1)
 	std	r6, SOFTE(r1)
-	ld	r2, PACATOC(r13)
-	LOAD_REG_IMMEDIATE(3, 0x7265677368657265)
+	LOAD_PACA_TOC()
+	LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER)
 	std	r3, STACK_FRAME_OVERHEAD-16(r1)
 
 	/*
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 6fa82efe833b..205545d820a1 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -223,7 +223,7 @@ _GLOBAL(kvmppc_resume_host)
 	lwz	r3, VCPU_HOST_PID(r4)
 	mtspr	SPRN_PID, r3
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 	/* we cheat and know that Linux doesn't use PID1 which is always 0 */
 	lis	r3, 0
 	mtspr	SPRN_PID1, r3
@@ -406,7 +406,7 @@ lightweight_exit:
 	lwz	r3, VCPU_SHADOW_PID(r4)
 	mtspr	SPRN_PID, r3
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 	lwz	r3, VCPU_SHADOW_PID1(r4)
 	mtspr	SPRN_PID1, r3
 #endif
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index c3ef751465fb..6d0d329cbb35 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -17,7 +17,7 @@
 #define KVM_E500_H
 
 #include <linux/kvm_host.h>
-#include <asm/nohash/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
 #include <asm/tlb.h>
 #include <asm/cputhreads.h>
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index ec9c1e3c2ff4..b850b0efa201 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -785,7 +785,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 
 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
-	vcpu->arch.dec_expires = get_tb();
 
 #ifdef CONFIG_KVM_EXIT_TIMING
 	mutex_init(&vcpu->arch.exit_timing_lock);
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 3bf17c854be4..2158f61e317f 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -110,7 +110,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
 	mtmsrd	r2, 1
 
 	/* Reload TOC pointer. */
-	ld	r2, PACATOC(r13)
+	LOAD_PACA_TOC()
 
 	/* Save all but r0-r2, r9 & r13 */
 	reg = 3
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 6edf0697a526..ad0cf3108dd0 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -94,17 +94,20 @@ void __init poking_init(void)
 	static_branch_enable(&poking_init_done);
 }
 
+static unsigned long get_patch_pfn(void *addr)
+{
+	if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+		return vmalloc_to_pfn(addr);
+	else
+		return __pa_symbol(addr) >> PAGE_SHIFT;
+}
+
 /*
  * This can be called for kernel text or a module.
  */
 static int map_patch_area(void *addr, unsigned long text_poke_addr)
 {
-	unsigned long pfn;
-
-	if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
-		pfn = vmalloc_to_pfn(addr);
-	else
-		pfn = __pa_symbol(addr) >> PAGE_SHIFT;
+	unsigned long pfn = get_patch_pfn(addr);
 
 	return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
 }
@@ -149,17 +152,22 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	int err;
 	u32 *patch_addr;
 	unsigned long text_poke_addr;
+	pte_t *pte;
+	unsigned long pfn = get_patch_pfn(addr);
 
-	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
+	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
-	err = map_patch_area(addr, text_poke_addr);
-	if (err)
-		return err;
+	pte = virt_to_kpte(text_poke_addr);
+	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+	/* See ptesync comment in radix__set_pte_at() */
+	if (radix_enabled())
+		asm volatile("ptesync": : :"memory");
 
 	err = __patch_instruction(addr, instr, patch_addr);
 
-	unmap_patch_area(text_poke_addr);
+	pte_clear(&init_mm, text_poke_addr, pte);
+	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
 
 	return err;
 }
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index d1091b5ee5da..6812cb19d04a 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -9,11 +9,6 @@
 #include <asm/export.h>
 #include <asm/feature-fixups.h>
 
-        .section        ".toc","aw"
-PPC64_CACHES:
-        .tc             ppc64_caches[TC],ppc64_caches
-        .section        ".text"
-
 _GLOBAL_TOC(copy_page)
 BEGIN_FTR_SECTION
 	lis	r5,PAGE_SIZE@h
@@ -24,7 +19,7 @@ FTR_SECTION_ELSE
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
-	ld      r10,PPC64_CACHES@toc(r2)
+	LOAD_REG_ADDR(r10, ppc64_caches)
 	lwz	r11,DCACHEL1LOGBLOCKSIZE(r10)	/* log2 of cache block size */
 	lwz     r12,DCACHEL1BLOCKSIZE(r10)	/* get cache block size */
 	li	r9,0
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 993d3f31832a..31f40f544de5 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -550,7 +550,7 @@ void do_barrier_nospec_fixups(bool enable)
 }
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
 {
 	unsigned int instr[2], *dest;
@@ -602,7 +602,7 @@ void __init do_btb_flush_fixups(void)
 	for (; start < end; start += 2)
 		patch_btb_flush_section(start);
 }
-#endif /* CONFIG_PPC_FSL_BOOK3E */
+#endif /* CONFIG_PPC_E500 */
 
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 169872bc0892..df41ce06f86b 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -11,11 +11,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/export.h>
 
-	.section	".toc","aw"
-PPC64_CACHES:
-	.tc		ppc64_caches[TC],ppc64_caches
-	.section	".text"
-
 /**
  * __arch_clear_user: - Zero a block of memory in user space, with less checking.
  * @to:   Destination address, in user space.
@@ -133,7 +128,7 @@ err1;	stb	r0,0(r3)
 	blr
 
 .Llong_clear:
-	ld	r5,PPC64_CACHES@toc(r2)
+	LOAD_REG_ADDR(r5, ppc64_caches)
 
 	bf	cr7*4+0,11f
 err2;	std	r0,0(r3)
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index a8794032f15f..603e59c3db10 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -17,4 +17,9 @@ obj-$(CONFIG_SPE)		+= math_efp.o
 CFLAGS_fabs.o = -fno-builtin-fabs
 CFLAGS_math.o = -fno-builtin-fabs
 
-ccflags-y = -w
+ccflags-remove-y = -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+
+ifdef KBUILD_EXTRA_WARN
+CFLAGS_math.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+CFLAGS_math_efp.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+endif
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index 36761bd00f38..936a9a149037 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -24,9 +24,9 @@ FLOATFUNC(mtfsf);
 FLOATFUNC(mtfsfi);
 
 #ifdef CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED
-#undef FLOATFUNC(x)
+#undef FLOATFUNC
 #define FLOATFUNC(x)	static inline int x(void *op1, void *op2, void *op3, \
-						 void *op4) { }
+						 void *op4) { return 0; }
 #endif
 
 FLOATFUNC(fadd);
@@ -396,28 +396,28 @@ do_mathemu(struct pt_regs *regs)
 
 	case XCR:
 		op0 = (void *)&regs->ccr;
-		op1 = (void *)((insn >> 23) & 0x7);
+		op1 = (void *)(long)((insn >> 23) & 0x7);
 		op2 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
 		op3 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
 		break;
 
 	case XCRL:
 		op0 = (void *)&regs->ccr;
-		op1 = (void *)((insn >> 23) & 0x7);
-		op2 = (void *)((insn >> 18) & 0x7);
+		op1 = (void *)(long)((insn >> 23) & 0x7);
+		op2 = (void *)(long)((insn >> 18) & 0x7);
 		break;
 
 	case XCRB:
-		op0 = (void *)((insn >> 21) & 0x1f);
+		op0 = (void *)(long)((insn >> 21) & 0x1f);
 		break;
 
 	case XCRI:
-		op0 = (void *)((insn >> 23) & 0x7);
-		op1 = (void *)((insn >> 12) & 0xf);
+		op0 = (void *)(long)((insn >> 23) & 0x7);
+		op1 = (void *)(long)((insn >> 12) & 0xf);
 		break;
 
 	case XFLB:
-		op0 = (void *)((insn >> 17) & 0xff);
+		op0 = (void *)(long)((insn >> 17) & 0xff);
 		op1 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
 		break;
 
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index 39b84e7452e1..34f62aafe706 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -17,6 +17,7 @@
 
 #include <linux/types.h>
 #include <linux/prctl.h>
+#include <linux/module.h>
 
 #include <linux/uaccess.h>
 #include <asm/reg.h>
@@ -218,6 +219,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 		case AB:
 		case XCR:
 			FP_UNPACK_SP(SA, va.wp + 1);
+			fallthrough;
 		case XB:
 			FP_UNPACK_SP(SB, vb.wp + 1);
 			break;
@@ -226,8 +228,8 @@ int do_spe_mathemu(struct pt_regs *regs)
 			break;
 		}
 
-		pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
-		pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
+		pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c);
+		pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c);
 
 		switch (func) {
 		case EFSABS:
@@ -278,7 +280,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 			} else {
 				SB_e += (func == EFSCTSF ? 31 : 32);
 				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
-						(func == EFSCTSF));
+						(func == EFSCTSF) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -287,7 +289,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 			FP_CLEAR_EXCEPTIONS;
 			FP_UNPACK_DP(DB, vb.dp);
 
-			pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
+			pr_debug("DB: %d %08x %08x %d (%d)\n",
 					DB_s, DB_f1, DB_f0, DB_e, DB_c);
 
 			FP_CONV(S, D, 1, 2, SR, DB);
@@ -301,7 +303,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -312,7 +314,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_S(vc.wp[1], SB, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -322,7 +324,7 @@ int do_spe_mathemu(struct pt_regs *regs)
 		break;
 
 pack_s:
-		pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
+		pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c);
 
 		FP_PACK_SP(vc.wp + 1, SR);
 		goto update_regs;
@@ -346,6 +348,7 @@ cmp_s:
 		case AB:
 		case XCR:
 			FP_UNPACK_DP(DA, va.dp);
+			fallthrough;
 		case XB:
 			FP_UNPACK_DP(DB, vb.dp);
 			break;
@@ -354,9 +357,9 @@ cmp_s:
 			break;
 		}
 
-		pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
+		pr_debug("DA: %d %08x %08x %d (%d)\n",
 				DA_s, DA_f1, DA_f0, DA_e, DA_c);
-		pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
+		pr_debug("DB: %d %08x %08x %d (%d)\n",
 				DB_s, DB_f1, DB_f0, DB_e, DB_c);
 
 		switch (func) {
@@ -408,7 +411,7 @@ cmp_s:
 			} else {
 				DB_e += (func == EFDCTSF ? 31 : 32);
 				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
-						(func == EFDCTSF));
+						(func == EFDCTSF) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -417,7 +420,7 @@ cmp_s:
 			FP_CLEAR_EXCEPTIONS;
 			FP_UNPACK_SP(SB, vb.wp + 1);
 
-			pr_debug("SB: %ld %08lx %ld (%ld)\n",
+			pr_debug("SB: %d %08x %d (%d)\n",
 					SB_s, SB_f, SB_e, SB_c);
 
 			FP_CONV(D, S, 2, 1, DR, SB);
@@ -431,7 +434,7 @@ cmp_s:
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_D(vc.dp[0], DB, 64,
-						((func & 0x1) == 0));
+						((func & 0x1) == 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -442,7 +445,7 @@ cmp_s:
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -453,7 +456,7 @@ cmp_s:
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_D(vc.wp[1], DB, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -463,7 +466,7 @@ cmp_s:
 		break;
 
 pack_d:
-		pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
+		pr_debug("DR: %d %08x %08x %d (%d)\n",
 				DR_s, DR_f1, DR_f0, DR_e, DR_c);
 
 		FP_PACK_DP(vc.dp, DR);
@@ -492,6 +495,7 @@ cmp_d:
 		case XCR:
 			FP_UNPACK_SP(SA0, va.wp);
 			FP_UNPACK_SP(SA1, va.wp + 1);
+			fallthrough;
 		case XB:
 			FP_UNPACK_SP(SB0, vb.wp);
 			FP_UNPACK_SP(SB1, vb.wp + 1);
@@ -502,13 +506,13 @@ cmp_d:
 			break;
 		}
 
-		pr_debug("SA0: %ld %08lx %ld (%ld)\n",
+		pr_debug("SA0: %d %08x %d (%d)\n",
 				SA0_s, SA0_f, SA0_e, SA0_c);
-		pr_debug("SA1: %ld %08lx %ld (%ld)\n",
+		pr_debug("SA1: %d %08x %d (%d)\n",
 				SA1_s, SA1_f, SA1_e, SA1_c);
-		pr_debug("SB0: %ld %08lx %ld (%ld)\n",
+		pr_debug("SB0: %d %08x %d (%d)\n",
 				SB0_s, SB0_f, SB0_e, SB0_c);
-		pr_debug("SB1: %ld %08lx %ld (%ld)\n",
+		pr_debug("SB1: %d %08x %d (%d)\n",
 				SB1_s, SB1_f, SB1_e, SB1_c);
 
 		switch (func) {
@@ -567,7 +571,7 @@ cmp_d:
 			} else {
 				SB0_e += (func == EVFSCTSF ? 31 : 32);
 				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
-						(func == EVFSCTSF));
+						(func == EVFSCTSF) ? 1 : 0);
 			}
 			if (SB1_c == FP_CLS_NAN) {
 				vc.wp[1] = 0;
@@ -575,7 +579,7 @@ cmp_d:
 			} else {
 				SB1_e += (func == EVFSCTSF ? 31 : 32);
 				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
-						(func == EVFSCTSF));
+						(func == EVFSCTSF) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -586,14 +590,14 @@ cmp_d:
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			if (SB1_c == FP_CLS_NAN) {
 				vc.wp[1] = 0;
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -604,14 +608,14 @@ cmp_d:
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_S(vc.wp[0], SB0, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			if (SB1_c == FP_CLS_NAN) {
 				vc.wp[1] = 0;
 				FP_SET_EXCEPTION(FP_EX_INVALID);
 			} else {
 				FP_TO_INT_S(vc.wp[1], SB1, 32,
-						((func & 0x3) != 0));
+						((func & 0x3) != 0) ? 1 : 0);
 			}
 			goto update_regs;
 
@@ -621,9 +625,9 @@ cmp_d:
 		break;
 
 pack_vs:
-		pr_debug("SR0: %ld %08lx %ld (%ld)\n",
+		pr_debug("SR0: %d %08x %d (%d)\n",
 				SR0_s, SR0_f, SR0_e, SR0_c);
-		pr_debug("SR1: %ld %08lx %ld (%ld)\n",
+		pr_debug("SR1: %d %08x %d (%d)\n",
 				SR1_s, SR1_f, SR1_e, SR1_c);
 
 		FP_PACK_SP(vc.wp, SR0);
@@ -886,7 +890,7 @@ int speround_handler(struct pt_regs *regs)
 	return 0;
 }
 
-int __init spe_mathemu_init(void)
+static int __init spe_mathemu_init(void)
 {
 	u32 pvr, maj, min;
 
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index a96b73006dfb..850783cfa9c7 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -158,7 +158,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
 	unsigned long done;
-	unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+	unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET;
 	unsigned long size;
 
 	size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET);
@@ -240,7 +240,7 @@ void mmu_mark_rodata_ro(void)
 	for (i = 0; i < nb; i++) {
 		struct ppc_bat *bat = BATS[i];
 
-		if (bat_addrs[i].start < (unsigned long)__init_begin)
+		if (bat_addrs[i].start < (unsigned long)__end_rodata)
 			bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
 	}
 
@@ -314,11 +314,9 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea)
  *
  * This must always be called with the pte lock held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		      pte_t *ptep)
 {
-	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
-		return;
 	/*
 	 * We don't need to worry about _PAGE_PRESENT here because we are
 	 * called with either mm->page_table_lock held or ptl lock held
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
index 19f0ef950d77..9ad6b56bfec9 100644
--- a/arch/powerpc/mm/book3s32/tlb.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -81,14 +81,15 @@ EXPORT_SYMBOL(hash__flush_range);
 void hash__flush_tlb_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *mp;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	/*
-	 * It is safe to go down the mm's list of vmas when called
-	 * from dup_mmap, holding mmap_lock.  It would also be safe from
-	 * unmap_region or exit_mmap, but not from vmtruncate on SMP -
-	 * but it seems dup_mmap is the only SMP case which gets here.
+	 * It is safe to iterate the vmas when called from dup_mmap,
+	 * holding mmap_lock.  It would also be safe from unmap_region
+	 * or exit_mmap, but not from vmtruncate on SMP - but it seems
+	 * dup_mmap is the only SMP case which gets here.
 	 */
-	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+	for_each_vma(vmi, mp)
 		hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
 }
 EXPORT_SYMBOL(hash__flush_tlb_mm);
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index ae008b9df0e6..747492edb75a 100644
--- a/arch/powerpc/mm/book3s64/hash_pgtable.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -256,7 +256,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
 	 * the __collapse_huge_page_copy can result in copying
 	 * the old content.
 	 */
-	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+	flush_hash_table_pmd_range(vma->vm_mm, &pmd, address);
 	return pmd;
 }
 
@@ -541,7 +541,7 @@ void hash__mark_rodata_ro(void)
 	unsigned long start, end, pp;
 
 	start = (unsigned long)_stext;
-	end = (unsigned long)__init_begin;
+	end = (unsigned long)__end_rodata;
 
 	pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
 
diff --git a/arch/powerpc/mm/book3s64/hash_tlb.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index eb0bccaf221e..a64ea0a7ef96 100644
--- a/arch/powerpc/mm/book3s64/hash_tlb.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -221,7 +221,7 @@ void __flush_hash_table_range(unsigned long start, unsigned long end)
 	local_irq_restore(flags);
 }
 
-void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
 {
 	pte_t *pte;
 	pte_t *start_pte;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 363a9447d63d..df008edf7be0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -123,11 +123,8 @@ EXPORT_SYMBOL_GPL(mmu_slb_size);
 #ifdef CONFIG_PPC_64K_PAGES
 int mmu_ci_restrictions;
 #endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
 static u8 *linear_map_hash_slots;
 static unsigned long linear_map_hash_count;
-static DEFINE_SPINLOCK(linear_map_hash_lock);
-#endif /* CONFIG_DEBUG_PAGEALLOC */
 struct mmu_hash_ops mmu_hash_ops;
 EXPORT_SYMBOL(mmu_hash_ops);
 
@@ -427,11 +424,9 @@ repeat:
 			break;
 
 		cond_resched();
-#ifdef CONFIG_DEBUG_PAGEALLOC
-		if (debug_pagealloc_enabled() &&
+		if (debug_pagealloc_enabled_or_kfence() &&
 			(paddr >> PAGE_SHIFT) < linear_map_hash_count)
 			linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
 	}
 	return ret < 0 ? ret : 0;
 }
@@ -778,7 +773,7 @@ static void __init htab_init_page_sizes(void)
 	bool aligned = true;
 	init_hpte_page_sizes();
 
-	if (!debug_pagealloc_enabled()) {
+	if (!debug_pagealloc_enabled_or_kfence()) {
 		/*
 		 * Pick a size for the linear mapping. Currently, we only
 		 * support 16M, 1M and 4K which is the default
@@ -1066,8 +1061,7 @@ static void __init htab_initialize(void)
 
 	prot = pgprot_val(PAGE_KERNEL);
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	if (debug_pagealloc_enabled()) {
+	if (debug_pagealloc_enabled_or_kfence()) {
 		linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
 		linear_map_hash_slots = memblock_alloc_try_nid(
 				linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
@@ -1076,7 +1070,6 @@ static void __init htab_initialize(void)
 			panic("%s: Failed to allocate %lu bytes max_addr=%pa\n",
 			      __func__, linear_map_hash_count, &ppc64_rma_size);
 	}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
 
 	/* create bolted the linear mapping in the hash table */
 	for_each_mem_range(i, &base, &end) {
@@ -1781,7 +1774,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
  *
  * This must always be called with the pte lock held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		      pte_t *ptep)
 {
 	/*
@@ -1791,9 +1784,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	unsigned long trap;
 	bool is_exec;
 
-	if (radix_enabled())
-		return;
-
 	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
 	if (!pte_young(*ptep) || address >= TASK_SIZE)
 		return;
@@ -1990,7 +1980,9 @@ repeat:
 	return slot;
 }
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+static DEFINE_SPINLOCK(linear_map_hash_lock);
+
 static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
 	unsigned long hash;
@@ -2005,6 +1997,9 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 	if (!vsid)
 		return;
 
+	if (linear_map_hash_slots[lmi] & 0x80)
+		return;
+
 	ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
 				    HPTE_V_BOLTED,
 				    mmu_linear_psize, mmu_kernel_ssize);
@@ -2024,7 +2019,10 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 
 	hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
 	spin_lock(&linear_map_hash_lock);
-	BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
+	if (!(linear_map_hash_slots[lmi] & 0x80)) {
+		spin_unlock(&linear_map_hash_lock);
+		return;
+	}
 	hidx = linear_map_hash_slots[lmi] & 0x7f;
 	linear_map_hash_slots[lmi] = 0;
 	spin_unlock(&linear_map_hash_lock);
@@ -2055,7 +2053,7 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable)
 	}
 	local_irq_restore(flags);
 }
-#endif /* CONFIG_DEBUG_PAGEALLOC */
+#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */
 
 void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
 				phys_addr_t first_memblock_size)
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index 7b9966402b25..f6151a589298 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -553,8 +553,15 @@ EXPORT_SYMBOL_GPL(memremap_compat_align);
 
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
-	unsigned long prot = pgprot_val(protection_map[vm_flags &
-					(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]);
+	unsigned long prot;
+
+	/* Radix supports execute-only, but protection_map maps X -> RX */
+	if (radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)) {
+		prot = pgprot_val(PAGE_EXECONLY);
+	} else {
+		prot = pgprot_val(protection_map[vm_flags &
+						 (VM_ACCESS_FLAGS | VM_SHARED)]);
+	}
 
 	if (vm_flags & VM_SAO)
 		prot |= _PAGE_SAO;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index e712f80fe189..cac727b01799 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -30,9 +30,12 @@
 #include <asm/trace.h>
 #include <asm/uaccess.h>
 #include <asm/ultravisor.h>
+#include <asm/set_memory.h>
 
 #include <trace/events/thp.h>
 
+#include <mm/mmu_decl.h>
+
 unsigned int mmu_base_pid;
 unsigned long radix_mem_block_size __ro_after_init;
 
@@ -228,7 +231,7 @@ void radix__mark_rodata_ro(void)
 	unsigned long start, end;
 
 	start = (unsigned long)_stext;
-	end = (unsigned long)__init_begin;
+	end = (unsigned long)__end_rodata;
 
 	radix__change_memory_range(start, end, _PAGE_WRITE);
 }
@@ -259,21 +262,24 @@ print_mapping(unsigned long start, unsigned long end, unsigned long size, bool e
 static unsigned long next_boundary(unsigned long addr, unsigned long end)
 {
 #ifdef CONFIG_STRICT_KERNEL_RWX
-	if (addr < __pa_symbol(__init_begin))
-		return __pa_symbol(__init_begin);
+	if (addr < __pa_symbol(__srwx_boundary))
+		return __pa_symbol(__srwx_boundary);
 #endif
 	return end;
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
 					     unsigned long end,
-					     unsigned long max_mapping_size,
 					     int nid, pgprot_t _prot)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
 	bool prev_exec, exec = false;
 	pgprot_t prot;
 	int psize;
+	unsigned long max_mapping_size = radix_mem_block_size;
+
+	if (debug_pagealloc_enabled_or_kfence())
+		max_mapping_size = PAGE_SIZE;
 
 	start = ALIGN(start, PAGE_SIZE);
 	end   = ALIGN_DOWN(end, PAGE_SIZE);
@@ -352,7 +358,6 @@ static void __init radix_init_pgtable(void)
 		}
 
 		WARN_ON(create_physical_mapping(start, end,
-						radix_mem_block_size,
 						-1, PAGE_KERNEL));
 	}
 
@@ -850,7 +855,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
 	}
 
 	return create_physical_mapping(__pa(start), __pa(end),
-				       radix_mem_block_size, nid, prot);
+				       nid, prot);
 }
 
 int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -896,10 +901,17 @@ void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long
 #endif
 #endif
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
 void radix__kernel_map_pages(struct page *page, int numpages, int enable)
 {
-	pr_warn_once("DEBUG_PAGEALLOC not supported in radix mode\n");
+	unsigned long addr;
+
+	addr = (unsigned long)page_address(page);
+
+	if (enable)
+		set_memory_p(addr, numpages);
+	else
+		set_memory_np(addr, numpages);
 }
 #endif
 
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 60c6ea16a972..d73b3b4176e8 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -149,24 +149,15 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
 				    unsigned long len)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, addr);
 
 	/*
 	 * We don't try too hard, we just mark all the vma in that range
 	 * VM_NOHUGEPAGE and split them.
 	 */
-	vma = find_vma(mm, addr);
-	/*
-	 * If the range is in unmapped range, just return
-	 */
-	if (vma && ((addr + len) <= vma->vm_start))
-		return;
-
-	while (vma) {
-		if (vma->vm_start >= (addr + len))
-			break;
+	for_each_vma_range(vmi, vma, addr + len) {
 		vma->vm_flags |= VM_NOHUGEPAGE;
 		walk_page_vma(vma, &subpage_walk_ops, NULL);
-		vma = vma->vm_next;
 	}
 }
 #else
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 014005428687..2bef19cc1b98 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -270,7 +270,11 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma
 		return false;
 	}
 
-	if (unlikely(!vma_is_accessible(vma)))
+	/*
+	 * Check for a read fault.  This could be caused by a read on an
+	 * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page.
+	 */
+	if (unlikely(!(vma->vm_flags & VM_READ)))
 		return true;
 	/*
 	 * We should ideally do the vma pkey access check here. But in the
@@ -367,7 +371,22 @@ static void sanity_check_fault(bool is_write, bool is_user,
 #elif defined(CONFIG_PPC_8xx)
 #define page_fault_is_bad(__err)	((__err) & DSISR_NOEXEC_OR_G)
 #elif defined(CONFIG_PPC64)
-#define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_64S)
+static int page_fault_is_bad(unsigned long err)
+{
+	unsigned long flag = DSISR_BAD_FAULT_64S;
+
+	/*
+	 * PAPR+ v2.11 § 14.15.3.4.1 (unreleased)
+	 * If byte 0, bit 3 of pi-attribute-specifier-type in
+	 * ibm,pi-features property is defined, ignore the DSI error
+	 * which is caused by the paste instruction on the
+	 * suspended NX window.
+	 */
+	if (mmu_has_feature(MMU_FTR_NX_DSI))
+		flag &= ~DSISR_BAD_COPYPASTE;
+
+	return err & flag;
+}
 #else
 #define page_fault_is_bad(__err)	((__err) & DSISR_BAD_FAULT_32S)
 #endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index bc84a594ca62..5852a86d990d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -392,7 +392,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 		 * single hugepage, but all of them point to
 		 * the same kmem cache that holds the hugepte.
 		 */
-		more = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
+		more = addr + (1UL << hugepd_shift(*(hugepd_t *)pmd));
 		if (more > next)
 			next = more;
 
@@ -434,7 +434,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
 			 * single hugepage, but all of them point to
 			 * the same kmem cache that holds the hugepte.
 			 */
-			more = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
+			more = addr + (1UL << hugepd_shift(*(hugepd_t *)pud));
 			if (more > next)
 				next = more;
 
@@ -496,7 +496,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 			 * for a single hugepage, but all of them point to the
 			 * same kmem cache that holds the hugepte.
 			 */
-			more = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
+			more = addr + (1UL << hugepd_shift(*(hugepd_t *)pgd));
 			if (more > next)
 				next = more;
 
@@ -623,7 +623,7 @@ static int __init hugetlbpage_init(void)
 		if (pdshift > shift) {
 			if (!IS_ENABLED(CONFIG_PPC_8xx))
 				pgtable_cache_add(pdshift - shift);
-		} else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) ||
+		} else if (IS_ENABLED(CONFIG_PPC_E500) ||
 			   IS_ENABLED(CONFIG_PPC_8xx)) {
 			pgtable_cache_add(PTE_T_ORDER);
 		}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 62d9af6606cd..d4cc3749e621 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -82,28 +82,15 @@ void __init MMU_init(void)
 	if (ppc_md.progress)
 		ppc_md.progress("MMU:enter", 0x111);
 
-	/*
-	 * Reserve gigantic pages for hugetlb.  This MUST occur before
-	 * lowmem_end_addr is initialized below.
-	 */
-	if (memblock.memory.cnt > 1) {
-#ifndef CONFIG_WII
-		memblock_enforce_memory_limit(memblock.memory.regions[0].size);
-		pr_warn("Only using first contiguous memory region\n");
-#else
-		wii_memory_fixups();
-#endif
-	}
-
 	total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
 	lowmem_end_addr = memstart_addr + total_lowmem;
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
 	 * entries, so we need to adjust lowmem to match the amount we can map
 	 * in the fixed entries */
 	adjust_total_lowmem();
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
 
 	if (total_lowmem > __max_low_memory) {
 		total_lowmem = __max_low_memory;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 01772e79fd93..84d171953ba4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -302,13 +302,13 @@ void __init mem_init(void)
 		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
 			phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
 			struct page *page = pfn_to_page(pfn);
-			if (!memblock_is_reserved(paddr))
+			if (memblock_is_memory(paddr) && !memblock_is_reserved(paddr))
 				free_highmem_page(page);
 		}
 	}
 #endif /* CONFIG_HIGHMEM */
 
-#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP)
 	/*
 	 * If smp is enabled, next_tlbcam_idx is initialized in the cpu up
 	 * functions.... do it here for the non-smp case.
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 229c72e49198..bd9784f77f2e 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid)
 #else /* CONFIG_40x || CONFIG_PPC_8xx */
 extern void _tlbil_all(void);
 extern void _tlbil_pid(unsigned int pid);
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 extern void _tlbil_pid_noind(unsigned int pid);
 #else
 #define _tlbil_pid_noind(pid)	_tlbil_pid(pid)
@@ -55,7 +55,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
 	asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
 	trace_tlbie(0, 0, address, pid, 0, 0, 0);
 }
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
 extern void _tlbil_va(unsigned long address, unsigned int pid,
 		      unsigned int tsize, unsigned int ind);
 #else
@@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
 }
 #endif /* CONFIG_PPC_8xx */
 
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
+#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_47x)
 extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
 			   unsigned int tsize, unsigned int ind);
 #else
@@ -92,28 +92,16 @@ extern void mapin_ram(void);
 extern void setbat(int index, unsigned long virt, phys_addr_t phys,
 		   unsigned int size, pgprot_t prot);
 
-extern unsigned int rtas_data, rtas_size;
-
-struct hash_pte;
 extern u8 early_hash[];
 
 #endif /* CONFIG_PPC32 */
 
 extern unsigned long __max_low_memory;
-extern phys_addr_t __initial_memory_limit_addr;
 extern phys_addr_t total_memory;
 extern phys_addr_t total_lowmem;
 extern phys_addr_t memstart_addr;
 extern phys_addr_t lowmem_end_addr;
 
-#ifdef CONFIG_WII
-extern unsigned long wii_hole_start;
-extern unsigned long wii_hole_size;
-
-extern unsigned long wii_mmu_mapin_mem2(unsigned long top);
-extern void wii_memory_fixups(void);
-#endif
-
 /* ...and now those things that may be slightly different between processor
  * architectures.  -- Dan
  */
@@ -123,11 +111,9 @@ void MMU_init_hw_patch(void);
 unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
 #endif
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
 				     bool dryrun, bool init);
-extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
-				 phys_addr_t phys);
 #ifdef CONFIG_PPC32
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
@@ -160,9 +146,9 @@ struct tlbcam {
 extern struct tlbcam TLBCAM[NUM_TLBCAMS];
 #endif
 
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_8xx)
 /* 6xx have BATS */
-/* FSL_BOOKE have TLBCAM */
+/* PPC_85xx have TLBCAM */
 /* 8xx have LTLB */
 phys_addr_t v_block_mapped(unsigned long va);
 unsigned long p_block_mapped(phys_addr_t pa);
@@ -171,7 +157,7 @@ static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; }
 static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
 #endif
 
-#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500)
 void mmu_mark_initmem_nx(void);
 void mmu_mark_rodata_ro(void);
 #else
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
index b467a25ee155..f3894e79d5f7 100644
--- a/arch/powerpc/mm/nohash/Makefile
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -7,13 +7,13 @@ obj-$(CONFIG_PPC_BOOK3E_64)  	+= tlb_low_64e.o book3e_pgtable.o
 obj-$(CONFIG_40x)		+= 40x.o
 obj-$(CONFIG_44x)		+= 44x.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
-obj-$(CONFIG_PPC_FSL_BOOK3E)	+= fsl_book3e.o
+obj-$(CONFIG_PPC_E500)		+= e500.o
 obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr_booke.o
 ifdef CONFIG_HUGETLB_PAGE
-obj-$(CONFIG_PPC_FSL_BOOK3E)	+= book3e_hugetlbpage.o
+obj-$(CONFIG_PPC_E500)	+= e500_hugetlbpage.o
 endif
 
 # Disable kcov instrumentation on sensitive code
 # This is necessary for booting with kcov enabled on book3e machines
 KCOV_INSTRUMENT_tlb.o := n
-KCOV_INSTRUMENT_fsl_book3e.o := n
+KCOV_INSTRUMENT_e500.o := n
diff --git a/arch/powerpc/mm/nohash/fsl_book3e.c b/arch/powerpc/mm/nohash/e500.c
index b8ae6c08c06f..40a4e69ae1a9 100644
--- a/arch/powerpc/mm/nohash/fsl_book3e.c
+++ b/arch/powerpc/mm/nohash/e500.c
@@ -59,7 +59,7 @@ static struct {
 	phys_addr_t phys;
 } tlbcam_addrs[NUM_TLBCAMS];
 
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
 /*
  * Return PA for this VA if it is mapped by a CAM, or 0
  */
@@ -135,8 +135,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 	tlbcam_addrs[index].phys = phys;
 }
 
-unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
-			  phys_addr_t phys)
+static unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
+				 phys_addr_t phys)
 {
 	unsigned int camsize = __ilog2(ram);
 	unsigned int align = __ffs(virt | phys);
diff --git a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
index c7d4b317a823..c7d4b317a823 100644
--- a/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
+++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 5e7ccb48b79c..2c15c86c7015 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -49,8 +49,7 @@
  * other sizes not listed here.   The .ind field is only used on MMUs that have
  * indirect page table entries.
  */
-#if defined(CONFIG_PPC_BOOK3E_MMU) || defined(CONFIG_PPC_8xx)
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
@@ -81,7 +80,20 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 		.enc	= BOOK3E_PAGESZ_1GB,
 	},
 };
-#elif defined(CONFIG_PPC_8xx)
+
+static inline int mmu_get_tsize(int psize)
+{
+	return mmu_psize_defs[psize].enc;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+	/* This isn't used on !Book3E for now */
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC_8xx
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
@@ -96,53 +108,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
 		.shift	= 23,
 	},
 };
-#else
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
-	[MMU_PAGE_4K] = {
-		.shift	= 12,
-		.ind	= 20,
-		.enc	= BOOK3E_PAGESZ_4K,
-	},
-	[MMU_PAGE_16K] = {
-		.shift	= 14,
-		.enc	= BOOK3E_PAGESZ_16K,
-	},
-	[MMU_PAGE_64K] = {
-		.shift	= 16,
-		.ind	= 28,
-		.enc	= BOOK3E_PAGESZ_64K,
-	},
-	[MMU_PAGE_1M] = {
-		.shift	= 20,
-		.enc	= BOOK3E_PAGESZ_1M,
-	},
-	[MMU_PAGE_16M] = {
-		.shift	= 24,
-		.ind	= 36,
-		.enc	= BOOK3E_PAGESZ_16M,
-	},
-	[MMU_PAGE_256M] = {
-		.shift	= 28,
-		.enc	= BOOK3E_PAGESZ_256M,
-	},
-	[MMU_PAGE_1G] = {
-		.shift	= 30,
-		.enc	= BOOK3E_PAGESZ_1GB,
-	},
-};
-#endif /* CONFIG_FSL_BOOKE */
-
-static inline int mmu_get_tsize(int psize)
-{
-	return mmu_psize_defs[psize].enc;
-}
-#else
-static inline int mmu_get_tsize(int psize)
-{
-	/* This isn't used on !Book3E for now */
-	return 0;
-}
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif
 
 /* The variables below are currently only used on 64-bit Book3E
  * though this will probably be made common with other nohash
@@ -166,7 +132,7 @@ int extlb_level_exc;
 
 #endif /* CONFIG_PPC64 */
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 /* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
 DEFINE_PER_CPU(int, next_tlbcam_idx);
 EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
@@ -441,7 +407,7 @@ static void __init setup_page_sizes(void)
 	unsigned int eptcfg;
 	int i, psize;
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	unsigned int mmucfg = mfspr(SPRN_MMUCFG);
 	int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
 
@@ -584,7 +550,7 @@ static void __init setup_mmu_htw(void)
 		patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
 		patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
 		break;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	case PPC_HTW_E6500:
 		extlb_level_exc = EX_TLB_SIZE;
 		patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
@@ -627,7 +593,7 @@ static void early_init_this_mmu(void)
 	}
 	mtspr(SPRN_MAS4, mas4);
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
 		unsigned int num_cams;
 		bool map = true;
@@ -680,7 +646,7 @@ static void __init early_init_mmu_global(void)
 	/* Look for HW tablewalk support */
 	setup_mmu_htw();
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
 		if (book3e_htw_mode == PPC_HTW_NONE) {
 			extlb_level_exc = EX_TLB_SIZE;
@@ -701,7 +667,7 @@ static void __init early_init_mmu_global(void)
 
 static void __init early_mmu_set_memory_limit(void)
 {
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
 		/*
 		 * Limit memory so we dont have linear faults.
@@ -750,7 +716,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 	 * We crop it to the size of the first MEMBLOCK to
 	 * avoid going over total available memory just in case...
 	 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
 	if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
 		unsigned long linear_sz;
 		unsigned int num_cams;
diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index d62b613a0d5d..e1199608ff4d 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -221,7 +221,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
 	blr
 #endif /* CONFIG_PPC_47x */
 
-#elif defined(CONFIG_FSL_BOOKE)
+#elif defined(CONFIG_PPC_85xx)
 /*
  * FSL BookE implementations.
  *
@@ -294,7 +294,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
 	isync
 1:	wrtee	r10
 	blr
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
 /*
  * New Book3E (>= 2.06) implementation
  *
@@ -364,7 +364,7 @@ _GLOBAL(_tlbivax_bcast)
 #error Unsupported processor type !
 #endif
 
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
 /*
  * extern void loadcam_entry(unsigned int index)
  *
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 68ffbfdba894..76cf456d7976 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -61,7 +61,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 	ld	r14,PACAPGD(r13)
 	std	r15,EX_TLB_R15(r12)
 	std	r10,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
 START_BTB_FLUSH_SECTION
 	mfspr r11, SPRN_SRR1
 	andi. r10,r11,MSR_PR
@@ -70,14 +69,11 @@ START_BTB_FLUSH_SECTION
 1:
 END_BTB_FLUSH_SECTION
 	std	r7,EX_TLB_R7(r12)
-#endif
 .endm
 
 .macro tlb_epilog_bolted
 	ld	r14,EX_TLB_CR(r12)
-#ifdef CONFIG_PPC_FSL_BOOK3E
 	ld	r7,EX_TLB_R7(r12)
-#endif
 	ld	r10,EX_TLB_R10(r12)
 	ld	r11,EX_TLB_R11(r12)
 	ld	r13,EX_TLB_R13(r12)
@@ -248,7 +244,6 @@ itlb_miss_fault_bolted:
 	beq	tlb_miss_user_bolted
 	b	itlb_miss_kernel_bolted
 
-#ifdef CONFIG_PPC_FSL_BOOK3E
 /*
  * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
  *
@@ -515,7 +510,6 @@ dtlb_miss_fault_e6500:
 itlb_miss_fault_e6500:
 	tlb_epilog_bolted
 	b	exc_instruction_storage_book3e
-#endif /* CONFIG_PPC_FSL_BOOK3E */
 
 /**********************************************************************
  *                                                                    *
@@ -1124,8 +1118,8 @@ tlb_load_linear:
 	 * we only use 1G pages for now. That might have to be changed in a
 	 * final implementation, especially when dealing with hypervisors
 	 */
-	ld	r11,PACATOC(r13)
-	ld	r11,linear_map_top@got(r11)
+	__LOAD_PACA_TOC(r11)
+	LOAD_REG_ADDR_ALTTOC(r11, r11, linear_map_top)
 	ld	r10,0(r11)
 	tovirt(10,10)
 	cmpld	cr0,r16,r10
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0801b2ce9b7d..b44ce71917d7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1160,6 +1160,9 @@ void __init mem_topology_setup(void)
 {
 	int cpu;
 
+	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
+
 	/*
 	 * Linux/mm assumes node 0 to be online at boot. However this is not
 	 * true on PowerPC, where node 0 is similar to any other node, it
@@ -1204,9 +1207,6 @@ void __init initmem_init(void)
 {
 	int nid;
 
-	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
-	max_pfn = max_low_pfn;
-
 	memblock_dump_all();
 
 	for_each_online_node(nid) {
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 3ac73f9fb5d5..5c02fd08d61e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -158,10 +158,11 @@ void mark_rodata_ro(void)
 	}
 
 	/*
-	 * mark .text and .rodata as read only. Use __init_begin rather than
-	 * __end_rodata to cover NOTES and EXCEPTION_TABLE.
+	 * mark text and rodata as read only. __end_rodata is set by
+	 * powerpc's linker script and includes tables and data
+	 * requiring relocation which are not put in RO_DATA.
 	 */
-	numpages = PFN_UP((unsigned long)__init_begin) -
+	numpages = PFN_UP((unsigned long)__end_rodata) -
 		   PFN_DOWN((unsigned long)_stext);
 
 	set_memory_ro((unsigned long)_stext, numpages);
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
index b533caaf0910..dc896d2874f3 100644
--- a/arch/powerpc/mm/ptdump/Makefile
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -4,7 +4,7 @@ obj-y	+= ptdump.o
 
 obj-$(CONFIG_4xx)		+= shared.o
 obj-$(CONFIG_PPC_8xx)		+= 8xx.o
-obj-$(CONFIG_PPC_BOOK3E_MMU)	+= shared.o
+obj-$(CONFIG_PPC_E500)		+= shared.o
 obj-$(CONFIG_PPC_BOOK3S_32)	+= shared.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= book3s64.o
 
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
index 1aa3259716b8..47ba05d5ae76 100644
--- a/arch/powerpc/perf/bhrb.S
+++ b/arch/powerpc/perf/bhrb.S
@@ -21,7 +21,7 @@
 _GLOBAL(read_bhrb)
 	cmpldi	r3,31
 	bgt	1f
-	ld	r4,bhrb_table@got(r2)
+	LOAD_REG_ADDR(r4, bhrb_table)
 	sldi	r3,r3,3
 	add	r3,r4,r3
 	mtctr	r3
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
index b83c47b7947f..ea8cfe3806dc 100644
--- a/arch/powerpc/perf/callchain_32.c
+++ b/arch/powerpc/perf/callchain_32.c
@@ -19,7 +19,7 @@
 #include "callchain.h"
 
 #ifdef CONFIG_PPC64
-#include "../kernel/ppc32.h"
+#include <asm/syscalls_32.h>
 #else  /* CONFIG_PPC64 */
 
 #define __SIGNAL_FRAMESIZE32	__SIGNAL_FRAMESIZE
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 13919eb96931..942aa830e110 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2131,6 +2131,23 @@ static int power_pmu_event_init(struct perf_event *event)
 	if (has_branch_stack(event)) {
 		u64 bhrb_filter = -1;
 
+		/*
+		 * Currently no PMU supports having multiple branch filters
+		 * at the same time. Branch filters are set via MMCRA IFM[32:33]
+		 * bits for Power8 and above. Return EOPNOTSUPP when multiple
+		 * branch filters are requested in the event attr.
+		 *
+		 * When opening event via perf_event_open(), branch_sample_type
+		 * gets adjusted in perf_copy_attr(). Kernel will automatically
+		 * adjust the branch_sample_type based on the event modifier
+		 * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop
+		 * the check for PERF_SAMPLE_BRANCH_PLM_ALL.
+		 */
+		if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) {
+			local_irq_restore(irq_flags);
+			return -EOPNOTSUPP;
+		}
+
 		if (ppmu->bhrb_filter_map)
 			bhrb_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
@@ -2297,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			cpuhw = this_cpu_ptr(&cpu_hw_events);
 			power_pmu_bhrb_read(event, cpuhw);
 			data.br_stack = &cpuhw->bhrb_stack;
+			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
 		}
 
 		if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
-						ppmu->get_mem_data_src)
+						ppmu->get_mem_data_src) {
 			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
+			data.sample_flags |= PERF_SAMPLE_DATA_SRC;
+		}
 
 		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
-						ppmu->get_mem_weight)
+						ppmu->get_mem_weight) {
 			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
-
+			data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
 		if (perf_event_overflow(event, &data, regs))
 			power_pmu_stop(event, 0);
 	} else if (period) {
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index d7976ab40d38..d517aba94d1b 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -240,8 +240,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
 	ct = of_get_child_count(pmu_events);
 
 	/* Get the event prefix */
-	if (of_property_read_string(node, "events-prefix", &prefix))
+	if (of_property_read_string(node, "events-prefix", &prefix)) {
+		of_node_put(pmu_events);
 		return 0;
+	}
 
 	/* Get a global unit and scale data if available */
 	if (of_property_read_string(node, "scale", &g_scale))
@@ -255,8 +257,10 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
 
 	/* Allocate memory for the events */
 	pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
-	if (!pmu->events)
+	if (!pmu->events) {
+		of_node_put(pmu_events);
 		return -ENOMEM;
+	}
 
 	ct = 0;
 	/* Parse the events and update the struct */
@@ -266,6 +270,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
 			ct++;
 	}
 
+	of_node_put(pmu_events);
+
 	/* Allocate memory for attribute group */
 	attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
 	if (!attr_group) {
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index 20cc8f80b086..7c91ac5a5241 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -140,6 +140,8 @@ static void __init ppc47x_init_irq(void)
 		ppc_md.get_irq = mpic_get_irq;
 	} else
 		panic("Unrecognized top level interrupt controller");
+
+	of_node_put(np);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index 0652c7e69225..42abeba4f698 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -950,7 +950,7 @@ static void __init mpc5121_clk_register_of_provider(struct device_node *np)
  */
 static void __init mpc5121_clk_provide_migration_support(void)
 {
-
+	struct device_node *np;
 	/*
 	 * pre-enable those clock items which are not yet appropriately
 	 * acquired by their peripheral driver
@@ -970,7 +970,9 @@ static void __init mpc5121_clk_provide_migration_support(void)
 	 * unused and so it gets disabled
 	 */
 	clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
-	if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"))
+	np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci");
+	of_node_put(np);
+	if (np)
 		clk_prepare_enable(clks[MPC512x_CLK_PCI]);
 }
 
@@ -1208,6 +1210,8 @@ int __init mpc5121_clk_init(void)
 	/* register as an OF clock provider */
 	mpc5121_clk_register_of_provider(clk_np);
 
+	of_node_put(clk_np);
+
 	/*
 	 * unbreak not yet adjusted peripheral drivers during migration
 	 * towards fully operational common clock support, and allow
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index ee367ff3ec8a..33a35fff11b5 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -174,6 +174,8 @@ static void __init media5200_init_irq(void)
 		goto out;
 	pr_debug("%s: allocated irqhost\n", __func__);
 
+	of_node_put(fpga_np);
+
 	irq_set_handler_data(cascade_virq, &media5200_irq);
 	irq_set_chained_handler(cascade_virq, media5200_irq_cascade);
 
@@ -181,6 +183,7 @@ static void __init media5200_init_irq(void)
 
  out:
 	pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n");
+	of_node_put(fpga_np);
 }
 
 /*
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index bb8caa5071f8..e12cb44e717f 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -162,6 +162,8 @@ static struct spi_board_info mpc832x_spi_boardinfo = {
 
 static int __init mpc832x_spi_init(void)
 {
+	struct device_node *np;
+
 	par_io_config_pin(3,  0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */
 	par_io_config_pin(3,  1, 3, 0, 1, 0); /* SPI1 MISO, I/O */
 	par_io_config_pin(3,  2, 3, 0, 1, 0); /* SPI1 CLK,  I/O */
@@ -175,7 +177,9 @@ static int __init mpc832x_spi_init(void)
 	 * Don't bother with legacy stuff when device tree contains
 	 * mmc-spi-slot node.
 	 */
-	if (of_find_compatible_node(NULL, NULL, "mmc-spi-slot"))
+	np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot");
+	of_node_put(np);
+	if (np)
 		return 0;
 	return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control);
 }
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index be16eba0f704..b92cb2b4d54d 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 menuconfig FSL_SOC_BOOKE
 	bool "Freescale Book-E Machine Type"
-	depends on PPC_85xx || PPC_BOOK3E
+	depends on PPC_E500
 	select FSL_SOC
 	select PPC_UDBG_16550
 	select MPIC
@@ -241,8 +241,6 @@ endif # PPC32
 config PPC_QEMU_E500
 	bool "QEMU generic e500 platform"
 	select DEFAULT_UIMAGE
-	select E500
-	select PPC_E500MC if PPC64
 	help
 	  This option enables support for running as a QEMU guest using
 	  QEMU's generic e500 machine.  This is not required if you're
@@ -258,7 +256,6 @@ config PPC_QEMU_E500
 config CORENET_GENERIC
 	bool "Freescale CoreNet Generic"
 	select DEFAULT_UIMAGE
-	select E500
 	select PPC_E500MC
 	select PHYS_64BIT
 	select SWIOTLB
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 28d6b36f1ccd..2c539de2d629 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -200,9 +200,5 @@ define_machine(corenet_generic) {
 #endif
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
 	.power_save		= e500_idle,
-#endif
 };
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 8e827376d97b..e3e8f18825a1 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -89,8 +89,10 @@ static void __init ge_imp3a_pci_assign_primary(void)
 		    of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
 		    of_device_is_compatible(np, "fsl,p2020-pcie")) {
 			of_address_to_resource(np, 0, &rsrc);
-			if ((rsrc.start & 0xfffff) == 0x9000)
-				fsl_pci_primary = np;
+			if ((rsrc.start & 0xfffff) == 0x9000) {
+				of_node_put(fsl_pci_primary);
+				fsl_pci_primary = of_node_get(np);
+			}
 		}
 	}
 #endif
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index bdf9d42f8521..a22f02b0fc77 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -133,6 +133,8 @@ static void __init ksi8560_setup_arch(void)
 	else
 		printk(KERN_ERR "Can't find CPLD in device tree\n");
 
+	of_node_put(cpld);
+
 	if (ppc_md.progress)
 		ppc_md.progress("ksi8560_setup_arch()", 0);
 
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 48f3acfece0b..0b8f2101c5fb 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -159,6 +159,7 @@ static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev)
 			else
 				dev->irq = 10;
 			pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+			break;
 		default:
 			break;
 		}
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 64109ad6736c..1639e222cc33 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -68,9 +68,5 @@ define_machine(qemu_e500) {
 	.get_irq		= mpic_get_coreint_irq,
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= udbg_progress,
-#ifdef CONFIG_PPC64
-	.power_save		= book3e_idle,
-#else
 	.power_save		= e500_idle,
-#endif
 };
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index 98ae64075193..e14d1b74d4e4 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -71,6 +71,7 @@ static int gpio_halt_probe(struct platform_device *pdev)
 {
 	enum of_gpio_flags flags;
 	struct device_node *node = pdev->dev.of_node;
+	struct device_node *child_node;
 	int gpio, err, irq;
 	int trigger;
 
@@ -78,26 +79,29 @@ static int gpio_halt_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	/* If there's no matching child, this isn't really an error */
-	halt_node = of_find_matching_node(node, child_match);
-	if (!halt_node)
+	child_node = of_find_matching_node(node, child_match);
+	if (!child_node)
 		return 0;
 
 	/* Technically we could just read the first one, but punish
 	 * DT writers for invalid form. */
-	if (of_gpio_count(halt_node) != 1)
-		return -EINVAL;
+	if (of_gpio_count(child_node) != 1) {
+		err = -EINVAL;
+		goto err_put;
+	}
 
 	/* Get the gpio number relative to the dynamic base. */
-	gpio = of_get_gpio_flags(halt_node, 0, &flags);
-	if (!gpio_is_valid(gpio))
-		return -EINVAL;
+	gpio = of_get_gpio_flags(child_node, 0, &flags);
+	if (!gpio_is_valid(gpio)) {
+		err = -EINVAL;
+		goto err_put;
+	}
 
 	err = gpio_request(gpio, "gpio-halt");
 	if (err) {
 		printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n",
 		       gpio);
-		halt_node = NULL;
-		return err;
+		goto err_put;
 	}
 
 	trigger = (flags == OF_GPIO_ACTIVE_LOW);
@@ -105,15 +109,14 @@ static int gpio_halt_probe(struct platform_device *pdev)
 	gpio_direction_output(gpio, !trigger);
 
 	/* Now get the IRQ which tells us when the power button is hit */
-	irq = irq_of_parse_and_map(halt_node, 0);
+	irq = irq_of_parse_and_map(child_node, 0);
 	err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING |
-			  IRQF_TRIGGER_FALLING, "gpio-halt", halt_node);
+			  IRQF_TRIGGER_FALLING, "gpio-halt", child_node);
 	if (err) {
 		printk(KERN_ERR "gpio-halt: error requesting IRQ %d for "
 		       "GPIO %d.\n", irq, gpio);
 		gpio_free(gpio);
-		halt_node = NULL;
-		return err;
+		goto err_put;
 	}
 
 	/* Register our halt function */
@@ -123,7 +126,12 @@ static int gpio_halt_probe(struct platform_device *pdev)
 	printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d"
 	       " irq).\n", gpio, trigger, irq);
 
+	halt_node = child_node;
 	return 0;
+
+err_put:
+	of_node_put(child_node);
+	return err;
 }
 
 static int gpio_halt_remove(struct platform_device *pdev)
@@ -139,6 +147,7 @@ static int gpio_halt_remove(struct platform_device *pdev)
 
 		gpio_free(gpio);
 
+		of_node_put(halt_node);
 		halt_node = NULL;
 	}
 
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index 3725d51248df..ffcfd17a5fa3 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -105,6 +105,9 @@ static void __init init_ioports(void)
 	if (dnode == NULL)
 		return;
 	prop = of_find_property(dnode, "ethernet1", &len);
+
+	of_node_put(dnode);
+
 	if (prop == NULL)
 		return;
 
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 5185d942b455..0c4eed9aea80 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -33,7 +33,7 @@ config PPC_BOOK3S_32
 
 config PPC_85xx
 	bool "Freescale 85xx"
-	select E500
+	select PPC_E500
 
 config PPC_8xx
 	bool "Freescale 8xx"
@@ -107,7 +107,8 @@ config PPC_BOOK3S_64
 
 config PPC_BOOK3E_64
 	bool "Embedded processors"
-	select PPC_FSL_BOOK3E
+	select PPC_E500
+	select PPC_E500MC
 	select PPC_FPU # Make it a choice ?
 	select PPC_SMP_MUXED_IPI
 	select PPC_DOORBELL
@@ -125,7 +126,7 @@ choice
 	  If unsure, select Generic.
 
 config GENERIC_CPU
-	bool "Generic (POWER4 and above)"
+	bool "Generic (POWER5 and PowerPC 970 and above)"
 	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 	select PPC_64S_HASH_MMU
 
@@ -144,8 +145,8 @@ config CELL_CPU
 	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 	select PPC_64S_HASH_MMU
 
-config POWER5_CPU
-	bool "POWER5"
+config PPC_970_CPU
+	bool "PowerPC 970 (including PowerPC G5)"
 	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 	select PPC_64S_HASH_MMU
 
@@ -171,13 +172,18 @@ config POWER9_CPU
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
 
+config POWER10_CPU
+	bool "POWER10"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+
 config E5500_CPU
 	bool "Freescale e5500"
-	depends on PPC64 && E500
+	depends on PPC64 && PPC_E500
 
 config E6500_CPU
 	bool "Freescale e6500"
-	depends on PPC64 && E500
+	depends on PPC64 && PPC_E500
 
 config 405_CPU
 	bool "40x family"
@@ -234,11 +240,12 @@ config TARGET_CPU
 	string
 	depends on TARGET_CPU_BOOL
 	default "cell" if CELL_CPU
-	default "power5" if POWER5_CPU
+	default "970" if PPC_970_CPU
 	default "power6" if POWER6_CPU
 	default "power7" if POWER7_CPU
 	default "power8" if POWER8_CPU
 	default "power9" if POWER9_CPU
+	default "power10" if POWER10_CPU
 	default "405" if 405_CPU
 	default "440" if 440_CPU
 	default "464" if 464_CPU
@@ -255,20 +262,19 @@ config PPC_BOOK3S
 	def_bool y
 	depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
 
-config PPC_BOOK3E
-	def_bool y
-	depends on PPC_BOOK3E_64
-
-config E500
+config PPC_E500
 	select FSL_EMB_PERFMON
-	select PPC_FSL_BOOK3E
 	bool
+	select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+	select PPC_SMP_MUXED_IPI
+	select PPC_DOORBELL
+	select PPC_KUEP
 
 config PPC_E500MC
 	bool "e500mc Support"
 	select PPC_FPU
 	select COMMON_CLK
-	depends on E500
+	depends on PPC_E500
 	help
 	  This must be enabled for running on e500mc (and derivatives
 	  such as e5500/e6500), and must be disabled for running on
@@ -291,7 +297,7 @@ config PPC_FPU
 
 config FSL_EMB_PERFMON
 	bool "Freescale Embedded Perfmon"
-	depends on E500 || PPC_83xx
+	depends on PPC_E500 || PPC_83xx
 	help
 	  This is the Performance Monitor support found on the e500 core
 	  and some e300 cores (c3 and c4).  Select this only if your
@@ -304,7 +310,7 @@ config FSL_EMB_PERF_EVENT
 
 config FSL_EMB_PERF_EVENT_E500
 	bool
-	depends on FSL_EMB_PERF_EVENT && E500
+	depends on FSL_EMB_PERF_EVENT && PPC_E500
 	default y
 
 config 4xx
@@ -314,7 +320,7 @@ config 4xx
 
 config BOOKE
 	bool
-	depends on E500 || 44x || PPC_BOOK3E
+	depends on PPC_E500 || 44x
 	default y
 
 config BOOKE_OR_40x
@@ -322,29 +328,14 @@ config BOOKE_OR_40x
 	depends on BOOKE || 40x
 	default y
 
-config FSL_BOOKE
-	bool
-	depends on E500 && PPC32
-	default y
-
-# this is for common code between PPC32 & PPC64 FSL BOOKE
-config PPC_FSL_BOOK3E
-	bool
-	select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
-	imply FSL_EMB_PERFMON
-	select PPC_SMP_MUXED_IPI
-	select PPC_DOORBELL
-	select PPC_KUEP
-	default y if FSL_BOOKE
-
 config PTE_64BIT
 	bool
-	depends on 44x || E500 || PPC_86xx
+	depends on 44x || PPC_E500 || PPC_86xx
 	default y if PHYS_64BIT
 
 config PHYS_64BIT
-	bool 'Large physical address support' if E500 || PPC_86xx
-	depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+	bool 'Large physical address support' if PPC_E500 || PPC_86xx
+	depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
 	select PHYS_ADDR_T_64BIT
 	help
 	  This option enables kernel support for larger than 32-bit physical
@@ -391,7 +382,7 @@ config VSX
 
 config SPE_POSSIBLE
 	def_bool y
-	depends on E500 && !PPC_E500MC
+	depends on PPC_E500 && !PPC_E500MC
 
 config SPE
 	bool "SPE Support"
@@ -481,10 +472,6 @@ config PPC_MMU_NOHASH
 	def_bool y
 	depends on !PPC_BOOK3S
 
-config PPC_BOOK3E_MMU
-	def_bool y
-	depends on FSL_BOOKE || PPC_BOOK3E
-
 config PPC_HAVE_PMU_SUPPORT
 	bool
 
@@ -506,7 +493,7 @@ config FORCE_SMP
 	select SMP
 
 config SMP
-	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
+	depends on PPC_BOOK3S || PPC_E500 || PPC_47x
 	select GENERIC_IRQ_MIGRATION
 	bool "Symmetric multi-processing support" if !FORCE_SMP
 	help
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index c0799fb26b6d..40f5ae5e1238 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -431,7 +431,7 @@ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
 	 * The window may be inactive due to lost credit (Ex: core
 	 * removal with DLPAR). If the window is active again when
 	 * the credit is available, map the new paste address at the
-	 * the window virtual address.
+	 * window virtual address.
 	 */
 	if (txwin->status == VAS_WIN_ACTIVE) {
 		paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
index 316e533afc00..fb4023f9ea6b 100644
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -182,9 +182,16 @@ static struct device_node *__init cbe_get_be_node(int cpu_id)
 		if (WARN_ON_ONCE(!cpu_handle))
 			return np;
 
-		for (i=0; i<len; i++)
-			if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
+		for (i = 0; i < len; i++) {
+			struct device_node *ch_np = of_find_node_by_phandle(cpu_handle[i]);
+			struct device_node *ci_np = of_get_cpu_node(cpu_id, NULL);
+
+			of_node_put(ch_np);
+			of_node_put(ci_np);
+
+			if (ch_np == ci_np)
 				return np;
+		}
 	}
 
 	return NULL;
@@ -193,21 +200,30 @@ static struct device_node *__init cbe_get_be_node(int cpu_id)
 static void __init cbe_fill_regs_map(struct cbe_regs_map *map)
 {
 	if(map->be_node) {
-		struct device_node *be, *np;
+		struct device_node *be, *np, *parent_np;
 
 		be = map->be_node;
 
-		for_each_node_by_type(np, "pervasive")
-			if (of_get_parent(np) == be)
+		for_each_node_by_type(np, "pervasive") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
 				map->pmd_regs = of_iomap(np, 0);
+			of_node_put(parent_np);
+		}
 
-		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
-			if (of_get_parent(np) == be)
+		for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
 				map->iic_regs = of_iomap(np, 2);
+			of_node_put(parent_np);
+		}
 
-		for_each_node_by_type(np, "mic-tm")
-			if (of_get_parent(np) == be)
+		for_each_node_by_type(np, "mic-tm") {
+			parent_np = of_get_parent(np);
+			if (parent_np == be)
 				map->mic_tm_regs = of_iomap(np, 0);
+			of_node_put(parent_np);
+		}
 	} else {
 		struct device_node *cpu;
 		/* That hack must die die die ! */
@@ -261,7 +277,8 @@ void __init cbe_regs_init(void)
 			of_node_put(cpu);
 			return;
 		}
-		map->cpu_node = cpu;
+		of_node_put(map->cpu_node);
+		map->cpu_node = of_node_get(cpu);
 
 		for_each_possible_cpu(i) {
 			struct cbe_thread_map *thread = &cbe_thread_map[i];
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 0ca3efeef293..8c7133039566 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -720,8 +720,10 @@ static int __init cell_iommu_init_disabled(void)
 	cell_disable_iommus();
 
 	/* If we have no Axon, we set up the spider DMA magic offset */
-	if (of_find_node_by_name(NULL, "axon") == NULL)
+	np = of_find_node_by_name(NULL, "axon");
+	if (!np)
 		cell_dma_nommu_offset = SPIDER_DMA_OFFSET;
+	of_node_put(np);
 
 	/* Now we need to check to see where the memory is mapped
 	 * in PCI space. We assume that all busses use the same dma
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 52de014983c9..47eaf75349f2 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -167,6 +167,8 @@ static int __init cell_publish_devices(void)
 		of_platform_device_create(np, NULL, NULL);
 	}
 
+	of_node_put(root);
+
 	/* There is no device for the MIC memory controller, thus we create
 	 * a platform device for it to attach the EDAC driver to.
 	 */
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index fe0d8797a00a..e780c14c5733 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -34,15 +34,15 @@
  *	mbind, mq_open, ipc, ...
  */
 
-static void *spu_syscall_table[] = {
+static const syscall_fn spu_syscall_table[] = {
 #define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
-#define __SYSCALL(nr, entry) [nr] = entry,
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
 #include <asm/syscall_table_spu.h>
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
 {
-	long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6);
+	syscall_fn syscall;
 
 	if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
 		pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index ae09c5a91b40..f1ac4c742069 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -488,6 +488,8 @@ static void __init init_affinity_node(int cbe)
 				avoid_ph = vic_dn->phandle;
 			}
 
+			of_node_put(vic_dn);
+
 			list_add_tail(&spu->aff_list, &last_spu->aff_list);
 			last_spu = spu;
 			break;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index af048b6dd30a..84958487f696 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -333,7 +333,6 @@ void spufs_stop_callback(struct spu *spu, int irq);
 void spufs_mfc_callback(struct spu *spu);
 void spufs_dma_callback(struct spu *spu, int type);
 
-extern struct spu_coredump_calls spufs_coredump_calls;
 struct spufs_coredump_reader {
 	char *name;
 	ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm);
@@ -341,7 +340,6 @@ struct spufs_coredump_reader {
 	size_t size;
 };
 extern const struct spufs_coredump_reader spufs_coredump_read[];
-extern int spufs_coredump_num_notes;
 
 extern int spu_init_csa(struct spu_state *csa);
 extern void spu_fini_csa(struct spu_state *csa);
diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h
index a5a7c338caf9..6ff4631d9db4 100644
--- a/arch/powerpc/platforms/chrp/chrp.h
+++ b/arch/powerpc/platforms/chrp/chrp.h
@@ -9,4 +9,3 @@ extern int chrp_set_rtc_time(struct rtc_time *);
 extern long chrp_time_init(void);
 
 extern void chrp_find_bridges(void);
-extern void chrp_event_scan(unsigned long);
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 78f2378d9223..bebc5a972694 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -123,6 +123,8 @@ static void __init holly_init_pci(void)
 	if (np)
 		tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1);
 
+	of_node_put(np);
+
 	ppc_md.pci_exclude_device = holly_exclude_device;
 	if (ppc_md.progress)
 		ppc_md.progress("tsi108: resources set", 0x100);
@@ -184,6 +186,9 @@ static void __init holly_init_IRQ(void)
 	tsi108_pci_int_init(cascade_node);
 	irq_set_handler_data(cascade_pci_irq, mpic);
 	irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+
+	of_node_put(tsi_pci);
+	of_node_put(cascade_node);
 #endif
 	/* Configure MPIC outputs to CPU0 */
 	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
@@ -210,6 +215,7 @@ static void __noreturn holly_restart(char *cmd)
 	if (bridge) {
 		prop = of_get_property(bridge, "reg", &size);
 		addr = of_translate_address(bridge, prop);
+		of_node_put(bridge);
 	}
 	addr += (TSI108_PB_OFFSET + 0x414);
 
diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c
index 0133e175a0fc..4ecbc55b37c0 100644
--- a/arch/powerpc/platforms/embedded6xx/ls_uart.c
+++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c
@@ -124,6 +124,8 @@ static int __init ls_uarts_init(void)
 	avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len);
 	phys_addr = ((u32*)of_get_property(avr, "reg", &len))[0];
 
+	of_node_put(avr);
+
 	if (!avr_clock || !phys_addr)
 		return -EINVAL;
 
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 8b2b42210356..ddf0c652af80 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -135,6 +135,9 @@ static void __init mpc7448_hpc2_init_IRQ(void)
 	tsi108_pci_int_init(cascade_node);
 	irq_set_handler_data(cascade_pci_irq, mpic);
 	irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+
+	of_node_put(tsi_pci);
+	of_node_put(cascade_node);
 #endif
 	/* Configure MPIC outputs to CPU0 */
 	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 9e03ff8f631c..f4e654a9d4ff 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -15,8 +15,6 @@
 #include <linux/seq_file.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
-#include <linux/memblock.h>
-#include <mm/mmu_decl.h>
 
 #include <asm/io.h>
 #include <asm/machdep.h>
@@ -49,19 +47,6 @@
 static void __iomem *hw_ctrl;
 static void __iomem *hw_gpio;
 
-static int __init page_aligned(unsigned long x)
-{
-	return !(x & (PAGE_SIZE-1));
-}
-
-void __init wii_memory_fixups(void)
-{
-	struct memblock_region *p = memblock.memory.regions;
-
-	BUG_ON(memblock.memory.cnt != 2);
-	BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
-}
-
 static void __noreturn wii_spin(void)
 {
 	local_irq_disable();
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index 823e219ef8ee..91606411d2e0 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -153,6 +153,7 @@ time64_t __init maple_get_boot_time(void)
 		       maple_rtc_addr);
 	}
  bail:
+	of_node_put(rtcs);
 	if (maple_rtc_addr == 0) {
 		maple_rtc_addr = RTC_PORT(0); /* legacy address */
 		printk(KERN_INFO "Maple: No device node for RTC, assuming "
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
index f859ada29074..9e9a7e46288a 100644
--- a/arch/powerpc/platforms/pasemi/misc.c
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -36,8 +36,7 @@ static int __init find_i2c_driver(struct device_node *node,
 	for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
 		if (!of_device_is_compatible(node, i2c_devices[i].of_device))
 			continue;
-		if (strlcpy(info->type, i2c_devices[i].i2c_type,
-			    I2C_NAME_SIZE) >= I2C_NAME_SIZE)
+		if (strscpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) < 0)
 			return -ENOMEM;
 		return 0;
 	}
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
index 55f0160910bf..f27d31414737 100644
--- a/arch/powerpc/platforms/pasemi/pci.c
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -270,18 +270,12 @@ static int __init pas_add_bridge(struct device_node *dev)
 
 void __init pas_pci_init(void)
 {
-	struct device_node *np, *root;
+	struct device_node *np;
 	int res;
 
-	root = of_find_node_by_path("/");
-	if (!root) {
-		pr_crit("pas_pci_init: can't find root of device tree\n");
-		return;
-	}
-
 	pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
 
-	np = of_find_compatible_node(root, NULL, "pasemi,rootbus");
+	np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus");
 	if (np) {
 		res = pas_add_bridge(np);
 		of_node_put(np);
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 5cc958adba13..0382d20b5619 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2632,31 +2632,31 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 		if (!macio_chips[i].of_node)
 			break;
 		if (macio_chips[i].of_node == node)
-			return;
+			goto out_put;
 	}
 
 	if (i >= MAX_MACIO_CHIPS) {
 		printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
 		printk(KERN_ERR "pmac_feature: %pOF skipped\n", node);
-		return;
+		goto out_put;
 	}
 	addrp = of_get_pci_address(node, 0, &size, NULL);
 	if (addrp == NULL) {
 		printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n",
 		       node);
-		return;
+		goto out_put;
 	}
 	addr = of_translate_address(node, addrp);
 	if (addr == 0) {
 		printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n",
 		       node);
-		return;
+		goto out_put;
 	}
 	base = ioremap(addr, (unsigned long)size);
 	if (!base) {
 		printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n",
 		       node);
-		return;
+		goto out_put;
 	}
 	if (type == macio_keylargo || type == macio_keylargo2) {
 		const u32 *did = of_get_property(node, "device-id", NULL);
@@ -2677,6 +2677,11 @@ static void __init probe_one_macio(const char *name, const char *compat, int typ
 		macio_chips[i].rev = *revp;
 	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
 		macio_names[type], macio_chips[i].rev, macio_chips[i].base);
+
+	return;
+
+out_put:
+	of_node_put(node);
 }
 
 static int __init
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index c1c430c66dc9..40f3aa432fba 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -627,6 +627,7 @@ static void __init kw_i2c_probe(void)
 			if (parent == NULL)
 				continue;
 			chans = parent->name[0] == 'u' ? 2 : 1;
+			of_node_put(parent);
 			for (i = 0; i < chans; i++)
 				kw_i2c_add(host, np, np, i);
 		} else {
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 9c2947a3edd5..085e0ad20eba 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -136,6 +136,8 @@ static void __init macio_gpio_init_one(struct macio_chip *macio)
 	for_each_child_of_node(gparent, gp)
 		pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
 
+	of_node_put(gparent);
+
 	/* Note: We do not at this point implement the "at sleep" or "at wake"
 	 * functions. I yet to find any for GPIOs anyway
 	 */
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index d9df45741ece..5b26a9012d2e 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -711,6 +711,7 @@ static void __init smp_core99_setup(int ncpus)
 			printk(KERN_INFO "Processor timebase sync using"
 			       " platform function\n");
 		}
+		of_node_put(cpus);
 	}
 
 #else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 734df5a32f99..1b7c39e841ee 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -81,10 +81,14 @@ void __init udbg_scc_init(int force_scc)
 	if (path != NULL)
 		stdout = of_find_node_by_path(path);
 	for_each_child_of_node(escc, ch) {
-		if (ch == stdout)
+		if (ch == stdout) {
+			of_node_put(ch_def);
 			ch_def = of_node_get(ch);
-		if (of_node_name_eq(ch, "ch-a"))
+		}
+		if (of_node_name_eq(ch, "ch-a")) {
+			of_node_put(ch_a);
 			ch_a = of_node_get(ch);
+		}
 	}
 	if (ch_def == NULL && !force_scc)
 		goto bail;
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 6f94b808dd39..841cb7f31f4f 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1411,7 +1411,7 @@ static int __init pnv_parse_cpuidle_dt(void)
 		goto out;
 	}
 	for (i = 0; i < nr_idle_states; i++)
-		strlcpy(pnv_idle_states[i].name, temp_string[i],
+		strscpy(pnv_idle_states[i].name, temp_string[i],
 			PNV_IDLE_NAME_LEN);
 	nr_pnv_idle_states = nr_idle_states;
 	rc = 0;
@@ -1419,6 +1419,7 @@ out:
 	kfree(temp_u32);
 	kfree(temp_u64);
 	kfree(temp_string);
+	of_node_put(np);
 	return rc;
 }
 
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index 27c936075031..629067781cec 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -478,10 +478,8 @@ EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
 {
 	struct spa_data *data = (struct spa_data *) platform_data;
-	int rc;
 
-	rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
-	return rc;
+	return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
 }
 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
 
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
index adcb1a1a2bfe..bb7657115f1d 100644
--- a/arch/powerpc/platforms/powernv/opal-core.c
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -348,6 +348,8 @@ static int __init create_opalcore(void)
 	if (!dn || ret)
 		pr_warn("WARNING: Failed to read OPAL base & entry values\n");
 
+	of_node_put(dn);
+
 	/* Use count to keep track of the program headers */
 	count = 0;
 
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
index 64506b46e77b..7bfe4cbeb35a 100644
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -153,7 +153,7 @@ void __init opal_powercap_init(void)
 	pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
 			GFP_KERNEL);
 	if (!pcaps)
-		return;
+		goto out_put_powercap;
 
 	powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
 	if (!powercap_kobj) {
@@ -226,6 +226,7 @@ void __init opal_powercap_init(void)
 		}
 		i++;
 	}
+	of_node_put(powercap);
 
 	return;
 
@@ -236,6 +237,9 @@ out_pcaps_pattrs:
 		kfree(pcaps[i].pg.name);
 	}
 	kobject_put(powercap_kobj);
+	of_node_put(node);
 out_pcaps:
 	kfree(pcaps);
+out_put_powercap:
+	of_node_put(powercap);
 }
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index 69d7e75950d1..6441e17b6996 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -135,7 +135,7 @@ void __init opal_psr_init(void)
 	psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
 			    GFP_KERNEL);
 	if (!psr_attrs)
-		return;
+		goto out_put_psr;
 
 	psr_kobj = kobject_create_and_add("psr", opal_kobj);
 	if (!psr_kobj) {
@@ -162,10 +162,14 @@ void __init opal_psr_init(void)
 		}
 		i++;
 	}
+	of_node_put(psr);
 
 	return;
 out_kobj:
+	of_node_put(node);
 	kobject_put(psr_kobj);
 out:
 	kfree(psr_attrs);
+out_put_psr:
+	of_node_put(psr);
 }
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 8fba7d25ae56..9944376b115c 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -170,7 +170,7 @@ void __init opal_sensor_groups_init(void)
 
 	sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
 	if (!sgs)
-		return;
+		goto out_sg_put;
 
 	sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
 	if (!sg_kobj) {
@@ -222,6 +222,7 @@ void __init opal_sensor_groups_init(void)
 		}
 		i++;
 	}
+	of_node_put(sg);
 
 	return;
 
@@ -231,6 +232,9 @@ out_sgs_sgattrs:
 		kfree(sgs[i].sg.attrs);
 	}
 	kobject_put(sg_kobj);
+	of_node_put(node);
 out_sgs:
 	kfree(sgs);
+out_sg_put:
+	of_node_put(sg);
 }
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index e5acc33b3b20..0ed95f753416 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -57,7 +57,7 @@ opal_return:
 	.long 0xa64b7b7d /* mthsrr1 r11				*/
 	.long 0x2402004c /* hrfid				*/
 #endif
-	ld	r2,PACATOC(r13)
+	LOAD_PACA_TOC()
 	ld	r0,PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 55a8fbfdb5b2..cdf3838f08d3 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -892,6 +892,7 @@ static void opal_export_attrs(void)
 	kobj = kobject_create_and_add("exports", opal_kobj);
 	if (!kobj) {
 		pr_warn("kobject_create_and_add() of exports failed\n");
+		of_node_put(np);
 		return;
 	}
 
@@ -952,6 +953,8 @@ static void __init opal_imc_init_dev(void)
 	np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
 	if (np)
 		of_platform_device_create(np, NULL, NULL);
+
+	of_node_put(np);
 }
 
 static int kopald(void *unused)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9de9b2fb163d..5c144c05cbfd 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -67,7 +67,7 @@ void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
 	vaf.va = &args;
 
 	if (pe->flags & PNV_IODA_PE_DEV)
-		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+		strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
 	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
 		sprintf(pfix, "%04x:%02x     ",
 			pci_domain_nr(pe->pbus), pe->pbus->number);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index dac545aa0308..61ab2d38ff4b 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -17,6 +17,7 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
+#include <linux/seq_buf.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -207,8 +208,29 @@ static void __init pnv_setup_arch(void)
 	pnv_rng_init();
 }
 
+static void __init pnv_add_hw_description(void)
+{
+	struct device_node *dn;
+	const char *s;
+
+	dn = of_find_node_by_path("/ibm,opal/firmware");
+	if (!dn)
+		return;
+
+	if (of_property_read_string(dn, "version", &s) == 0 ||
+	    of_property_read_string(dn, "git-id", &s) == 0)
+		seq_buf_printf(&ppc_hw_desc, "opal:%s ", s);
+
+	if (of_property_read_string(dn, "mi-version", &s) == 0)
+		seq_buf_printf(&ppc_hw_desc, "mi:%s ", s);
+
+	of_node_put(dn);
+}
+
 static void __init pnv_init(void)
 {
+	pnv_add_hw_description();
+
 	/*
 	 * Initialize the LPC bus now so that legacy serial
 	 * ports can be found on it
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index fb6499977f99..a3b4d99567cb 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -23,13 +23,21 @@ config PPC_PSERIES
 	select SWIOTLB
 	default y
 
+config PARAVIRT
+	bool
+
 config PARAVIRT_SPINLOCKS
 	bool
 
+config PARAVIRT_TIME_ACCOUNTING
+	select PARAVIRT
+	bool
+
 config PPC_SPLPAR
 	bool "Support for shared-processor logical partitions"
 	depends on PPC_PSERIES
 	select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+	select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
 	default y
 	help
 	  Enabling this option will make the kernel run more efficiently
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 14e143b946a3..92310202bdd7 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -7,7 +7,7 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o rng.o \
 			   pci.o pci_dlpar.o eeh_pseries.o msi.o \
-			   papr_platform_attributes.o
+			   papr_platform_attributes.o dtl.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_KEXEC_CORE)	+= kexec.o
 obj-$(CONFIG_PSERIES_ENERGY)	+= pseries_energy.o
@@ -19,7 +19,6 @@ obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
 obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
 obj-$(CONFIG_CMM)		+= cmm.o
-obj-$(CONFIG_DTL)		+= dtl.o
 obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
 obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 obj-$(CONFIG_IBMVIO)		+= vio.o
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 352af5b14a0f..3f1cdccebc9c 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -18,6 +18,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/machdep.h>
 
+#ifdef CONFIG_DTL
 struct dtl {
 	struct dtl_entry	*buf;
 	int			cpu;
@@ -37,6 +38,15 @@ static u8 dtl_event_mask = DTL_LOG_ALL;
 static int dtl_buf_entries = N_DISPATCH_LOG;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+static void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
 struct dtl_ring {
 	u64	write_index;
 	struct dtl_entry *write_ptr;
@@ -355,3 +365,81 @@ static int dtl_init(void)
 	return 0;
 }
 machine_arch_initcall(pseries, dtl_init);
+#endif /* CONFIG_DTL */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
+ */
+static notrace u64 scan_dispatch_log(u64 stop_tb)
+{
+	u64 i = local_paca->dtl_ridx;
+	struct dtl_entry *dtl = local_paca->dtl_curr;
+	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+	u64 tb_delta;
+	u64 stolen = 0;
+	u64 dtb;
+
+	if (!dtl)
+		return 0;
+
+	if (i == be64_to_cpu(vpa->dtl_idx))
+		return 0;
+	while (i < be64_to_cpu(vpa->dtl_idx)) {
+		dtb = be64_to_cpu(dtl->timebase);
+		tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+			be32_to_cpu(dtl->ready_to_enqueue_time);
+		barrier();
+		if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+			/* buffer has overflowed */
+			i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+			continue;
+		}
+		if (dtb > stop_tb)
+			break;
+#ifdef CONFIG_DTL
+		if (dtl_consumer)
+			dtl_consumer(dtl, i);
+#endif
+		stolen += tb_delta;
+		++i;
+		++dtl;
+		if (dtl == dtl_end)
+			dtl = local_paca->dispatch_log;
+	}
+	local_paca->dtl_ridx = i;
+	local_paca->dtl_curr = dtl;
+	return stolen;
+}
+
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void notrace pseries_accumulate_stolen_time(void)
+{
+	u64 sst, ust;
+	struct cpu_accounting_data *acct = &local_paca->accounting;
+
+	sst = scan_dispatch_log(acct->starttime_user);
+	ust = scan_dispatch_log(acct->starttime);
+	acct->stime -= sst;
+	acct->utime -= ust;
+	acct->steal_time += ust + sst;
+}
+
+u64 pseries_calculate_stolen_time(u64 stop_tb)
+{
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
+		return scan_dispatch_log(stop_tb);
+
+	return 0;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 0f8cd8b06432..e0a7ac5db15d 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -619,17 +619,21 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)
 {
 	unsigned int use_count = 0;
-	struct device_node *dn;
+	struct device_node *dn, *tn;
 
 	WARN_ON(!of_node_is_type(cachedn, "cache"));
 
 	for_each_of_cpu_node(dn) {
-		if (of_find_next_cache_node(dn) == cachedn)
+		tn = of_find_next_cache_node(dn);
+		of_node_put(tn);
+		if (tn == cachedn)
 			use_count++;
 	}
 
 	for_each_node_by_type(dn, "cache") {
-		if (of_find_next_cache_node(dn) == cachedn)
+		tn = of_find_next_cache_node(dn);
+		of_node_put(tn);
+		if (tn == cachedn)
 			use_count++;
 	}
 
@@ -649,10 +653,13 @@ static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)
 
 	dn = cpudn;
 	while ((dn = of_find_next_cache_node(dn))) {
-		if (pseries_cpuhp_cache_use_count(dn) > 1)
+		if (pseries_cpuhp_cache_use_count(dn) > 1) {
+			of_node_put(dn);
 			break;
+		}
 
 		ret = of_changeset_detach_node(&cs, dn);
+		of_node_put(dn);
 		if (ret)
 			goto out;
 	}
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ab9fc6506861..762eb15d3bd4 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -16,7 +16,7 @@
 #ifdef CONFIG_TRACEPOINTS
 
 #ifndef CONFIG_JUMP_LABEL
-	.section	".toc","aw"
+	.data
 
 	.globl hcall_tracepoint_refcount
 hcall_tracepoint_refcount:
@@ -88,7 +88,7 @@ hcall_tracepoint_refcount:
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r12,hcall_tracepoint_refcount@toc(r2);		\
+	LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ;		\
 	std	r12,32(r1);					\
 	cmpdi	r12,0;						\
 	bne-	LABEL;						\
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 96e18d3b2fcf..d48c9c7ce10f 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -176,7 +176,7 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
 			= (unsigned int)last_p_partition_ID;
 
 		/* copy the Null-term char too */
-		strlcpy(&next_partner_info->location_code[0],
+		strscpy(&next_partner_info->location_code[0],
 			(char *)&pi_buff[2],
 			sizeof(next_partner_info->location_code));
 
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index 7ee3ed7d6cc2..a870cada7acd 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -152,7 +152,11 @@ static const struct dma_map_ops ibmebus_dma_ops = {
 static int ibmebus_match_path(struct device *dev, const void *data)
 {
 	struct device_node *dn = to_platform_device(dev)->dev.of_node;
-	return (of_find_node_by_path(data) == dn);
+	struct device_node *tn = of_find_node_by_path(data);
+
+	of_node_put(tn);
+
+	return (tn == dn);
 }
 
 static int ibmebus_match_node(struct device *dev, const void *data)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e6c117fb6491..97ef6499e501 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -660,6 +660,17 @@ static int __init vcpudispatch_stats_procfs_init(void)
 }
 
 machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+	struct lppaca *lppaca = &lppaca_of(cpu);
+
+	return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+		be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
 #endif /* CONFIG_PPC_SPLPAR */
 
 void vpa_init(int cpu)
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 3d36a8955eaf..634fac5db3f9 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -216,7 +216,7 @@ static int update_dt_node(struct device_node *dn, s32 scope)
 		nprops = be32_to_cpu(upwa->nprops);
 
 		/* On the first call to ibm,update-properties for a node the
-		 * the first property value descriptor contains an empty
+		 * first property value descriptor contains an empty
 		 * property name, the property value length encoded as u32,
 		 * and the property value is the node path being updated.
 		 */
@@ -740,11 +740,19 @@ static int pseries_migrate_partition(u64 handle)
 #ifdef CONFIG_PPC_WATCHDOG
 	factor = nmi_wd_lpm_factor;
 #endif
+	/*
+	 * When the migration is initiated, the hypervisor changes VAS
+	 * mappings to prepare before OS gets the notification and
+	 * closes all VAS windows. NX generates continuous faults during
+	 * this time and the user space can not differentiate these
+	 * faults from the migration event. So reduce this time window
+	 * by closing VAS windows at the beginning of this function.
+	 */
+	vas_migration_handler(VAS_SUSPEND);
+
 	ret = wait_for_vasi_session_suspending(handle);
 	if (ret)
-		return ret;
-
-	vas_migration_handler(VAS_SUSPEND);
+		goto out;
 
 	if (factor)
 		watchdog_nmi_set_timeout_pct(factor);
@@ -765,6 +773,7 @@ static int pseries_migrate_partition(u64 handle)
 	if (factor)
 		watchdog_nmi_set_timeout_pct(0);
 
+out:
 	vas_migration_handler(VAS_RESUME);
 
 	return ret;
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index cad7a0c93117..599bd2c78514 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/notifier.h>
 #include <linux/proc_fs.h>
+#include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/of.h>
 
@@ -361,6 +362,10 @@ static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t coun
 	char *kbuf;
 	char *tmp;
 
+	rv = security_locked_down(LOCKDOWN_DEVICE_TREE);
+	if (rv)
+		return rv;
+
 	kbuf = memdup_user_nul(buf, count);
 	if (IS_ERR(kbuf))
 		return PTR_ERR(kbuf);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 489f4c4df468..8ef3270515a9 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -41,6 +41,7 @@
 #include <linux/of_pci.h>
 #include <linux/memblock.h>
 #include <linux/swiotlb.h>
+#include <linux/seq_buf.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -80,6 +81,20 @@
 DEFINE_STATIC_KEY_FALSE(shared_processor);
 EXPORT_SYMBOL(shared_processor);
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
 unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -834,6 +849,11 @@ static void __init pSeries_setup_arch(void)
 		if (lppaca_shared_proc(get_lppaca())) {
 			static_branch_enable(&shared_processor);
 			pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+			static_key_slow_inc(&paravirt_steal_enabled);
+			if (steal_acc)
+				static_key_slow_inc(&paravirt_steal_rq_enabled);
+#endif
 		}
 
 		ppc_md.power_save = pseries_lpar_idle;
@@ -992,6 +1012,33 @@ static void __init pSeries_cmo_feature_init(void)
 	pr_debug(" <- fw_cmo_feature_init()\n");
 }
 
+static void __init pseries_add_hw_description(void)
+{
+	struct device_node *dn;
+	const char *s;
+
+	dn = of_find_node_by_path("/openprom");
+	if (dn) {
+		if (of_property_read_string(dn, "model", &s) == 0)
+			seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
+
+		of_node_put(dn);
+	}
+
+	dn = of_find_node_by_path("/hypervisor");
+	if (dn) {
+		if (of_property_read_string(dn, "compatible", &s) == 0)
+			seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
+
+		of_node_put(dn);
+		return;
+	}
+
+	if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
+	    of_property_read_bool(of_root, "ibm,fw-net-version"))
+		seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+}
+
 /*
  * Early initialization.  Relocation is on but do not reference unbolted pages
  */
@@ -999,6 +1046,8 @@ static void __init pseries_init(void)
 {
 	pr_debug(" -> pseries_init()\n");
 
+	pseries_add_hw_description();
+
 #ifdef CONFIG_HVC_CONSOLE
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		hvc_vio_init_early();
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 7e6e6dd2e33e..0e0524cbe20c 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -333,7 +333,7 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
 		 * So no unpacking needs to be done.
 		 */
 		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
-				  VPHN_FLAG_VCPU, smp_processor_id());
+				  VPHN_FLAG_VCPU, hard_smp_processor_id());
 		if (rc != H_SUCCESS) {
 			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
 			goto out;
@@ -501,14 +501,10 @@ static const struct vas_user_win_ops vops_pseries = {
 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
 			     const char *name)
 {
-	int rc;
-
 	if (!copypaste_feat)
 		return -ENOTSUPP;
 
-	rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
-
-	return rc;
+	return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
 }
 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
 
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index ef9a5999fa93..73c2d70706c0 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -209,8 +209,10 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			dev_err(&pdev->dev,
 				"node %pOF has an invalid fsl,msi phandle %u\n",
 				hose->dn, np->phandle);
+			of_node_put(np);
 			return -EINVAL;
 		}
+		of_node_put(np);
 	}
 
 	msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index af6c8ca824d3..974d3db6faab 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -181,6 +181,7 @@ static int setup_one_atmu(struct ccsr_pci __iomem *pci,
 static bool is_kdump(void)
 {
 	struct device_node *node;
+	bool ret;
 
 	node = of_find_node_by_type(NULL, "memory");
 	if (!node) {
@@ -188,7 +189,10 @@ static bool is_kdump(void)
 		return false;
 	}
 
-	return of_property_read_bool(node, "linux,usable-memory");
+	ret = of_property_read_bool(node, "linux,usable-memory");
+	of_node_put(node);
+
+	return ret;
 }
 
 /* atmu setup for fsl pci/pcie controller */
@@ -939,7 +943,7 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose)
 	return 0;
 }
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 static int mcheck_handle_load(struct pt_regs *regs, u32 inst)
 {
 	unsigned int rd, ra, rb, d;
@@ -1142,7 +1146,6 @@ void __init fsl_pci_assign_primary(void)
 	for_each_matching_node(np, pci_ids) {
 		if (of_device_is_available(np)) {
 			fsl_pci_primary = np;
-			of_node_put(np);
 			return;
 		}
 	}
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 4647c6074f3b..c8f044d62fe2 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -98,7 +98,7 @@ resource_size_t rio_law_start;
 struct fsl_rio_dbell *dbell;
 struct fsl_rio_pw *pw;
 
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
 int fsl_rio_mcheck_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *entry;
diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index 698fefaaa6dd..a439e33eae06 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -121,6 +121,7 @@ static unsigned int mpic_msgr_number_of_blocks(void)
 
 			count += 1;
 		}
+		of_node_put(aliases);
 	}
 
 	return count;
@@ -144,12 +145,18 @@ static int mpic_msgr_block_number(struct device_node *node)
 
 	for (index = 0; index < number_of_blocks; ++index) {
 		struct property *prop;
+		struct device_node *tn;
 
 		snprintf(buf, sizeof(buf), "mpic-msgr-block%d", index);
 		prop = of_find_property(aliases, buf, NULL);
-		if (node == of_find_node_by_path(prop->value))
+		tn = of_find_node_by_path(prop->value);
+		if (node == tn) {
+			of_node_put(tn);
 			break;
+		}
+		of_node_put(tn);
 	}
+	of_node_put(aliases);
 
 	return index == number_of_blocks ? -1 : index;
 }
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 9e7007f9aca5..f8320f8e5bc7 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -36,8 +36,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 
 	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);
 
-	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
-					  server, DEFAULT_PRIORITY);
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
+				DEFAULT_PRIORITY);
 	if (call_status != 0) {
 		printk(KERN_ERR
 			"%s: ibm_set_xive irq %u server %x returned %d\n",
@@ -46,7 +46,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
 	}
 
 	/* Now unmask the interrupt (often a no-op) */
-	call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq);
+	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -68,7 +68,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
 	if (hw_irq == XICS_IPI)
 		return;
 
-	call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq);
+	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -76,8 +76,8 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
 	}
 
 	/* Have to set XIVE to 0xff to be able to remove a slot */
-	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
-					  xics_default_server, 0xff);
+	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
+				xics_default_server, 0xff);
 	if (call_status != 0) {
 		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
 			__func__, hw_irq, call_status);
@@ -108,7 +108,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
 		return -1;
 
-	status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq);
+	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
@@ -126,8 +126,8 @@ static int ics_rtas_set_affinity(struct irq_data *d,
 	pr_debug("%s: irq %d [hw 0x%x] server: 0x%x\n", __func__, d->irq,
 		 hw_irq, irq_server);
 
-	status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
-				     hw_irq, irq_server, xics_status[1]);
+	status = rtas_call(ibm_set_xive, 3, 1, NULL,
+			   hw_irq, irq_server, xics_status[1]);
 
 	if (status) {
 		printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
@@ -158,7 +158,7 @@ static int ics_rtas_check(struct ics *ics, unsigned int hw_irq)
 		return -EINVAL;
 
 	/* Check if RTAS knows about this interrupt */
-	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq);
+	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
 	if (rc)
 		return -ENXIO;
 
@@ -174,7 +174,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
 {
 	int rc, status[2];
 
-	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec);
+	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
 	if (rc)
 		return -1;
 	return status[0];
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 61b9f98dfd4a..a289cb97c1d7 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -783,7 +783,7 @@ static int xive_irq_set_type(struct irq_data *d, unsigned int flow_type)
 	 * the corresponding descriptor bits mind you but those will in turn
 	 * affect the resend function when re-enabling an edge interrupt.
 	 *
-	 * Set set the default to edge as explained in map().
+	 * Set the default to edge as explained in map().
 	 */
 	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
 		flow_type = IRQ_TYPE_EDGE_RISING;
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index d25d8c692909..3925825954bc 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -579,12 +579,12 @@ bool __init xive_native_init(void)
 	/* Resource 1 is HV window */
 	if (of_address_to_resource(np, 1, &r)) {
 		pr_err("Failed to get thread mgmnt area resource\n");
-		return false;
+		goto err_put;
 	}
 	tima = ioremap(r.start, resource_size(&r));
 	if (!tima) {
 		pr_err("Failed to map thread mgmnt area\n");
-		return false;
+		goto err_put;
 	}
 
 	/* Read number of priorities */
@@ -612,7 +612,7 @@ bool __init xive_native_init(void)
 	/* Resource 2 is OS window */
 	if (of_address_to_resource(np, 2, &r)) {
 		pr_err("Failed to get thread mgmnt area resource\n");
-		return false;
+		goto err_put;
 	}
 
 	xive_tima_os = r.start;
@@ -624,7 +624,7 @@ bool __init xive_native_init(void)
 	rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
 	if (rc) {
 		pr_err("Switch to exploitation mode failed with error %lld\n", rc);
-		return false;
+		goto err_put;
 	}
 
 	/* Setup some dummy HV pool VPs */
@@ -634,10 +634,15 @@ bool __init xive_native_init(void)
 	if (!xive_core_init(np, &xive_native_ops, tima, TM_QW3_HV_PHYS,
 			    max_prio)) {
 		opal_xive_reset(OPAL_XIVE_MODE_EMU);
-		return false;
+		goto err_put;
 	}
+	of_node_put(np);
 	pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
 	return true;
+
+err_put:
+	of_node_put(np);
+	return false;
 }
 
 static bool xive_native_provision_pages(void)
diff --git a/arch/powerpc/xmon/ppc.h b/arch/powerpc/xmon/ppc.h
index d00f33dcf192..1d98b8dd134e 100644
--- a/arch/powerpc/xmon/ppc.h
+++ b/arch/powerpc/xmon/ppc.h
@@ -435,8 +435,6 @@ struct powerpc_macro
 extern const struct powerpc_macro powerpc_macros[];
 extern const int powerpc_num_macros;
 
-extern ppc_cpu_t ppc_parse_cpu (ppc_cpu_t, ppc_cpu_t *, const char *);
-
 static inline long
 ppc_optional_operand_value (const struct powerpc_operand *operand)
 {
diff --git a/arch/powerpc/xmon/spr_access.S b/arch/powerpc/xmon/spr_access.S
index 720a52afdd58..c308ddf268fb 100644
--- a/arch/powerpc/xmon/spr_access.S
+++ b/arch/powerpc/xmon/spr_access.S
@@ -4,12 +4,12 @@
 
 /* unsigned long xmon_mfspr(sprn, default_value) */
 _GLOBAL(xmon_mfspr)
-	PPC_LL	r5, .Lmfspr_table@got(r2)
+	LOAD_REG_ADDR(r5, .Lmfspr_table)
 	b	xmon_mxspr
 
 /* void xmon_mtspr(sprn, new_value) */
 _GLOBAL(xmon_mtspr)
-	PPC_LL	r5, .Lmtspr_table@got(r2)
+	LOAD_REG_ADDR(r5, .Lmtspr_table)
 	b	xmon_mxspr
 
 /*
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 26ef3388c24c..f51c882bf902 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -195,7 +195,7 @@ static int do_spu_cmd(void);
 #ifdef CONFIG_44x
 static void dump_tlb_44x(void);
 #endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 static void dump_tlb_book3e(void);
 #endif
 
@@ -288,11 +288,11 @@ Commands:\n\
   t	print backtrace\n\
   x	exit monitor and recover\n\
   X	exit monitor and don't recover\n"
-#if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E)
+#if defined(CONFIG_PPC_BOOK3S_64)
 "  u	dump segment table or SLB\n"
 #elif defined(CONFIG_PPC_BOOK3S_32)
 "  u	dump segment registers\n"
-#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_44x) || defined(CONFIG_PPC_BOOK3E_64)
 "  u	dump TLB\n"
 #endif
 "  U	show uptime information\n"
@@ -1166,7 +1166,7 @@ cmds(struct pt_regs *excp)
 		case 'u':
 			dump_tlb_44x();
 			break;
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
 		case 'u':
 			dump_tlb_book3e();
 			break;
@@ -2686,7 +2686,7 @@ static void dump_one_paca(int cpu)
 	DUMP(p, rfi_flush_fallback_area, "%-*px");
 #endif
 	DUMP(p, dscr_default, "%#-*llx");
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	DUMP(p, pgd, "%-*px");
 	DUMP(p, kernel_pgd, "%-*px");
 	DUMP(p, tcd_ptr, "%-*px");
@@ -2701,7 +2701,7 @@ static void dump_one_paca(int cpu)
 	DUMP(p, canary, "%#-*lx");
 #endif
 	DUMP(p, saved_r1, "%#-*llx");
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 	DUMP(p, trap_save, "%#-*x");
 #endif
 	DUMP(p, irq_soft_mask, "%#-*x");
@@ -3823,7 +3823,7 @@ static void dump_tlb_44x(void)
 }
 #endif /* CONFIG_44x */
 
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
 static void dump_tlb_book3e(void)
 {
 	u32 mmucfg, pidmask, lpidmask;
@@ -3965,7 +3965,7 @@ static void dump_tlb_book3e(void)
 		}
 	}
 }
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
 
 static void xmon_init(int enable)
 {
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c56bc70158ac..6b48a3ae9843 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -105,6 +105,7 @@ config RISCV
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_POSIX_CPU_TIMERS_TASK_WORK
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_FUNCTION_ARG_ACCESS_API
 	select HAVE_STACKPROTECTOR
@@ -229,6 +230,9 @@ config RISCV_DMA_NONCOHERENT
 	select ARCH_HAS_SETUP_DMA_OPS
 	select DMA_DIRECT_REMAP
 
+config AS_HAS_INSN
+	def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
+
 source "arch/riscv/Kconfig.socs"
 source "arch/riscv/Kconfig.erratas"
 
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 76364cf67a72..1c8ec656e916 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -112,8 +112,6 @@ else
 KBUILD_IMAGE	:= $(boot)/Image.gz
 endif
 
-head-y := arch/riscv/kernel/head.o
-
 libs-y += arch/riscv/lib/
 libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
diff --git a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
index c9af67f7a0d2..f7a230110512 100644
--- a/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
+++ b/arch/riscv/boot/dts/starfive/jh7100-beaglev-starlight.dts
@@ -8,7 +8,7 @@
 #include "jh7100.dtsi"
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/leds/common.h>
-#include <dt-bindings/pinctrl/pinctrl-starfive.h>
+#include <dt-bindings/pinctrl/pinctrl-starfive-jh7100.h>
 
 / {
 	model = "BeagleV Starlight Beta";
diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h
index dfee2829fc7c..efeb5edf8a3a 100644
--- a/arch/riscv/include/asm/gpr-num.h
+++ b/arch/riscv/include/asm/gpr-num.h
@@ -3,6 +3,11 @@
 #define __ASM_GPR_NUM_H
 
 #ifdef __ASSEMBLY__
+
+	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+	.equ	.L__gpr_num_x\num, \num
+	.endr
+
 	.equ	.L__gpr_num_zero,	0
 	.equ	.L__gpr_num_ra,		1
 	.equ	.L__gpr_num_sp,		2
@@ -39,6 +44,9 @@
 #else /* __ASSEMBLY__ */
 
 #define __DEFINE_ASM_GPR_NUMS					\
+"	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \
+"	.equ	.L__gpr_num_x\\num, \\num\n"			\
+"	.endr\n"						\
 "	.equ	.L__gpr_num_zero,	0\n"			\
 "	.equ	.L__gpr_num_ra,		1\n"			\
 "	.equ	.L__gpr_num_sp,		2\n"			\
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 6f59ec64175e..b22525290073 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
 	RISCV_ISA_EXT_ZICBOM,
 	RISCV_ISA_EXT_ZIHINTPAUSE,
 	RISCV_ISA_EXT_SSTC,
+	RISCV_ISA_EXT_SVINVAL,
 	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
 };
 
@@ -69,6 +70,7 @@ enum riscv_isa_ext_id {
 enum riscv_isa_ext_key {
 	RISCV_ISA_EXT_KEY_FPU,		/* For 'F' and 'D' */
 	RISCV_ISA_EXT_KEY_ZIHINTPAUSE,
+	RISCV_ISA_EXT_KEY_SVINVAL,
 	RISCV_ISA_EXT_KEY_MAX,
 };
 
@@ -90,6 +92,8 @@ static __always_inline int riscv_isa_ext2key(int num)
 		return RISCV_ISA_EXT_KEY_FPU;
 	case RISCV_ISA_EXT_ZIHINTPAUSE:
 		return RISCV_ISA_EXT_KEY_ZIHINTPAUSE;
+	case RISCV_ISA_EXT_SVINVAL:
+		return RISCV_ISA_EXT_KEY_SVINVAL;
 	default:
 		return -EINVAL;
 	}
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
new file mode 100644
index 000000000000..16044affa57c
--- /dev/null
+++ b/arch/riscv/include/asm/insn-def.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+#include <asm/asm.h>
+
+#define INSN_R_FUNC7_SHIFT		25
+#define INSN_R_RS2_SHIFT		20
+#define INSN_R_RS1_SHIFT		15
+#define INSN_R_FUNC3_SHIFT		12
+#define INSN_R_RD_SHIFT			 7
+#define INSN_R_OPCODE_SHIFT		 0
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_AS_HAS_INSN
+
+	.macro insn_r, opcode, func3, func7, rd, rs1, rs2
+	.insn	r \opcode, \func3, \func7, \rd, \rs1, \rs2
+	.endm
+
+#else
+
+#include <asm/gpr-num.h>
+
+	.macro insn_r, opcode, func3, func7, rd, rs1, rs2
+	.4byte	((\opcode << INSN_R_OPCODE_SHIFT) |		\
+		 (\func3 << INSN_R_FUNC3_SHIFT) |		\
+		 (\func7 << INSN_R_FUNC7_SHIFT) |		\
+		 (.L__gpr_num_\rd << INSN_R_RD_SHIFT) |		\
+		 (.L__gpr_num_\rs1 << INSN_R_RS1_SHIFT) |	\
+		 (.L__gpr_num_\rs2 << INSN_R_RS2_SHIFT))
+	.endm
+
+#endif
+
+#define __INSN_R(...)	insn_r __VA_ARGS__
+
+#else /* ! __ASSEMBLY__ */
+
+#ifdef CONFIG_AS_HAS_INSN
+
+#define __INSN_R(opcode, func3, func7, rd, rs1, rs2)	\
+	".insn	r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n"
+
+#else
+
+#include <linux/stringify.h>
+#include <asm/gpr-num.h>
+
+#define DEFINE_INSN_R							\
+	__DEFINE_ASM_GPR_NUMS						\
+"	.macro insn_r, opcode, func3, func7, rd, rs1, rs2\n"		\
+"	.4byte	((\\opcode << " __stringify(INSN_R_OPCODE_SHIFT) ") |"	\
+"		 (\\func3 << " __stringify(INSN_R_FUNC3_SHIFT) ") |"	\
+"		 (\\func7 << " __stringify(INSN_R_FUNC7_SHIFT) ") |"	\
+"		 (.L__gpr_num_\\rd << " __stringify(INSN_R_RD_SHIFT) ") |"    \
+"		 (.L__gpr_num_\\rs1 << " __stringify(INSN_R_RS1_SHIFT) ") |"  \
+"		 (.L__gpr_num_\\rs2 << " __stringify(INSN_R_RS2_SHIFT) "))\n" \
+"	.endm\n"
+
+#define UNDEFINE_INSN_R							\
+"	.purgem insn_r\n"
+
+#define __INSN_R(opcode, func3, func7, rd, rs1, rs2)			\
+	DEFINE_INSN_R							\
+	"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
+	UNDEFINE_INSN_R
+
+#endif
+
+#endif /* ! __ASSEMBLY__ */
+
+#define INSN_R(opcode, func3, func7, rd, rs1, rs2)		\
+	__INSN_R(RV_##opcode, RV_##func3, RV_##func7,		\
+		 RV_##rd, RV_##rs1, RV_##rs2)
+
+#define RV_OPCODE(v)		__ASM_STR(v)
+#define RV_FUNC3(v)		__ASM_STR(v)
+#define RV_FUNC7(v)		__ASM_STR(v)
+#define RV_RD(v)		__ASM_STR(v)
+#define RV_RS1(v)		__ASM_STR(v)
+#define RV_RS2(v)		__ASM_STR(v)
+#define __RV_REG(v)		__ASM_STR(x ## v)
+#define RV___RD(v)		__RV_REG(v)
+#define RV___RS1(v)		__RV_REG(v)
+#define RV___RS2(v)		__RV_REG(v)
+
+#define RV_OPCODE_SYSTEM	RV_OPCODE(115)
+
+#define HFENCE_VVMA(vaddr, asid)				\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17),		\
+	       __RD(0), RS1(vaddr), RS2(asid))
+
+#define HFENCE_GVMA(gaddr, vmid)				\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(49),		\
+	       __RD(0), RS1(gaddr), RS2(vmid))
+
+#define HLVX_HU(dest, addr)					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(50),		\
+	       RD(dest), RS1(addr), __RS2(3))
+
+#define HLV_W(dest, addr)					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(52),		\
+	       RD(dest), RS1(addr), __RS2(0))
+
+#ifdef CONFIG_64BIT
+#define HLV_D(dest, addr)					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(54),		\
+	       RD(dest), RS1(addr), __RS2(0))
+#else
+#define HLV_D(dest, addr)					\
+	__ASM_STR(.error "hlv.d requires 64-bit support")
+#endif
+
+#define SINVAL_VMA(vaddr, asid)					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11),		\
+	       __RD(0), RS1(vaddr), RS2(asid))
+
+#define SFENCE_W_INVAL()					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12),		\
+	       __RD(0), __RS1(0), __RS2(0))
+
+#define SFENCE_INVAL_IR()					\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12),		\
+	       __RD(0), __RS1(0), __RS2(1))
+
+#define HINVAL_VVMA(vaddr, asid)				\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(19),		\
+	       __RD(0), RS1(vaddr), RS2(asid))
+
+#define HINVAL_GVMA(gaddr, vmid)				\
+	INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51),		\
+	       __RD(0), RS1(gaddr), RS2(vmid))
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 60c517e4d576..dbbf43d52623 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -67,6 +67,7 @@ struct kvm_vcpu_stat {
 	u64 mmio_exit_kernel;
 	u64 csr_exit_user;
 	u64 csr_exit_kernel;
+	u64 signal_exits;
 	u64 exits;
 };
 
diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h
index 26a446a34057..d4e3e600beef 100644
--- a/arch/riscv/include/asm/kvm_vcpu_sbi.h
+++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h
@@ -11,8 +11,8 @@
 
 #define KVM_SBI_IMPID 3
 
-#define KVM_SBI_VERSION_MAJOR 0
-#define KVM_SBI_VERSION_MINOR 3
+#define KVM_SBI_VERSION_MAJOR 1
+#define KVM_SBI_VERSION_MINOR 0
 
 struct kvm_vcpu_sbi_extension {
 	unsigned long extid_start;
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 19eedd4af4cd..94a0590c6971 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -65,11 +65,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *dead_task)
-{
-}
-
 extern unsigned long __get_wchan(struct task_struct *p);
 
 
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 7351417afd62..8985ff234c01 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -48,6 +48,7 @@ struct kvm_sregs {
 /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
 struct kvm_riscv_config {
 	unsigned long isa;
+	unsigned long zicbom_block_size;
 };
 
 /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID {
 	KVM_RISCV_ISA_EXT_M,
 	KVM_RISCV_ISA_EXT_SVPBMT,
 	KVM_RISCV_ISA_EXT_SSTC,
+	KVM_RISCV_ISA_EXT_SVINVAL,
+	KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+	KVM_RISCV_ISA_EXT_ZICBOM,
 	KVM_RISCV_ISA_EXT_MAX,
 };
 
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 33bb60a354cd..db6e4b1294ba 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -28,9 +28,9 @@ KASAN_SANITIZE_cpufeature.o := n
 endif
 endif
 
-extra-y += head.o
 extra-y += vmlinux.lds
 
+obj-y	+= head.o
 obj-y	+= soc.o
 obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o
 obj-y	+= cpu.o
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 0937595c32a5..fa427bdcf773 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -140,6 +140,7 @@ device_initcall(riscv_cpuinfo_init);
 static struct riscv_isa_ext_data isa_ext_arr[] = {
 	__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
 	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
+	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
 	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
 	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
 	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 92811ceb8470..694267d1fe81 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -204,6 +204,7 @@ void __init riscv_fill_hwcap(void)
 				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
 				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
 				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
+				SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
 			}
 #undef SET_ISA_EXT_MAP
 		}
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 4abc9aebdfae..123d05255fcf 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -119,10 +119,11 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 {
 	struct mm_struct *mm = task->mm;
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	mmap_read_lock(mm);
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		unsigned long size = vma->vm_end - vma->vm_start;
 
 		if (vma_is_special_mapping(vma, vdso_info.dm))
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index f5a342fa1b1d..f36a737d5f96 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select KVM_MMIO
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+	select KVM_XFER_TO_GUEST_WORK
 	select HAVE_KVM_VCPU_ASYNC_IOCTL
 	select HAVE_KVM_EVENTFD
 	select SRCU
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 1549205fe5fe..df2d8716851f 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -122,7 +122,7 @@ void kvm_arch_exit(void)
 {
 }
 
-static int riscv_kvm_init(void)
+static int __init riscv_kvm_init(void)
 {
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 }
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 1a76d0b1907d..309d79b3e5cd 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -12,22 +12,11 @@
 #include <linux/kvm_host.h>
 #include <asm/cacheflush.h>
 #include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
 
-/*
- * Instruction encoding of hfence.gvma is:
- * HFENCE.GVMA rs1, rs2
- * HFENCE.GVMA zero, rs2
- * HFENCE.GVMA rs1
- * HFENCE.GVMA
- *
- * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
- * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
- * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
- * rs1==zero and rs2==zero ==> HFENCE.GVMA
- *
- * Instruction encoding of HFENCE.GVMA is:
- * 0110001 rs2(5) rs1(5) 000 00000 1110011
- */
+#define has_svinval()	\
+	static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL])
 
 void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
 					  gpa_t gpa, gpa_t gpsz,
@@ -40,32 +29,22 @@ void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
 		return;
 	}
 
-	for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
-		/*
-		 * rs1 = a0 (GPA >> 2)
-		 * rs2 = a1 (VMID)
-		 * HFENCE.GVMA a0, a1
-		 * 0110001 01011 01010 000 00000 1110011
-		 */
-		asm volatile ("srli a0, %0, 2\n"
-			      "add a1, %1, zero\n"
-			      ".word 0x62b50073\n"
-			      :: "r" (pos), "r" (vmid)
-			      : "a0", "a1", "memory");
+	if (has_svinval()) {
+		asm volatile (SFENCE_W_INVAL() ::: "memory");
+		for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+			asm volatile (HINVAL_GVMA(%0, %1)
+			: : "r" (pos >> 2), "r" (vmid) : "memory");
+		asm volatile (SFENCE_INVAL_IR() ::: "memory");
+	} else {
+		for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+			asm volatile (HFENCE_GVMA(%0, %1)
+			: : "r" (pos >> 2), "r" (vmid) : "memory");
 	}
 }
 
 void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid)
 {
-	/*
-	 * rs1 = zero
-	 * rs2 = a0 (VMID)
-	 * HFENCE.GVMA zero, a0
-	 * 0110001 01010 00000 000 00000 1110011
-	 */
-	asm volatile ("add a0, %0, zero\n"
-		      ".word 0x62a00073\n"
-		      :: "r" (vmid) : "a0", "memory");
+	asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory");
 }
 
 void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
@@ -78,46 +57,24 @@ void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
 		return;
 	}
 
-	for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) {
-		/*
-		 * rs1 = a0 (GPA >> 2)
-		 * rs2 = zero
-		 * HFENCE.GVMA a0
-		 * 0110001 00000 01010 000 00000 1110011
-		 */
-		asm volatile ("srli a0, %0, 2\n"
-			      ".word 0x62050073\n"
-			      :: "r" (pos) : "a0", "memory");
+	if (has_svinval()) {
+		asm volatile (SFENCE_W_INVAL() ::: "memory");
+		for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+			asm volatile(HINVAL_GVMA(%0, zero)
+			: : "r" (pos >> 2) : "memory");
+		asm volatile (SFENCE_INVAL_IR() ::: "memory");
+	} else {
+		for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+			asm volatile(HFENCE_GVMA(%0, zero)
+			: : "r" (pos >> 2) : "memory");
 	}
 }
 
 void kvm_riscv_local_hfence_gvma_all(void)
 {
-	/*
-	 * rs1 = zero
-	 * rs2 = zero
-	 * HFENCE.GVMA
-	 * 0110001 00000 00000 000 00000 1110011
-	 */
-	asm volatile (".word 0x62000073" ::: "memory");
+	asm volatile(HFENCE_GVMA(zero, zero) : : : "memory");
 }
 
-/*
- * Instruction encoding of hfence.gvma is:
- * HFENCE.VVMA rs1, rs2
- * HFENCE.VVMA zero, rs2
- * HFENCE.VVMA rs1
- * HFENCE.VVMA
- *
- * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2
- * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2
- * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1
- * rs1==zero and rs2==zero ==> HFENCE.VVMA
- *
- * Instruction encoding of HFENCE.VVMA is:
- * 0010001 rs2(5) rs1(5) 000 00000 1110011
- */
-
 void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
 					  unsigned long asid,
 					  unsigned long gva,
@@ -133,18 +90,16 @@ void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
 
 	hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
 
-	for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
-		/*
-		 * rs1 = a0 (GVA)
-		 * rs2 = a1 (ASID)
-		 * HFENCE.VVMA a0, a1
-		 * 0010001 01011 01010 000 00000 1110011
-		 */
-		asm volatile ("add a0, %0, zero\n"
-			      "add a1, %1, zero\n"
-			      ".word 0x22b50073\n"
-			      :: "r" (pos), "r" (asid)
-			      : "a0", "a1", "memory");
+	if (has_svinval()) {
+		asm volatile (SFENCE_W_INVAL() ::: "memory");
+		for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+			asm volatile(HINVAL_VVMA(%0, %1)
+			: : "r" (pos), "r" (asid) : "memory");
+		asm volatile (SFENCE_INVAL_IR() ::: "memory");
+	} else {
+		for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+			asm volatile(HFENCE_VVMA(%0, %1)
+			: : "r" (pos), "r" (asid) : "memory");
 	}
 
 	csr_write(CSR_HGATP, hgatp);
@@ -157,15 +112,7 @@ void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
 
 	hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
 
-	/*
-	 * rs1 = zero
-	 * rs2 = a0 (ASID)
-	 * HFENCE.VVMA zero, a0
-	 * 0010001 01010 00000 000 00000 1110011
-	 */
-	asm volatile ("add a0, %0, zero\n"
-		      ".word 0x22a00073\n"
-		      :: "r" (asid) : "a0", "memory");
+	asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory");
 
 	csr_write(CSR_HGATP, hgatp);
 }
@@ -183,16 +130,16 @@ void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
 
 	hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
 
-	for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) {
-		/*
-		 * rs1 = a0 (GVA)
-		 * rs2 = zero
-		 * HFENCE.VVMA a0
-		 * 0010001 00000 01010 000 00000 1110011
-		 */
-		asm volatile ("add a0, %0, zero\n"
-			      ".word 0x22050073\n"
-			      :: "r" (pos) : "a0", "memory");
+	if (has_svinval()) {
+		asm volatile (SFENCE_W_INVAL() ::: "memory");
+		for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+			asm volatile(HINVAL_VVMA(%0, zero)
+			: : "r" (pos) : "memory");
+		asm volatile (SFENCE_INVAL_IR() ::: "memory");
+	} else {
+		for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+			asm volatile(HFENCE_VVMA(%0, zero)
+			: : "r" (pos) : "memory");
 	}
 
 	csr_write(CSR_HGATP, hgatp);
@@ -204,13 +151,7 @@ void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
 
 	hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
 
-	/*
-	 * rs1 = zero
-	 * rs2 = zero
-	 * HFENCE.VVMA
-	 * 0010001 00000 00000 000 00000 1110011
-	 */
-	asm volatile (".word 0x22000073" ::: "memory");
+	asm volatile(HFENCE_VVMA(zero, zero) : : : "memory");
 
 	csr_write(CSR_HGATP, hgatp);
 }
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index d0f08d5b4282..a032c4f0d600 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/entry-kvm.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kdebug.h>
@@ -18,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
+#include <asm/cacheflush.h>
 #include <asm/hwcap.h>
 
 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
@@ -28,6 +30,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
 	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
 	STATS_DESC_COUNTER(VCPU, csr_exit_user),
 	STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
 	STATS_DESC_COUNTER(VCPU, exits)
 };
 
@@ -42,17 +45,23 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 
 #define KVM_RISCV_BASE_ISA_MASK		GENMASK(25, 0)
 
+#define KVM_ISA_EXT_ARR(ext)		[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
 /* Mapping between KVM ISA Extension ID & Host ISA extension ID */
 static const unsigned long kvm_isa_ext_arr[] = {
-	RISCV_ISA_EXT_a,
-	RISCV_ISA_EXT_c,
-	RISCV_ISA_EXT_d,
-	RISCV_ISA_EXT_f,
-	RISCV_ISA_EXT_h,
-	RISCV_ISA_EXT_i,
-	RISCV_ISA_EXT_m,
-	RISCV_ISA_EXT_SVPBMT,
-	RISCV_ISA_EXT_SSTC,
+	[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+	[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+	[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+	[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+	[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+	[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+	[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+
+	KVM_ISA_EXT_ARR(SSTC),
+	KVM_ISA_EXT_ARR(SVINVAL),
+	KVM_ISA_EXT_ARR(SVPBMT),
+	KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+	KVM_ISA_EXT_ARR(ZICBOM),
 };
 
 static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
@@ -87,6 +96,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
 	case KVM_RISCV_ISA_EXT_I:
 	case KVM_RISCV_ISA_EXT_M:
 	case KVM_RISCV_ISA_EXT_SSTC:
+	case KVM_RISCV_ISA_EXT_SVINVAL:
+	case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
 		return false;
 	default:
 		break;
@@ -254,6 +265,11 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
 	case KVM_REG_RISCV_CONFIG_REG(isa):
 		reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
 		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+		if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+			return -EINVAL;
+		reg_val = riscv_cbom_block_size;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -311,6 +327,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
 			return -EOPNOTSUPP;
 		}
 		break;
+	case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+		return -EOPNOTSUPP;
 	default:
 		return -EINVAL;
 	}
@@ -784,11 +802,15 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
 {
 	u64 henvcfg = 0;
 
-	if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT))
+	if (riscv_isa_extension_available(isa, SVPBMT))
 		henvcfg |= ENVCFG_PBMTE;
 
-	if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC))
+	if (riscv_isa_extension_available(isa, SSTC))
 		henvcfg |= ENVCFG_STCE;
+
+	if (riscv_isa_extension_available(isa, ZICBOM))
+		henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
+
 	csr_write(CSR_HENVCFG, henvcfg);
 #ifdef CONFIG_32BIT
 	csr_write(CSR_HENVCFGH, henvcfg >> 32);
@@ -958,7 +980,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	while (ret > 0) {
 		/* Check conditions before entering the guest */
-		cond_resched();
+		ret = xfer_to_guest_mode_handle_work(vcpu);
+		if (!ret)
+			ret = 1;
 
 		kvm_riscv_gstage_vmid_update(vcpu);
 
@@ -967,15 +991,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		local_irq_disable();
 
 		/*
-		 * Exit if we have a signal pending so that we can deliver
-		 * the signal to user space.
-		 */
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			run->exit_reason = KVM_EXIT_INTR;
-		}
-
-		/*
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -997,7 +1012,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 		if (ret <= 0 ||
 		    kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
-		    kvm_request_pending(vcpu)) {
+		    kvm_request_pending(vcpu) ||
+		    xfer_to_guest_mode_work_pending()) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			local_irq_enable();
 			kvm_vcpu_srcu_read_lock(vcpu);
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index d5c36386878a..c9f741ab26f5 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -8,6 +8,7 @@
 
 #include <linux/kvm_host.h>
 #include <asm/csr.h>
+#include <asm/insn-def.h>
 
 static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
 			     struct kvm_cpu_trap *trap)
@@ -62,11 +63,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
 {
 	register unsigned long taddr asm("a0") = (unsigned long)trap;
 	register unsigned long ttmp asm("a1");
-	register unsigned long val asm("t0");
-	register unsigned long tmp asm("t1");
-	register unsigned long addr asm("t2") = guest_addr;
-	unsigned long flags;
-	unsigned long old_stvec, old_hstatus;
+	unsigned long flags, val, tmp, old_stvec, old_hstatus;
 
 	local_irq_save(flags);
 
@@ -82,29 +79,19 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
 			".option push\n"
 			".option norvc\n"
 			"add %[ttmp], %[taddr], 0\n"
-			/*
-			 * HLVX.HU %[val], (%[addr])
-			 * HLVX.HU t0, (t2)
-			 * 0110010 00011 00111 100 00101 1110011
-			 */
-			".word 0x6433c2f3\n"
+			HLVX_HU(%[val], %[addr])
 			"andi %[tmp], %[val], 3\n"
 			"addi %[tmp], %[tmp], -3\n"
 			"bne %[tmp], zero, 2f\n"
 			"addi %[addr], %[addr], 2\n"
-			/*
-			 * HLVX.HU %[tmp], (%[addr])
-			 * HLVX.HU t1, (t2)
-			 * 0110010 00011 00111 100 00110 1110011
-			 */
-			".word 0x6433c373\n"
+			HLVX_HU(%[tmp], %[addr])
 			"sll %[tmp], %[tmp], 16\n"
 			"add %[val], %[val], %[tmp]\n"
 			"2:\n"
 			".option pop"
 		: [val] "=&r" (val), [tmp] "=&r" (tmp),
 		  [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp),
-		  [addr] "+&r" (addr) : : "memory");
+		  [addr] "+&r" (guest_addr) : : "memory");
 
 		if (trap->scause == EXC_LOAD_PAGE_FAULT)
 			trap->scause = EXC_INST_PAGE_FAULT;
@@ -121,24 +108,14 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
 			".option norvc\n"
 			"add %[ttmp], %[taddr], 0\n"
 #ifdef CONFIG_64BIT
-			/*
-			 * HLV.D %[val], (%[addr])
-			 * HLV.D t0, (t2)
-			 * 0110110 00000 00111 100 00101 1110011
-			 */
-			".word 0x6c03c2f3\n"
+			HLV_D(%[val], %[addr])
 #else
-			/*
-			 * HLV.W %[val], (%[addr])
-			 * HLV.W t0, (t2)
-			 * 0110100 00000 00111 100 00101 1110011
-			 */
-			".word 0x6803c2f3\n"
+			HLV_W(%[val], %[addr])
 #endif
 			".option pop"
 		: [val] "=&r" (val),
 		  [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp)
-		: [addr] "r" (addr) : "memory");
+		: [addr] "r" (guest_addr) : "memory");
 	}
 
 	csr_write(CSR_STVEC, old_stvec);
diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c
index e3f9bdf47c5f..b0add983530a 100644
--- a/arch/riscv/mm/dma-noncoherent.c
+++ b/arch/riscv/mm/dma-noncoherent.c
@@ -13,6 +13,8 @@
 #include <asm/cacheflush.h>
 
 unsigned int riscv_cbom_block_size;
+EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
+
 static bool noncoherent_supported;
 
 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 4cb5d17e7ead..de6d8b2ea4d8 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -119,8 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
 
 OBJCOPYFLAGS	:= -O binary
 
-head-y		:= arch/s390/kernel/head64.o
-
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/
 
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bc48fe82d949..6e7f01ca53e6 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -10,11 +10,14 @@
 #include <asm/sclp.h>
 #include <asm/diag.h>
 #include <asm/uv.h>
+#include <asm/abs_lowcore.h>
 #include "decompressor.h"
 #include "boot.h"
 #include "uv.h"
 
 unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned long __bootdata_preserved(__abs_lowcore);
+unsigned long __bootdata_preserved(__memcpy_real_area);
 unsigned long __bootdata(__amode31_base);
 unsigned long __bootdata_preserved(VMALLOC_START);
 unsigned long __bootdata_preserved(VMALLOC_END);
@@ -180,7 +183,10 @@ static void setup_kernel_memory_layout(void)
 	/* force vmalloc and modules below kasan shadow */
 	vmax = min(vmax, KASAN_SHADOW_START);
 #endif
-	MODULES_END = vmax;
+	__memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+	__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
+				   sizeof(struct lowcore));
+	MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
 	VMALLOC_END = MODULES_VADDR;
 
diff --git a/arch/s390/boot/version.c b/arch/s390/boot/version.c
index d32e58bdda6a..fd32f038777f 100644
--- a/arch/s390/boot/version.c
+++ b/arch/s390/boot/version.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <generated/utsversion.h>
 #include <generated/utsrelease.h>
 #include <generated/compile.h>
 #include "boot.h"
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
new file mode 100644
index 000000000000..06ee706b0d78
--- /dev/null
+++ b/arch/s390/crypto/Kconfig
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (s390)"
+
+config CRYPTO_CRC32_S390
+	tristate "CRC32c and CRC32"
+	depends on S390
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c and CRC32 CRC algorithms
+
+	  Architecture: s390
+
+	  It is available with IBM z13 or later.
+
+config CRYPTO_SHA512_S390
+	tristate "Hash functions: SHA-384 and SHA-512"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z10.
+
+config CRYPTO_SHA1_S390
+	tristate "Hash functions: SHA-1"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z990.
+
+config CRYPTO_SHA256_S390
+	tristate "Hash functions: SHA-224 and SHA-256"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: s390
+
+	  It is available as of z9.
+
+config CRYPTO_SHA3_256_S390
+	tristate "Hash functions: SHA3-224 and SHA3-256"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA3-224 and SHA3-256 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+	tristate "Hash functions: SHA3-384 and SHA3-512"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  SHA3-384 and SHA3-512 secure hash algorithms (FIPS 202)
+
+	  Architecture: s390
+
+	  It is available as of z14.
+
+config CRYPTO_GHASH_S390
+	tristate "Hash functions: GHASH"
+	depends on S390
+	select CRYPTO_HASH
+	help
+	  GCM GHASH hash function (NIST SP800-38D)
+
+	  Architecture: s390
+
+	  It is available as of z196.
+
+config CRYPTO_AES_S390
+	tristate "Ciphers: AES, modes: ECB, CBC, CTR, XTS, GCM"
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	help
+	  Block cipher: AES cipher algorithms (FIPS 197)
+	  AEAD cipher: AES with GCM
+	  Length-preserving ciphers: AES with ECB, CBC, XTS, and CTR modes
+
+	  Architecture: s390
+
+	  As of z9 the ECB and CBC modes are hardware accelerated
+	  for 128 bit keys.
+
+	  As of z10 the ECB and CBC modes are hardware accelerated
+	  for all AES key sizes.
+
+	  As of z196 the CTR mode is hardware accelerated for all AES
+	  key sizes and XTS mode is hardware accelerated for 256 and
+	  512 bit keys.
+
+config CRYPTO_DES_S390
+	tristate "Ciphers: DES and Triple DES EDE, modes: ECB, CBC, CTR"
+	depends on S390
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_DES
+	help
+	  Block ciphers: DES (FIPS 46-2) cipher algorithm
+	  Block ciphers: Triple DES EDE (FIPS 46-3) cipher algorithm
+	  Length-preserving ciphers: DES with ECB, CBC, and CTR modes
+	  Length-preserving ciphers: Triple DES EDED with ECB, CBC, and CTR modes
+
+	  Architecture: s390
+
+	  As of z990 the ECB and CBC mode are hardware accelerated.
+	  As of z196 the CTR mode is hardware accelerated.
+
+config CRYPTO_CHACHA_S390
+	tristate "Ciphers: ChaCha20"
+	depends on S390
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving cipher: ChaCha20 stream cipher (RFC 7539)
+
+	  Architecture: s390
+
+	  It is available as of z13.
+
+endmenu
diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h
new file mode 100644
index 000000000000..4c61b14ee928
--- /dev/null
+++ b/arch/s390/include/asm/abs_lowcore.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ABS_LOWCORE_H
+#define _ASM_S390_ABS_LOWCORE_H
+
+#include <asm/lowcore.h>
+
+#define ABS_LOWCORE_MAP_SIZE	(NR_CPUS * sizeof(struct lowcore))
+
+extern unsigned long __abs_lowcore;
+extern bool abs_lowcore_mapped;
+
+struct lowcore *get_abs_lowcore(unsigned long *flags);
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags);
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc);
+void abs_lowcore_unmap(int cpu);
+
+#endif /* _ASM_S390_ABS_LOWCORE_H */
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index d4e90f2ba77e..bd1596810cc1 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -214,7 +214,6 @@ extern struct ccw_device *ccw_device_create_console(struct ccw_driver *);
 extern void ccw_device_destroy_console(struct ccw_device *);
 extern int ccw_device_enable_console(struct ccw_device *);
 extern void ccw_device_wait_idle(struct ccw_device *);
-extern int ccw_device_force_console(struct ccw_device *);
 
 extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
 extern void ccw_device_dma_free(struct ccw_device *cdev,
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 267a8f88e143..adf7d8cdac7e 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -95,7 +95,8 @@ union ctlreg0 {
 					   Interruption-Filtering Override */
 		unsigned long	   : 3;
 		unsigned long ccc  : 1; /* Cryptography counter control */
-		unsigned long	   : 18;
+		unsigned long pec  : 1; /* PAI extension control */
+		unsigned long	   : 17;
 		unsigned long	   : 3;
 		unsigned long lap  : 1; /* Low-address-protection control */
 		unsigned long	   : 4;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 26fe5e535728..8aa1f6530a3e 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -203,7 +203,9 @@ struct lowcore {
 	__u8	pad_0x1400[0x1500-0x1400];	/* 0x1400 */
 	/* Cryptography-counter designation */
 	__u64	ccd;				/* 0x1500 */
-	__u8	pad_0x1508[0x1800-0x1508];	/* 0x1508 */
+	/* AI-extension counter designation */
+	__u64	aicd;				/* 0x1508 */
+	__u8	pad_0x1510[0x1800-0x1510];	/* 0x1510 */
 
 	/* Transaction abort diagnostic block */
 	struct pgm_tdb pgm_tdb;			/* 0x1800 */
diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h
new file mode 100644
index 000000000000..c7fa838cf6b9
--- /dev/null
+++ b/arch/s390/include/asm/maccess.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_S390_MACCESS_H
+#define __ASM_S390_MACCESS_H
+
+#include <linux/types.h>
+
+struct iov_iter;
+
+extern unsigned long __memcpy_real_area;
+void memcpy_real_init(void);
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count);
+int memcpy_real(void *dest, unsigned long src, size_t count);
+#ifdef CONFIG_CRASH_DUMP
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count);
+#endif
+
+#endif /* __ASM_S390_MACCESS_H */
diff --git a/arch/s390/include/asm/os_info.h b/arch/s390/include/asm/os_info.h
index 85248d8fee0c..0d1c74a7a650 100644
--- a/arch/s390/include/asm/os_info.h
+++ b/arch/s390/include/asm/os_info.h
@@ -41,20 +41,6 @@ u32 os_info_csum(struct os_info *os_info);
 
 #ifdef CONFIG_CRASH_DUMP
 void *os_info_old_entry(int nr, unsigned long *size);
-size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count);
-
-static inline int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
-{
-	struct iov_iter iter;
-	struct kvec kvec;
-
-	kvec.iov_base = dst;
-	kvec.iov_len = count;
-	iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
-	if (copy_oldmem_iter(&iter, src, count) < count)
-		return -EFAULT;
-	return 0;
-}
 #else
 static inline void *os_info_old_entry(int nr, unsigned long *size)
 {
diff --git a/arch/s390/include/asm/pai.h b/arch/s390/include/asm/pai.h
index 5b7e33ac6f0b..1a8a6b15d121 100644
--- a/arch/s390/include/asm/pai.h
+++ b/arch/s390/include/asm/pai.h
@@ -17,7 +17,9 @@ struct qpaci_info_block {
 	struct {
 		u64 : 8;
 		u64 num_cc : 8;	/* # of supported crypto counters */
-		u64 : 48;
+		u64 : 9;
+		u64 num_nnpa : 7;	/* # of supported NNPA counters */
+		u64 : 32;
 	};
 };
 
@@ -42,6 +44,8 @@ static inline int qpaci(struct qpaci_info_block *info)
 #define PAI_CRYPTO_BASE			0x1000	/* First event number */
 #define PAI_CRYPTO_MAXCTR		256	/* Max # of event counters */
 #define PAI_CRYPTO_KERNEL_OFFSET	2048
+#define PAI_NNPA_BASE			0x1800	/* First event number */
+#define PAI_NNPA_MAXCTR			128	/* Max # of event counters */
 
 DECLARE_STATIC_KEY_FALSE(pai_key);
 
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 7b4cdadbc023..108e732d7b14 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -117,7 +117,6 @@ struct zpci_bus {
 struct zpci_dev {
 	struct zpci_bus *zbus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
-	struct list_head bus_next;
 	struct kref kref;
 	struct hotplug_slot hotplug_slot;
 
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f019df19884d..f1cb9391190d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1777,6 +1777,10 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
 
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+extern int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot);
+extern void vmem_unmap_4k_page(unsigned long addr);
+extern pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc);
 extern int s390_enable_sie(void);
 extern int s390_enable_skey(void);
 extern void s390_reset_cmma(struct mm_struct *mm);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index bd66f8e34949..87be3e855bf7 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -186,9 +186,6 @@ struct pt_regs;
 void show_registers(struct pt_regs *regs);
 void show_cacheinfo(struct seq_file *m);
 
-/* Free all resources held by a thread. */
-static inline void release_thread(struct task_struct *tsk) { }
-
 /* Free guarded storage control block */
 void guarded_storage_release(struct task_struct *tsk);
 void gs_load_bc_cb(struct pt_regs *regs);
@@ -306,23 +303,6 @@ static __always_inline void __noreturn disabled_wait(void)
 
 #define ARCH_LOW_ADDRESS_LIMIT	0x7fffffffUL
 
-extern int memcpy_real(void *, unsigned long, size_t);
-extern void memcpy_absolute(void *, void *, size_t);
-
-#define put_abs_lowcore(member, x) do {					\
-	unsigned long __abs_address = offsetof(struct lowcore, member);	\
-	__typeof__(((struct lowcore *)0)->member) __tmp = (x);		\
-									\
-	memcpy_absolute(__va(__abs_address), &__tmp, sizeof(__tmp));	\
-} while (0)
-
-#define get_abs_lowcore(x, member) do {					\
-	unsigned long __abs_address = offsetof(struct lowcore, member);	\
-	__typeof__(((struct lowcore *)0)->member) *__ptr = &(x);	\
-									\
-	memcpy_absolute(__ptr, __va(__abs_address), sizeof(*__ptr));	\
-} while (0)
-
 extern int s390_isolate_bp(void);
 extern int s390_isolate_bp_guest(void);
 
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 7f5d4763357b..73ed2781073b 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -30,7 +30,8 @@ extern void smp_emergency_stop(void);
 
 extern int smp_find_processor_id(u16 address);
 extern int smp_store_status(int cpu);
-extern void smp_save_dump_cpus(void);
+extern void smp_save_dump_ipl_cpu(void);
+extern void smp_save_dump_secondary_cpus(void);
 extern void smp_yield_cpu(int cpu);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
@@ -58,6 +59,7 @@ static inline void smp_cpus_done(unsigned int max_cpus)
 {
 }
 
+extern int smp_reinit_ipl_cpu(void);
 extern int smp_rescan_cpus(void);
 extern void __noreturn cpu_die(void);
 extern void __cpu_die(unsigned int cpu);
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3cbfa9fddd9a..5e6a23299790 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,16 +33,16 @@ CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
 CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
-obj-y	:= traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y	:= head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
 obj-y	+= sysinfo.o lgr.o os_info.o machine_kexec.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
 obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
-obj-y	+= smp.o text_amode31.o stacktrace.o
+obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o
 
-extra-y				+= head64.o vmlinux.lds
+extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
 CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
@@ -72,7 +72,7 @@ obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf_common.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o perf_regs.o
-obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o perf_pai_ext.o
 
 obj-$(CONFIG_TRACEPOINTS)	+= trace.o
 obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 000000000000..fb92e8ed0525
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+#define ABS_LOWCORE_UNMAPPED	1
+#define ABS_LOWCORE_LAP_ON	2
+#define ABS_LOWCORE_IRQS_ON	4
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+bool __ro_after_init abs_lowcore_mapped;
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	unsigned long phys = __pa(lc);
+	int rc, i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+		if (rc) {
+			/*
+			 * Do not unmap allocated page tables in case the
+			 * allocation was not requested. In such a case the
+			 * request is expected coming from an atomic context,
+			 * while the unmap attempt might sleep.
+			 */
+			if (alloc) {
+				for (--i; i >= 0; i--) {
+					addr -= PAGE_SIZE;
+					vmem_unmap_4k_page(addr);
+				}
+			}
+			return rc;
+		}
+		addr += PAGE_SIZE;
+		phys += PAGE_SIZE;
+	}
+	return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	int i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		vmem_unmap_4k_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
+
+struct lowcore *get_abs_lowcore(unsigned long *flags)
+{
+	unsigned long irq_flags;
+	union ctlreg0 cr0;
+	int cpu;
+
+	*flags = 0;
+	cpu = get_cpu();
+	if (abs_lowcore_mapped) {
+		return ((struct lowcore *)__abs_lowcore) + cpu;
+	} else {
+		if (cpu != 0)
+			panic("Invalid unmapped absolute lowcore access\n");
+		local_irq_save(irq_flags);
+		if (!irqs_disabled_flags(irq_flags))
+			*flags |= ABS_LOWCORE_IRQS_ON;
+		__ctl_store(cr0.val, 0, 0);
+		if (cr0.lap) {
+			*flags |= ABS_LOWCORE_LAP_ON;
+			__ctl_clear_bit(0, 28);
+		}
+		*flags |= ABS_LOWCORE_UNMAPPED;
+		return lowcore_ptr[0];
+	}
+}
+
+void put_abs_lowcore(struct lowcore *lc, unsigned long flags)
+{
+	if (abs_lowcore_mapped) {
+		if (flags)
+			panic("Invalid mapped absolute lowcore release\n");
+	} else {
+		if (smp_processor_id() != 0)
+			panic("Invalid mapped absolute lowcore access\n");
+		if (!(flags & ABS_LOWCORE_UNMAPPED))
+			panic("Invalid unmapped absolute lowcore release\n");
+		if (flags & ABS_LOWCORE_LAP_ON)
+			__ctl_set_bit(0, 28);
+		if (flags & ABS_LOWCORE_IRQS_ON)
+			local_irq_enable();
+	}
+	put_cpu();
+}
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index bad8f47fc5d6..dd74fe664ed1 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -21,6 +21,7 @@
 #include <asm/elf.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
+#include <asm/maccess.h>
 
 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -53,8 +54,6 @@ struct save_area {
 };
 
 static LIST_HEAD(dump_save_areas);
-static DEFINE_MUTEX(memcpy_real_mutex);
-static char memcpy_real_buf[PAGE_SIZE];
 
 /*
  * Allocate a save area
@@ -116,27 +115,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
 	memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
 }
 
-static size_t copy_to_iter_real(struct iov_iter *iter, unsigned long src, size_t count)
-{
-	size_t len, copied, res = 0;
-
-	mutex_lock(&memcpy_real_mutex);
-	while (count) {
-		len = min(PAGE_SIZE, count);
-		if (memcpy_real(memcpy_real_buf, src, len))
-			break;
-		copied = copy_to_iter(memcpy_real_buf, len, iter);
-		count -= copied;
-		src += copied;
-		res += copied;
-		if (copied < len)
-			break;
-	}
-	mutex_unlock(&memcpy_real_mutex);
-	return res;
-}
-
-size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
 {
 	size_t len, copied, res = 0;
 
@@ -156,7 +135,7 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
 			} else {
 				len = count;
 			}
-			copied = copy_to_iter_real(iter, src, len);
+			copied = memcpy_real_iter(iter, src, len);
 		}
 		count -= copied;
 		src += copied;
@@ -167,6 +146,19 @@ size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
 	return res;
 }
 
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dst;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+	if (copy_oldmem_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
+}
+
 /*
  * Copy one page from "oldmem"
  */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4331c7e6e1c0..d7a82066a638 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -250,7 +250,7 @@ static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
 	rc->level	   = level;
 	rc->buf_size	   = buf_size;
 	rc->entry_size	   = sizeof(debug_entry_t) + buf_size;
-	strlcpy(rc->name, name, sizeof(rc->name));
+	strscpy(rc->name, name, sizeof(rc->name));
 	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
 	memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
 	refcount_set(&(rc->ref_count), 0);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 432c8c987256..6030fdd6997b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -267,7 +267,7 @@ char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
 static void __init setup_boot_command_line(void)
 {
 	/* copy arch command line */
-	strlcpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+	strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
 }
 
 static void __init check_image_bootable(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 1cc85b8ff42e..325cbf69ebbd 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -29,6 +29,7 @@
 #include <asm/sclp.h>
 #include <asm/checksum.h>
 #include <asm/debug.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
@@ -1642,12 +1643,16 @@ static struct shutdown_action __refdata dump_action = {
 static void dump_reipl_run(struct shutdown_trigger *trigger)
 {
 	unsigned long ipib = (unsigned long) reipl_block_actual;
+	struct lowcore *abs_lc;
+	unsigned long flags;
 	unsigned int csum;
 
 	csum = (__force unsigned int)
 	       csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
-	put_abs_lowcore(ipib, ipib);
-	put_abs_lowcore(ipib_checksum, csum);
+	abs_lc = get_abs_lowcore(&flags);
+	abs_lc->ipib = ipib;
+	abs_lc->ipib_checksum = csum;
+	put_abs_lowcore(abs_lc, flags);
 	dump_run(trigger);
 }
 
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index ab761c008f98..4579b42286d5 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -21,6 +21,7 @@
 #include <asm/elf.h>
 #include <asm/asm-offsets.h>
 #include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
 #include <asm/set_memory.h>
 #include <asm/stacktrace.h>
@@ -222,13 +223,18 @@ void machine_kexec_cleanup(struct kimage *image)
 
 void arch_crash_save_vmcoreinfo(void)
 {
+	struct lowcore *abs_lc;
+	unsigned long flags;
+
 	VMCOREINFO_SYMBOL(lowcore_ptr);
 	VMCOREINFO_SYMBOL(high_memory);
 	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
 	vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
 	vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
 	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-	put_abs_lowcore(vmcore_info, paddr_vmcoreinfo_note());
+	abs_lc = get_abs_lowcore(&flags);
+	abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+	put_abs_lowcore(abs_lc, flags);
 }
 
 void machine_shutdown(void)
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
index 1acc2e05d70f..ec0bd9457e90 100644
--- a/arch/s390/kernel/os_info.c
+++ b/arch/s390/kernel/os_info.c
@@ -13,8 +13,9 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <asm/checksum.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/os_info.h>
+#include <asm/maccess.h>
 #include <asm/asm-offsets.h>
 
 /*
@@ -57,13 +58,16 @@ void os_info_entry_add(int nr, void *ptr, u64 size)
  */
 void __init os_info_init(void)
 {
-	void *ptr = &os_info;
+	struct lowcore *abs_lc;
+	unsigned long flags;
 
 	os_info.version_major = OS_INFO_VERSION_MAJOR;
 	os_info.version_minor = OS_INFO_VERSION_MINOR;
 	os_info.magic = OS_INFO_MAGIC;
 	os_info.csum = os_info_csum(&os_info);
-	put_abs_lowcore(os_info, __pa(ptr));
+	abs_lc = get_abs_lowcore(&flags);
+	abs_lc->os_info = __pa(&os_info);
+	put_abs_lowcore(abs_lc, flags);
 }
 
 #ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index f7dd3c849e68..f043a7ff220b 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
 		raw.frag.data = cpuhw->stop;
 		raw.size = raw.frag.size;
 		data.raw = &raw;
+		data.sample_flags |= PERF_SAMPLE_RAW;
 	}
 
 	overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
index b38b4ae01589..6826e2a69a21 100644
--- a/arch/s390/kernel/perf_pai_crypto.c
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
 		raw.frag.data = cpump->save;
 		raw.size = raw.frag.size;
 		data.raw = &raw;
+		data.sample_flags |= PERF_SAMPLE_RAW;
 	}
 
 	overflow = perf_event_overflow(event, &data, &regs);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 000000000000..d5c7c1e30c17
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,671 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_ext"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+
+#include <asm/cpu_mcf.h>
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define	PAIE1_CB_SZ		0x200	/* Size of PAIE1 control block */
+#define	PAIE1_CTRBLOCK_SZ	0x400	/* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt;	/* Extracted with QPACI instruction */
+
+enum paiext_mode {
+	PAI_MODE_NONE,
+	PAI_MODE_SAMPLING,
+	PAI_MODE_COUNTER,
+};
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb {		/* PAI extension 1 control block */
+	u64 header;		/* Not used */
+	u64 reserved1;
+	u64 acc;		/* Addr to analytics counter control block */
+	u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+	unsigned long *area;		/* Area for CPU to store counters */
+	struct pai_userdata *save;	/* Area to store non-zero counters */
+	enum paiext_mode mode;		/* Type of event */
+	unsigned int active_events;	/* # of PAI Extension users */
+	unsigned int refcnt;
+	struct perf_event *event;	/* Perf event for sampling */
+	struct paiext_cb *paiext_cb;	/* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+	struct paiext_map *mapptr;
+};
+
+static struct paiext_root {		/* Anchor to per CPU data */
+	int refcnt;			/* Overall active events */
+	struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+	if (!--paiext_root.refcnt) {
+		free_percpu(paiext_root.mapptr);
+		paiext_root.mapptr = NULL;
+	}
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+	if (++paiext_root.refcnt == 1) {
+		/* The memory is already zeroed. */
+		paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+		if (!paiext_root.mapptr) {
+			/* Returing without refcnt adjustment is ok. The
+			 * error code is handled by paiext_alloc() which
+			 * decrements refcnt when an event can not be
+			 * created.
+			 */
+			return -ENOMEM;
+		}
+	}
+	return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+	kfree(mp->mapptr->area);
+	kfree(mp->mapptr->paiext_cb);
+	kvfree(mp->mapptr->save);
+	kfree(mp->mapptr);
+	mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	struct paiext_map *cpump = mp->mapptr;
+
+	mutex_lock(&paiext_reserve_mutex);
+	cpump->event = NULL;
+	if (!--cpump->refcnt)		/* Last reference gone */
+		paiext_free(mp);
+	paiext_root_free();
+	mutex_unlock(&paiext_reserve_mutex);
+	debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+			    event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+	struct paiext_mapptr *mp;
+	struct paiext_map *cpump;
+	int rc;
+
+	mutex_lock(&paiext_reserve_mutex);
+
+	rc = paiext_root_alloc();
+	if (rc)
+		goto unlock;
+
+	mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	cpump = mp->mapptr;
+	if (!cpump) {			/* Paiext_map allocated? */
+		rc = -ENOMEM;
+		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+		if (!cpump)
+			goto unlock;
+
+		/* Allocate memory for counter area and counter extraction.
+		 * These are
+		 * - a 512 byte block and requires 512 byte boundary alignment.
+		 * - a 1KB byte block and requires 1KB boundary alignment.
+		 * Only the first counting event has to allocate the area.
+		 *
+		 * Note: This works with commit 59bb47985c1d by default.
+		 * Backporting this to kernels without this commit might
+		 * need adjustment.
+		 */
+		mp->mapptr = cpump;
+		cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+		cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+		cpump->save = kvmalloc_array(paiext_cnt + 1,
+					     sizeof(struct pai_userdata),
+					     GFP_KERNEL);
+		if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+			paiext_free(mp);
+			goto unlock;
+		}
+		cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+					       : PAI_MODE_COUNTER;
+	} else {
+		/* Multiple invocation, check whats active.
+		 * Supported are multiple counter events or only one sampling
+		 * event concurrently at any one time.
+		 */
+		if (cpump->mode == PAI_MODE_SAMPLING ||
+		    (cpump->mode == PAI_MODE_COUNTER && a->sample_period)) {
+			rc = -EBUSY;
+			goto unlock;
+		}
+	}
+
+	rc = 0;
+	cpump->event = event;
+	++cpump->refcnt;
+
+unlock:
+	if (rc) {
+		/* Error in allocation of event, decrement anchor. Since
+		 * the event in not created, its destroy() function is never
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		paiext_root_free();
+	}
+	mutex_unlock(&paiext_reserve_mutex);
+	/* If rc is non-zero, no increment of counter/sampler was done. */
+	return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+		/* Offset NNPA in paiext_cb */
+		event->hw.config_base = offsetof(struct paiext_cb, acc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	int rc;
+
+	/* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI extension event must be valid and in supported range */
+	rc = paiext_event_valid(event);
+	if (rc)
+		return rc;
+	/* Allow only CPU wide operation, no process context for now. */
+	if (event->hw.target || event->cpu == -1)
+		return -ENOENT;
+	/* Allow only event NNPA_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_NNPA_BASE)
+		return -EINVAL;
+	/* Prohibit exclude_user event selection */
+	if (a->exclude_user)
+		return -EINVAL;
+
+	rc = paiext_alloc(a, event);
+	if (rc)
+		return rc;
+	event->hw.last_tag = 0;
+	event->destroy = paiext_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which are the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+	return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_NNPA_BASE)
+		return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+	for (i = 1; i <= paiext_cnt; i++)
+		sum += paiext_getctr(cpump, i);
+
+	return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+	return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paiext_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = new - prev;
+	local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+	u64 sum;
+
+	if (event->hw.last_tag)
+		return;
+	event->hw.last_tag = 1;
+	sum = paiext_getall(event);		/* Get current value */
+	local64_set(&event->hw.prev_count, sum);
+	local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (++cpump->active_events == 1) {
+		S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+		pcb->acc = virt_to_phys(cpump->area) | 0x1;
+		/* Enable CPU instruction lookup for PAIE1 control block */
+		__ctl_set_bit(0, 49);
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+	if (flags & PERF_EF_START && !event->attr.sample_period) {
+		/* Only counting needs initial counter value */
+		paiext_start(event, PERF_EF_RELOAD);
+	}
+	event->hw.state = 0;
+	if (event->attr.sample_period) {
+		cpump->event = event;
+		perf_sched_cb_inc(event->pmu);
+	}
+	return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+	paiext_read(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (event->attr.sample_period)
+		perf_sched_cb_dec(event->pmu);
+	if (!event->attr.sample_period) {
+		/* Only counting needs to read counter */
+		paiext_stop(event, PERF_EF_UPDATE);
+	}
+	if (--cpump->active_events == 0) {
+		/* Disable CPU instruction lookup for PAIE1 control block */
+		__ctl_clear_bit(0, 49);
+		pcb->acc = 0;
+		S390_lowcore.aicd = 0;
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+	struct pai_userdata *userdata = cpump->save;
+	int i, outidx = 0;
+
+	for (i = 1; i <= paiext_cnt; i++) {
+		u64 val = paiext_getctr(cpump, i);
+
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct perf_event *event = cpump->event;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	size_t rawsize;
+	int overflow;
+
+	rawsize = paiext_copy(cpump);
+	if (!rawsize)			/* No incremented counters */
+		return 0;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = smp_processor_id();
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		raw.size = raw.frag.size;
+		data.raw = &raw;
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Clear lowcore area after read */
+	memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+	return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, clear values.
+	 */
+	if (!sched_in)
+		paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+	.name = "events",
+	.attrs = NULL,			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+	.name = "format",
+	.attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+	&paiext_events_group,
+	&paiext_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+	.task_ctx_nr  = perf_invalid_context,
+	.event_init   = paiext_event_init,
+	.add	      = paiext_add,
+	.del	      = paiext_del,
+	.start	      = paiext_start,
+	.stop	      = paiext_stop,
+	.read	      = paiext_read,
+	.sched_task   = paiext_sched_task,
+	.attr_groups  = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+	[0] = "NNPA_ALL",
+	[1] = "NNPA_ADD",
+	[2] = "NNPA_SUB",
+	[3] = "NNPA_MUL",
+	[4] = "NNPA_DIV",
+	[5] = "NNPA_MIN",
+	[6] = "NNPA_MAX",
+	[7] = "NNPA_LOG",
+	[8] = "NNPA_EXP",
+	[9] = "NNPA_IBM_RESERVED_9",
+	[10] = "NNPA_RELU",
+	[11] = "NNPA_TANH",
+	[12] = "NNPA_SIGMOID",
+	[13] = "NNPA_SOFTMAX",
+	[14] = "NNPA_BATCHNORM",
+	[15] = "NNPA_MAXPOOL2D",
+	[16] = "NNPA_AVGPOOL2D",
+	[17] = "NNPA_LSTMACT",
+	[18] = "NNPA_GRUACT",
+	[19] = "NNPA_CONVOLUTION",
+	[20] = "NNPA_MATMUL_OP",
+	[21] = "NNPA_MATMUL_OP_BCAST23",
+	[22] = "NNPA_SMALLBATCH",
+	[23] = "NNPA_LARGEDIM",
+	[24] = "NNPA_SMALLTENSOR",
+	[25] = "NNPA_1MFRAME",
+	[26] = "NNPA_2GFRAME",
+	[27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	struct device_attribute *dap;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_NNPA_BASE + num;
+	pa->attr.attr.name = paiext_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+			      GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i - 1);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paiext_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paiext_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc = -ENOMEM;
+
+	if (!test_facility(197))
+		return 0;
+
+	qpaci(&ib);
+	paiext_cnt = ib.num_nnpa;
+	if (paiext_cnt >= PAI_NNPA_MAXCTR)
+		paiext_cnt = PAI_NNPA_MAXCTR;
+	if (!paiext_cnt)
+		return 0;
+
+	rc = attr_event_init();
+	if (rc) {
+		pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!paiext_dbg) {
+		pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+		rc = -ENOMEM;
+		goto out_init;
+	}
+	debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+	if (rc) {
+		pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+		       "rc=%i\n", rc);
+		goto out_pmu;
+	}
+
+	return 0;
+
+out_pmu:
+	debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+	debug_unregister(paiext_dbg);
+out_init:
+	attr_event_free(paiext_events_group.attrs,
+			ARRAY_SIZE(paiext_ctrnames) + 1);
+	return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index bbd4bde4f65d..ab19ddb09d65 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -58,7 +58,7 @@
 #include <asm/smp.h>
 #include <asm/mmu_context.h>
 #include <asm/cpcmd.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/nmi.h>
 #include <asm/irq.h>
 #include <asm/page.h>
@@ -74,6 +74,7 @@
 #include <asm/alternative.h>
 #include <asm/nospec-branch.h>
 #include <asm/mem_detect.h>
+#include <asm/maccess.h>
 #include <asm/uv.h>
 #include <asm/asm-offsets.h>
 #include "entry.h"
@@ -395,6 +396,7 @@ void __init arch_call_rest_init(void)
 {
 	unsigned long stack;
 
+	smp_reinit_ipl_cpu();
 	stack = stack_alloc();
 	if (!stack)
 		panic("Couldn't allocate kernel stack");
@@ -411,8 +413,9 @@ void __init arch_call_rest_init(void)
 static void __init setup_lowcore_dat_off(void)
 {
 	unsigned long int_psw_mask = PSW_KERNEL_BITS;
+	struct lowcore *abs_lc, *lc;
 	unsigned long mcck_stack;
-	struct lowcore *lc;
+	unsigned long flags;
 
 	if (IS_ENABLED(CONFIG_KASAN))
 		int_psw_mask |= PSW_MASK_DAT;
@@ -474,12 +477,14 @@ static void __init setup_lowcore_dat_off(void)
 	lc->restart_data = 0;
 	lc->restart_source = -1U;
 
-	put_abs_lowcore(restart_stack, lc->restart_stack);
-	put_abs_lowcore(restart_fn, lc->restart_fn);
-	put_abs_lowcore(restart_data, lc->restart_data);
-	put_abs_lowcore(restart_source, lc->restart_source);
-	put_abs_lowcore(restart_psw, lc->restart_psw);
-	put_abs_lowcore(mcesad, lc->mcesad);
+	abs_lc = get_abs_lowcore(&flags);
+	abs_lc->restart_stack = lc->restart_stack;
+	abs_lc->restart_fn = lc->restart_fn;
+	abs_lc->restart_data = lc->restart_data;
+	abs_lc->restart_source = lc->restart_source;
+	abs_lc->restart_psw = lc->restart_psw;
+	abs_lc->mcesad = lc->mcesad;
+	put_abs_lowcore(abs_lc, flags);
 
 	mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 	if (!mcck_stack)
@@ -500,8 +505,8 @@ static void __init setup_lowcore_dat_off(void)
 
 static void __init setup_lowcore_dat_on(void)
 {
-	struct lowcore *lc = lowcore_ptr[0];
-	int cr;
+	struct lowcore *abs_lc;
+	unsigned long flags;
 
 	__ctl_clear_bit(0, 28);
 	S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
@@ -510,10 +515,15 @@ static void __init setup_lowcore_dat_on(void)
 	S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
 	__ctl_set_bit(0, 28);
 	__ctl_store(S390_lowcore.cregs_save_area, 0, 15);
-	put_abs_lowcore(restart_flags, RESTART_FLAG_CTLREGS);
-	put_abs_lowcore(program_new_psw, lc->program_new_psw);
-	for (cr = 0; cr < ARRAY_SIZE(lc->cregs_save_area); cr++)
-		put_abs_lowcore(cregs_save_area[cr], lc->cregs_save_area[cr]);
+	if (abs_lowcore_map(0, lowcore_ptr[0], true))
+		panic("Couldn't setup absolute lowcore");
+	abs_lowcore_mapped = true;
+	abs_lc = get_abs_lowcore(&flags);
+	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+	abs_lc->program_new_psw = S390_lowcore.program_new_psw;
+	memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
+	       sizeof(abs_lc->cregs_save_area));
+	put_abs_lowcore(abs_lc, flags);
 }
 
 static struct resource code_resource = {
@@ -1019,10 +1029,10 @@ void __init setup_arch(char **cmdline_p)
 	reserve_crashkernel();
 #ifdef CONFIG_CRASH_DUMP
 	/*
-	 * Be aware that smp_save_dump_cpus() triggers a system reset.
+	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
 	 * Therefore CPU and device initialization should be done afterwards.
 	 */
-	smp_save_dump_cpus();
+	smp_save_dump_secondary_cpus();
 #endif
 
 	setup_resources();
@@ -1041,12 +1051,15 @@ void __init setup_arch(char **cmdline_p)
 	 * Create kernel page tables and switch to virtual addressing.
 	 */
         paging_init();
-
+	memcpy_real_init();
 	/*
 	 * After paging_init created the kernel page table, the new PSWs
 	 * in lowcore can now run with DAT enabled.
 	 */
 	setup_lowcore_dat_on();
+#ifdef CONFIG_CRASH_DUMP
+	smp_save_dump_ipl_cpu();
+#endif
 
         /* Setup default console */
 	conmode_default();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 30c91d565933..0031325ce4bc 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -45,7 +45,7 @@
 #include <asm/irq.h>
 #include <asm/tlbflush.h>
 #include <asm/vtimer.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/sclp.h>
 #include <asm/debug.h>
 #include <asm/os_info.h>
@@ -55,6 +55,7 @@
 #include <asm/stacktrace.h>
 #include <asm/topology.h>
 #include <asm/vdso.h>
+#include <asm/maccess.h>
 #include "entry.h"
 
 enum {
@@ -212,10 +213,14 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
 	lc->preempt_count = PREEMPT_DISABLED;
 	if (nmi_alloc_mcesa(&lc->mcesad))
 		goto out;
+	if (abs_lowcore_map(cpu, lc, true))
+		goto out_mcesa;
 	lowcore_ptr[cpu] = lc;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
 	return 0;
 
+out_mcesa:
+	nmi_free_mcesa(&lc->mcesad);
 out:
 	stack_free(mcck_stack);
 	stack_free(async_stack);
@@ -237,6 +242,7 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
 	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
 	lowcore_ptr[cpu] = NULL;
+	abs_lowcore_unmap(cpu);
 	nmi_free_mcesa(&lc->mcesad);
 	stack_free(async_stack);
 	stack_free(mcck_stack);
@@ -315,9 +321,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
 			  pcpu_delegate_fn *func,
 			  void *data, unsigned long stack)
 {
-	struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
-	unsigned int source_cpu = stap();
+	struct lowcore *lc, *abs_lc;
+	unsigned int source_cpu;
+	unsigned long flags;
 
+	lc = lowcore_ptr[pcpu - pcpu_devices];
+	source_cpu = stap();
 	__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
 	if (pcpu->address == source_cpu) {
 		call_on_stack(2, stack, void, __pcpu_delegate,
@@ -332,10 +341,12 @@ static void pcpu_delegate(struct pcpu *pcpu,
 		lc->restart_data = (unsigned long)data;
 		lc->restart_source = source_cpu;
 	} else {
-		put_abs_lowcore(restart_stack, stack);
-		put_abs_lowcore(restart_fn, (unsigned long)func);
-		put_abs_lowcore(restart_data, (unsigned long)data);
-		put_abs_lowcore(restart_source, source_cpu);
+		abs_lc = get_abs_lowcore(&flags);
+		abs_lc->restart_stack = stack;
+		abs_lc->restart_fn = (unsigned long)func;
+		abs_lc->restart_data = (unsigned long)data;
+		abs_lc->restart_source = source_cpu;
+		put_abs_lowcore(abs_lc, flags);
 	}
 	__bpon();
 	asm volatile(
@@ -581,6 +592,8 @@ static DEFINE_SPINLOCK(ctl_lock);
 void smp_ctl_set_clear_bit(int cr, int bit, bool set)
 {
 	struct ec_creg_mask_parms parms = { .cr = cr, };
+	struct lowcore *abs_lc;
+	unsigned long flags;
 	u64 ctlreg;
 
 	if (set) {
@@ -591,9 +604,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
 		parms.andval = ~(1UL << bit);
 	}
 	spin_lock(&ctl_lock);
-	get_abs_lowcore(ctlreg, cregs_save_area[cr]);
+	abs_lc = get_abs_lowcore(&flags);
+	ctlreg = abs_lc->cregs_save_area[cr];
 	ctlreg = (ctlreg & parms.andval) | parms.orval;
-	put_abs_lowcore(cregs_save_area[cr], ctlreg);
+	abs_lc->cregs_save_area[cr] = ctlreg;
+	put_abs_lowcore(abs_lc, flags);
 	spin_unlock(&ctl_lock);
 	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
@@ -650,35 +665,36 @@ int smp_store_status(int cpu)
  *    This case does not exist for s390 anymore, setup_arch explicitly
  *    deactivates the elfcorehdr= kernel parameter
  */
-static __init void smp_save_cpu_vxrs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, __vector128 *vxrs)
+static bool dump_available(void)
 {
-	if (is_boot_cpu)
-		vxrs = boot_cpu_vector_save_area;
-	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(vxrs));
-	save_area_add_vxrs(sa, vxrs);
+	return oldmem_data.start || is_ipl_type_dump();
 }
 
-static __init void smp_save_cpu_regs(struct save_area *sa, u16 addr,
-				     bool is_boot_cpu, void *regs)
+void __init smp_save_dump_ipl_cpu(void)
 {
-	if (is_boot_cpu)
-		copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
-	else
-		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(regs));
+	struct save_area *sa;
+	void *regs;
+
+	if (!dump_available())
+		return;
+	sa = save_area_alloc(true);
+	regs = memblock_alloc(512, 8);
+	if (!sa || !regs)
+		panic("could not allocate memory for boot CPU save area\n");
+	copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
 	save_area_add_regs(sa, regs);
+	memblock_free(regs, 512);
+	if (MACHINE_HAS_VX)
+		save_area_add_vxrs(sa, boot_cpu_vector_save_area);
 }
 
-void __init smp_save_dump_cpus(void)
+void __init smp_save_dump_secondary_cpus(void)
 {
 	int addr, boot_cpu_addr, max_cpu_addr;
 	struct save_area *sa;
-	bool is_boot_cpu;
 	void *page;
 
-	if (!(oldmem_data.start || is_ipl_type_dump()))
-		/* No previous system present, normal boot. */
+	if (!dump_available())
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
 	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
@@ -691,26 +707,20 @@ void __init smp_save_dump_cpus(void)
 	boot_cpu_addr = stap();
 	max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
 	for (addr = 0; addr <= max_cpu_addr; addr++) {
+		if (addr == boot_cpu_addr)
+			continue;
 		if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
 		    SIGP_CC_NOT_OPERATIONAL)
 			continue;
-		is_boot_cpu = (addr == boot_cpu_addr);
-		/* Allocate save area */
-		sa = save_area_alloc(is_boot_cpu);
+		sa = save_area_alloc(false);
 		if (!sa)
 			panic("could not allocate memory for save area\n");
-		if (MACHINE_HAS_VX)
-			/* Get the vector registers */
-			smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
-		/*
-		 * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
-		 * of the boot CPU are stored in the HSA. To retrieve
-		 * these registers an SCLP request is required which is
-		 * done by drivers/s390/char/zcore.c:init_cpu_info()
-		 */
-		if (!is_boot_cpu || oldmem_data.start)
-			/* Get the CPU registers */
-			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
+		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+		save_area_add_regs(sa, page);
+		if (MACHINE_HAS_VX) {
+			__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+			save_area_add_vxrs(sa, page);
+		}
 	}
 	memblock_free(page, PAGE_SIZE);
 	diag_amode31_ops.diag308_reset();
@@ -1256,7 +1266,7 @@ static __always_inline void set_new_lowcore(struct lowcore *lc)
 		: "memory", "cc");
 }
 
-static int __init smp_reinit_ipl_cpu(void)
+int __init smp_reinit_ipl_cpu(void)
 {
 	unsigned long async_stack, nodat_stack, mcck_stack;
 	struct lowcore *lc, *lc_ipl;
@@ -1281,6 +1291,8 @@ static int __init smp_reinit_ipl_cpu(void)
 	__ctl_clear_bit(0, 28); /* disable lowcore protection */
 	S390_lowcore.mcesad = mcesad;
 	__ctl_load(cr0, 0, 0);
+	if (abs_lowcore_map(0, lc, false))
+		panic("Couldn't remap absolute lowcore");
 	lowcore_ptr[0] = lc;
 	local_mcck_enable();
 	local_irq_restore(flags);
@@ -1291,4 +1303,3 @@ static int __init smp_reinit_ipl_cpu(void)
 
 	return 0;
 }
-early_initcall(smp_reinit_ipl_cpu);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 5075cde77b29..535099f2736d 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -69,10 +69,11 @@ static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
 int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 {
 	struct mm_struct *mm = task->mm;
+	VMA_ITERATOR(vmi, mm, 0);
 	struct vm_area_struct *vma;
 
 	mmap_read_lock(mm);
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		unsigned long size = vma->vm_end - vma->vm_start;
 
 		if (!vma_is_special_mapping(vma, &vvar_mapping))
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index f7f5adea8940..be14c58cb989 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -13,13 +13,10 @@
 
 void __delay(unsigned long loops)
 {
-        /*
-         * To end the bloody studid and useless discussion about the
-         * BogoMips number I took the liberty to define the __delay
-         * function in a way that that resulting BogoMips number will
-         * yield the megahertz number of the cpu. The important function
-         * is udelay and that is done using the tod clock. -- martin.
-         */
+	/*
+	 * Loop 'loops' times. Callers must not assume a specific
+	 * amount of time passes before this function returns.
+	 */
 	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 EXPORT_SYMBOL(__delay);
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index d7b3b193d108..58033dfcb6d4 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -81,8 +81,9 @@ unsigned long _copy_from_user_key(void *to, const void __user *from,
 
 	might_fault();
 	if (!should_fail_usercopy()) {
-		instrument_copy_from_user(to, from, n);
+		instrument_copy_from_user_before(to, from, n);
 		res = raw_copy_from_user_key(to, from, n, key);
+		instrument_copy_from_user_after(to, from, n, res);
 	}
 	if (unlikely(res))
 		memset(to + (n - res), 0, res);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 9f9af5298dd6..9953819d7959 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -8,8 +8,10 @@
 #include <linux/kasan.h>
 #include <asm/ptdump.h>
 #include <asm/kasan.h>
+#include <asm/abs_lowcore.h>
 #include <asm/nospec-branch.h>
 #include <asm/sections.h>
+#include <asm/maccess.h>
 
 static unsigned long max_addr;
 
@@ -21,6 +23,8 @@ struct addr_marker {
 enum address_markers_idx {
 	IDENTITY_BEFORE_NR = 0,
 	IDENTITY_BEFORE_END_NR,
+	AMODE31_START_NR,
+	AMODE31_END_NR,
 	KERNEL_START_NR,
 	KERNEL_END_NR,
 #ifdef CONFIG_KFENCE
@@ -39,11 +43,17 @@ enum address_markers_idx {
 	VMALLOC_END_NR,
 	MODULES_NR,
 	MODULES_END_NR,
+	ABS_LOWCORE_NR,
+	ABS_LOWCORE_END_NR,
+	MEMCPY_REAL_NR,
+	MEMCPY_REAL_END_NR,
 };
 
 static struct addr_marker address_markers[] = {
 	[IDENTITY_BEFORE_NR]	= {0, "Identity Mapping Start"},
 	[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
+	[AMODE31_START_NR]	= {0, "Amode31 Area Start"},
+	[AMODE31_END_NR]	= {0, "Amode31 Area End"},
 	[KERNEL_START_NR]	= {(unsigned long)_stext, "Kernel Image Start"},
 	[KERNEL_END_NR]		= {(unsigned long)_end, "Kernel Image End"},
 #ifdef CONFIG_KFENCE
@@ -62,6 +72,10 @@ static struct addr_marker address_markers[] = {
 	[VMALLOC_END_NR]	= {0, "vmalloc Area End"},
 	[MODULES_NR]		= {0, "Modules Area Start"},
 	[MODULES_END_NR]	= {0, "Modules Area End"},
+	[ABS_LOWCORE_NR]	= {0, "Lowcore Area Start"},
+	[ABS_LOWCORE_END_NR]	= {0, "Lowcore Area End"},
+	[MEMCPY_REAL_NR]	= {0, "Real Memory Copy Area Start"},
+	[MEMCPY_REAL_END_NR]	= {0, "Real Memory Copy Area End"},
 	{ -1, NULL }
 };
 
@@ -276,8 +290,14 @@ static int pt_dump_init(void)
 	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
 	max_addr = 1UL << (max_addr * 11 + 31);
 	address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
+	address_markers[AMODE31_START_NR].start_address = __samode31;
+	address_markers[AMODE31_END_NR].start_address = __eamode31;
 	address_markers[MODULES_NR].start_address = MODULES_VADDR;
 	address_markers[MODULES_END_NR].start_address = MODULES_END;
+	address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
+	address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
+	address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
+	address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
 	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
 	address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
 	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 9ab6ca6f7f59..9649d9382e0a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -268,8 +268,7 @@ static noinline void do_sigbus(struct pt_regs *regs)
 			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
 }
 
-static noinline void do_fault_error(struct pt_regs *regs, int access,
-					vm_fault_t fault)
+static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
 {
 	int si_code;
 
@@ -516,7 +515,7 @@ void do_protection_exception(struct pt_regs *regs)
 		fault = do_exception(regs, access);
 	}
 	if (unlikely(fault))
-		do_fault_error(regs, access, fault);
+		do_fault_error(regs, fault);
 }
 NOKPROBE_SYMBOL(do_protection_exception);
 
@@ -528,7 +527,7 @@ void do_dat_exception(struct pt_regs *regs)
 	access = VM_ACCESS_FLAGS;
 	fault = do_exception(regs, access);
 	if (unlikely(fault))
-		do_fault_error(regs, access, fault);
+		do_fault_error(regs, fault);
 }
 NOKPROBE_SYMBOL(do_dat_exception);
 
@@ -803,7 +802,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 		addr = __gmap_translate(gmap, addr);
 		mmap_read_unlock(mm);
 		if (IS_ERR_VALUE(addr)) {
-			do_fault_error(regs, VM_ACCESS_FLAGS, VM_FAULT_BADMAP);
+			do_fault_error(regs, VM_FAULT_BADMAP);
 			break;
 		}
 		fallthrough;
@@ -813,7 +812,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 		vma = find_vma(mm, addr);
 		if (!vma) {
 			mmap_read_unlock(mm);
-			do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+			do_fault_error(regs, VM_FAULT_BADMAP);
 			break;
 		}
 		page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
@@ -836,7 +835,7 @@ void do_secure_storage_access(struct pt_regs *regs)
 			BUG();
 		break;
 	default:
-		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+		do_fault_error(regs, VM_FAULT_BADMAP);
 		WARN_ON_ONCE(1);
 	}
 }
@@ -848,7 +847,7 @@ void do_non_secure_storage_access(struct pt_regs *regs)
 	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
 
 	if (get_fault_type(regs) != GMAP_FAULT) {
-		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+		do_fault_error(regs, VM_FAULT_BADMAP);
 		WARN_ON_ONCE(1);
 		return;
 	}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 62758cb5872f..02d15c8dc92e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2515,8 +2515,9 @@ static const struct mm_walk_ops thp_split_walk_ops = {
 static inline void thp_split_mm(struct mm_struct *mm)
 {
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		vma->vm_flags &= ~VM_HUGEPAGE;
 		vma->vm_flags |= VM_NOHUGEPAGE;
 		walk_page_vma(vma, &thp_split_walk_ops, NULL);
@@ -2584,8 +2585,9 @@ int gmap_mark_unmergeable(void)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	int ret;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
 				  MADV_UNMERGEABLE, &vma->vm_flags);
 		if (ret)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 10e51ef9c79a..c299a18273ff 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -237,16 +237,6 @@ int pud_huge(pud_t pud)
 	return pud_large(pud);
 }
 
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-		pud_t *pud, int flags)
-{
-	if (flags & FOLL_GET)
-		return NULL;
-
-	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-}
-
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
 	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 4a154a084966..97d66a3e60fb 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -37,7 +37,7 @@
 #include <asm/kfence.h>
 #include <asm/ptdump.h>
 #include <asm/dma.h>
-#include <asm/lowcore.h>
+#include <asm/abs_lowcore.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index d6d84e02f35a..1571cdcb0c50 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -12,10 +12,17 @@
 #include <linux/errno.h>
 #include <linux/gfp.h>
 #include <linux/cpu.h>
+#include <linux/uio.h>
 #include <asm/asm-extable.h>
 #include <asm/ctl_reg.h>
 #include <asm/io.h>
+#include <asm/abs_lowcore.h>
 #include <asm/stacktrace.h>
+#include <asm/maccess.h>
+
+unsigned long __bootdata_preserved(__memcpy_real_area);
+static __ro_after_init pte_t *memcpy_real_ptep;
+static DEFINE_MUTEX(memcpy_real_mutex);
 
 static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
 {
@@ -76,118 +83,72 @@ notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
 	return dst;
 }
 
-static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
+void __init memcpy_real_init(void)
 {
-	union register_pair _dst, _src;
-	int rc = -EFAULT;
-
-	_dst.even = (unsigned long) dest;
-	_dst.odd  = (unsigned long) count;
-	_src.even = (unsigned long) src;
-	_src.odd  = (unsigned long) count;
-	asm volatile (
-		"0:	mvcle	%[dst],%[src],0\n"
-		"1:	jo	0b\n"
-		"	lhi	%[rc],0\n"
-		"2:\n"
-		EX_TABLE(1b,2b)
-		: [rc] "+&d" (rc), [dst] "+&d" (_dst.pair), [src] "+&d" (_src.pair)
-		: : "cc", "memory");
-	return rc;
-}
-
-static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
-							unsigned long src,
-							unsigned long count)
-{
-	int irqs_disabled, rc;
-	unsigned long flags;
-
-	if (!count)
-		return 0;
-	flags = arch_local_irq_save();
-	irqs_disabled = arch_irqs_disabled_flags(flags);
-	if (!irqs_disabled)
-		trace_hardirqs_off();
-	__arch_local_irq_stnsm(0xf8); // disable DAT
-	rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
-	if (flags & PSW_MASK_DAT)
-		__arch_local_irq_stosm(0x04); // enable DAT
-	if (!irqs_disabled)
-		trace_hardirqs_on();
-	__arch_local_irq_ssm(flags);
-	return rc;
+	memcpy_real_ptep = vmem_get_alloc_pte(__memcpy_real_area, true);
+	if (!memcpy_real_ptep)
+		panic("Couldn't setup memcpy real area");
 }
 
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, unsigned long src, size_t count)
+size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
 {
-	unsigned long _dest  = (unsigned long)dest;
-	unsigned long _src   = (unsigned long)src;
-	unsigned long _count = (unsigned long)count;
-	int rc;
-
-	if (S390_lowcore.nodat_stack != 0) {
-		preempt_disable();
-		rc = call_on_stack(3, S390_lowcore.nodat_stack,
-				   unsigned long, _memcpy_real,
-				   unsigned long, _dest,
-				   unsigned long, _src,
-				   unsigned long, _count);
-		preempt_enable();
-		return rc;
+	size_t len, copied, res = 0;
+	unsigned long phys, offset;
+	void *chunk;
+	pte_t pte;
+
+	while (count) {
+		phys = src & PAGE_MASK;
+		offset = src & ~PAGE_MASK;
+		chunk = (void *)(__memcpy_real_area + offset);
+		len = min(count, PAGE_SIZE - offset);
+		pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
+
+		mutex_lock(&memcpy_real_mutex);
+		if (pte_val(pte) != pte_val(*memcpy_real_ptep)) {
+			__ptep_ipte(__memcpy_real_area, memcpy_real_ptep, 0, 0, IPTE_GLOBAL);
+			set_pte(memcpy_real_ptep, pte);
+		}
+		copied = copy_to_iter(chunk, len, iter);
+		mutex_unlock(&memcpy_real_mutex);
+
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
 	}
-	/*
-	 * This is a really early memcpy_real call, the stacks are
-	 * not set up yet. Just call _memcpy_real on the early boot
-	 * stack
-	 */
-	return _memcpy_real(_dest, _src, _count);
+	return res;
 }
 
-/*
- * Copy memory in absolute mode (kernel to kernel)
- */
-void memcpy_absolute(void *dest, void *src, size_t count)
+int memcpy_real(void *dest, unsigned long src, size_t count)
 {
-	unsigned long cr0, flags, prefix;
-
-	flags = arch_local_irq_save();
-	__ctl_store(cr0, 0, 0);
-	__ctl_clear_bit(0, 28); /* disable lowcore protection */
-	prefix = store_prefix();
-	if (prefix) {
-		local_mcck_disable();
-		set_prefix(0);
-		memcpy(dest, src, count);
-		set_prefix(prefix);
-		local_mcck_enable();
-	} else {
-		memcpy(dest, src, count);
-	}
-	__ctl_load(cr0, 0, 0);
-	arch_local_irq_restore(flags);
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dest;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, WRITE, &kvec, 1, count);
+	if (memcpy_real_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
 }
 
 /*
- * Check if physical address is within prefix or zero page
+ * Find CPU that owns swapped prefix page
  */
-static int is_swapped(phys_addr_t addr)
+static int get_swapped_owner(phys_addr_t addr)
 {
 	phys_addr_t lc;
 	int cpu;
 
-	if (addr < sizeof(struct lowcore))
-		return 1;
 	for_each_online_cpu(cpu) {
 		lc = virt_to_phys(lowcore_ptr[cpu]);
 		if (addr > lc + sizeof(struct lowcore) - 1 || addr < lc)
 			continue;
-		return 1;
+		return cpu;
 	}
-	return 0;
+	return -1;
 }
 
 /*
@@ -200,17 +161,35 @@ void *xlate_dev_mem_ptr(phys_addr_t addr)
 {
 	void *ptr = phys_to_virt(addr);
 	void *bounce = ptr;
+	struct lowcore *abs_lc;
+	unsigned long flags;
 	unsigned long size;
+	int this_cpu, cpu;
 
 	cpus_read_lock();
-	preempt_disable();
-	if (is_swapped(addr)) {
-		size = PAGE_SIZE - (addr & ~PAGE_MASK);
-		bounce = (void *) __get_free_page(GFP_ATOMIC);
-		if (bounce)
-			memcpy_absolute(bounce, ptr, size);
+	this_cpu = get_cpu();
+	if (addr >= sizeof(struct lowcore)) {
+		cpu = get_swapped_owner(addr);
+		if (cpu < 0)
+			goto out;
+	}
+	bounce = (void *)__get_free_page(GFP_ATOMIC);
+	if (!bounce)
+		goto out;
+	size = PAGE_SIZE - (addr & ~PAGE_MASK);
+	if (addr < sizeof(struct lowcore)) {
+		abs_lc = get_abs_lowcore(&flags);
+		ptr = (void *)abs_lc + addr;
+		memcpy(bounce, ptr, size);
+		put_abs_lowcore(abs_lc, flags);
+	} else if (cpu == this_cpu) {
+		ptr = (void *)(addr - virt_to_phys(lowcore_ptr[cpu]));
+		memcpy(bounce, ptr, size);
+	} else {
+		memcpy(bounce, ptr, size);
 	}
-	preempt_enable();
+out:
+	put_cpu();
 	cpus_read_unlock();
 	return bounce;
 }
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index c2583f921ca8..ee1a97078527 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -240,7 +240,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
 		} else if (pmd_none(*pmd)) {
 			if (IS_ALIGNED(addr, PMD_SIZE) &&
 			    IS_ALIGNED(next, PMD_SIZE) &&
-			    MACHINE_HAS_EDAT1 && addr && direct &&
+			    MACHINE_HAS_EDAT1 && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pmd(pmd, __pmd(__pa(addr) | prot));
 				pages++;
@@ -336,7 +336,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
 		} else if (pud_none(*pud)) {
 			if (IS_ALIGNED(addr, PUD_SIZE) &&
 			    IS_ALIGNED(next, PUD_SIZE) &&
-			    MACHINE_HAS_EDAT2 && addr && direct &&
+			    MACHINE_HAS_EDAT2 && direct &&
 			    !debug_pagealloc_enabled()) {
 				set_pud(pud, __pud(__pa(addr) | prot));
 				pages++;
@@ -561,6 +561,103 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
 }
 
 /*
+ * Allocate new or return existing page-table entry, but do not map it
+ * to any physical address. If missing, allocate segment- and region-
+ * table entries along. Meeting a large segment- or region-table entry
+ * while traversing is an error, since the function is expected to be
+ * called against virtual regions reserverd for 4KB mappings only.
+ */
+pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
+{
+	pte_t *ptep = NULL;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd)) {
+		if (!alloc)
+			goto out;
+		p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+		if (!p4d)
+			goto out;
+		pgd_populate(&init_mm, pgd, p4d);
+	}
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d)) {
+		if (!alloc)
+			goto out;
+		pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+		if (!pud)
+			goto out;
+		p4d_populate(&init_mm, p4d, pud);
+	}
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud)) {
+		if (!alloc)
+			goto out;
+		pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+		if (!pmd)
+			goto out;
+		pud_populate(&init_mm, pud, pmd);
+	} else if (WARN_ON_ONCE(pud_large(*pud))) {
+		goto out;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		if (!alloc)
+			goto out;
+		pte = vmem_pte_alloc();
+		if (!pte)
+			goto out;
+		pmd_populate(&init_mm, pmd, pte);
+	} else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+		goto out;
+	}
+	ptep = pte_offset_kernel(pmd, addr);
+out:
+	return ptep;
+}
+
+int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc)
+{
+	pte_t *ptep, pte;
+
+	if (!IS_ALIGNED(addr, PAGE_SIZE))
+		return -EINVAL;
+	ptep = vmem_get_alloc_pte(addr, alloc);
+	if (!ptep)
+		return -ENOMEM;
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte = mk_pte_phys(phys, prot);
+	set_pte(ptep, pte);
+	return 0;
+}
+
+int vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot)
+{
+	int rc;
+
+	mutex_lock(&vmem_mutex);
+	rc = __vmem_map_4k_page(addr, phys, prot, true);
+	mutex_unlock(&vmem_mutex);
+	return rc;
+}
+
+void vmem_unmap_4k_page(unsigned long addr)
+{
+	pte_t *ptep;
+
+	mutex_lock(&vmem_mutex);
+	ptep = virt_to_kpte(addr);
+	__ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL);
+	pte_clear(&init_mm, addr, ptep);
+	mutex_unlock(&vmem_mutex);
+}
+
+/*
  * map whole physical memory to virtual memory (identity mapping)
  * we reserve enough space in the vmalloc area for vmemmap to hotplug
  * additional memory segments.
@@ -584,6 +681,9 @@ void __init vmem_map_init(void)
 	__set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
 		     SET_MEMORY_RO | SET_MEMORY_X);
 
+	/* lowcore requires 4k mapping for real addresses / prefixing */
+	set_memory_4k(0, LC_PAGES);
+
 	/* lowcore must be executable for LPSWE */
 	if (!static_key_enabled(&cpu_has_bear))
 		set_memory_x(0, 1);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f46833a25526..227cf0a62800 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -666,7 +666,7 @@ static int __init dma_alloc_cpu_table_caches(void)
 
 int __init zpci_dma_init(void)
 {
-	s390_iommu_aperture = (u64)high_memory;
+	s390_iommu_aperture = (u64)virt_to_phys(high_memory);
 	if (!s390_iommu_aperture_factor)
 		s390_iommu_aperture = ULONG_MAX;
 	else
diff --git a/arch/sh/Makefile b/arch/sh/Makefile
index b39412bf91fb..5c8776482530 100644
--- a/arch/sh/Makefile
+++ b/arch/sh/Makefile
@@ -114,8 +114,6 @@ endif
 
 export ld-bfd
 
-head-y	:= arch/sh/kernel/head_32.o
-
 # Mach groups
 machdir-$(CONFIG_SOLUTION_ENGINE)		+= mach-se
 machdir-$(CONFIG_SH_HP6XX)			+= mach-hp6xx
diff --git a/arch/sh/configs/apsh4a3a_defconfig b/arch/sh/configs/apsh4a3a_defconfig
index 530498f18990..99931a13a74d 100644
--- a/arch/sh/configs/apsh4a3a_defconfig
+++ b/arch/sh/configs/apsh4a3a_defconfig
@@ -85,7 +85,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 # CONFIG_FTRACE is not set
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index 6abd9bd70106..d9fb124bf015 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -116,7 +116,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_VM=y
 CONFIG_DWARF_UNWINDER=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig
index e699e2e04128..b52e14ccb450 100644
--- a/arch/sh/configs/ecovec24_defconfig
+++ b/arch/sh/configs/ecovec24_defconfig
@@ -8,7 +8,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_CPU_SUBTYPE_SH7724=y
-CONFIG_FORCE_MAX_ZONEORDER=12
+CONFIG_ARCH_FORCE_MAX_ORDER=12
 CONFIG_MEMORY_SIZE=0x10000000
 CONFIG_FLATMEM_MANUAL=y
 CONFIG_SH_ECOVEC=y
diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig
index d77f54e906fd..f427a95bcd21 100644
--- a/arch/sh/configs/edosk7760_defconfig
+++ b/arch/sh/configs/edosk7760_defconfig
@@ -107,7 +107,7 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_CRYPTO=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig
index 0989ed929540..ef1d98e35c91 100644
--- a/arch/sh/configs/magicpanelr2_defconfig
+++ b/arch/sh/configs/magicpanelr2_defconfig
@@ -84,7 +84,7 @@ CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_DEBUG_KOBJECT=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_POINTER=y
 CONFIG_CRC_CCITT=m
 CONFIG_CRC16=m
diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig
index 246408ec7462..f42e4867ddc1 100644
--- a/arch/sh/configs/polaris_defconfig
+++ b/arch/sh/configs/polaris_defconfig
@@ -79,5 +79,5 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_RT_MUTEXES=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_SG=y
diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig
index f823cc6b18f9..e527cd60a191 100644
--- a/arch/sh/configs/r7780mp_defconfig
+++ b/arch/sh/configs/r7780mp_defconfig
@@ -101,7 +101,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_DEBUG_PREEMPT is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig
index f96bc20d4b1a..a3f952a83d97 100644
--- a/arch/sh/configs/r7785rp_defconfig
+++ b/arch/sh/configs/r7785rp_defconfig
@@ -96,7 +96,7 @@ CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_PREEMPT is not set
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_SH_STANDARD_BIOS=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_4KSTACKS=y
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 5a54e2b883f0..d00fafc021e1 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -112,7 +112,7 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 7d6d32359848..41cb588ca99c 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -131,7 +131,7 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_SH_STANDARD_BIOS=y
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_DES=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index a8662b6927ec..97b7356639ed 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -16,7 +16,6 @@ CONFIG_CPUSETS=y
 # CONFIG_PROC_PID_CPUSET is not set
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_BLK_CGROUP=y
diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig
index ee6d28ae08de..36356223d51c 100644
--- a/arch/sh/configs/se7712_defconfig
+++ b/arch/sh/configs/se7712_defconfig
@@ -93,7 +93,7 @@ CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_POINTER=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig
index bad921bc10f8..46c5a263a239 100644
--- a/arch/sh/configs/se7721_defconfig
+++ b/arch/sh/configs/se7721_defconfig
@@ -121,7 +121,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_932=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_POINTER=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRC_CCITT=y
diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig
index 79f02f1c0dc8..259c69e3fa22 100644
--- a/arch/sh/configs/sh2007_defconfig
+++ b/arch/sh/configs/sh2007_defconfig
@@ -159,7 +159,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DETECT_SOFTLOCKUP is not set
 # CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_POINTER=y
 CONFIG_SH_STANDARD_BIOS=y
 CONFIG_CRYPTO_NULL=y
diff --git a/arch/sh/configs/sh7757lcr_defconfig b/arch/sh/configs/sh7757lcr_defconfig
index a2700ab165af..2579dc4bc0c8 100644
--- a/arch/sh/configs/sh7757lcr_defconfig
+++ b/arch/sh/configs/sh7757lcr_defconfig
@@ -80,6 +80,6 @@ CONFIG_NLS_ISO8859_1=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 # CONFIG_FTRACE is not set
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 7eb3c10f28ad..781ff13227fc 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -141,7 +141,7 @@ CONFIG_DEBUG_KMEMLEAK=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_LATENCYTOP=y
 # CONFIG_FTRACE is not set
 CONFIG_CRYPTO_HMAC=y
diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig
index cb2f56468fe0..8fc687c98fd1 100644
--- a/arch/sh/configs/urquell_defconfig
+++ b/arch/sh/configs/urquell_defconfig
@@ -14,7 +14,6 @@ CONFIG_CPUSETS=y
 # CONFIG_PROC_PID_CPUSET is not set
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_MEMCG=y
-CONFIG_CGROUP_MEMCG_SWAP=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_BLK_DEV_INITRD=y
@@ -139,7 +138,7 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_POINTER=y
 # CONFIG_FTRACE is not set
 # CONFIG_DUMP_CODE is not set
diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h
index 199d17b765f2..361a0f57bdeb 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -48,10 +48,7 @@ struct pmu;
 /* Maximum number of UBC channels */
 #define HBP_NUM		2
 
-static inline int hw_breakpoint_slots(int type)
-{
-	return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
 
 /* arch/sh/kernel/hw_breakpoint.c */
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index 45240ec6b85a..27aebf1e75a2 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -127,9 +127,6 @@ struct task_struct;
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned long new_sp);
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
 /*
  * FPU lazy state save handling.
  */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index aa0fbc9202b1..69cd9ac4b2ab 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the Linux/SuperH kernel.
 #
 
-extra-y	:= head_32.o vmlinux.lds
+extra-y	:= vmlinux.lds
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
@@ -12,7 +12,7 @@ endif
 
 CFLAGS_REMOVE_return_address.o = -pg
 
-obj-y	:= debugtraps.o dumpstack.o 		\
+obj-y	:= head_32.o debugtraps.o dumpstack.o				\
 	   idle.o io.o irq.o irq_32.o kdebugfs.o			\
 	   machvec.o nmi_debug.o process.o				\
 	   process_32.o ptrace.o ptrace_32.o				\
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index a808843375e7..92b6649d4929 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -84,11 +84,6 @@ void flush_thread(void)
 #endif
 }
 
-void release_thread(struct task_struct *dead_task)
-{
-	/* do nothing */
-}
-
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index ba569cfb4368..411fdc0901f7 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -18,7 +18,7 @@ config PAGE_OFFSET
 	default "0x80000000" if MMU
 	default "0x00000000"
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	range 9 64 if PAGE_SIZE_16KB
 	default "9" if PAGE_SIZE_16KB
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 1c852bb530ec..4d3d1af90d52 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -269,7 +269,7 @@ config ARCH_SPARSEMEM_ENABLE
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y if SPARC64
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	default "13"
 	help
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index fe58a410b4ce..a4ea5b05f288 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -56,8 +56,6 @@ endif
 
 endif
 
-head-y                 := arch/sparc/kernel/head_$(BITS).o
-
 libs-y                 += arch/sparc/prom/
 libs-y                 += arch/sparc/lib/
 
diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig
new file mode 100644
index 000000000000..cfe5102b1c68
--- /dev/null
+++ b/arch/sparc/crypto/Kconfig
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (sparc64)"
+
+config CRYPTO_DES_SPARC64
+	tristate "Ciphers: DES and Triple DES EDE, modes: ECB/CBC"
+	depends on SPARC64
+	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_DES
+	select CRYPTO_SKCIPHER
+	help
+	  Block cipher: DES (FIPS 46-2) cipher algorithm
+	  Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm
+	  Length-preserving ciphers: DES with ECB and CBC modes
+	  Length-preserving ciphers: Tripe DES EDE with ECB and CBC modes
+
+	  Architecture: sparc64
+
+config CRYPTO_CRC32C_SPARC64
+	tristate "CRC32c"
+	depends on SPARC64
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+
+	  Architecture: sparc64
+
+config CRYPTO_MD5_SPARC64
+	tristate "Digests: MD5"
+	depends on SPARC64
+	select CRYPTO_MD5
+	select CRYPTO_HASH
+	help
+	  MD5 message digest algorithm (RFC1321)
+
+	  Architecture: sparc64 using crypto instructions, when available
+
+config CRYPTO_SHA1_SPARC64
+	tristate "Hash functions: SHA-1"
+	depends on SPARC64
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: sparc64
+
+config CRYPTO_SHA256_SPARC64
+	tristate "Hash functions: SHA-224 and SHA-256"
+	depends on SPARC64
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: sparc64 using crypto instructions, when available
+
+config CRYPTO_SHA512_SPARC64
+	tristate "Hash functions: SHA-384 and SHA-512"
+	depends on SPARC64
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: sparc64 using crypto instructions, when available
+
+config CRYPTO_AES_SPARC64
+	tristate "Ciphers: AES, modes: ECB, CBC, CTR"
+	depends on SPARC64
+	select CRYPTO_SKCIPHER
+	help
+	  Block ciphers: AES cipher algorithms (FIPS-197)
+	  Length-preseving ciphers: AES with ECB, CBC, and CTR modes
+
+	  Architecture: sparc64 using crypto instructions
+
+config CRYPTO_CAMELLIA_SPARC64
+	tristate "Ciphers: Camellia, modes: ECB, CBC"
+	depends on SPARC64
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	help
+	  Block ciphers: Camellia cipher algorithms
+	  Length-preserving ciphers: Camellia with ECB and CBC modes
+
+	  Architecture: sparc64
+
+endmenu
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index b26c35336b51..ba8b70ffec08 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -80,9 +80,6 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 			     : "memory");
 }
 
-/* Free all resources held by a thread. */
-#define release_thread(tsk)		do { } while(0)
-
 unsigned long __get_wchan(struct task_struct *);
 
 #define task_pt_regs(tsk) ((tsk)->thread.kregs)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 89850dff6b03..2667f35d5ea5 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -176,9 +176,6 @@ do { \
 	regs->tstate &= ~TSTATE_PEF;	\
 } while (0)
 
-/* Free all resources held by a thread. */
-#define release_thread(tsk)		do { } while (0)
-
 unsigned long __get_wchan(struct task_struct *task);
 
 #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 620c0c5b749b..0984bb6f0f17 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -7,8 +7,6 @@
 asflags-y := -ansi
 ccflags-y := -Werror
 
-extra-y     := head_$(BITS).o
-
 # Undefine sparc when processing vmlinux.lds - it is used
 # And teach CPP we are doing $(BITS) builds (for this case)
 CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
@@ -22,6 +20,7 @@ CFLAGS_REMOVE_perf_event.o := -pg
 CFLAGS_REMOVE_pcr.o := -pg
 endif
 
+obj-y                   := head_$(BITS).o
 obj-$(CONFIG_SPARC64)   += urtt_fill.o
 obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
 obj-$(CONFIG_SPARC32)   += etrap_32.o
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index fb51bd206dbe..c0162286d68b 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -69,5 +69,5 @@ CONFIG_JOLIET=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_NLS=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 477b87317424..bec6e5d95687 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -67,6 +67,6 @@ CONFIG_JOLIET=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_NLS=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index d0fc1862da95..bb5f06480da9 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -55,10 +55,6 @@ struct thread_struct {
 	.request		= { 0 } \
 }
 
-static inline void release_thread(struct task_struct *task)
-{
-}
-
 /*
  * User space process size: 3GB (default).
  */
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index bc38f79ca3a3..ad449173a1a1 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -584,21 +584,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	while (vma != NULL) {
+	for_each_vma(vmi, vma)
 		fix_range(mm, vma->vm_start, vma->vm_end, 0);
-		vma = vma->vm_next;
-	}
 }
 
 void force_flush_all(void)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma = mm->mmap;
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
-	while (vma != NULL) {
+	for_each_vma(vmi, vma)
 		fix_range(mm, vma->vm_start, vma->vm_end, 1);
-		vma = vma->vm_next;
-	}
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 088af7c84e5d..6d1879ef933a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -85,6 +85,7 @@ config X86
 	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
 	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_NONLEAF_PMD_YOUNG	if PGTABLE_LEVELS > 2
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
 	select ARCH_HAS_COPY_MC			if X86_64
 	select ARCH_HAS_SET_MEMORY
@@ -130,7 +131,9 @@ config X86
 	select CLKEVT_I8253
 	select CLOCKSOURCE_VALIDATE_LAST_CYCLE
 	select CLOCKSOURCE_WATCHDOG
-	select DCACHE_WORD_ACCESS
+	# Word-size accesses may read uninitialized data past the trailing \0
+	# in strings and cause false KMSAN reports.
+	select DCACHE_WORD_ACCESS		if !KMSAN
 	select DYNAMIC_SIGFRAME
 	select EDAC_ATOMIC_SCRUB
 	select EDAC_SUPPORT
@@ -168,6 +171,7 @@ config X86
 	select HAVE_ARCH_KASAN			if X86_64
 	select HAVE_ARCH_KASAN_VMALLOC		if X86_64
 	select HAVE_ARCH_KFENCE
+	select HAVE_ARCH_KMSAN			if X86_64
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS		if MMU
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if MMU && COMPAT
@@ -328,6 +332,10 @@ config GENERIC_ISA_DMA
 	def_bool y
 	depends on ISA_DMA_API
 
+config GENERIC_CSUM
+	bool
+	default y if KMSAN || KASAN
+
 config GENERIC_BUG
 	def_bool y
 	depends on BUG
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d7e640674c6..415a5d138de4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -244,11 +244,6 @@ archheaders:
 ###
 # Kernel objects
 
-head-y := arch/x86/kernel/head_$(BITS).o
-head-y += arch/x86/kernel/head$(BITS).o
-head-y += arch/x86/kernel/ebda.o
-head-y += arch/x86/kernel/platform-quirks.o
-
 libs-y  += arch/x86/lib/
 
 # drivers-y are linked after core-y
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ffec8bb01ba8..9860ca5979f8 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -12,6 +12,7 @@
 # Sanitizer runtimes are unavailable and cannot be linked for early boot code.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
+KMSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
 # Kernel does not boot with kcov instrumentation here.
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 35ce1a64068b..3a261abb6d15 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -20,6 +20,7 @@
 # Sanitizer runtimes are unavailable and cannot be linked for early boot code.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
+KMSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 4a3f223973f4..e476bcbd9b42 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -29,6 +29,7 @@
 #include <linux/uts.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <generated/utsversion.h>
 #include <generated/utsrelease.h>
 
 #define _SETUP
diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c
index a1aaaf6c06a6..945383f0f606 100644
--- a/arch/x86/boot/version.c
+++ b/arch/x86/boot/version.c
@@ -11,6 +11,7 @@
  */
 
 #include "boot.h"
+#include <generated/utsversion.h>
 #include <generated/utsrelease.h>
 #include <generated/compile.h>
 
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 7207219509f6..3cf34912abfe 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -27,7 +27,6 @@ CONFIG_CGROUP_MISC=y
 CONFIG_CGROUP_DEBUG=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
 CONFIG_SMP=y
 CONFIG_HYPERVISOR_GUEST=y
@@ -44,6 +43,7 @@ CONFIG_EFI_STUB=y
 CONFIG_HZ_1000=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_RETHUNK is not set
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TRACE_RTC=y
@@ -62,6 +62,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y
 CONFIG_BLK_CGROUP_IOCOST=y
 CONFIG_BLK_CGROUP_IOPRIO=y
 CONFIG_BINFMT_MISC=y
+# CONFIG_COMPAT_BRK is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -269,9 +270,10 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_KERNEL=y
 CONFIG_FRAME_WARN=1024
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_WX=y
 CONFIG_DEBUG_STACK_USAGE=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 5ce67b73e218..27759236fd60 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -26,7 +26,6 @@ CONFIG_CGROUP_MISC=y
 CONFIG_CGROUP_DEBUG=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
 CONFIG_SMP=y
 CONFIG_HYPERVISOR_GUEST=y
@@ -62,6 +61,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y
 CONFIG_BLK_CGROUP_IOCOST=y
 CONFIG_BLK_CGROUP_IOPRIO=y
 CONFIG_BINFMT_MISC=y
+# CONFIG_COMPAT_BRK is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -267,8 +267,9 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_PRINTK_TIME=y
-CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_WX=y
 CONFIG_DEBUG_STACK_USAGE=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
new file mode 100644
index 000000000000..71c4c473d34b
--- /dev/null
+++ b/arch/x86/crypto/Kconfig
@@ -0,0 +1,484 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (x86)"
+
+config CRYPTO_CURVE25519_X86
+	tristate "Public key crypto: Curve25519 (ADX)"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_CURVE25519_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+	help
+	  Curve25519 algorithm
+
+	  Architecture: x86_64 using:
+	  - ADX (large integer arithmetic)
+
+config CRYPTO_AES_NI_INTEL
+	tristate "Ciphers: AES, modes: ECB, CBC, CTS, CTR, XTR, XTS, GCM (AES-NI)"
+	depends on X86
+	select CRYPTO_AEAD
+	select CRYPTO_LIB_AES
+	select CRYPTO_ALGAPI
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
+	help
+	  Block cipher: AES cipher algorithms
+	  AEAD cipher: AES with GCM
+	  Length-preserving ciphers: AES with ECB, CBC, CTS, CTR, XTR, XTS
+
+	  Architecture: x86 (32-bit and 64-bit) using:
+	  - AES-NI (AES new instructions)
+
+config CRYPTO_BLOWFISH_X86_64
+	tristate "Ciphers: Blowfish, modes: ECB, CBC"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_BLOWFISH_COMMON
+	imply CRYPTO_CTR
+	help
+	  Block cipher: Blowfish cipher algorithm
+	  Length-preserving ciphers: Blowfish with ECB and CBC modes
+
+	  Architecture: x86_64
+
+config CRYPTO_CAMELLIA_X86_64
+	tristate "Ciphers: Camellia with modes: ECB, CBC"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	imply CRYPTO_CTR
+	help
+	  Block cipher: Camellia cipher algorithms
+	  Length-preserving ciphers: Camellia with ECB and CBC modes
+
+	  Architecture: x86_64
+
+config CRYPTO_CAMELLIA_AESNI_AVX_X86_64
+	tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_CAMELLIA_X86_64
+	select CRYPTO_SIMD
+	imply CRYPTO_XTS
+	help
+	  Length-preserving ciphers: Camellia with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - AVX (Advanced Vector Extensions)
+
+config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
+	tristate "Ciphers: Camellia with modes: ECB, CBC (AES-NI/AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
+	help
+	  Length-preserving ciphers: Camellia with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - AVX2 (Advanced Vector Extensions 2)
+
+config CRYPTO_CAST5_AVX_X86_64
+	tristate "Ciphers: CAST5 with modes: ECB, CBC (AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_CAST5
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_SIMD
+	imply CRYPTO_CTR
+	help
+	  Length-preserving ciphers: CAST5 (CAST-128) cipher algorithm
+	  (RFC2144) with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AVX (Advanced Vector Extensions)
+
+	  Processes 16 blocks in parallel.
+
+config CRYPTO_CAST6_AVX_X86_64
+	tristate "Ciphers: CAST6 with modes: ECB, CBC (AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_CAST6
+	select CRYPTO_CAST_COMMON
+	select CRYPTO_SIMD
+	imply CRYPTO_XTS
+	imply CRYPTO_CTR
+	help
+	  Length-preserving ciphers: CAST6 (CAST-256) cipher algorithm
+	  (RFC2612) with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AVX (Advanced Vector Extensions)
+
+	  Processes eight blocks in parallel.
+
+config CRYPTO_DES3_EDE_X86_64
+	tristate "Ciphers: Triple DES EDE with modes: ECB, CBC"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_DES
+	imply CRYPTO_CTR
+	help
+	  Block cipher: Triple DES EDE (FIPS 46-3) cipher algorithm
+	  Length-preserving ciphers: Triple DES EDE with ECB and CBC modes
+
+	  Architecture: x86_64
+
+	  Processes one or three blocks in parallel.
+
+config CRYPTO_SERPENT_SSE2_X86_64
+	tristate "Ciphers: Serpent with modes: ECB, CBC (SSE2)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SERPENT
+	select CRYPTO_SIMD
+	imply CRYPTO_CTR
+	help
+	  Length-preserving ciphers: Serpent cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - SSE2 (Streaming SIMD Extensions 2)
+
+	  Processes eight blocks in parallel.
+
+config CRYPTO_SERPENT_SSE2_586
+	tristate "Ciphers: Serpent with modes: ECB, CBC (32-bit with SSE2)"
+	depends on X86 && !64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SERPENT
+	select CRYPTO_SIMD
+	imply CRYPTO_CTR
+	help
+	  Length-preserving ciphers: Serpent cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86 (32-bit) using:
+	  - SSE2 (Streaming SIMD Extensions 2)
+
+	  Processes four blocks in parallel.
+
+config CRYPTO_SERPENT_AVX_X86_64
+	tristate "Ciphers: Serpent with modes: ECB, CBC (AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SERPENT
+	select CRYPTO_SIMD
+	imply CRYPTO_XTS
+	imply CRYPTO_CTR
+	help
+	  Length-preserving ciphers: Serpent cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AVX (Advanced Vector Extensions)
+
+	  Processes eight blocks in parallel.
+
+config CRYPTO_SERPENT_AVX2_X86_64
+	tristate "Ciphers: Serpent with modes: ECB, CBC (AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_SERPENT_AVX_X86_64
+	help
+	  Length-preserving ciphers: Serpent cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AVX2 (Advanced Vector Extensions 2)
+
+	  Processes 16 blocks in parallel.
+
+config CRYPTO_SM4_AESNI_AVX_X86_64
+	tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
+	select CRYPTO_ALGAPI
+	select CRYPTO_SM4
+	help
+	  Length-preserving ciphers: SM4 cipher algorithms
+	  (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - AVX (Advanced Vector Extensions)
+
+	  Through two affine transforms,
+	  we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+	  effect of instruction acceleration.
+
+	  If unsure, say N.
+
+config CRYPTO_SM4_AESNI_AVX2_X86_64
+	tristate "Ciphers: SM4 with modes: ECB, CBC, CFB, CTR (AES-NI/AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
+	select CRYPTO_ALGAPI
+	select CRYPTO_SM4
+	select CRYPTO_SM4_AESNI_AVX_X86_64
+	help
+	  Length-preserving ciphers: SM4 cipher algorithms
+	  (OSCCA GB/T 32907-2016) with ECB, CBC, CFB, and CTR modes
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - AVX2 (Advanced Vector Extensions 2)
+
+	  Through two affine transforms,
+	  we can use the AES S-Box to simulate the SM4 S-Box to achieve the
+	  effect of instruction acceleration.
+
+	  If unsure, say N.
+
+config CRYPTO_TWOFISH_586
+	tristate "Ciphers: Twofish (32-bit)"
+	depends on (X86 || UML_X86) && !64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_TWOFISH_COMMON
+	imply CRYPTO_CTR
+	help
+	  Block cipher: Twofish cipher algorithm
+
+	  Architecture: x86 (32-bit)
+
+config CRYPTO_TWOFISH_X86_64
+	tristate "Ciphers: Twofish"
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_TWOFISH_COMMON
+	imply CRYPTO_CTR
+	help
+	  Block cipher: Twofish cipher algorithm
+
+	  Architecture: x86_64
+
+config CRYPTO_TWOFISH_X86_64_3WAY
+	tristate "Ciphers: Twofish with modes: ECB, CBC (3-way parallel)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_TWOFISH_COMMON
+	select CRYPTO_TWOFISH_X86_64
+	help
+	  Length-preserving cipher: Twofish cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86_64
+
+	  Processes three blocks in parallel, better utilizing resources of
+	  out-of-order CPUs.
+
+config CRYPTO_TWOFISH_AVX_X86_64
+	tristate "Ciphers: Twofish with modes: ECB, CBC (AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
+	select CRYPTO_TWOFISH_COMMON
+	select CRYPTO_TWOFISH_X86_64
+	select CRYPTO_TWOFISH_X86_64_3WAY
+	imply CRYPTO_XTS
+	help
+	  Length-preserving cipher: Twofish cipher algorithm
+	  with ECB and CBC modes
+
+	  Architecture: x86_64 using:
+	  - AVX (Advanced Vector Extensions)
+
+	  Processes eight blocks in parallel.
+
+config CRYPTO_ARIA_AESNI_AVX_X86_64
+	tristate "Ciphers: ARIA with modes: ECB, CTR (AES-NI/AVX/GFNI)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_SIMD
+	select CRYPTO_ALGAPI
+	select CRYPTO_ARIA
+	help
+	  Length-preserving cipher: ARIA cipher algorithms
+	  (RFC 5794) with ECB and CTR modes
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - AVX (Advanced Vector Extensions)
+	  - GFNI (Galois Field New Instructions)
+
+	  Processes 16 blocks in parallel.
+
+config CRYPTO_CHACHA20_X86_64
+	tristate "Ciphers: ChaCha20, XChaCha20, XChaCha12 (SSSE3/AVX2/AVX-512VL)"
+	depends on X86 && 64BIT
+	select CRYPTO_SKCIPHER
+	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
+	help
+	  Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12
+	  stream cipher algorithms
+
+	  Architecture: x86_64 using:
+	  - SSSE3 (Supplemental SSE3)
+	  - AVX2 (Advanced Vector Extensions 2)
+	  - AVX-512VL (Advanced Vector Extensions-512VL)
+
+config CRYPTO_AEGIS128_AESNI_SSE2
+	tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)"
+	depends on X86 && 64BIT
+	select CRYPTO_AEAD
+	select CRYPTO_SIMD
+	help
+	  AEGIS-128 AEAD algorithm
+
+	  Architecture: x86_64 using:
+	  - AES-NI (AES New Instructions)
+	  - SSE2 (Streaming SIMD Extensions 2)
+
+config CRYPTO_NHPOLY1305_SSE2
+	tristate "Hash functions: NHPoly1305 (SSE2)"
+	depends on X86 && 64BIT
+	select CRYPTO_NHPOLY1305
+	help
+	  NHPoly1305 hash function for Adiantum
+
+	  Architecture: x86_64 using:
+	  - SSE2 (Streaming SIMD Extensions 2)
+
+config CRYPTO_NHPOLY1305_AVX2
+	tristate "Hash functions: NHPoly1305 (AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_NHPOLY1305
+	help
+	  NHPoly1305 hash function for Adiantum
+
+	  Architecture: x86_64 using:
+	  - AVX2 (Advanced Vector Extensions 2)
+
+config CRYPTO_BLAKE2S_X86
+	bool "Hash functions: BLAKE2s (SSSE3/AVX-512)"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_BLAKE2S_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+	help
+	  BLAKE2s cryptographic hash function (RFC 7693)
+
+	  Architecture: x86_64 using:
+	  - SSSE3 (Supplemental SSE3)
+	  - AVX-512 (Advanced Vector Extensions-512)
+
+config CRYPTO_POLYVAL_CLMUL_NI
+	tristate "Hash functions: POLYVAL (CLMUL-NI)"
+	depends on X86 && 64BIT
+	select CRYPTO_POLYVAL
+	help
+	  POLYVAL hash function for HCTR2
+
+	  Architecture: x86_64 using:
+	  - CLMUL-NI (carry-less multiplication new instructions)
+
+config CRYPTO_POLY1305_X86_64
+	tristate "Hash functions: Poly1305 (SSE2/AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_LIB_POLY1305_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_POLY1305
+	help
+	  Poly1305 authenticator algorithm (RFC7539)
+
+	  Architecture: x86_64 using:
+	  - SSE2 (Streaming SIMD Extensions 2)
+	  - AVX2 (Advanced Vector Extensions 2)
+
+config CRYPTO_SHA1_SSSE3
+	tristate "Hash functions: SHA-1 (SSSE3/AVX/AVX2/SHA-NI)"
+	depends on X86 && 64BIT
+	select CRYPTO_SHA1
+	select CRYPTO_HASH
+	help
+	  SHA-1 secure hash algorithm (FIPS 180)
+
+	  Architecture: x86_64 using:
+	  - SSSE3 (Supplemental SSE3)
+	  - AVX (Advanced Vector Extensions)
+	  - AVX2 (Advanced Vector Extensions 2)
+	  - SHA-NI (SHA Extensions New Instructions)
+
+config CRYPTO_SHA256_SSSE3
+	tristate "Hash functions: SHA-224 and SHA-256 (SSSE3/AVX/AVX2/SHA-NI)"
+	depends on X86 && 64BIT
+	select CRYPTO_SHA256
+	select CRYPTO_HASH
+	help
+	  SHA-224 and SHA-256 secure hash algorithms (FIPS 180)
+
+	  Architecture: x86_64 using:
+	  - SSSE3 (Supplemental SSE3)
+	  - AVX (Advanced Vector Extensions)
+	  - AVX2 (Advanced Vector Extensions 2)
+	  - SHA-NI (SHA Extensions New Instructions)
+
+config CRYPTO_SHA512_SSSE3
+	tristate "Hash functions: SHA-384 and SHA-512 (SSSE3/AVX/AVX2)"
+	depends on X86 && 64BIT
+	select CRYPTO_SHA512
+	select CRYPTO_HASH
+	help
+	  SHA-384 and SHA-512 secure hash algorithms (FIPS 180)
+
+	  Architecture: x86_64 using:
+	  - SSSE3 (Supplemental SSE3)
+	  - AVX (Advanced Vector Extensions)
+	  - AVX2 (Advanced Vector Extensions 2)
+
+config CRYPTO_SM3_AVX_X86_64
+	tristate "Hash functions: SM3 (AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_HASH
+	select CRYPTO_SM3
+	help
+	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3
+
+	  Architecture: x86_64 using:
+	  - AVX (Advanced Vector Extensions)
+
+	  If unsure, say N.
+
+config CRYPTO_GHASH_CLMUL_NI_INTEL
+	tristate "Hash functions: GHASH (CLMUL-NI)"
+	depends on X86 && 64BIT
+	select CRYPTO_CRYPTD
+	help
+	  GCM GHASH hash function (NIST SP800-38D)
+
+	  Architecture: x86_64 using:
+	  - CLMUL-NI (carry-less multiplication new instructions)
+
+config CRYPTO_CRC32C_INTEL
+	tristate "CRC32c (SSE4.2/PCLMULQDQ)"
+	depends on X86
+	select CRYPTO_HASH
+	help
+	  CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
+
+	  Architecture: x86 (32-bit and 64-bit) using:
+	  - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction
+	  - PCLMULQDQ (carry-less multiplication)
+
+config CRYPTO_CRC32_PCLMUL
+	tristate "CRC32 (PCLMULQDQ)"
+	depends on X86
+	select CRYPTO_HASH
+	select CRC32
+	help
+	  CRC32 CRC algorithm (IEEE 802.3)
+
+	  Architecture: x86 (32-bit and 64-bit) using:
+	  - PCLMULQDQ (carry-less multiplication)
+
+config CRYPTO_CRCT10DIF_PCLMUL
+	tristate "CRCT10DIF (PCLMULQDQ)"
+	depends on X86 && 64BIT && CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
+
+	  Architecture: x86_64 using:
+	  - PCLMULQDQ (carry-less multiplication)
+
+endmenu
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 04d07ab744b2..3b1d701a4f6c 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -100,6 +100,9 @@ sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o
 obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64) += sm4-aesni-avx2-x86_64.o
 sm4-aesni-avx2-x86_64-y := sm4-aesni-avx2-asm_64.o sm4_aesni_avx2_glue.o
 
+obj-$(CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64) += aria-aesni-avx-x86_64.o
+aria-aesni-avx-x86_64-y := aria-aesni-avx-asm_64.o aria_aesni_avx_glue.o
+
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $< > $@
 $(obj)/%.S: $(src)/%.pl FORCE
diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S
new file mode 100644
index 000000000000..c75fd7d015ed
--- /dev/null
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -0,0 +1,1303 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ARIA Cipher 16-way parallel algorithm (AVX)
+ *
+ * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/frame.h>
+
+/* struct aria_ctx: */
+#define enc_key 0
+#define dec_key 272
+#define rounds 544
+
+/* register macros */
+#define CTX %rdi
+
+
+#define BV8(a0, a1, a2, a3, a4, a5, a6, a7)		\
+	( (((a0) & 1) << 0) |				\
+	  (((a1) & 1) << 1) |				\
+	  (((a2) & 1) << 2) |				\
+	  (((a3) & 1) << 3) |				\
+	  (((a4) & 1) << 4) |				\
+	  (((a5) & 1) << 5) |				\
+	  (((a6) & 1) << 6) |				\
+	  (((a7) & 1) << 7) )
+
+#define BM8X8(l0, l1, l2, l3, l4, l5, l6, l7)		\
+	( ((l7) << (0 * 8)) |				\
+	  ((l6) << (1 * 8)) |				\
+	  ((l5) << (2 * 8)) |				\
+	  ((l4) << (3 * 8)) |				\
+	  ((l3) << (4 * 8)) |				\
+	  ((l2) << (5 * 8)) |				\
+	  ((l1) << (6 * 8)) |				\
+	  ((l0) << (7 * 8)) )
+
+#define inc_le128(x, minus_one, tmp)			\
+	vpcmpeqq minus_one, x, tmp;			\
+	vpsubq minus_one, x, x;				\
+	vpslldq $8, tmp, tmp;				\
+	vpsubq tmp, x, x;
+
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0)	\
+	vpand x, mask4bit, tmp0;			\
+	vpandn x, mask4bit, x;				\
+	vpsrld $4, x, x;				\
+							\
+	vpshufb tmp0, lo_t, tmp0;			\
+	vpshufb x, hi_t, x;				\
+	vpxor tmp0, x, x;
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2)		\
+	vpunpckhdq x1, x0, t2;				\
+	vpunpckldq x1, x0, x0;				\
+							\
+	vpunpckldq x3, x2, t1;				\
+	vpunpckhdq x3, x2, x2;				\
+							\
+	vpunpckhqdq t1, x0, x1;				\
+	vpunpcklqdq t1, x0, x0;				\
+							\
+	vpunpckhqdq x2, t2, x3;				\
+	vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b(a0, b0, c0, d0,		\
+			 a1, b1, c1, d1,		\
+			 a2, b2, c2, d2,		\
+			 a3, b3, c3, d3,		\
+			 st0, st1)			\
+	vmovdqu d2, st0;				\
+	vmovdqu d3, st1;				\
+	transpose_4x4(a0, a1, a2, a3, d2, d3);		\
+	transpose_4x4(b0, b1, b2, b3, d2, d3);		\
+	vmovdqu st0, d2;				\
+	vmovdqu st1, d3;				\
+							\
+	vmovdqu a0, st0;				\
+	vmovdqu a1, st1;				\
+	transpose_4x4(c0, c1, c2, c3, a0, a1);		\
+	transpose_4x4(d0, d1, d2, d3, a0, a1);		\
+							\
+	vmovdqu .Lshufb_16x16b, a0;			\
+	vmovdqu st1, a1;				\
+	vpshufb a0, a2, a2;				\
+	vpshufb a0, a3, a3;				\
+	vpshufb a0, b0, b0;				\
+	vpshufb a0, b1, b1;				\
+	vpshufb a0, b2, b2;				\
+	vpshufb a0, b3, b3;				\
+	vpshufb a0, a1, a1;				\
+	vpshufb a0, c0, c0;				\
+	vpshufb a0, c1, c1;				\
+	vpshufb a0, c2, c2;				\
+	vpshufb a0, c3, c3;				\
+	vpshufb a0, d0, d0;				\
+	vpshufb a0, d1, d1;				\
+	vpshufb a0, d2, d2;				\
+	vpshufb a0, d3, d3;				\
+	vmovdqu d3, st1;				\
+	vmovdqu st0, d3;				\
+	vpshufb a0, d3, a0;				\
+	vmovdqu d2, st0;				\
+							\
+	transpose_4x4(a0, b0, c0, d0, d2, d3);		\
+	transpose_4x4(a1, b1, c1, d1, d2, d3);		\
+	vmovdqu st0, d2;				\
+	vmovdqu st1, d3;				\
+							\
+	vmovdqu b0, st0;				\
+	vmovdqu b1, st1;				\
+	transpose_4x4(a2, b2, c2, d2, b0, b1);		\
+	transpose_4x4(a3, b3, c3, d3, b0, b1);		\
+	vmovdqu st0, b0;				\
+	vmovdqu st1, b1;				\
+	/* does not adjust output bytes inside vectors */
+
+#define debyteslice_16x16b(a0, b0, c0, d0,		\
+			   a1, b1, c1, d1,		\
+			   a2, b2, c2, d2,		\
+			   a3, b3, c3, d3,		\
+			   st0, st1)			\
+	vmovdqu d2, st0;				\
+	vmovdqu d3, st1;				\
+	transpose_4x4(a0, a1, a2, a3, d2, d3);		\
+	transpose_4x4(b0, b1, b2, b3, d2, d3);		\
+	vmovdqu st0, d2;				\
+	vmovdqu st1, d3;				\
+							\
+	vmovdqu a0, st0;				\
+	vmovdqu a1, st1;				\
+	transpose_4x4(c0, c1, c2, c3, a0, a1);		\
+	transpose_4x4(d0, d1, d2, d3, a0, a1);		\
+							\
+	vmovdqu .Lshufb_16x16b, a0;			\
+	vmovdqu st1, a1;				\
+	vpshufb a0, a2, a2;				\
+	vpshufb a0, a3, a3;				\
+	vpshufb a0, b0, b0;				\
+	vpshufb a0, b1, b1;				\
+	vpshufb a0, b2, b2;				\
+	vpshufb a0, b3, b3;				\
+	vpshufb a0, a1, a1;				\
+	vpshufb a0, c0, c0;				\
+	vpshufb a0, c1, c1;				\
+	vpshufb a0, c2, c2;				\
+	vpshufb a0, c3, c3;				\
+	vpshufb a0, d0, d0;				\
+	vpshufb a0, d1, d1;				\
+	vpshufb a0, d2, d2;				\
+	vpshufb a0, d3, d3;				\
+	vmovdqu d3, st1;				\
+	vmovdqu st0, d3;				\
+	vpshufb a0, d3, a0;				\
+	vmovdqu d2, st0;				\
+							\
+	transpose_4x4(c0, d0, a0, b0, d2, d3);		\
+	transpose_4x4(c1, d1, a1, b1, d2, d3);		\
+	vmovdqu st0, d2;				\
+	vmovdqu st1, d3;				\
+							\
+	vmovdqu b0, st0;				\
+	vmovdqu b1, st1;				\
+	transpose_4x4(c2, d2, a2, b2, b0, b1);		\
+	transpose_4x4(c3, d3, a3, b3, b0, b1);		\
+	vmovdqu st0, b0;				\
+	vmovdqu st1, b1;				\
+	/* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3,			\
+		     x4, x5, x6, x7,			\
+		     y0, y1, y2, y3,			\
+		     y4, y5, y6, y7,			\
+		     rio)				\
+	vmovdqu (0 * 16)(rio), x0;			\
+	vmovdqu (1 * 16)(rio), x1;			\
+	vmovdqu (2 * 16)(rio), x2;			\
+	vmovdqu (3 * 16)(rio), x3;			\
+	vmovdqu (4 * 16)(rio), x4;			\
+	vmovdqu (5 * 16)(rio), x5;			\
+	vmovdqu (6 * 16)(rio), x6;			\
+	vmovdqu (7 * 16)(rio), x7;			\
+	vmovdqu (8 * 16)(rio), y0;			\
+	vmovdqu (9 * 16)(rio), y1;			\
+	vmovdqu (10 * 16)(rio), y2;			\
+	vmovdqu (11 * 16)(rio), y3;			\
+	vmovdqu (12 * 16)(rio), y4;			\
+	vmovdqu (13 * 16)(rio), y5;			\
+	vmovdqu (14 * 16)(rio), y6;			\
+	vmovdqu (15 * 16)(rio), y7;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3,			\
+		      x4, x5, x6, x7,			\
+		      y0, y1, y2, y3,			\
+		      y4, y5, y6, y7,			\
+		      mem_ab, mem_cd)			\
+	byteslice_16x16b(x0, x1, x2, x3,		\
+			 x4, x5, x6, x7,		\
+			 y0, y1, y2, y3,		\
+			 y4, y5, y6, y7,		\
+			 (mem_ab), (mem_cd));		\
+							\
+	vmovdqu x0, 0 * 16(mem_ab);			\
+	vmovdqu x1, 1 * 16(mem_ab);			\
+	vmovdqu x2, 2 * 16(mem_ab);			\
+	vmovdqu x3, 3 * 16(mem_ab);			\
+	vmovdqu x4, 4 * 16(mem_ab);			\
+	vmovdqu x5, 5 * 16(mem_ab);			\
+	vmovdqu x6, 6 * 16(mem_ab);			\
+	vmovdqu x7, 7 * 16(mem_ab);			\
+	vmovdqu y0, 0 * 16(mem_cd);			\
+	vmovdqu y1, 1 * 16(mem_cd);			\
+	vmovdqu y2, 2 * 16(mem_cd);			\
+	vmovdqu y3, 3 * 16(mem_cd);			\
+	vmovdqu y4, 4 * 16(mem_cd);			\
+	vmovdqu y5, 5 * 16(mem_cd);			\
+	vmovdqu y6, 6 * 16(mem_cd);			\
+	vmovdqu y7, 7 * 16(mem_cd);
+
+#define write_output(x0, x1, x2, x3,			\
+		     x4, x5, x6, x7,			\
+		     y0, y1, y2, y3,			\
+		     y4, y5, y6, y7,			\
+		     mem)				\
+	vmovdqu x0, 0 * 16(mem);			\
+	vmovdqu x1, 1 * 16(mem);			\
+	vmovdqu x2, 2 * 16(mem);			\
+	vmovdqu x3, 3 * 16(mem);			\
+	vmovdqu x4, 4 * 16(mem);			\
+	vmovdqu x5, 5 * 16(mem);			\
+	vmovdqu x6, 6 * 16(mem);			\
+	vmovdqu x7, 7 * 16(mem);			\
+	vmovdqu y0, 8 * 16(mem);			\
+	vmovdqu y1, 9 * 16(mem);			\
+	vmovdqu y2, 10 * 16(mem);			\
+	vmovdqu y3, 11 * 16(mem);			\
+	vmovdqu y4, 12 * 16(mem);			\
+	vmovdqu y5, 13 * 16(mem);			\
+	vmovdqu y6, 14 * 16(mem);			\
+	vmovdqu y7, 15 * 16(mem);			\
+
+#define aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, idx)		\
+	vmovdqu x0, ((idx + 0) * 16)(mem_tmp);		\
+	vmovdqu x1, ((idx + 1) * 16)(mem_tmp);		\
+	vmovdqu x2, ((idx + 2) * 16)(mem_tmp);		\
+	vmovdqu x3, ((idx + 3) * 16)(mem_tmp);		\
+	vmovdqu x4, ((idx + 4) * 16)(mem_tmp);		\
+	vmovdqu x5, ((idx + 5) * 16)(mem_tmp);		\
+	vmovdqu x6, ((idx + 6) * 16)(mem_tmp);		\
+	vmovdqu x7, ((idx + 7) * 16)(mem_tmp);
+
+#define aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, idx)		\
+	vmovdqu ((idx + 0) * 16)(mem_tmp), x0;		\
+	vmovdqu ((idx + 1) * 16)(mem_tmp), x1;		\
+	vmovdqu ((idx + 2) * 16)(mem_tmp), x2;		\
+	vmovdqu ((idx + 3) * 16)(mem_tmp), x3;		\
+	vmovdqu ((idx + 4) * 16)(mem_tmp), x4;		\
+	vmovdqu ((idx + 5) * 16)(mem_tmp), x5;		\
+	vmovdqu ((idx + 6) * 16)(mem_tmp), x6;		\
+	vmovdqu ((idx + 7) * 16)(mem_tmp), x7;
+
+#define aria_ark_8way(x0, x1, x2, x3,			\
+		      x4, x5, x6, x7,			\
+		      t0, rk, idx, round)		\
+	/* AddRoundKey */                               \
+	vpbroadcastb ((round * 16) + idx + 3)(rk), t0;	\
+	vpxor t0, x0, x0;				\
+	vpbroadcastb ((round * 16) + idx + 2)(rk), t0;	\
+	vpxor t0, x1, x1;				\
+	vpbroadcastb ((round * 16) + idx + 1)(rk), t0;	\
+	vpxor t0, x2, x2;				\
+	vpbroadcastb ((round * 16) + idx + 0)(rk), t0;	\
+	vpxor t0, x3, x3;				\
+	vpbroadcastb ((round * 16) + idx + 7)(rk), t0;	\
+	vpxor t0, x4, x4;				\
+	vpbroadcastb ((round * 16) + idx + 6)(rk), t0;	\
+	vpxor t0, x5, x5;				\
+	vpbroadcastb ((round * 16) + idx + 5)(rk), t0;	\
+	vpxor t0, x6, x6;				\
+	vpbroadcastb ((round * 16) + idx + 4)(rk), t0;	\
+	vpxor t0, x7, x7;
+
+#define aria_sbox_8way_gfni(x0, x1, x2, x3,		\
+			    x4, x5, x6, x7,		\
+			    t0, t1, t2, t3,		\
+			    t4, t5, t6, t7)		\
+	vpbroadcastq .Ltf_s2_bitmatrix, t0;		\
+	vpbroadcastq .Ltf_inv_bitmatrix, t1;		\
+	vpbroadcastq .Ltf_id_bitmatrix, t2;		\
+	vpbroadcastq .Ltf_aff_bitmatrix, t3;		\
+	vpbroadcastq .Ltf_x2_bitmatrix, t4;		\
+	vgf2p8affineinvqb $(tf_s2_const), t0, x1, x1;	\
+	vgf2p8affineinvqb $(tf_s2_const), t0, x5, x5;	\
+	vgf2p8affineqb $(tf_inv_const), t1, x2, x2;	\
+	vgf2p8affineqb $(tf_inv_const), t1, x6, x6;	\
+	vgf2p8affineinvqb $0, t2, x2, x2;		\
+	vgf2p8affineinvqb $0, t2, x6, x6;		\
+	vgf2p8affineinvqb $(tf_aff_const), t3, x0, x0;	\
+	vgf2p8affineinvqb $(tf_aff_const), t3, x4, x4;	\
+	vgf2p8affineqb $(tf_x2_const), t4, x3, x3;	\
+	vgf2p8affineqb $(tf_x2_const), t4, x7, x7;	\
+	vgf2p8affineinvqb $0, t2, x3, x3;		\
+	vgf2p8affineinvqb $0, t2, x7, x7
+
+#define aria_sbox_8way(x0, x1, x2, x3,            	\
+		       x4, x5, x6, x7,			\
+		       t0, t1, t2, t3,			\
+		       t4, t5, t6, t7)			\
+	vpxor t7, t7, t7;				\
+	vmovdqa .Linv_shift_row, t0;			\
+	vmovdqa .Lshift_row, t1;			\
+	vpbroadcastd .L0f0f0f0f, t6;			\
+	vmovdqa .Ltf_lo__inv_aff__and__s2, t2;		\
+	vmovdqa .Ltf_hi__inv_aff__and__s2, t3;		\
+	vmovdqa .Ltf_lo__x2__and__fwd_aff, t4;		\
+	vmovdqa .Ltf_hi__x2__and__fwd_aff, t5;		\
+							\
+	vaesenclast t7, x0, x0;				\
+	vaesenclast t7, x4, x4;				\
+	vaesenclast t7, x1, x1;				\
+	vaesenclast t7, x5, x5;				\
+	vaesdeclast t7, x2, x2;				\
+	vaesdeclast t7, x6, x6;				\
+							\
+	/* AES inverse shift rows */			\
+	vpshufb t0, x0, x0;				\
+	vpshufb t0, x4, x4;				\
+	vpshufb t0, x1, x1;				\
+	vpshufb t0, x5, x5;				\
+	vpshufb t1, x3, x3;				\
+	vpshufb t1, x7, x7;				\
+	vpshufb t1, x2, x2;				\
+	vpshufb t1, x6, x6;				\
+							\
+	/* affine transformation for S2 */		\
+	filter_8bit(x1, t2, t3, t6, t0);		\
+	/* affine transformation for S2 */		\
+	filter_8bit(x5, t2, t3, t6, t0);		\
+							\
+	/* affine transformation for X2 */		\
+	filter_8bit(x3, t4, t5, t6, t0);		\
+	/* affine transformation for X2 */		\
+	filter_8bit(x7, t4, t5, t6, t0);		\
+	vaesdeclast t7, x3, x3;				\
+	vaesdeclast t7, x7, x7;
+
+#define aria_diff_m(x0, x1, x2, x3,			\
+		    t0, t1, t2, t3)			\
+	/* T = rotr32(X, 8); */				\
+	/* X ^= T */					\
+	vpxor x0, x3, t0;				\
+	vpxor x1, x0, t1;				\
+	vpxor x2, x1, t2;				\
+	vpxor x3, x2, t3;				\
+	/* X = T ^ rotr(X, 16); */			\
+	vpxor t2, x0, x0;				\
+	vpxor x1, t3, t3;				\
+	vpxor t0, x2, x2;				\
+	vpxor t1, x3, x1;				\
+	vmovdqu t3, x3;
+
+#define aria_diff_word(x0, x1, x2, x3,			\
+		       x4, x5, x6, x7,			\
+		       y0, y1, y2, y3,			\
+		       y4, y5, y6, y7)			\
+	/* t1 ^= t2; */					\
+	vpxor y0, x4, x4;				\
+	vpxor y1, x5, x5;				\
+	vpxor y2, x6, x6;				\
+	vpxor y3, x7, x7;				\
+							\
+	/* t2 ^= t3; */					\
+	vpxor y4, y0, y0;				\
+	vpxor y5, y1, y1;				\
+	vpxor y6, y2, y2;				\
+	vpxor y7, y3, y3;				\
+							\
+	/* t0 ^= t1; */					\
+	vpxor x4, x0, x0;				\
+	vpxor x5, x1, x1;				\
+	vpxor x6, x2, x2;				\
+	vpxor x7, x3, x3;				\
+							\
+	/* t3 ^= t1; */					\
+	vpxor x4, y4, y4;				\
+	vpxor x5, y5, y5;				\
+	vpxor x6, y6, y6;				\
+	vpxor x7, y7, y7;				\
+							\
+	/* t2 ^= t0; */					\
+	vpxor x0, y0, y0;				\
+	vpxor x1, y1, y1;				\
+	vpxor x2, y2, y2;				\
+	vpxor x3, y3, y3;				\
+							\
+	/* t1 ^= t2; */					\
+	vpxor y0, x4, x4;				\
+	vpxor y1, x5, x5;				\
+	vpxor y2, x6, x6;				\
+	vpxor y3, x7, x7;
+
+#define aria_fe(x0, x1, x2, x3,				\
+		x4, x5, x6, x7,				\
+		y0, y1, y2, y3,				\
+		y4, y5, y6, y7,				\
+		mem_tmp, rk, round)			\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 0);		\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);		\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x4, x5, x6, x7,			\
+		       y0, y1, y2, y3,			\
+		       y4, y5, y6, y7);			\
+	/* aria_diff_byte() 				\
+	 * T3 = ABCD -> BADC 				\
+	 * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 	\
+	 * T0 = ABCD -> CDAB 				\
+	 * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 	\
+	 * T1 = ABCD -> DCBA 				\
+	 * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4	\
+	 */						\
+	aria_diff_word(x2, x3, x0, x1,			\
+		       x7, x6, x5, x4,			\
+		       y0, y1, y2, y3,			\
+		       y5, y4, y7, y6);			\
+	aria_store_state_8way(x3, x2, x1, x0,		\
+			      x6, x7, x4, x5,		\
+			      mem_tmp, 0);
+
+#define aria_fo(x0, x1, x2, x3,				\
+		x4, x5, x6, x7,				\
+		y0, y1, y2, y3,				\
+		y4, y5, y6, y7,				\
+		mem_tmp, rk, round)			\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 0);		\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);		\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x4, x5, x6, x7,			\
+		       y0, y1, y2, y3,			\
+		       y4, y5, y6, y7);			\
+	/* aria_diff_byte() 				\
+	 * T1 = ABCD -> BADC 				\
+	 * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6	\
+	 * T2 = ABCD -> CDAB 				\
+	 * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 	\
+	 * T3 = ABCD -> DCBA 				\
+	 * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 	\
+	 */						\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x5, x4, x7, x6,			\
+		       y2, y3, y0, y1,			\
+		       y7, y6, y5, y4);			\
+	aria_store_state_8way(x3, x2, x1, x0,		\
+			      x6, x7, x4, x5,		\
+			      mem_tmp, 0);
+
+#define aria_ff(x0, x1, x2, x3,				\
+		x4, x5, x6, x7,				\
+		y0, y1, y2, y3,				\
+		y4, y5, y6, y7,				\
+		mem_tmp, rk, round, last_round)		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, last_round);		\
+							\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way(x2, x3, x0, x1, x6, x7, x4, x5,	\
+		       y0, y1, y2, y3, y4, y5, y6, y7);	\
+							\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, last_round);		\
+							\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);
+
+#define aria_fe_gfni(x0, x1, x2, x3,			\
+		     x4, x5, x6, x7,			\
+		     y0, y1, y2, y3,			\
+		     y4, y5, y6, y7,			\
+		     mem_tmp, rk, round)		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way_gfni(x2, x3, x0, x1, 		\
+			    x6, x7, x4, x5,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way_gfni(x2, x3, x0, x1, 		\
+			    x6, x7, x4, x5,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 0);		\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);		\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x4, x5, x6, x7,			\
+		       y0, y1, y2, y3,			\
+		       y4, y5, y6, y7);			\
+	/* aria_diff_byte() 				\
+	 * T3 = ABCD -> BADC 				\
+	 * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 	\
+	 * T0 = ABCD -> CDAB 				\
+	 * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 	\
+	 * T1 = ABCD -> DCBA 				\
+	 * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4	\
+	 */						\
+	aria_diff_word(x2, x3, x0, x1,			\
+		       x7, x6, x5, x4,			\
+		       y0, y1, y2, y3,			\
+		       y5, y4, y7, y6);			\
+	aria_store_state_8way(x3, x2, x1, x0,		\
+			      x6, x7, x4, x5,		\
+			      mem_tmp, 0);
+
+#define aria_fo_gfni(x0, x1, x2, x3,			\
+		     x4, x5, x6, x7,			\
+		     y0, y1, y2, y3,			\
+		     y4, y5, y6, y7,			\
+		     mem_tmp, rk, round)		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way_gfni(x0, x1, x2, x3, 		\
+			    x4, x5, x6, x7,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way_gfni(x0, x1, x2, x3, 		\
+			    x4, x5, x6, x7,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3);	\
+	aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3);	\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 0);		\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);		\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x4, x5, x6, x7,			\
+		       y0, y1, y2, y3,			\
+		       y4, y5, y6, y7);			\
+	/* aria_diff_byte() 				\
+	 * T1 = ABCD -> BADC 				\
+	 * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6	\
+	 * T2 = ABCD -> CDAB 				\
+	 * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 	\
+	 * T3 = ABCD -> DCBA 				\
+	 * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 	\
+	 */						\
+	aria_diff_word(x0, x1, x2, x3,			\
+		       x5, x4, x7, x6,			\
+		       y2, y3, y0, y1,			\
+		       y7, y6, y5, y4);			\
+	aria_store_state_8way(x3, x2, x1, x0,		\
+			      x6, x7, x4, x5,		\
+			      mem_tmp, 0);
+
+#define aria_ff_gfni(x0, x1, x2, x3,			\
+		x4, x5, x6, x7,				\
+		y0, y1, y2, y3,				\
+		y4, y5, y6, y7,				\
+		mem_tmp, rk, round, last_round)		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, round);		\
+							\
+	aria_sbox_8way_gfni(x2, x3, x0, x1, 		\
+			    x6, x7, x4, x5,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 8, last_round);		\
+							\
+	aria_store_state_8way(x0, x1, x2, x3,		\
+			      x4, x5, x6, x7,		\
+			      mem_tmp, 8);		\
+							\
+	aria_load_state_8way(x0, x1, x2, x3,		\
+			     x4, x5, x6, x7,		\
+			     mem_tmp, 0);		\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, round);		\
+							\
+	aria_sbox_8way_gfni(x2, x3, x0, x1, 		\
+			    x6, x7, x4, x5,		\
+			    y0, y1, y2, y3, 		\
+			    y4, y5, y6, y7);		\
+							\
+	aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7,	\
+		      y0, rk, 0, last_round);		\
+							\
+	aria_load_state_8way(y0, y1, y2, y3,		\
+			     y4, y5, y6, y7,		\
+			     mem_tmp, 8);
+
+/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
+.section	.rodata.cst16, "aM", @progbits, 16
+.align 16
+
+#define SHUFB_BYTES(idx) \
+	0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+	.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+	.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+	.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+.Lshift_row:
+	.byte 0x00, 0x05, 0x0a, 0x0f, 0x04, 0x09, 0x0e, 0x03
+	.byte 0x08, 0x0d, 0x02, 0x07, 0x0c, 0x01, 0x06, 0x0b
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+	.byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+	.byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+
+/* AES inverse affine and S2 combined:
+ *      1 1 0 0 0 0 0 1     x0     0
+ *      0 1 0 0 1 0 0 0     x1     0
+ *      1 1 0 0 1 1 1 1     x2     0
+ *      0 1 1 0 1 0 0 1     x3     1
+ *      0 1 0 0 1 1 0 0  *  x4  +  0
+ *      0 1 0 1 1 0 0 0     x5     0
+ *      0 0 0 0 0 1 0 1     x6     0
+ *      1 1 1 0 0 1 1 1     x7     1
+ */
+.Ltf_lo__inv_aff__and__s2:
+	.octa 0x92172DA81A9FA520B2370D883ABF8500
+.Ltf_hi__inv_aff__and__s2:
+	.octa 0x2B15FFC1AF917B45E6D8320C625CB688
+
+/* X2 and AES forward affine combined:
+ *      1 0 1 1 0 0 0 1     x0     0
+ *      0 1 1 1 1 0 1 1     x1     0
+ *      0 0 0 1 1 0 1 0     x2     1
+ *      0 1 0 0 0 1 0 0     x3     0
+ *      0 0 1 1 1 0 1 1  *  x4  +  0
+ *      0 1 0 0 1 0 0 0     x5     0
+ *      1 1 0 1 0 0 1 1     x6     0
+ *      0 1 0 0 1 0 1 0     x7     0
+ */
+.Ltf_lo__x2__and__fwd_aff:
+	.octa 0xEFAE0544FCBD1657B8F95213ABEA4100
+.Ltf_hi__x2__and__fwd_aff:
+	.octa 0x3F893781E95FE1576CDA64D2BA0CB204
+
+.section	.rodata.cst8, "aM", @progbits, 8
+.align 8
+/* AES affine: */
+#define tf_aff_const BV8(1, 1, 0, 0, 0, 1, 1, 0)
+.Ltf_aff_bitmatrix:
+	.quad BM8X8(BV8(1, 0, 0, 0, 1, 1, 1, 1),
+		    BV8(1, 1, 0, 0, 0, 1, 1, 1),
+		    BV8(1, 1, 1, 0, 0, 0, 1, 1),
+		    BV8(1, 1, 1, 1, 0, 0, 0, 1),
+		    BV8(1, 1, 1, 1, 1, 0, 0, 0),
+		    BV8(0, 1, 1, 1, 1, 1, 0, 0),
+		    BV8(0, 0, 1, 1, 1, 1, 1, 0),
+		    BV8(0, 0, 0, 1, 1, 1, 1, 1))
+
+/* AES inverse affine: */
+#define tf_inv_const BV8(1, 0, 1, 0, 0, 0, 0, 0)
+.Ltf_inv_bitmatrix:
+	.quad BM8X8(BV8(0, 0, 1, 0, 0, 1, 0, 1),
+		    BV8(1, 0, 0, 1, 0, 0, 1, 0),
+		    BV8(0, 1, 0, 0, 1, 0, 0, 1),
+		    BV8(1, 0, 1, 0, 0, 1, 0, 0),
+		    BV8(0, 1, 0, 1, 0, 0, 1, 0),
+		    BV8(0, 0, 1, 0, 1, 0, 0, 1),
+		    BV8(1, 0, 0, 1, 0, 1, 0, 0),
+		    BV8(0, 1, 0, 0, 1, 0, 1, 0))
+
+/* S2: */
+#define tf_s2_const BV8(0, 1, 0, 0, 0, 1, 1, 1)
+.Ltf_s2_bitmatrix:
+	.quad BM8X8(BV8(0, 1, 0, 1, 0, 1, 1, 1),
+		    BV8(0, 0, 1, 1, 1, 1, 1, 1),
+		    BV8(1, 1, 1, 0, 1, 1, 0, 1),
+		    BV8(1, 1, 0, 0, 0, 0, 1, 1),
+		    BV8(0, 1, 0, 0, 0, 0, 1, 1),
+		    BV8(1, 1, 0, 0, 1, 1, 1, 0),
+		    BV8(0, 1, 1, 0, 0, 0, 1, 1),
+		    BV8(1, 1, 1, 1, 0, 1, 1, 0))
+
+/* X2: */
+#define tf_x2_const BV8(0, 0, 1, 1, 0, 1, 0, 0)
+.Ltf_x2_bitmatrix:
+	.quad BM8X8(BV8(0, 0, 0, 1, 1, 0, 0, 0),
+		    BV8(0, 0, 1, 0, 0, 1, 1, 0),
+		    BV8(0, 0, 0, 0, 1, 0, 1, 0),
+		    BV8(1, 1, 1, 0, 0, 0, 1, 1),
+		    BV8(1, 1, 1, 0, 1, 1, 0, 0),
+		    BV8(0, 1, 1, 0, 1, 0, 1, 1),
+		    BV8(1, 0, 1, 1, 1, 1, 0, 1),
+		    BV8(1, 0, 0, 1, 0, 0, 1, 1))
+
+/* Identity matrix: */
+.Ltf_id_bitmatrix:
+	.quad BM8X8(BV8(1, 0, 0, 0, 0, 0, 0, 0),
+		    BV8(0, 1, 0, 0, 0, 0, 0, 0),
+		    BV8(0, 0, 1, 0, 0, 0, 0, 0),
+		    BV8(0, 0, 0, 1, 0, 0, 0, 0),
+		    BV8(0, 0, 0, 0, 1, 0, 0, 0),
+		    BV8(0, 0, 0, 0, 0, 1, 0, 0),
+		    BV8(0, 0, 0, 0, 0, 0, 1, 0),
+		    BV8(0, 0, 0, 0, 0, 0, 0, 1))
+
+/* 4-bit mask */
+.section	.rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
+.align 4
+.L0f0f0f0f:
+	.long 0x0f0f0f0f
+
+.text
+
+SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
+	/* input:
+	*      %r9: rk
+	*      %rsi: dst
+	*      %rdx: src
+	*      %xmm0..%xmm15: 16 byte-sliced blocks
+	*/
+
+	FRAME_BEGIN
+
+	movq %rsi, %rax;
+	leaq 8 * 16(%rax), %r8;
+
+	inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		      %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		      %xmm15, %rax, %r8);
+	aria_fo(%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 0);
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 1);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 2);
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 3);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 4);
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 5);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 6);
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 7);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 8);
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 9);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 10);
+	cmpl $12, rounds(CTX);
+	jne .Laria_192;
+	aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 11, 12);
+	jmp .Laria_end;
+.Laria_192:
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 11);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 12);
+	cmpl $14, rounds(CTX);
+	jne .Laria_256;
+	aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 13, 14);
+	jmp .Laria_end;
+.Laria_256:
+	aria_fe(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 13);
+	aria_fo(%xmm9, %xmm8, %xmm11, %xmm10, %xmm12, %xmm13, %xmm14, %xmm15,
+		%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		%rax, %r9, 14);
+	aria_ff(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 15, 16);
+.Laria_end:
+	debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4,
+			   %xmm9, %xmm13, %xmm0, %xmm5,
+			   %xmm10, %xmm14, %xmm3, %xmm6,
+			   %xmm11, %xmm15, %xmm2, %xmm7,
+			   (%rax), (%r8));
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(__aria_aesni_avx_crypt_16way)
+
+SYM_FUNC_START(aria_aesni_avx_encrypt_16way)
+	/* input:
+	*      %rdi: ctx, CTX
+	*      %rsi: dst
+	*      %rdx: src
+	*/
+
+	FRAME_BEGIN
+
+	leaq enc_key(CTX), %r9;
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx);
+
+	call __aria_aesni_avx_crypt_16way;
+
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_encrypt_16way)
+
+SYM_FUNC_START(aria_aesni_avx_decrypt_16way)
+	/* input:
+	*      %rdi: ctx, CTX
+	*      %rsi: dst
+	*      %rdx: src
+	*/
+
+	FRAME_BEGIN
+
+	leaq dec_key(CTX), %r9;
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx);
+
+	call __aria_aesni_avx_crypt_16way;
+
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_decrypt_16way)
+
+SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way)
+	/* input:
+	*      %rdi: ctx
+	*      %rsi: dst
+	*      %rdx: src
+	*      %rcx: keystream
+	*      %r8: iv (big endian, 128bit)
+	*/
+
+	FRAME_BEGIN
+	/* load IV and byteswap */
+	vmovdqu (%r8), %xmm8;
+
+	vmovdqa .Lbswap128_mask (%rip), %xmm1;
+	vpshufb %xmm1, %xmm8, %xmm3; /* be => le */
+
+	vpcmpeqd %xmm0, %xmm0, %xmm0;
+	vpsrldq $8, %xmm0, %xmm0; /* low: -1, high: 0 */
+
+	/* construct IVs */
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm9;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm10;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm11;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm12;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm13;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm14;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm15;
+	vmovdqu %xmm8, (0 * 16)(%rcx);
+	vmovdqu %xmm9, (1 * 16)(%rcx);
+	vmovdqu %xmm10, (2 * 16)(%rcx);
+	vmovdqu %xmm11, (3 * 16)(%rcx);
+	vmovdqu %xmm12, (4 * 16)(%rcx);
+	vmovdqu %xmm13, (5 * 16)(%rcx);
+	vmovdqu %xmm14, (6 * 16)(%rcx);
+	vmovdqu %xmm15, (7 * 16)(%rcx);
+
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm8;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm9;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm10;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm11;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm12;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm13;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm14;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm15;
+	inc_le128(%xmm3, %xmm0, %xmm5); /* +1 */
+	vpshufb %xmm1, %xmm3, %xmm4;
+	vmovdqu %xmm4, (%r8);
+
+	vmovdqu (0 * 16)(%rcx), %xmm0;
+	vmovdqu (1 * 16)(%rcx), %xmm1;
+	vmovdqu (2 * 16)(%rcx), %xmm2;
+	vmovdqu (3 * 16)(%rcx), %xmm3;
+	vmovdqu (4 * 16)(%rcx), %xmm4;
+	vmovdqu (5 * 16)(%rcx), %xmm5;
+	vmovdqu (6 * 16)(%rcx), %xmm6;
+	vmovdqu (7 * 16)(%rcx), %xmm7;
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way)
+
+SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
+	/* input:
+	*      %rdi: ctx
+	*      %rsi: dst
+	*      %rdx: src
+	*      %rcx: keystream
+	*      %r8: iv (big endian, 128bit)
+	*/
+	FRAME_BEGIN
+
+	call __aria_aesni_avx_ctr_gen_keystream_16way;
+
+	leaq (%rsi), %r10;
+	leaq (%rdx), %r11;
+	leaq (%rcx), %rsi;
+	leaq (%rcx), %rdx;
+	leaq enc_key(CTX), %r9;
+
+	call __aria_aesni_avx_crypt_16way;
+
+	vpxor (0 * 16)(%r11), %xmm1, %xmm1;
+	vpxor (1 * 16)(%r11), %xmm0, %xmm0;
+	vpxor (2 * 16)(%r11), %xmm3, %xmm3;
+	vpxor (3 * 16)(%r11), %xmm2, %xmm2;
+	vpxor (4 * 16)(%r11), %xmm4, %xmm4;
+	vpxor (5 * 16)(%r11), %xmm5, %xmm5;
+	vpxor (6 * 16)(%r11), %xmm6, %xmm6;
+	vpxor (7 * 16)(%r11), %xmm7, %xmm7;
+	vpxor (8 * 16)(%r11), %xmm8, %xmm8;
+	vpxor (9 * 16)(%r11), %xmm9, %xmm9;
+	vpxor (10 * 16)(%r11), %xmm10, %xmm10;
+	vpxor (11 * 16)(%r11), %xmm11, %xmm11;
+	vpxor (12 * 16)(%r11), %xmm12, %xmm12;
+	vpxor (13 * 16)(%r11), %xmm13, %xmm13;
+	vpxor (14 * 16)(%r11), %xmm14, %xmm14;
+	vpxor (15 * 16)(%r11), %xmm15, %xmm15;
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %r10);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_ctr_crypt_16way)
+
+SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
+	/* input:
+	*      %r9: rk
+	*      %rsi: dst
+	*      %rdx: src
+	*      %xmm0..%xmm15: 16 byte-sliced blocks
+	*/
+
+	FRAME_BEGIN
+
+	movq %rsi, %rax;
+	leaq 8 * 16(%rax), %r8;
+
+	inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3,
+		      %xmm4, %xmm5, %xmm6, %xmm7,
+		      %xmm8, %xmm9, %xmm10, %xmm11,
+		      %xmm12, %xmm13, %xmm14,
+		      %xmm15, %rax, %r8);
+	aria_fo_gfni(%xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 0);
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 1);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 2);
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 3);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 4);
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 5);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 6);
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 7);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 8);
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 9);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 10);
+	cmpl $12, rounds(CTX);
+	jne .Laria_gfni_192;
+	aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		%xmm15, %rax, %r9, 11, 12);
+	jmp .Laria_gfni_end;
+.Laria_gfni_192:
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 11);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 12);
+	cmpl $14, rounds(CTX);
+	jne .Laria_gfni_256;
+	aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 13, 14);
+	jmp .Laria_gfni_end;
+.Laria_gfni_256:
+	aria_fe_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 13);
+	aria_fo_gfni(%xmm9, %xmm8, %xmm11, %xmm10,
+		     %xmm12, %xmm13, %xmm14, %xmm15,
+		     %xmm0, %xmm1, %xmm2, %xmm3,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %rax, %r9, 14);
+	aria_ff_gfni(%xmm1, %xmm0, %xmm3, %xmm2,
+		     %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11,
+		     %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax, %r9, 15, 16);
+.Laria_gfni_end:
+	debyteslice_16x16b(%xmm8, %xmm12, %xmm1, %xmm4,
+			   %xmm9, %xmm13, %xmm0, %xmm5,
+			   %xmm10, %xmm14, %xmm3, %xmm6,
+			   %xmm11, %xmm15, %xmm2, %xmm7,
+			   (%rax), (%r8));
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way)
+
+SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
+	/* input:
+	*      %rdi: ctx, CTX
+	*      %rsi: dst
+	*      %rdx: src
+	*/
+
+	FRAME_BEGIN
+
+	leaq enc_key(CTX), %r9;
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx);
+
+	call __aria_aesni_avx_gfni_crypt_16way;
+
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way)
+
+SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
+	/* input:
+	*      %rdi: ctx, CTX
+	*      %rsi: dst
+	*      %rdx: src
+	*/
+
+	FRAME_BEGIN
+
+	leaq dec_key(CTX), %r9;
+
+	inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rdx);
+
+	call __aria_aesni_avx_gfni_crypt_16way;
+
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %rax);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way)
+
+SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
+	/* input:
+	*      %rdi: ctx
+	*      %rsi: dst
+	*      %rdx: src
+	*      %rcx: keystream
+	*      %r8: iv (big endian, 128bit)
+	*/
+	FRAME_BEGIN
+
+	call __aria_aesni_avx_ctr_gen_keystream_16way
+
+	leaq (%rsi), %r10;
+	leaq (%rdx), %r11;
+	leaq (%rcx), %rsi;
+	leaq (%rcx), %rdx;
+	leaq enc_key(CTX), %r9;
+
+	call __aria_aesni_avx_gfni_crypt_16way;
+
+	vpxor (0 * 16)(%r11), %xmm1, %xmm1;
+	vpxor (1 * 16)(%r11), %xmm0, %xmm0;
+	vpxor (2 * 16)(%r11), %xmm3, %xmm3;
+	vpxor (3 * 16)(%r11), %xmm2, %xmm2;
+	vpxor (4 * 16)(%r11), %xmm4, %xmm4;
+	vpxor (5 * 16)(%r11), %xmm5, %xmm5;
+	vpxor (6 * 16)(%r11), %xmm6, %xmm6;
+	vpxor (7 * 16)(%r11), %xmm7, %xmm7;
+	vpxor (8 * 16)(%r11), %xmm8, %xmm8;
+	vpxor (9 * 16)(%r11), %xmm9, %xmm9;
+	vpxor (10 * 16)(%r11), %xmm10, %xmm10;
+	vpxor (11 * 16)(%r11), %xmm11, %xmm11;
+	vpxor (12 * 16)(%r11), %xmm12, %xmm12;
+	vpxor (13 * 16)(%r11), %xmm13, %xmm13;
+	vpxor (14 * 16)(%r11), %xmm14, %xmm14;
+	vpxor (15 * 16)(%r11), %xmm15, %xmm15;
+	write_output(%xmm1, %xmm0, %xmm3, %xmm2, %xmm4, %xmm5, %xmm6, %xmm7,
+		     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+		     %xmm15, %r10);
+
+	FRAME_END
+	RET;
+SYM_FUNC_END(aria_aesni_avx_gfni_ctr_crypt_16way)
diff --git a/arch/x86/crypto/aria-avx.h b/arch/x86/crypto/aria-avx.h
new file mode 100644
index 000000000000..01e9a01dc157
--- /dev/null
+++ b/arch/x86/crypto/aria-avx.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ASM_X86_ARIA_AVX_H
+#define ASM_X86_ARIA_AVX_H
+
+#include <linux/types.h>
+
+#define ARIA_AESNI_PARALLEL_BLOCKS 16
+#define ARIA_AESNI_PARALLEL_BLOCK_SIZE  (ARIA_BLOCK_SIZE * 16)
+
+struct aria_avx_ops {
+	void (*aria_encrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
+	void (*aria_decrypt_16way)(const void *ctx, u8 *dst, const u8 *src);
+	void (*aria_ctr_crypt_16way)(const void *ctx, u8 *dst, const u8 *src,
+				     u8 *keystream, u8 *iv);
+};
+#endif
diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c
new file mode 100644
index 000000000000..c561ea4fefa5
--- /dev/null
+++ b/arch/x86/crypto/aria_aesni_avx_glue.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Glue Code for the AVX/AES-NI/GFNI assembler implementation of the ARIA Cipher
+ *
+ * Copyright (c) 2022 Taehee Yoo <ap420073@gmail.com>
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/aria.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#include "ecb_cbc_helpers.h"
+#include "aria-avx.h"
+
+asmlinkage void aria_aesni_avx_encrypt_16way(const void *ctx, u8 *dst,
+					     const u8 *src);
+asmlinkage void aria_aesni_avx_decrypt_16way(const void *ctx, u8 *dst,
+					     const u8 *src);
+asmlinkage void aria_aesni_avx_ctr_crypt_16way(const void *ctx, u8 *dst,
+					       const u8 *src,
+					       u8 *keystream, u8 *iv);
+asmlinkage void aria_aesni_avx_gfni_encrypt_16way(const void *ctx, u8 *dst,
+						  const u8 *src);
+asmlinkage void aria_aesni_avx_gfni_decrypt_16way(const void *ctx, u8 *dst,
+						  const u8 *src);
+asmlinkage void aria_aesni_avx_gfni_ctr_crypt_16way(const void *ctx, u8 *dst,
+						    const u8 *src,
+						    u8 *keystream, u8 *iv);
+
+static struct aria_avx_ops aria_ops;
+
+static int ecb_do_encrypt(struct skcipher_request *req, const u32 *rkey)
+{
+	ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+	ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_encrypt_16way);
+	ECB_BLOCK(1, aria_encrypt);
+	ECB_WALK_END();
+}
+
+static int ecb_do_decrypt(struct skcipher_request *req, const u32 *rkey)
+{
+	ECB_WALK_START(req, ARIA_BLOCK_SIZE, ARIA_AESNI_PARALLEL_BLOCKS);
+	ECB_BLOCK(ARIA_AESNI_PARALLEL_BLOCKS, aria_ops.aria_decrypt_16way);
+	ECB_BLOCK(1, aria_decrypt);
+	ECB_WALK_END();
+}
+
+static int aria_avx_ecb_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return ecb_do_encrypt(req, ctx->enc_key[0]);
+}
+
+static int aria_avx_ecb_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	return ecb_do_decrypt(req, ctx->dec_key[0]);
+}
+
+static int aria_avx_set_key(struct crypto_skcipher *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	return aria_set_key(&tfm->base, key, keylen);
+}
+
+static int aria_avx_ctr_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aria_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while ((nbytes = walk.nbytes) > 0) {
+		const u8 *src = walk.src.virt.addr;
+		u8 *dst = walk.dst.virt.addr;
+
+		while (nbytes >= ARIA_AESNI_PARALLEL_BLOCK_SIZE) {
+			u8 keystream[ARIA_AESNI_PARALLEL_BLOCK_SIZE];
+
+			kernel_fpu_begin();
+			aria_ops.aria_ctr_crypt_16way(ctx, dst, src, keystream,
+						      walk.iv);
+			kernel_fpu_end();
+			dst += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+			src += ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+			nbytes -= ARIA_AESNI_PARALLEL_BLOCK_SIZE;
+		}
+
+		while (nbytes >= ARIA_BLOCK_SIZE) {
+			u8 keystream[ARIA_BLOCK_SIZE];
+
+			memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
+			crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+			aria_encrypt(ctx, keystream, keystream);
+
+			crypto_xor_cpy(dst, src, keystream, ARIA_BLOCK_SIZE);
+			dst += ARIA_BLOCK_SIZE;
+			src += ARIA_BLOCK_SIZE;
+			nbytes -= ARIA_BLOCK_SIZE;
+		}
+
+		if (walk.nbytes == walk.total && nbytes > 0) {
+			u8 keystream[ARIA_BLOCK_SIZE];
+
+			memcpy(keystream, walk.iv, ARIA_BLOCK_SIZE);
+			crypto_inc(walk.iv, ARIA_BLOCK_SIZE);
+
+			aria_encrypt(ctx, keystream, keystream);
+
+			crypto_xor_cpy(dst, src, keystream, nbytes);
+			dst += nbytes;
+			src += nbytes;
+			nbytes = 0;
+		}
+		err = skcipher_walk_done(&walk, nbytes);
+	}
+
+	return err;
+}
+
+static struct skcipher_alg aria_algs[] = {
+	{
+		.base.cra_name		= "__ecb(aria)",
+		.base.cra_driver_name	= "__ecb-aria-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= ARIA_BLOCK_SIZE,
+		.base.cra_ctxsize	= sizeof(struct aria_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= ARIA_MIN_KEY_SIZE,
+		.max_keysize		= ARIA_MAX_KEY_SIZE,
+		.setkey			= aria_avx_set_key,
+		.encrypt		= aria_avx_ecb_encrypt,
+		.decrypt		= aria_avx_ecb_decrypt,
+	}, {
+		.base.cra_name		= "__ctr(aria)",
+		.base.cra_driver_name	= "__ctr-aria-avx",
+		.base.cra_priority	= 400,
+		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.base.cra_blocksize	= 1,
+		.base.cra_ctxsize	= sizeof(struct aria_ctx),
+		.base.cra_module	= THIS_MODULE,
+		.min_keysize		= ARIA_MIN_KEY_SIZE,
+		.max_keysize		= ARIA_MAX_KEY_SIZE,
+		.ivsize			= ARIA_BLOCK_SIZE,
+		.chunksize		= ARIA_BLOCK_SIZE,
+		.walksize		= 16 * ARIA_BLOCK_SIZE,
+		.setkey			= aria_avx_set_key,
+		.encrypt		= aria_avx_ctr_encrypt,
+		.decrypt		= aria_avx_ctr_encrypt,
+	}
+};
+
+static struct simd_skcipher_alg *aria_simd_algs[ARRAY_SIZE(aria_algs)];
+
+static int __init aria_avx_init(void)
+{
+	const char *feature_name;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX) ||
+	    !boot_cpu_has(X86_FEATURE_AES) ||
+	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+		pr_info("AVX or AES-NI instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
+				&feature_name)) {
+		pr_info("CPU feature '%s' is not supported.\n", feature_name);
+		return -ENODEV;
+	}
+
+	if (boot_cpu_has(X86_FEATURE_GFNI)) {
+		aria_ops.aria_encrypt_16way = aria_aesni_avx_gfni_encrypt_16way;
+		aria_ops.aria_decrypt_16way = aria_aesni_avx_gfni_decrypt_16way;
+		aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_gfni_ctr_crypt_16way;
+	} else {
+		aria_ops.aria_encrypt_16way = aria_aesni_avx_encrypt_16way;
+		aria_ops.aria_decrypt_16way = aria_aesni_avx_decrypt_16way;
+		aria_ops.aria_ctr_crypt_16way = aria_aesni_avx_ctr_crypt_16way;
+	}
+
+	return simd_register_skciphers_compat(aria_algs,
+					      ARRAY_SIZE(aria_algs),
+					      aria_simd_algs);
+}
+
+static void __exit aria_avx_exit(void)
+{
+	simd_unregister_skciphers(aria_algs, ARRAY_SIZE(aria_algs),
+				  aria_simd_algs);
+}
+
+module_init(aria_avx_init);
+module_exit(aria_avx_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Taehee Yoo <ap420073@gmail.com>");
+MODULE_DESCRIPTION("ARIA Cipher Algorithm, AVX/AES-NI/GFNI optimized");
+MODULE_ALIAS_CRYPTO("aria");
+MODULE_ALIAS_CRYPTO("aria-aesni-avx");
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 30e70f4fe2f7..6d3b85e53d0e 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -36,6 +36,7 @@
 #include <linux/types.h>
 #include <crypto/sha2.h>
 #include <crypto/sha512_base.h>
+#include <asm/cpu_device_id.h>
 #include <asm/simd.h>
 
 asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
@@ -284,6 +285,13 @@ static int register_sha512_avx2(void)
 			ARRAY_SIZE(sha512_avx2_algs));
 	return 0;
 }
+static const struct x86_cpu_id module_cpu_ids[] = {
+	X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL),
+	X86_MATCH_FEATURE(X86_FEATURE_SSSE3, NULL),
+	{}
+};
+MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids);
 
 static void unregister_sha512_avx2(void)
 {
@@ -294,6 +302,8 @@ static void unregister_sha512_avx2(void)
 
 static int __init sha512_ssse3_mod_init(void)
 {
+	if (!x86_match_cpu(module_cpu_ids))
+		return -ENODEV;
 
 	if (register_sha512_ssse3())
 		goto fail;
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 381d3333b996..3e88b9df8c8f 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -11,6 +11,9 @@ include $(srctree)/lib/vdso/Makefile
 
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
+KMSAN_SANITIZE_vclock_gettime.o := n
+KMSAN_SANITIZE_vgetcpu.o	:= n
+
 UBSAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 1000d457c332..6292b960037b 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -127,17 +127,17 @@ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
 {
 	struct mm_struct *mm = task->mm;
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	mmap_read_lock(mm);
-
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		unsigned long size = vma->vm_end - vma->vm_start;
 
 		if (vma_is_special_mapping(vma, &vvar_mapping))
 			zap_page_range(vma, vma->vm_start, size);
 	}
-
 	mmap_read_unlock(mm);
+
 	return 0;
 }
 #else
@@ -354,6 +354,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, mm, 0);
 
 	mmap_write_lock(mm);
 	/*
@@ -363,7 +364,7 @@ int map_vdso_once(const struct vdso_image *image, unsigned long addr)
 	 * We could search vma near context.vdso, but it's a slowpath,
 	 * so let's explicitly check all VMAs to be completely sure.
 	 */
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+	for_each_vma(vmi, vma) {
 		if (vma_is_special_mapping(vma, &vdso_mapping) ||
 				vma_is_special_mapping(vma, &vvar_mapping)) {
 			mmap_write_unlock(mm);
diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile
index 9933c0e8e97a..86a76efa8bb6 100644
--- a/arch/x86/events/Makefile
+++ b/arch/x86/events/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y					+= core.o probe.o
+obj-y					+= core.o probe.o utils.o
 obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL)	+= rapl.o
 obj-y					+= amd/
 obj-$(CONFIG_X86_LOCAL_APIC)            += msr.o
diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index b9f5d4610256..527d947eb76b 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CPU_SUP_AMD)		+= core.o
+obj-$(CONFIG_CPU_SUP_AMD)		+= core.o lbr.o
 obj-$(CONFIG_PERF_EVENTS_AMD_BRS)	+= brs.o
 obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
 obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
diff --git a/arch/x86/events/amd/brs.c b/arch/x86/events/amd/brs.c
index bee8765a1e9b..f1bff153d945 100644
--- a/arch/x86/events/amd/brs.c
+++ b/arch/x86/events/amd/brs.c
@@ -81,7 +81,7 @@ static bool __init amd_brs_detect(void)
  * a br_sel_map. Software filtering is not supported because it would not correlate well
  * with a sampling period.
  */
-int amd_brs_setup_filter(struct perf_event *event)
+static int amd_brs_setup_filter(struct perf_event *event)
 {
 	u64 type = event->attr.branch_sample_type;
 
@@ -96,6 +96,73 @@ int amd_brs_setup_filter(struct perf_event *event)
 	return 0;
 }
 
+static inline int amd_is_brs_event(struct perf_event *e)
+{
+	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
+}
+
+int amd_brs_hw_config(struct perf_event *event)
+{
+	int ret = 0;
+
+	/*
+	 * Due to interrupt holding, BRS is not recommended in
+	 * counting mode.
+	 */
+	if (!is_sampling_event(event))
+		return -EINVAL;
+
+	/*
+	 * Due to the way BRS operates by holding the interrupt until
+	 * lbr_nr entries have been captured, it does not make sense
+	 * to allow sampling on BRS with an event that does not match
+	 * what BRS is capturing, i.e., retired taken branches.
+	 * Otherwise the correlation with the event's period is even
+	 * more loose:
+	 *
+	 * With retired taken branch:
+	 *   Effective P = P + 16 + X
+	 * With any other event:
+	 *   Effective P = P + Y + X
+	 *
+	 * Where X is the number of taken branches due to interrupt
+	 * skid. Skid is large.
+	 *
+	 * Where Y is the occurences of the event while BRS is
+	 * capturing the lbr_nr entries.
+	 *
+	 * By using retired taken branches, we limit the impact on the
+	 * Y variable. We know it cannot be more than the depth of
+	 * BRS.
+	 */
+	if (!amd_is_brs_event(event))
+		return -EINVAL;
+
+	/*
+	 * BRS implementation does not work with frequency mode
+	 * reprogramming of the period.
+	 */
+	if (event->attr.freq)
+		return -EINVAL;
+	/*
+	 * The kernel subtracts BRS depth from period, so it must
+	 * be big enough.
+	 */
+	if (event->attr.sample_period <= x86_pmu.lbr_nr)
+		return -EINVAL;
+
+	/*
+	 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
+	 */
+	ret = amd_brs_setup_filter(event);
+
+	/* only set in case of success */
+	if (!ret)
+		event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
+
+	return ret;
+}
+
 /* tos = top of stack, i.e., last valid entry written */
 static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
 {
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 9ac3718410ce..8b70237c33f7 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -330,16 +330,10 @@ static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc)
 	}
 }
 
-#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
-static inline int amd_is_brs_event(struct perf_event *e)
-{
-	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
-}
+DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config);
 
 static int amd_core_hw_config(struct perf_event *event)
 {
-	int ret = 0;
-
 	if (event->attr.exclude_host && event->attr.exclude_guest)
 		/*
 		 * When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -356,66 +350,10 @@ static int amd_core_hw_config(struct perf_event *event)
 	if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw))
 		event->hw.flags |= PERF_X86_EVENT_PAIR;
 
-	/*
-	 * if branch stack is requested
-	 */
-	if (has_branch_stack(event)) {
-		/*
-		 * Due to interrupt holding, BRS is not recommended in
-		 * counting mode.
-		 */
-		if (!is_sampling_event(event))
-			return -EINVAL;
+	if (has_branch_stack(event))
+		return static_call(amd_pmu_branch_hw_config)(event);
 
-		/*
-		 * Due to the way BRS operates by holding the interrupt until
-		 * lbr_nr entries have been captured, it does not make sense
-		 * to allow sampling on BRS with an event that does not match
-		 * what BRS is capturing, i.e., retired taken branches.
-		 * Otherwise the correlation with the event's period is even
-		 * more loose:
-		 *
-		 * With retired taken branch:
-		 *   Effective P = P + 16 + X
-		 * With any other event:
-		 *   Effective P = P + Y + X
-		 *
-		 * Where X is the number of taken branches due to interrupt
-		 * skid. Skid is large.
-		 *
-		 * Where Y is the occurences of the event while BRS is
-		 * capturing the lbr_nr entries.
-		 *
-		 * By using retired taken branches, we limit the impact on the
-		 * Y variable. We know it cannot be more than the depth of
-		 * BRS.
-		 */
-		if (!amd_is_brs_event(event))
-			return -EINVAL;
-
-		/*
-		 * BRS implementation does not work with frequency mode
-		 * reprogramming of the period.
-		 */
-		if (event->attr.freq)
-			return -EINVAL;
-		/*
-		 * The kernel subtracts BRS depth from period, so it must
-		 * be big enough.
-		 */
-		if (event->attr.sample_period <= x86_pmu.lbr_nr)
-			return -EINVAL;
-
-		/*
-		 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
-		 */
-		ret = amd_brs_setup_filter(event);
-
-		/* only set in case of success */
-		if (!ret)
-			event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
-	}
-	return ret;
+	return 0;
 }
 
 static inline int amd_is_nb_event(struct hw_perf_event *hwc)
@@ -582,8 +520,14 @@ static struct amd_nb *amd_alloc_nb(int cpu)
 	return nb;
 }
 
+typedef void (amd_pmu_branch_reset_t)(void);
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t);
+
 static void amd_pmu_cpu_reset(int cpu)
 {
+	if (x86_pmu.lbr_nr)
+		static_call(amd_pmu_branch_reset)();
+
 	if (x86_pmu.version < 2)
 		return;
 
@@ -598,16 +542,24 @@ static int amd_pmu_cpu_prepare(int cpu)
 {
 	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
+	cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL,
+				     cpu_to_node(cpu));
+	if (!cpuc->lbr_sel)
+		return -ENOMEM;
+
 	WARN_ON_ONCE(cpuc->amd_nb);
 
 	if (!x86_pmu.amd_nb_constraints)
 		return 0;
 
 	cpuc->amd_nb = amd_alloc_nb(cpu);
-	if (!cpuc->amd_nb)
-		return -ENOMEM;
+	if (cpuc->amd_nb)
+		return 0;
 
-	return 0;
+	kfree(cpuc->lbr_sel);
+	cpuc->lbr_sel = NULL;
+
+	return -ENOMEM;
 }
 
 static void amd_pmu_cpu_starting(int cpu)
@@ -640,19 +592,19 @@ static void amd_pmu_cpu_starting(int cpu)
 	cpuc->amd_nb->nb_id = nb_id;
 	cpuc->amd_nb->refcnt++;
 
-	amd_brs_reset();
 	amd_pmu_cpu_reset(cpu);
 }
 
 static void amd_pmu_cpu_dead(int cpu)
 {
-	struct cpu_hw_events *cpuhw;
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	kfree(cpuhw->lbr_sel);
+	cpuhw->lbr_sel = NULL;
 
 	if (!x86_pmu.amd_nb_constraints)
 		return;
 
-	cpuhw = &per_cpu(cpu_hw_events, cpu);
-
 	if (cpuhw->amd_nb) {
 		struct amd_nb *nb = cpuhw->amd_nb;
 
@@ -677,7 +629,7 @@ static inline u64 amd_pmu_get_global_status(void)
 	/* PerfCntrGlobalStatus is read-only */
 	rdmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status);
 
-	return status & amd_pmu_global_cntr_mask;
+	return status;
 }
 
 static inline void amd_pmu_ack_global_status(u64 status)
@@ -688,8 +640,6 @@ static inline void amd_pmu_ack_global_status(u64 status)
 	 * clears the same bit in PerfCntrGlobalStatus
 	 */
 
-	/* Only allow modifications to PerfCntrGlobalStatus.PerfCntrOvfl */
-	status &= amd_pmu_global_cntr_mask;
 	wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status);
 }
 
@@ -799,11 +749,17 @@ static void amd_pmu_v2_enable_event(struct perf_event *event)
 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
-static void amd_pmu_v2_enable_all(int added)
+static __always_inline void amd_pmu_core_enable_all(void)
 {
 	amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask);
 }
 
+static void amd_pmu_v2_enable_all(int added)
+{
+	amd_pmu_lbr_enable_all();
+	amd_pmu_core_enable_all();
+}
+
 static void amd_pmu_disable_event(struct perf_event *event)
 {
 	x86_pmu_disable_event(event);
@@ -828,23 +784,32 @@ static void amd_pmu_disable_all(void)
 	amd_pmu_check_overflow();
 }
 
-static void amd_pmu_v2_disable_all(void)
+static __always_inline void amd_pmu_core_disable_all(void)
 {
-	/* Disable all PMCs */
 	amd_pmu_set_global_ctl(0);
+}
+
+static void amd_pmu_v2_disable_all(void)
+{
+	amd_pmu_core_disable_all();
+	amd_pmu_lbr_disable_all();
 	amd_pmu_check_overflow();
 }
 
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add);
+
 static void amd_pmu_add_event(struct perf_event *event)
 {
 	if (needs_branch_stack(event))
-		amd_pmu_brs_add(event);
+		static_call(amd_pmu_branch_add)(event);
 }
 
+DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del);
+
 static void amd_pmu_del_event(struct perf_event *event)
 {
 	if (needs_branch_stack(event))
-		amd_pmu_brs_del(event);
+		static_call(amd_pmu_branch_del)(event);
 }
 
 /*
@@ -930,8 +895,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 	pmu_enabled = cpuc->enabled;
 	cpuc->enabled = 0;
 
-	/* Stop counting */
-	amd_pmu_v2_disable_all();
+	/* Stop counting but do not disable LBR */
+	amd_pmu_core_disable_all();
 
 	status = amd_pmu_get_global_status();
 
@@ -939,6 +904,12 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 	if (!status)
 		goto done;
 
+	/* Read branch records before unfreezing */
+	if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+		amd_pmu_lbr_read();
+		status &= ~GLOBAL_STATUS_LBRS_FROZEN;
+	}
+
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
@@ -958,6 +929,11 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_event_set_period(event))
 			continue;
 
+		if (has_branch_stack(event)) {
+			data.br_stack = &cpuc->lbr_stack;
+			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+		}
+
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
 
@@ -971,7 +947,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
 	 */
 	WARN_ON(status > 0);
 
-	/* Clear overflow bits */
+	/* Clear overflow and freeze bits */
 	amd_pmu_ack_global_status(~status);
 
 	/*
@@ -985,7 +961,7 @@ done:
 
 	/* Resume counting only if PMU is active */
 	if (pmu_enabled)
-		amd_pmu_v2_enable_all(0);
+		amd_pmu_core_enable_all();
 
 	return amd_pmu_adjust_nmi_window(handled);
 }
@@ -1248,23 +1224,14 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
 	return x86_event_sysfs_show(page, config, event);
 }
 
-static void amd_pmu_sched_task(struct perf_event_context *ctx,
-				 bool sched_in)
-{
-	if (sched_in && x86_pmu.lbr_nr)
-		amd_pmu_brs_sched_task(ctx, sched_in);
-}
-
-static u64 amd_pmu_limit_period(struct perf_event *event, u64 left)
+static void amd_pmu_limit_period(struct perf_event *event, s64 *left)
 {
 	/*
 	 * Decrease period by the depth of the BRS feature to get the last N
 	 * taken branches and approximate the desired period
 	 */
-	if (has_branch_stack(event) && left > x86_pmu.lbr_nr)
-		left -= x86_pmu.lbr_nr;
-
-	return left;
+	if (has_branch_stack(event) && *left > x86_pmu.lbr_nr)
+		*left -= x86_pmu.lbr_nr;
 }
 
 static __initconst const struct x86_pmu amd_pmu = {
@@ -1311,23 +1278,25 @@ static ssize_t branches_show(struct device *cdev,
 
 static DEVICE_ATTR_RO(branches);
 
-static struct attribute *amd_pmu_brs_attrs[] = {
+static struct attribute *amd_pmu_branches_attrs[] = {
 	&dev_attr_branches.attr,
 	NULL,
 };
 
 static umode_t
-amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i)
 {
 	return x86_pmu.lbr_nr ? attr->mode : 0;
 }
 
-static struct attribute_group group_caps_amd_brs = {
+static struct attribute_group group_caps_amd_branches = {
 	.name  = "caps",
-	.attrs = amd_pmu_brs_attrs,
-	.is_visible = amd_brs_is_visible,
+	.attrs = amd_pmu_branches_attrs,
+	.is_visible = amd_branches_is_visible,
 };
 
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
+
 EVENT_ATTR_STR(branch-brs, amd_branch_brs,
 	       "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n");
 
@@ -1336,15 +1305,26 @@ static struct attribute *amd_brs_events_attrs[] = {
 	NULL,
 };
 
+static umode_t
+amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
+{
+	return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ?
+	       attr->mode : 0;
+}
+
 static struct attribute_group group_events_amd_brs = {
 	.name       = "events",
 	.attrs      = amd_brs_events_attrs,
 	.is_visible = amd_brs_is_visible,
 };
 
+#endif	/* CONFIG_PERF_EVENTS_AMD_BRS */
+
 static const struct attribute_group *amd_attr_update[] = {
-	&group_caps_amd_brs,
+	&group_caps_amd_branches,
+#ifdef CONFIG_PERF_EVENTS_AMD_BRS
 	&group_events_amd_brs,
+#endif
 	NULL,
 };
 
@@ -1421,13 +1401,27 @@ static int __init amd_core_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_PAIR;
 	}
 
-	/*
-	 * BRS requires special event constraints and flushing on ctxsw.
-	 */
-	if (boot_cpu_data.x86 >= 0x19 && !amd_brs_init()) {
+	/* LBR and BRS are mutually exclusive features */
+	if (!amd_pmu_lbr_init()) {
+		/* LBR requires flushing on context switch */
+		x86_pmu.sched_task = amd_pmu_lbr_sched_task;
+		static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config);
+		static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset);
+		static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add);
+		static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del);
+	} else if (!amd_brs_init()) {
+		/*
+		 * BRS requires special event constraints and flushing on ctxsw.
+		 */
 		x86_pmu.get_event_constraints = amd_get_event_constraints_f19h;
-		x86_pmu.sched_task = amd_pmu_sched_task;
+		x86_pmu.sched_task = amd_pmu_brs_sched_task;
 		x86_pmu.limit_period = amd_pmu_limit_period;
+
+		static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config);
+		static_call_update(amd_pmu_branch_reset, amd_brs_reset);
+		static_call_update(amd_pmu_branch_add, amd_pmu_brs_add);
+		static_call_update(amd_pmu_branch_del, amd_pmu_brs_del);
+
 		/*
 		 * put_event_constraints callback same as Fam17h, set above
 		 */
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index c251bc44c088..3271735f0070 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -300,16 +300,6 @@ static int perf_ibs_init(struct perf_event *event)
 	hwc->config_base = perf_ibs->msr;
 	hwc->config = config;
 
-	/*
-	 * rip recorded by IbsOpRip will not be consistent with rsp and rbp
-	 * recorded as part of interrupt regs. Thus we need to use rip from
-	 * interrupt regs while unwinding call stack. Setting _EARLY flag
-	 * makes sure we unwind call-stack before perf sample rip is set to
-	 * IbsOpRip.
-	 */
-	if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-		event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
-
 	return 0;
 }
 
@@ -688,6 +678,339 @@ static struct perf_ibs perf_ibs_op = {
 	.get_count		= get_ibs_op_count,
 };
 
+static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3,
+				struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->mem_op = PERF_MEM_OP_NA;
+
+	if (op_data3->ld_op)
+		data_src->mem_op = PERF_MEM_OP_LOAD;
+	else if (op_data3->st_op)
+		data_src->mem_op = PERF_MEM_OP_STORE;
+}
+
+/*
+ * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
+ * more fine granular DataSrc encodings. Others have coarse.
+ */
+static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
+{
+	if (ibs_caps & IBS_CAPS_ZEN4)
+		return (op_data2->data_src_hi << 3) | op_data2->data_src_lo;
+
+	return op_data2->data_src_lo;
+}
+
+static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+				 union ibs_op_data3 *op_data3,
+				 struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u8 ibs_data_src = perf_ibs_data_src(op_data2);
+
+	data_src->mem_lvl = 0;
+
+	/*
+	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
+	 * memory accesses. So, check DcUcMemAcc bit early.
+	 */
+	if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
+		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* L1 Hit */
+	if (op_data3->dc_miss == 0) {
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* L2 Hit */
+	if (op_data3->l2_miss == 0) {
+		/* Erratum #1293 */
+		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
+		    !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/*
+	 * OP_DATA2 is valid only for load ops. Skip all checks which
+	 * uses OP_DATA2[DataSrc].
+	 */
+	if (data_src->mem_op != PERF_MEM_OP_LOAD)
+		goto check_mab;
+
+	/* L3 Hit */
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	} else {
+		if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
+					    PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/* A peer cache in a near CCX */
+	if (ibs_caps & IBS_CAPS_ZEN4 &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* A peer cache in a far CCX */
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	} else {
+		if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/* DRAM */
+	if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
+		if (op_data2->rmt_node == 0)
+			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		else
+			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* PMEM */
+	if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+		if (op_data2->rmt_node) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+	/* Extension Memory */
+	if (ibs_caps & IBS_CAPS_ZEN4 &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
+		if (op_data2->rmt_node) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+	/* IO */
+	if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
+		data_src->mem_lvl = PERF_MEM_LVL_IO;
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+		if (op_data2->rmt_node) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+check_mab:
+	/*
+	 * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
+	 * DC misses. However, such data may come from any level in mem
+	 * hierarchy. IBS provides detail about both MAB as well as actual
+	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
+	 * MAB only when IBS fails to provide DataSrc.
+	 */
+	if (op_data3->dc_miss_no_mab_alloc) {
+		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	data_src->mem_lvl = PERF_MEM_LVL_NA;
+}
+
+static bool perf_ibs_cache_hit_st_valid(void)
+{
+	/* 0: Uninitialized, 1: Valid, -1: Invalid */
+	static int cache_hit_st_valid;
+
+	if (unlikely(!cache_hit_st_valid)) {
+		if (boot_cpu_data.x86 == 0x19 &&
+		    (boot_cpu_data.x86_model <= 0xF ||
+		    (boot_cpu_data.x86_model >= 0x20 &&
+		     boot_cpu_data.x86_model <= 0x5F))) {
+			cache_hit_st_valid = -1;
+		} else {
+			cache_hit_st_valid = 1;
+		}
+	}
+
+	return cache_hit_st_valid == 1;
+}
+
+static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2,
+				   struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u8 ibs_data_src;
+
+	data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+
+	if (!perf_ibs_cache_hit_st_valid() ||
+	    data_src->mem_op != PERF_MEM_OP_LOAD ||
+	    data_src->mem_lvl & PERF_MEM_LVL_L1 ||
+	    data_src->mem_lvl & PERF_MEM_LVL_L2 ||
+	    op_data2->cache_hit_st)
+		return;
+
+	ibs_data_src = perf_ibs_data_src(op_data2);
+
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
+		    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
+		    ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE)
+			data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+	} else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+	}
+}
+
+static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3,
+				 struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->mem_dtlb = PERF_MEM_TLB_NA;
+
+	if (!op_data3->dc_lin_addr_valid)
+		return;
+
+	if (!op_data3->dc_l1tlb_miss) {
+		data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
+		return;
+	}
+
+	if (!op_data3->dc_l2tlb_miss) {
+		data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
+		return;
+	}
+
+	data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
+}
+
+static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3,
+				  struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->mem_lock = PERF_MEM_LOCK_NA;
+
+	if (op_data3->dc_locked_op)
+		data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
+}
+
+#define ibs_op_msr_idx(msr)	(msr - MSR_AMD64_IBSOPCTL)
+
+static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
+				  struct perf_sample_data *data,
+				  union ibs_op_data2 *op_data2,
+				  union ibs_op_data3 *op_data3)
+{
+	perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+	perf_ibs_get_mem_snoop(op_data2, data);
+	perf_ibs_get_tlb_lvl(op_data3, data);
+	perf_ibs_get_mem_lock(op_data3, data);
+}
+
+static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data,
+				   union ibs_op_data3 *op_data3)
+{
+	__u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];
+
+	/* Erratum #1293 */
+	if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
+	    (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
+		/*
+		 * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
+		 * DataSrc=0 is 'No valid status' and RmtNode is invalid when
+		 * DataSrc=0.
+		 */
+		val = 0;
+	}
+	return val;
+}
+
+static void perf_ibs_parse_ld_st_data(__u64 sample_type,
+				      struct perf_ibs_data *ibs_data,
+				      struct perf_sample_data *data)
+{
+	union ibs_op_data3 op_data3;
+	union ibs_op_data2 op_data2;
+	union ibs_op_data op_data;
+
+	data->data_src.val = PERF_MEM_NA;
+	op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+
+	perf_ibs_get_mem_op(&op_data3, data);
+	if (data->data_src.mem_op != PERF_MEM_OP_LOAD &&
+	    data->data_src.mem_op != PERF_MEM_OP_STORE)
+		return;
+
+	op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3);
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC) {
+		perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3);
+		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+	}
+
+	if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss &&
+	    data->data_src.mem_op == PERF_MEM_OP_LOAD) {
+		op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)];
+
+		if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
+			data->weight.var1_dw = op_data3.dc_miss_lat;
+			data->weight.var2_w = op_data.tag_to_ret_ctr;
+		} else if (sample_type & PERF_SAMPLE_WEIGHT) {
+			data->weight.full = op_data3.dc_miss_lat;
+		}
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) {
+		data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)];
+		data->sample_flags |= PERF_SAMPLE_ADDR;
+	}
+
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) {
+		data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)];
+		data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
+	}
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+				   int check_rip)
+{
+	if (sample_type & PERF_SAMPLE_RAW ||
+	    (perf_ibs == &perf_ibs_op &&
+	     (sample_type & PERF_SAMPLE_DATA_SRC ||
+	      sample_type & PERF_SAMPLE_WEIGHT_TYPE ||
+	      sample_type & PERF_SAMPLE_ADDR ||
+	      sample_type & PERF_SAMPLE_PHYS_ADDR)))
+		return perf_ibs->offset_max;
+	else if (check_rip)
+		return 3;
+	return 1;
+}
+
 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 {
 	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -735,12 +1058,9 @@ fail:
 	size = 1;
 	offset = 1;
 	check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
-	if (event->attr.sample_type & PERF_SAMPLE_RAW)
-		offset_max = perf_ibs->offset_max;
-	else if (check_rip)
-		offset_max = 3;
-	else
-		offset_max = 1;
+
+	offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+
 	do {
 		rdmsrl(msr + offset, *buf++);
 		size++;
@@ -791,15 +1111,21 @@ fail:
 			},
 		};
 		data.raw = &raw;
+		data.sample_flags |= PERF_SAMPLE_RAW;
 	}
 
+	if (perf_ibs == &perf_ibs_op)
+		perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data);
+
 	/*
 	 * rip recorded by IbsOpRip will not be consistent with rsp and rbp
 	 * recorded as part of interrupt regs. Thus we need to use rip from
 	 * interrupt regs while unwinding call stack.
 	 */
-	if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+	if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 		data.callchain = perf_callchain(event, iregs);
+		data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
+	}
 
 	throttle = perf_event_overflow(event, &data, &regs);
 out:
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
new file mode 100644
index 000000000000..38a75216c12c
--- /dev/null
+++ b/arch/x86/events/amd/lbr.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <asm/perf_event.h>
+
+#include "../perf_event.h"
+
+/* LBR Branch Select valid bits */
+#define LBR_SELECT_MASK		0x1ff
+
+/*
+ * LBR Branch Select filter bits which when set, ensures that the
+ * corresponding type of branches are not recorded
+ */
+#define LBR_SELECT_KERNEL		0	/* Branches ending in CPL = 0 */
+#define LBR_SELECT_USER			1	/* Branches ending in CPL > 0 */
+#define LBR_SELECT_JCC			2	/* Conditional branches */
+#define LBR_SELECT_CALL_NEAR_REL	3	/* Near relative calls */
+#define LBR_SELECT_CALL_NEAR_IND	4	/* Indirect relative calls */
+#define LBR_SELECT_RET_NEAR		5	/* Near returns */
+#define LBR_SELECT_JMP_NEAR_IND		6	/* Near indirect jumps (excl. calls and returns) */
+#define LBR_SELECT_JMP_NEAR_REL		7	/* Near relative jumps (excl. calls) */
+#define LBR_SELECT_FAR_BRANCH		8	/* Far branches */
+
+#define LBR_KERNEL	BIT(LBR_SELECT_KERNEL)
+#define LBR_USER	BIT(LBR_SELECT_USER)
+#define LBR_JCC		BIT(LBR_SELECT_JCC)
+#define LBR_REL_CALL	BIT(LBR_SELECT_CALL_NEAR_REL)
+#define LBR_IND_CALL	BIT(LBR_SELECT_CALL_NEAR_IND)
+#define LBR_RETURN	BIT(LBR_SELECT_RET_NEAR)
+#define LBR_REL_JMP	BIT(LBR_SELECT_JMP_NEAR_REL)
+#define LBR_IND_JMP	BIT(LBR_SELECT_JMP_NEAR_IND)
+#define LBR_FAR		BIT(LBR_SELECT_FAR_BRANCH)
+#define LBR_NOT_SUPP	-1	/* unsupported filter */
+#define LBR_IGNORE	0
+
+#define LBR_ANY		\
+	(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |	\
+	 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
+
+struct branch_entry {
+	union {
+		struct {
+			u64	ip:58;
+			u64	ip_sign_ext:5;
+			u64	mispredict:1;
+		} split;
+		u64		full;
+	} from;
+
+	union {
+		struct {
+			u64	ip:58;
+			u64	ip_sign_ext:3;
+			u64	reserved:1;
+			u64	spec:1;
+			u64	valid:1;
+		} split;
+		u64		full;
+	} to;
+};
+
+static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
+{
+	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+}
+
+static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
+{
+	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+}
+
+static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
+
+	return val;
+}
+
+static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
+{
+	u64 val;
+
+	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
+
+	return val;
+}
+
+static __always_inline u64 sign_ext_branch_ip(u64 ip)
+{
+	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
+
+	return (u64)(((s64)ip << shift) >> shift);
+}
+
+static void amd_pmu_lbr_filter(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int br_sel = cpuc->br_sel, offset, type, i, j;
+	bool compress = false;
+	bool fused_only = false;
+	u64 from, to;
+
+	/* If sampling all branches, there is nothing to filter */
+	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
+		fused_only = true;
+
+	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+		from = cpuc->lbr_entries[i].from;
+		to = cpuc->lbr_entries[i].to;
+		type = branch_type_fused(from, to, 0, &offset);
+
+		/*
+		 * Adjust the branch from address in case of instruction
+		 * fusion where it points to an instruction preceding the
+		 * actual branch
+		 */
+		if (offset) {
+			cpuc->lbr_entries[i].from += offset;
+			if (fused_only)
+				continue;
+		}
+
+		/* If type does not correspond, then discard */
+		if (type == X86_BR_NONE || (br_sel & type) != type) {
+			cpuc->lbr_entries[i].from = 0;	/* mark invalid */
+			compress = true;
+		}
+
+		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
+			cpuc->lbr_entries[i].type = common_branch_type(type);
+	}
+
+	if (!compress)
+		return;
+
+	/* Remove all invalid entries */
+	for (i = 0; i < cpuc->lbr_stack.nr; ) {
+		if (!cpuc->lbr_entries[i].from) {
+			j = i;
+			while (++j < cpuc->lbr_stack.nr)
+				cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
+			cpuc->lbr_stack.nr--;
+			if (!cpuc->lbr_entries[i].from)
+				continue;
+		}
+		i++;
+	}
+}
+
+static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
+	PERF_BR_SPEC_NA,
+	PERF_BR_SPEC_WRONG_PATH,
+	PERF_BR_NON_SPEC_CORRECT_PATH,
+	PERF_BR_SPEC_CORRECT_PATH,
+};
+
+void amd_pmu_lbr_read(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct perf_branch_entry *br = cpuc->lbr_entries;
+	struct branch_entry entry;
+	int out = 0, idx, i;
+
+	if (!cpuc->lbr_users)
+		return;
+
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		entry.from.full	= amd_pmu_lbr_get_from(i);
+		entry.to.full	= amd_pmu_lbr_get_to(i);
+
+		/*
+		 * Check if a branch has been logged; if valid = 0, spec = 0
+		 * then no branch was recorded
+		 */
+		if (!entry.to.split.valid && !entry.to.split.spec)
+			continue;
+
+		perf_clear_branch_entry_bitfields(br + out);
+
+		br[out].from	= sign_ext_branch_ip(entry.from.split.ip);
+		br[out].to	= sign_ext_branch_ip(entry.to.split.ip);
+		br[out].mispred	= entry.from.split.mispredict;
+		br[out].predicted = !br[out].mispred;
+
+		/*
+		 * Set branch speculation information using the status of
+		 * the valid and spec bits.
+		 *
+		 * When valid = 0, spec = 0, no branch was recorded and the
+		 * entry is discarded as seen above.
+		 *
+		 * When valid = 0, spec = 1, the recorded branch was
+		 * speculative but took the wrong path.
+		 *
+		 * When valid = 1, spec = 0, the recorded branch was
+		 * non-speculative but took the correct path.
+		 *
+		 * When valid = 1, spec = 1, the recorded branch was
+		 * speculative and took the correct path
+		 */
+		idx = (entry.to.split.valid << 1) | entry.to.split.spec;
+		br[out].spec = lbr_spec_map[idx];
+		out++;
+	}
+
+	cpuc->lbr_stack.nr = out;
+
+	/*
+	 * Internal register renaming always ensures that LBR From[0] and
+	 * LBR To[0] always represent the TOS
+	 */
+	cpuc->lbr_stack.hw_idx = 0;
+
+	/* Perform further software filtering */
+	amd_pmu_lbr_filter();
+}
+
+static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGNORE,
+
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]	= LBR_NOT_SUPP,
+	[PERF_SAMPLE_BRANCH_IN_TX_SHIFT]	= LBR_NOT_SUPP,
+	[PERF_SAMPLE_BRANCH_NO_TX_SHIFT]	= LBR_NOT_SUPP,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+
+	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_NOT_SUPP,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
+
+	[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]	= LBR_NOT_SUPP,
+	[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]	= LBR_NOT_SUPP,
+};
+
+static int amd_pmu_lbr_setup_filter(struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
+	u64 br_type = event->attr.branch_sample_type;
+	u64 mask = 0, v;
+	int i;
+
+	/* No LBR support */
+	if (!x86_pmu.lbr_nr)
+		return -EOPNOTSUPP;
+
+	if (br_type & PERF_SAMPLE_BRANCH_USER)
+		mask |= X86_BR_USER;
+
+	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+		mask |= X86_BR_KERNEL;
+
+	/* Ignore BRANCH_HV here */
+
+	if (br_type & PERF_SAMPLE_BRANCH_ANY)
+		mask |= X86_BR_ANY;
+
+	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+		mask |= X86_BR_ANY_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+		mask |= X86_BR_IND_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_COND)
+		mask |= X86_BR_JCC;
+
+	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+		mask |= X86_BR_IND_JMP;
+
+	if (br_type & PERF_SAMPLE_BRANCH_CALL)
+		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+		mask |= X86_BR_TYPE_SAVE;
+
+	reg->reg = mask;
+	mask = 0;
+
+	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+		if (!(br_type & BIT_ULL(i)))
+			continue;
+
+		v = lbr_select_map[i];
+		if (v == LBR_NOT_SUPP)
+			return -EOPNOTSUPP;
+
+		if (v != LBR_IGNORE)
+			mask |= v;
+	}
+
+	/* Filter bits operate in suppress mode */
+	reg->config = mask ^ LBR_SELECT_MASK;
+
+	return 0;
+}
+
+int amd_pmu_lbr_hw_config(struct perf_event *event)
+{
+	int ret = 0;
+
+	/* LBR is not recommended in counting mode */
+	if (!is_sampling_event(event))
+		return -EINVAL;
+
+	ret = amd_pmu_lbr_setup_filter(event);
+	if (!ret)
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+
+	return ret;
+}
+
+void amd_pmu_lbr_reset(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	/* Reset all branch records individually */
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		amd_pmu_lbr_set_from(i, 0);
+		amd_pmu_lbr_set_to(i, 0);
+	}
+
+	cpuc->last_task_ctx = NULL;
+	cpuc->last_log_id = 0;
+	wrmsrl(MSR_AMD64_LBR_SELECT, 0);
+}
+
+void amd_pmu_lbr_add(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	if (has_branch_stack(event)) {
+		cpuc->lbr_select = 1;
+		cpuc->lbr_sel->config = reg->config;
+		cpuc->br_sel = reg->reg;
+	}
+
+	perf_sched_cb_inc(event->ctx->pmu);
+
+	if (!cpuc->lbr_users++ && !event->total_time_running)
+		amd_pmu_lbr_reset();
+}
+
+void amd_pmu_lbr_del(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	if (has_branch_stack(event))
+		cpuc->lbr_select = 0;
+
+	cpuc->lbr_users--;
+	WARN_ON_ONCE(cpuc->lbr_users < 0);
+	perf_sched_cb_dec(event->ctx->pmu);
+}
+
+void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/*
+	 * A context switch can flip the address space and LBR entries are
+	 * not tagged with an identifier. Hence, branches cannot be resolved
+	 * from the old address space and the LBR records should be wiped.
+	 */
+	if (cpuc->lbr_users && sched_in)
+		amd_pmu_lbr_reset();
+}
+
+void amd_pmu_lbr_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 lbr_select, dbg_ctl, dbg_extn_cfg;
+
+	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
+		return;
+
+	/* Set hardware branch filter */
+	if (cpuc->lbr_select) {
+		lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
+		wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
+	}
+
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
+}
+
+void amd_pmu_lbr_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	u64 dbg_ctl, dbg_extn_cfg;
+
+	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
+		return;
+
+	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+
+	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+}
+
+__init int amd_pmu_lbr_init(void)
+{
+	union cpuid_0x80000022_ebx ebx;
+
+	if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
+		return -EOPNOTSUPP;
+
+	/* Set number of entries */
+	ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+	x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
+
+	pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
+
+	return 0;
+}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f969410d0c90..b30b8bbcd1e2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -72,6 +72,10 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_add,  *x86_pmu.add);
 DEFINE_STATIC_CALL_NULL(x86_pmu_del,  *x86_pmu.del);
 DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
 
+DEFINE_STATIC_CALL_NULL(x86_pmu_set_period,   *x86_pmu.set_period);
+DEFINE_STATIC_CALL_NULL(x86_pmu_update,       *x86_pmu.update);
+DEFINE_STATIC_CALL_NULL(x86_pmu_limit_period, *x86_pmu.limit_period);
+
 DEFINE_STATIC_CALL_NULL(x86_pmu_schedule_events,       *x86_pmu.schedule_events);
 DEFINE_STATIC_CALL_NULL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
 DEFINE_STATIC_CALL_NULL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
@@ -116,9 +120,6 @@ u64 x86_perf_event_update(struct perf_event *event)
 	if (unlikely(!hwc->event_base))
 		return 0;
 
-	if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
-		return x86_pmu.update_topdown_event(event);
-
 	/*
 	 * Careful: an NMI might modify the previous event value.
 	 *
@@ -621,8 +622,9 @@ int x86_pmu_hw_config(struct perf_event *event)
 		event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
 
 	if (event->attr.sample_period && x86_pmu.limit_period) {
-		if (x86_pmu.limit_period(event, event->attr.sample_period) >
-				event->attr.sample_period)
+		s64 left = event->attr.sample_period;
+		x86_pmu.limit_period(event, &left);
+		if (left > event->attr.sample_period)
 			return -EINVAL;
 	}
 
@@ -1354,7 +1356,7 @@ static void x86_pmu_enable(struct pmu *pmu)
 	static_call(x86_pmu_enable_all)(added);
 }
 
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
@@ -1370,10 +1372,6 @@ int x86_perf_event_set_period(struct perf_event *event)
 	if (unlikely(!hwc->event_base))
 		return 0;
 
-	if (unlikely(is_topdown_count(event)) &&
-	    x86_pmu.set_topdown_event_period)
-		return x86_pmu.set_topdown_event_period(event);
-
 	/*
 	 * If we are way outside a reasonable range then just skip forward:
 	 */
@@ -1399,10 +1397,9 @@ int x86_perf_event_set_period(struct perf_event *event)
 	if (left > x86_pmu.max_period)
 		left = x86_pmu.max_period;
 
-	if (x86_pmu.limit_period)
-		left = x86_pmu.limit_period(event, left);
+	static_call_cond(x86_pmu_limit_period)(event, &left);
 
-	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+	this_cpu_write(pmc_prev_left[idx], left);
 
 	/*
 	 * The hw event starts counting from this event offset,
@@ -1419,16 +1416,6 @@ int x86_perf_event_set_period(struct perf_event *event)
 	if (is_counter_pair(hwc))
 		wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
 
-	/*
-	 * Due to erratum on certan cpu we need
-	 * a second write to be sure the register
-	 * is updated properly
-	 */
-	if (x86_pmu.perfctr_second_write) {
-		wrmsrl(hwc->event_base,
-			(u64)(-left) & x86_pmu.cntval_mask);
-	}
-
 	perf_event_update_userpage(event);
 
 	return ret;
@@ -1518,7 +1505,7 @@ static void x86_pmu_start(struct perf_event *event, int flags)
 
 	if (flags & PERF_EF_RELOAD) {
 		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
-		x86_perf_event_set_period(event);
+		static_call(x86_pmu_set_period)(event);
 	}
 
 	event->hw.state = 0;
@@ -1610,7 +1597,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
 		 * Drain the remaining delta count out of a event
 		 * that we are disabling:
 		 */
-		x86_perf_event_update(event);
+		static_call(x86_pmu_update)(event);
 		hwc->state |= PERF_HES_UPTODATE;
 	}
 }
@@ -1700,7 +1687,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 
 		event = cpuc->events[idx];
 
-		val = x86_perf_event_update(event);
+		val = static_call(x86_pmu_update)(event);
 		if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
 			continue;
 
@@ -1709,13 +1696,15 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
 		 */
 		handled++;
 
-		if (!x86_perf_event_set_period(event))
+		if (!static_call(x86_pmu_set_period)(event))
 			continue;
 
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
-		if (has_branch_stack(event))
+		if (has_branch_stack(event)) {
 			data.br_stack = &cpuc->lbr_stack;
+			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+		}
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
@@ -2023,6 +2012,10 @@ static void x86_pmu_static_call_update(void)
 	static_call_update(x86_pmu_del, x86_pmu.del);
 	static_call_update(x86_pmu_read, x86_pmu.read);
 
+	static_call_update(x86_pmu_set_period, x86_pmu.set_period);
+	static_call_update(x86_pmu_update, x86_pmu.update);
+	static_call_update(x86_pmu_limit_period, x86_pmu.limit_period);
+
 	static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
 	static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
 	static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
@@ -2042,7 +2035,7 @@ static void x86_pmu_static_call_update(void)
 
 static void _x86_pmu_read(struct perf_event *event)
 {
-	x86_perf_event_update(event);
+	static_call(x86_pmu_update)(event);
 }
 
 void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
@@ -2149,6 +2142,12 @@ static int __init init_hw_perf_events(void)
 	if (!x86_pmu.guest_get_msrs)
 		x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
 
+	if (!x86_pmu.set_period)
+		x86_pmu.set_period = x86_perf_event_set_period;
+
+	if (!x86_pmu.update)
+		x86_pmu.update = x86_perf_event_update;
+
 	x86_pmu_static_call_update();
 
 	/*
@@ -2670,7 +2669,9 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
 		return -EINVAL;
 
 	if (value && x86_pmu.limit_period) {
-		if (x86_pmu.limit_period(event, value) > value)
+		s64 left = value;
+		x86_pmu.limit_period(event, &left);
+		if (left > value)
 			return -EINVAL;
 	}
 
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index c20d8cd47c48..a646a5f9a235 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2199,6 +2199,12 @@ static void __intel_pmu_enable_all(int added, bool pmi)
 	u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
 
 	intel_pmu_lbr_enable_all(pmi);
+
+	if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
+		wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+		cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
+	}
+
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
 	       intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
 
@@ -2311,7 +2317,7 @@ static void intel_pmu_nhm_workaround(void)
 	for (i = 0; i < 4; i++) {
 		event = cpuc->events[i];
 		if (event)
-			x86_perf_event_update(event);
+			static_call(x86_pmu_update)(event);
 	}
 
 	for (i = 0; i < 4; i++) {
@@ -2326,7 +2332,7 @@ static void intel_pmu_nhm_workaround(void)
 		event = cpuc->events[i];
 
 		if (event) {
-			x86_perf_event_set_period(event);
+			static_call(x86_pmu_set_period)(event);
 			__x86_pmu_enable_event(&event->hw,
 					ARCH_PERFMON_EVENTSEL_ENABLE);
 		} else
@@ -2416,9 +2422,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
 
 static void intel_pmu_disable_fixed(struct perf_event *event)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 ctrl_val, mask;
 	int idx = hwc->idx;
+	u64 mask;
 
 	if (is_topdown_idx(idx)) {
 		struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2435,9 +2442,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
 	intel_clear_masks(event, idx);
 
 	mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	wrmsrl(hwc->config_base, ctrl_val);
+	cpuc->fixed_ctrl_val &= ~mask;
 }
 
 static void intel_pmu_disable_event(struct perf_event *event)
@@ -2530,6 +2535,8 @@ static int adl_set_topdown_event_period(struct perf_event *event)
 	return icl_set_topdown_event_period(event);
 }
 
+DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
+
 static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
 {
 	u32 val;
@@ -2680,6 +2687,7 @@ static u64 adl_update_topdown_event(struct perf_event *event)
 	return icl_update_topdown_event(event);
 }
 
+DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
 
 static void intel_pmu_read_topdown_event(struct perf_event *event)
 {
@@ -2691,7 +2699,7 @@ static void intel_pmu_read_topdown_event(struct perf_event *event)
 		return;
 
 	perf_pmu_disable(event->pmu);
-	x86_pmu.update_topdown_event(event);
+	static_call(intel_pmu_update_topdown_event)(event);
 	perf_pmu_enable(event->pmu);
 }
 
@@ -2699,7 +2707,7 @@ static void intel_pmu_read_event(struct perf_event *event)
 {
 	if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
 		intel_pmu_auto_reload_read(event);
-	else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
+	else if (is_topdown_count(event))
 		intel_pmu_read_topdown_event(event);
 	else
 		x86_perf_event_update(event);
@@ -2707,8 +2715,9 @@ static void intel_pmu_read_event(struct perf_event *event)
 
 static void intel_pmu_enable_fixed(struct perf_event *event)
 {
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	u64 ctrl_val, mask, bits = 0;
+	u64 mask, bits = 0;
 	int idx = hwc->idx;
 
 	if (is_topdown_idx(idx)) {
@@ -2752,10 +2761,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
 		mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
 	}
 
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	wrmsrl(hwc->config_base, ctrl_val);
+	cpuc->fixed_ctrl_val &= ~mask;
+	cpuc->fixed_ctrl_val |= bits;
 }
 
 static void intel_pmu_enable_event(struct perf_event *event)
@@ -2803,7 +2810,7 @@ static void intel_pmu_add_event(struct perf_event *event)
  */
 int intel_pmu_save_and_restart(struct perf_event *event)
 {
-	x86_perf_event_update(event);
+	static_call(x86_pmu_update)(event);
 	/*
 	 * For a checkpointed counter always reset back to 0.  This
 	 * avoids a situation where the counter overflows, aborts the
@@ -2815,9 +2822,25 @@ int intel_pmu_save_and_restart(struct perf_event *event)
 		wrmsrl(event->hw.event_base, 0);
 		local64_set(&event->hw.prev_count, 0);
 	}
+	return static_call(x86_pmu_set_period)(event);
+}
+
+static int intel_pmu_set_period(struct perf_event *event)
+{
+	if (unlikely(is_topdown_count(event)))
+		return static_call(intel_pmu_set_topdown_event_period)(event);
+
 	return x86_perf_event_set_period(event);
 }
 
+static u64 intel_pmu_update(struct perf_event *event)
+{
+	if (unlikely(is_topdown_count(event)))
+		return static_call(intel_pmu_update_topdown_event)(event);
+
+	return x86_perf_event_update(event);
+}
+
 static void intel_pmu_reset(void)
 {
 	struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
@@ -2980,8 +3003,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 	 */
 	if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
 		handled++;
-		if (x86_pmu.update_topdown_event)
-			x86_pmu.update_topdown_event(NULL);
+		static_call(intel_pmu_update_topdown_event)(NULL);
 	}
 
 	/*
@@ -3004,8 +3026,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 
 		perf_sample_data_init(&data, 0, event->hw.last_period);
 
-		if (has_branch_stack(event))
+		if (has_branch_stack(event)) {
 			data.br_stack = &cpuc->lbr_stack;
+			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+		}
 
 		if (perf_event_overflow(event, &data, regs))
 			x86_pmu_stop(event, 0);
@@ -3853,9 +3877,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
-
-		if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
-			event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
 	}
 
 	if (needs_branch_stack(event)) {
@@ -4334,28 +4355,25 @@ static u8 adl_get_hybrid_cpu_type(void)
  * Therefore the effective (average) period matches the requested period,
  * despite coarser hardware granularity.
  */
-static u64 bdw_limit_period(struct perf_event *event, u64 left)
+static void bdw_limit_period(struct perf_event *event, s64 *left)
 {
 	if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
 			X86_CONFIG(.event=0xc0, .umask=0x01)) {
-		if (left < 128)
-			left = 128;
-		left &= ~0x3fULL;
+		if (*left < 128)
+			*left = 128;
+		*left &= ~0x3fULL;
 	}
-	return left;
 }
 
-static u64 nhm_limit_period(struct perf_event *event, u64 left)
+static void nhm_limit_period(struct perf_event *event, s64 *left)
 {
-	return max(left, 32ULL);
+	*left = max(*left, 32LL);
 }
 
-static u64 spr_limit_period(struct perf_event *event, u64 left)
+static void spr_limit_period(struct perf_event *event, s64 *left)
 {
 	if (event->attr.precise_ip == 3)
-		return max(left, 128ULL);
-
-	return left;
+		*left = max(*left, 128LL);
 }
 
 PMU_FORMAT_ATTR(event,	"config:0-7"	);
@@ -4794,6 +4812,8 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.add			= intel_pmu_add_event,
 	.del			= intel_pmu_del_event,
 	.read			= intel_pmu_read_event,
+	.set_period		= intel_pmu_set_period,
+	.update			= intel_pmu_update,
 	.hw_config		= intel_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
@@ -6312,8 +6332,10 @@ __init int intel_pmu_init(void)
 		x86_pmu.lbr_pt_coexist = true;
 		intel_pmu_pebs_data_source_skl(pmem);
 		x86_pmu.num_topdown_events = 4;
-		x86_pmu.update_topdown_event = icl_update_topdown_event;
-		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
+		static_call_update(intel_pmu_update_topdown_event,
+				   &icl_update_topdown_event);
+		static_call_update(intel_pmu_set_topdown_event_period,
+				   &icl_set_topdown_event_period);
 		pr_cont("Icelake events, ");
 		name = "icelake";
 		break;
@@ -6348,8 +6370,10 @@ __init int intel_pmu_init(void)
 		x86_pmu.lbr_pt_coexist = true;
 		intel_pmu_pebs_data_source_skl(pmem);
 		x86_pmu.num_topdown_events = 8;
-		x86_pmu.update_topdown_event = icl_update_topdown_event;
-		x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
+		static_call_update(intel_pmu_update_topdown_event,
+				   &icl_update_topdown_event);
+		static_call_update(intel_pmu_set_topdown_event_period,
+				   &icl_set_topdown_event_period);
 		pr_cont("Sapphire Rapids events, ");
 		name = "sapphire_rapids";
 		break;
@@ -6358,6 +6382,7 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_ALDERLAKE_L:
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
+	case INTEL_FAM6_RAPTORLAKE_S:
 		/*
 		 * Alder Lake has 2 types of CPU, core and atom.
 		 *
@@ -6382,8 +6407,10 @@ __init int intel_pmu_init(void)
 		intel_pmu_pebs_data_source_adl();
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
 		x86_pmu.num_topdown_events = 8;
-		x86_pmu.update_topdown_event = adl_update_topdown_event;
-		x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;
+		static_call_update(intel_pmu_update_topdown_event,
+				   &adl_update_topdown_event);
+		static_call_update(intel_pmu_set_topdown_event_period,
+				   &adl_set_topdown_event_period);
 
 		x86_pmu.filter_match = intel_pmu_filter_match;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 8ec23f47fee9..a2834bc93149 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -685,6 +685,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_cstates),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_cstates),
+	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_cstates),
 	{ },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ac973c6f82ad..7839507b3844 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1540,14 +1540,18 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 	/*
 	 * Use latency for weight (only avail with PEBS-LL)
 	 */
-	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE))
+	if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) {
 		data->weight.full = pebs->lat;
+		data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+	}
 
 	/*
 	 * data.data_src encodes the data source
 	 */
-	if (sample_type & PERF_SAMPLE_DATA_SRC)
+	if (sample_type & PERF_SAMPLE_DATA_SRC) {
 		data->data_src.val = get_data_src(event, pebs->dse);
+		data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+	}
 
 	/*
 	 * We must however always use iregs for the unwinder to stay sane; the
@@ -1555,8 +1559,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 	 * previous PMI context or an (I)RET happened between the record and
 	 * PMI.
 	 */
-	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		data->callchain = perf_callchain(event, iregs);
+		data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+	}
 
 	/*
 	 * We use the interrupt regs as a base because the PEBS record does not
@@ -1628,17 +1634,22 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 
 
 	if ((sample_type & PERF_SAMPLE_ADDR_TYPE) &&
-	    x86_pmu.intel_cap.pebs_format >= 1)
+	    x86_pmu.intel_cap.pebs_format >= 1) {
 		data->addr = pebs->dla;
+		data->sample_flags |= PERF_SAMPLE_ADDR;
+	}
 
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
-		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll)
+		if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) {
 			data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning);
-
-		if (sample_type & PERF_SAMPLE_TRANSACTION)
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+		}
+		if (sample_type & PERF_SAMPLE_TRANSACTION) {
 			data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
 							      pebs->ax);
+			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+		}
 	}
 
 	/*
@@ -1648,11 +1659,15 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
 	 * We can only do this for the default trace clock.
 	 */
 	if (x86_pmu.intel_cap.pebs_format >= 3 &&
-		event->attr.use_clockid == 0)
+		event->attr.use_clockid == 0) {
 		data->time = native_sched_clock_from_tsc(pebs->tsc);
+		data->sample_flags |= PERF_SAMPLE_TIME;
+	}
 
-	if (has_branch_stack(event))
+	if (has_branch_stack(event)) {
 		data->br_stack = &cpuc->lbr_stack;
+		data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
+	}
 }
 
 static void adaptive_pebs_save_regs(struct pt_regs *regs,
@@ -1710,8 +1725,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 	perf_sample_data_init(data, 0, event->hw.last_period);
 	data->period = event->hw.last_period;
 
-	if (event->attr.use_clockid == 0)
+	if (event->attr.use_clockid == 0) {
 		data->time = native_sched_clock_from_tsc(basic->tsc);
+		data->sample_flags |= PERF_SAMPLE_TIME;
+	}
 
 	/*
 	 * We must however always use iregs for the unwinder to stay sane; the
@@ -1719,8 +1736,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 	 * previous PMI context or an (I)RET happened between the record and
 	 * PMI.
 	 */
-	if (sample_type & PERF_SAMPLE_CALLCHAIN)
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		data->callchain = perf_callchain(event, iregs);
+		data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
+	}
 
 	*regs = *iregs;
 	/* The ip in basic is EventingIP */
@@ -1771,17 +1790,24 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 				data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?:
 					intel_get_tsx_weight(meminfo->tsx_tuning);
 			}
+			data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
 		}
 
-		if (sample_type & PERF_SAMPLE_DATA_SRC)
+		if (sample_type & PERF_SAMPLE_DATA_SRC) {
 			data->data_src.val = get_data_src(event, meminfo->aux);
+			data->sample_flags |= PERF_SAMPLE_DATA_SRC;
+		}
 
-		if (sample_type & PERF_SAMPLE_ADDR_TYPE)
+		if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
 			data->addr = meminfo->address;
+			data->sample_flags |= PERF_SAMPLE_ADDR;
+		}
 
-		if (sample_type & PERF_SAMPLE_TRANSACTION)
+		if (sample_type & PERF_SAMPLE_TRANSACTION) {
 			data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
 							  gprs ? gprs->ax : 0);
+			data->sample_flags |= PERF_SAMPLE_TRANSACTION;
+		}
 	}
 
 	if (format_size & PEBS_DATACFG_XMMS) {
@@ -1800,6 +1826,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 		if (has_branch_stack(event)) {
 			intel_pmu_store_pebs_lbrs(lbr);
 			data->br_stack = &cpuc->lbr_stack;
+			data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
 		}
 	}
 
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 47fca6a7a8bc..4fce1a4226e3 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -4,7 +4,6 @@
 
 #include <asm/perf_event.h>
 #include <asm/msr.h>
-#include <asm/insn.h>
 
 #include "../perf_event.h"
 
@@ -66,65 +65,6 @@
 #define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
 
 /*
- * x86control flow change classification
- * x86control flow changes include branches, interrupts, traps, faults
- */
-enum {
-	X86_BR_NONE		= 0,      /* unknown */
-
-	X86_BR_USER		= 1 << 0, /* branch target is user */
-	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
-
-	X86_BR_CALL		= 1 << 2, /* call */
-	X86_BR_RET		= 1 << 3, /* return */
-	X86_BR_SYSCALL		= 1 << 4, /* syscall */
-	X86_BR_SYSRET		= 1 << 5, /* syscall return */
-	X86_BR_INT		= 1 << 6, /* sw interrupt */
-	X86_BR_IRET		= 1 << 7, /* return from interrupt */
-	X86_BR_JCC		= 1 << 8, /* conditional */
-	X86_BR_JMP		= 1 << 9, /* jump */
-	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
-	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
-	X86_BR_ABORT		= 1 << 12,/* transaction abort */
-	X86_BR_IN_TX		= 1 << 13,/* in transaction */
-	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
-	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
-	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
-	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
-
-	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
-
-};
-
-#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
-#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
-
-#define X86_BR_ANY       \
-	(X86_BR_CALL    |\
-	 X86_BR_RET     |\
-	 X86_BR_SYSCALL |\
-	 X86_BR_SYSRET  |\
-	 X86_BR_INT     |\
-	 X86_BR_IRET    |\
-	 X86_BR_JCC     |\
-	 X86_BR_JMP	 |\
-	 X86_BR_IRQ	 |\
-	 X86_BR_ABORT	 |\
-	 X86_BR_IND_CALL |\
-	 X86_BR_IND_JMP  |\
-	 X86_BR_ZERO_CALL)
-
-#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
-
-#define X86_BR_ANY_CALL		 \
-	(X86_BR_CALL		|\
-	 X86_BR_IND_CALL	|\
-	 X86_BR_ZERO_CALL	|\
-	 X86_BR_SYSCALL		|\
-	 X86_BR_IRQ		|\
-	 X86_BR_INT)
-
-/*
  * Intel LBR_CTL bits
  *
  * Hardware branch filter for Arch LBR
@@ -1151,219 +1091,6 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
 	return ret;
 }
 
-/*
- * return the type of control flow change at address "from"
- * instruction is not necessarily a branch (in case of interrupt).
- *
- * The branch type returned also includes the priv level of the
- * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
- *
- * If a branch type is unknown OR the instruction cannot be
- * decoded (e.g., text page not present), then X86_BR_NONE is
- * returned.
- */
-static int branch_type(unsigned long from, unsigned long to, int abort)
-{
-	struct insn insn;
-	void *addr;
-	int bytes_read, bytes_left;
-	int ret = X86_BR_NONE;
-	int ext, to_plm, from_plm;
-	u8 buf[MAX_INSN_SIZE];
-	int is64 = 0;
-
-	to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
-	from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
-
-	/*
-	 * maybe zero if lbr did not fill up after a reset by the time
-	 * we get a PMU interrupt
-	 */
-	if (from == 0 || to == 0)
-		return X86_BR_NONE;
-
-	if (abort)
-		return X86_BR_ABORT | to_plm;
-
-	if (from_plm == X86_BR_USER) {
-		/*
-		 * can happen if measuring at the user level only
-		 * and we interrupt in a kernel thread, e.g., idle.
-		 */
-		if (!current->mm)
-			return X86_BR_NONE;
-
-		/* may fail if text not present */
-		bytes_left = copy_from_user_nmi(buf, (void __user *)from,
-						MAX_INSN_SIZE);
-		bytes_read = MAX_INSN_SIZE - bytes_left;
-		if (!bytes_read)
-			return X86_BR_NONE;
-
-		addr = buf;
-	} else {
-		/*
-		 * The LBR logs any address in the IP, even if the IP just
-		 * faulted. This means userspace can control the from address.
-		 * Ensure we don't blindly read any address by validating it is
-		 * a known text address.
-		 */
-		if (kernel_text_address(from)) {
-			addr = (void *)from;
-			/*
-			 * Assume we can get the maximum possible size
-			 * when grabbing kernel data.  This is not
-			 * _strictly_ true since we could possibly be
-			 * executing up next to a memory hole, but
-			 * it is very unlikely to be a problem.
-			 */
-			bytes_read = MAX_INSN_SIZE;
-		} else {
-			return X86_BR_NONE;
-		}
-	}
-
-	/*
-	 * decoder needs to know the ABI especially
-	 * on 64-bit systems running 32-bit apps
-	 */
-#ifdef CONFIG_X86_64
-	is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
-#endif
-	insn_init(&insn, addr, bytes_read, is64);
-	if (insn_get_opcode(&insn))
-		return X86_BR_ABORT;
-
-	switch (insn.opcode.bytes[0]) {
-	case 0xf:
-		switch (insn.opcode.bytes[1]) {
-		case 0x05: /* syscall */
-		case 0x34: /* sysenter */
-			ret = X86_BR_SYSCALL;
-			break;
-		case 0x07: /* sysret */
-		case 0x35: /* sysexit */
-			ret = X86_BR_SYSRET;
-			break;
-		case 0x80 ... 0x8f: /* conditional */
-			ret = X86_BR_JCC;
-			break;
-		default:
-			ret = X86_BR_NONE;
-		}
-		break;
-	case 0x70 ... 0x7f: /* conditional */
-		ret = X86_BR_JCC;
-		break;
-	case 0xc2: /* near ret */
-	case 0xc3: /* near ret */
-	case 0xca: /* far ret */
-	case 0xcb: /* far ret */
-		ret = X86_BR_RET;
-		break;
-	case 0xcf: /* iret */
-		ret = X86_BR_IRET;
-		break;
-	case 0xcc ... 0xce: /* int */
-		ret = X86_BR_INT;
-		break;
-	case 0xe8: /* call near rel */
-		if (insn_get_immediate(&insn) || insn.immediate1.value == 0) {
-			/* zero length call */
-			ret = X86_BR_ZERO_CALL;
-			break;
-		}
-		fallthrough;
-	case 0x9a: /* call far absolute */
-		ret = X86_BR_CALL;
-		break;
-	case 0xe0 ... 0xe3: /* loop jmp */
-		ret = X86_BR_JCC;
-		break;
-	case 0xe9 ... 0xeb: /* jmp */
-		ret = X86_BR_JMP;
-		break;
-	case 0xff: /* call near absolute, call far absolute ind */
-		if (insn_get_modrm(&insn))
-			return X86_BR_ABORT;
-
-		ext = (insn.modrm.bytes[0] >> 3) & 0x7;
-		switch (ext) {
-		case 2: /* near ind call */
-		case 3: /* far ind call */
-			ret = X86_BR_IND_CALL;
-			break;
-		case 4:
-		case 5:
-			ret = X86_BR_IND_JMP;
-			break;
-		}
-		break;
-	default:
-		ret = X86_BR_NONE;
-	}
-	/*
-	 * interrupts, traps, faults (and thus ring transition) may
-	 * occur on any instructions. Thus, to classify them correctly,
-	 * we need to first look at the from and to priv levels. If they
-	 * are different and to is in the kernel, then it indicates
-	 * a ring transition. If the from instruction is not a ring
-	 * transition instr (syscall, systenter, int), then it means
-	 * it was a irq, trap or fault.
-	 *
-	 * we have no way of detecting kernel to kernel faults.
-	 */
-	if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
-	    && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
-		ret = X86_BR_IRQ;
-
-	/*
-	 * branch priv level determined by target as
-	 * is done by HW when LBR_SELECT is implemented
-	 */
-	if (ret != X86_BR_NONE)
-		ret |= to_plm;
-
-	return ret;
-}
-
-#define X86_BR_TYPE_MAP_MAX	16
-
-static int branch_map[X86_BR_TYPE_MAP_MAX] = {
-	PERF_BR_CALL,		/* X86_BR_CALL */
-	PERF_BR_RET,		/* X86_BR_RET */
-	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
-	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
-	PERF_BR_UNKNOWN,	/* X86_BR_INT */
-	PERF_BR_ERET,		/* X86_BR_IRET */
-	PERF_BR_COND,		/* X86_BR_JCC */
-	PERF_BR_UNCOND,		/* X86_BR_JMP */
-	PERF_BR_IRQ,		/* X86_BR_IRQ */
-	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
-	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
-	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
-	PERF_BR_UNKNOWN,	/* X86_BR_NO_TX */
-	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
-	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
-	PERF_BR_IND,		/* X86_BR_IND_JMP */
-};
-
-static int
-common_branch_type(int type)
-{
-	int i;
-
-	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
-
-	if (type) {
-		i = __ffs(type);
-		if (i < X86_BR_TYPE_MAP_MAX)
-			return branch_map[i];
-	}
-
-	return PERF_BR_UNKNOWN;
-}
-
 enum {
 	ARCH_LBR_BR_TYPE_JCC			= 0,
 	ARCH_LBR_BR_TYPE_NEAR_IND_JMP		= 1,
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index 7951a5dc73b6..03bbcc2fa2ff 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1006,6 +1006,29 @@ static void p4_pmu_enable_all(int added)
 	}
 }
 
+static int p4_pmu_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = this_cpu_read(pmc_prev_left[hwc->idx]);
+	int ret;
+
+	ret = x86_perf_event_set_period(event);
+
+	if (hwc->event_base) {
+		/*
+		 * This handles erratum N15 in intel doc 249199-029,
+		 * the counter may not be updated correctly on write
+		 * so we need a second write operation to do the trick
+		 * (the official workaround didn't work)
+		 *
+		 * the former idea is taken from OProfile code
+		 */
+		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+	}
+
+	return ret;
+}
+
 static int p4_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
@@ -1044,7 +1067,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
 		/* event overflow for sure */
 		perf_sample_data_init(&data, 0, hwc->last_period);
 
-		if (!x86_perf_event_set_period(event))
+		if (!static_call(x86_pmu_set_period)(event))
 			continue;
 
 
@@ -1316,6 +1339,9 @@ static __initconst const struct x86_pmu p4_pmu = {
 	.enable_all		= p4_pmu_enable_all,
 	.enable			= p4_pmu_enable_event,
 	.disable		= p4_pmu_disable_event,
+
+	.set_period		= p4_pmu_set_period,
+
 	.eventsel		= MSR_P4_BPU_CCCR0,
 	.perfctr		= MSR_P4_BPU_PERFCTR0,
 	.event_map		= p4_pmu_event_map,
@@ -1334,15 +1360,6 @@ static __initconst const struct x86_pmu p4_pmu = {
 	.max_period		= (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
 	.hw_config		= p4_hw_config,
 	.schedule_events	= p4_pmu_schedule_events,
-	/*
-	 * This handles erratum N15 in intel doc 249199-029,
-	 * the counter may not be updated correctly on write
-	 * so we need a second write operation to do the trick
-	 * (the official workaround didn't work)
-	 *
-	 * the former idea is taken from OProfile code
-	 */
-	.perfctr_second_write	= 1,
 
 	.format_attrs		= intel_p4_formats_attr,
 };
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index db6c31bca809..6f1ccc57a692 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1831,6 +1831,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,		&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,		&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,	&adl_uncore_init),
+	X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,	&adl_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&spr_uncore_init),
 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,	&snr_uncore_init),
 	{},
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index ac542f98c070..ecced3a52668 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -106,6 +106,7 @@ static bool test_intel(int idx, void *data)
 	case INTEL_FAM6_ALDERLAKE_N:
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
+	case INTEL_FAM6_RAPTORLAKE_S:
 		if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
 			return true;
 		break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 266143abcbd8..332d2e6d8ae4 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -64,27 +64,25 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
 	return ((ecode & c->cmask) - c->code) <= (u64)c->size;
 }
 
+#define PERF_ARCH(name, val)	\
+	PERF_X86_EVENT_##name = val,
+
 /*
  * struct hw_perf_event.flags flags
  */
-#define PERF_X86_EVENT_PEBS_LDLAT	0x00001 /* ld+ldlat data address sampling */
-#define PERF_X86_EVENT_PEBS_ST		0x00002 /* st data address sampling */
-#define PERF_X86_EVENT_PEBS_ST_HSW	0x00004 /* haswell style datala, store */
-#define PERF_X86_EVENT_PEBS_LD_HSW	0x00008 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW	0x00010 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL		0x00020 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC		0x00040 /* dynamic alloc'd constraint */
-
-#define PERF_X86_EVENT_EXCL_ACCT	0x00100 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD	0x00200 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS	0x00400 /* use large PEBS */
-#define PERF_X86_EVENT_PEBS_VIA_PT	0x00800 /* use PT buffer for PEBS */
-#define PERF_X86_EVENT_PAIR		0x01000 /* Large Increment per Cycle */
-#define PERF_X86_EVENT_LBR_SELECT	0x02000 /* Save/Restore MSR_LBR_SELECT */
-#define PERF_X86_EVENT_TOPDOWN		0x04000 /* Count Topdown slots/metrics events */
-#define PERF_X86_EVENT_PEBS_STLAT	0x08000 /* st+stlat data address sampling */
-#define PERF_X86_EVENT_AMD_BRS		0x10000 /* AMD Branch Sampling */
-#define PERF_X86_EVENT_PEBS_LAT_HYBRID	0x20000 /* ld and st lat for hybrid */
+enum {
+#include "perf_event_flags.h"
+};
+
+#undef PERF_ARCH
+
+#define PERF_ARCH(name, val)						\
+	static_assert((PERF_X86_EVENT_##name & PERF_EVENT_FLAG_ARCH) ==	\
+		      PERF_X86_EVENT_##name);
+
+#include "perf_event_flags.h"
+
+#undef PERF_ARCH
 
 static inline bool is_topdown_count(struct perf_event *event)
 {
@@ -272,6 +270,10 @@ struct cpu_hw_events {
 	u64			active_pebs_data_cfg;
 	int			pebs_record_size;
 
+	/* Intel Fixed counter configuration */
+	u64			fixed_ctrl_val;
+	u64			active_fixed_ctrl_val;
+
 	/*
 	 * Intel LBR bits
 	 */
@@ -745,6 +747,8 @@ struct x86_pmu {
 	void		(*add)(struct perf_event *);
 	void		(*del)(struct perf_event *);
 	void		(*read)(struct perf_event *event);
+	int		(*set_period)(struct perf_event *event);
+	u64		(*update)(struct perf_event *event);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -780,8 +784,7 @@ struct x86_pmu {
 
 	struct event_constraint *event_constraints;
 	struct x86_pmu_quirk *quirks;
-	int		perfctr_second_write;
-	u64		(*limit_period)(struct perf_event *event, u64 l);
+	void		(*limit_period)(struct perf_event *event, s64 *l);
 
 	/* PMI handler bits */
 	unsigned int	late_ack		:1,
@@ -889,8 +892,6 @@ struct x86_pmu {
 	 * Intel perf metrics
 	 */
 	int		num_topdown_events;
-	u64		(*update_topdown_event)(struct perf_event *event);
-	int		(*set_topdown_event_period)(struct perf_event *event);
 
 	/*
 	 * perf task context (i.e. struct perf_event_context::task_ctx_data)
@@ -1044,6 +1045,9 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
 struct pmu *x86_get_pmu(unsigned int cpu);
 extern struct x86_pmu x86_pmu __read_mostly;
 
+DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
+DECLARE_STATIC_CALL(x86_pmu_update,     *x86_pmu.update);
+
 static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
 {
 	if (static_cpu_has(X86_FEATURE_ARCH_LBR))
@@ -1059,6 +1063,7 @@ static inline bool x86_pmu_has_lbr_callstack(void)
 }
 
 DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 int x86_perf_event_set_period(struct perf_event *event);
 
@@ -1210,6 +1215,70 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
 	regs->ip = ip;
 }
 
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+	X86_BR_NONE		= 0,      /* unknown */
+
+	X86_BR_USER		= 1 << 0, /* branch target is user */
+	X86_BR_KERNEL		= 1 << 1, /* branch target is kernel */
+
+	X86_BR_CALL		= 1 << 2, /* call */
+	X86_BR_RET		= 1 << 3, /* return */
+	X86_BR_SYSCALL		= 1 << 4, /* syscall */
+	X86_BR_SYSRET		= 1 << 5, /* syscall return */
+	X86_BR_INT		= 1 << 6, /* sw interrupt */
+	X86_BR_IRET		= 1 << 7, /* return from interrupt */
+	X86_BR_JCC		= 1 << 8, /* conditional */
+	X86_BR_JMP		= 1 << 9, /* jump */
+	X86_BR_IRQ		= 1 << 10,/* hw interrupt or trap or fault */
+	X86_BR_IND_CALL		= 1 << 11,/* indirect calls */
+	X86_BR_ABORT		= 1 << 12,/* transaction abort */
+	X86_BR_IN_TX		= 1 << 13,/* in transaction */
+	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
+	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
+	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
+	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
+
+	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
+
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
+
+#define X86_BR_ANY       \
+	(X86_BR_CALL    |\
+	 X86_BR_RET     |\
+	 X86_BR_SYSCALL |\
+	 X86_BR_SYSRET  |\
+	 X86_BR_INT     |\
+	 X86_BR_IRET    |\
+	 X86_BR_JCC     |\
+	 X86_BR_JMP	 |\
+	 X86_BR_IRQ	 |\
+	 X86_BR_ABORT	 |\
+	 X86_BR_IND_CALL |\
+	 X86_BR_IND_JMP  |\
+	 X86_BR_ZERO_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL		 \
+	(X86_BR_CALL		|\
+	 X86_BR_IND_CALL	|\
+	 X86_BR_ZERO_CALL	|\
+	 X86_BR_SYSCALL		|\
+	 X86_BR_IRQ		|\
+	 X86_BR_INT)
+
+int common_branch_type(int type);
+int branch_type(unsigned long from, unsigned long to, int abort);
+int branch_type_fused(unsigned long from, unsigned long to, int abort,
+		      int *offset);
+
 ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
 ssize_t intel_event_sysfs_show(char *page, u64 config);
 
@@ -1232,7 +1301,20 @@ static inline bool fixed_counter_disabled(int i, struct pmu *pmu)
 
 int amd_pmu_init(void);
 
+int amd_pmu_lbr_init(void);
+void amd_pmu_lbr_reset(void);
+void amd_pmu_lbr_read(void);
+void amd_pmu_lbr_add(struct perf_event *event);
+void amd_pmu_lbr_del(struct perf_event *event);
+void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+void amd_pmu_lbr_enable_all(void);
+void amd_pmu_lbr_disable_all(void);
+int amd_pmu_lbr_hw_config(struct perf_event *event);
+
 #ifdef CONFIG_PERF_EVENTS_AMD_BRS
+
+#define AMD_FAM19H_BRS_EVENT 0xc4 /* RETIRED_TAKEN_BRANCH_INSTRUCTIONS */
+
 int amd_brs_init(void);
 void amd_brs_disable(void);
 void amd_brs_enable(void);
@@ -1241,7 +1323,7 @@ void amd_brs_disable_all(void);
 void amd_brs_drain(void);
 void amd_brs_lopwr_init(void);
 void amd_brs_disable_all(void);
-int amd_brs_setup_filter(struct perf_event *event);
+int amd_brs_hw_config(struct perf_event *event);
 void amd_brs_reset(void);
 
 static inline void amd_pmu_brs_add(struct perf_event *event)
@@ -1277,7 +1359,7 @@ static inline void amd_brs_enable(void) {}
 static inline void amd_brs_drain(void) {}
 static inline void amd_brs_lopwr_init(void) {}
 static inline void amd_brs_disable_all(void) {}
-static inline int amd_brs_setup_filter(struct perf_event *event)
+static inline int amd_brs_hw_config(struct perf_event *event)
 {
 	return 0;
 }
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
new file mode 100644
index 000000000000..1dc19b9b4426
--- /dev/null
+++ b/arch/x86/events/perf_event_flags.h
@@ -0,0 +1,22 @@
+
+/*
+ * struct hw_perf_event.flags flags
+ */
+PERF_ARCH(PEBS_LDLAT,		0x00001) /* ld+ldlat data address sampling */
+PERF_ARCH(PEBS_ST,		0x00002) /* st data address sampling */
+PERF_ARCH(PEBS_ST_HSW,		0x00004) /* haswell style datala, store */
+PERF_ARCH(PEBS_LD_HSW,		0x00008) /* haswell style datala, load */
+PERF_ARCH(PEBS_NA_HSW,		0x00010) /* haswell style datala, unknown */
+PERF_ARCH(EXCL,			0x00020) /* HT exclusivity on counter */
+PERF_ARCH(DYNAMIC,		0x00040) /* dynamic alloc'd constraint */
+			/*	0x00080	*/
+PERF_ARCH(EXCL_ACCT,		0x00100) /* accounted EXCL event */
+PERF_ARCH(AUTO_RELOAD,		0x00200) /* use PEBS auto-reload */
+PERF_ARCH(LARGE_PEBS,		0x00400) /* use large PEBS */
+PERF_ARCH(PEBS_VIA_PT,		0x00800) /* use PT buffer for PEBS */
+PERF_ARCH(PAIR,			0x01000) /* Large Increment per Cycle */
+PERF_ARCH(LBR_SELECT,		0x02000) /* Save/Restore MSR_LBR_SELECT */
+PERF_ARCH(TOPDOWN,		0x04000) /* Count Topdown slots/metrics events */
+PERF_ARCH(PEBS_STLAT,		0x08000) /* st+stlat data address sampling */
+PERF_ARCH(AMD_BRS,		0x10000) /* AMD Branch Sampling */
+PERF_ARCH(PEBS_LAT_HYBRID,	0x20000) /* ld and st lat for hybrid */
diff --git a/arch/x86/events/utils.c b/arch/x86/events/utils.c
new file mode 100644
index 000000000000..76b1f8bb0fd5
--- /dev/null
+++ b/arch/x86/events/utils.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/insn.h>
+
+#include "perf_event.h"
+
+static int decode_branch_type(struct insn *insn)
+{
+	int ext;
+
+	if (insn_get_opcode(insn))
+		return X86_BR_ABORT;
+
+	switch (insn->opcode.bytes[0]) {
+	case 0xf:
+		switch (insn->opcode.bytes[1]) {
+		case 0x05: /* syscall */
+		case 0x34: /* sysenter */
+			return X86_BR_SYSCALL;
+		case 0x07: /* sysret */
+		case 0x35: /* sysexit */
+			return X86_BR_SYSRET;
+		case 0x80 ... 0x8f: /* conditional */
+			return X86_BR_JCC;
+		}
+		return X86_BR_NONE;
+	case 0x70 ... 0x7f: /* conditional */
+		return X86_BR_JCC;
+	case 0xc2: /* near ret */
+	case 0xc3: /* near ret */
+	case 0xca: /* far ret */
+	case 0xcb: /* far ret */
+		return X86_BR_RET;
+	case 0xcf: /* iret */
+		return X86_BR_IRET;
+	case 0xcc ... 0xce: /* int */
+		return X86_BR_INT;
+	case 0xe8: /* call near rel */
+		if (insn_get_immediate(insn) || insn->immediate1.value == 0) {
+			/* zero length call */
+			return X86_BR_ZERO_CALL;
+		}
+		fallthrough;
+	case 0x9a: /* call far absolute */
+		return X86_BR_CALL;
+	case 0xe0 ... 0xe3: /* loop jmp */
+		return X86_BR_JCC;
+	case 0xe9 ... 0xeb: /* jmp */
+		return X86_BR_JMP;
+	case 0xff: /* call near absolute, call far absolute ind */
+		if (insn_get_modrm(insn))
+			return X86_BR_ABORT;
+
+		ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+		switch (ext) {
+		case 2: /* near ind call */
+		case 3: /* far ind call */
+			return X86_BR_IND_CALL;
+		case 4:
+		case 5:
+			return X86_BR_IND_JMP;
+		}
+		return X86_BR_NONE;
+	}
+
+	return X86_BR_NONE;
+}
+
+/*
+ * return the type of control flow change at address "from"
+ * instruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ *
+ * While recording branches, some processors can report the "from"
+ * address to be that of an instruction preceding the actual branch
+ * when instruction fusion occurs. If fusion is expected, attempt to
+ * find the type of the first branch instruction within the next
+ * MAX_INSN_SIZE bytes and if found, provide the offset between the
+ * reported "from" address and the actual branch instruction address.
+ */
+static int get_branch_type(unsigned long from, unsigned long to, int abort,
+			   bool fused, int *offset)
+{
+	struct insn insn;
+	void *addr;
+	int bytes_read, bytes_left, insn_offset;
+	int ret = X86_BR_NONE;
+	int to_plm, from_plm;
+	u8 buf[MAX_INSN_SIZE];
+	int is64 = 0;
+
+	/* make sure we initialize offset */
+	if (offset)
+		*offset = 0;
+
+	to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+	from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+	/*
+	 * maybe zero if lbr did not fill up after a reset by the time
+	 * we get a PMU interrupt
+	 */
+	if (from == 0 || to == 0)
+		return X86_BR_NONE;
+
+	if (abort)
+		return X86_BR_ABORT | to_plm;
+
+	if (from_plm == X86_BR_USER) {
+		/*
+		 * can happen if measuring at the user level only
+		 * and we interrupt in a kernel thread, e.g., idle.
+		 */
+		if (!current->mm)
+			return X86_BR_NONE;
+
+		/* may fail if text not present */
+		bytes_left = copy_from_user_nmi(buf, (void __user *)from,
+						MAX_INSN_SIZE);
+		bytes_read = MAX_INSN_SIZE - bytes_left;
+		if (!bytes_read)
+			return X86_BR_NONE;
+
+		addr = buf;
+	} else {
+		/*
+		 * The LBR logs any address in the IP, even if the IP just
+		 * faulted. This means userspace can control the from address.
+		 * Ensure we don't blindly read any address by validating it is
+		 * a known text address.
+		 */
+		if (kernel_text_address(from)) {
+			addr = (void *)from;
+			/*
+			 * Assume we can get the maximum possible size
+			 * when grabbing kernel data.  This is not
+			 * _strictly_ true since we could possibly be
+			 * executing up next to a memory hole, but
+			 * it is very unlikely to be a problem.
+			 */
+			bytes_read = MAX_INSN_SIZE;
+		} else {
+			return X86_BR_NONE;
+		}
+	}
+
+	/*
+	 * decoder needs to know the ABI especially
+	 * on 64-bit systems running 32-bit apps
+	 */
+#ifdef CONFIG_X86_64
+	is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs());
+#endif
+	insn_init(&insn, addr, bytes_read, is64);
+	ret = decode_branch_type(&insn);
+	insn_offset = 0;
+
+	/* Check for the possibility of branch fusion */
+	while (fused && ret == X86_BR_NONE) {
+		/* Check for decoding errors */
+		if (insn_get_length(&insn) || !insn.length)
+			break;
+
+		insn_offset += insn.length;
+		bytes_read -= insn.length;
+		if (bytes_read < 0)
+			break;
+
+		insn_init(&insn, addr + insn_offset, bytes_read, is64);
+		ret = decode_branch_type(&insn);
+	}
+
+	if (offset)
+		*offset = insn_offset;
+
+	/*
+	 * interrupts, traps, faults (and thus ring transition) may
+	 * occur on any instructions. Thus, to classify them correctly,
+	 * we need to first look at the from and to priv levels. If they
+	 * are different and to is in the kernel, then it indicates
+	 * a ring transition. If the from instruction is not a ring
+	 * transition instr (syscall, systenter, int), then it means
+	 * it was a irq, trap or fault.
+	 *
+	 * we have no way of detecting kernel to kernel faults.
+	 */
+	if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+	    && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+		ret = X86_BR_IRQ;
+
+	/*
+	 * branch priv level determined by target as
+	 * is done by HW when LBR_SELECT is implemented
+	 */
+	if (ret != X86_BR_NONE)
+		ret |= to_plm;
+
+	return ret;
+}
+
+int branch_type(unsigned long from, unsigned long to, int abort)
+{
+	return get_branch_type(from, to, abort, false, NULL);
+}
+
+int branch_type_fused(unsigned long from, unsigned long to, int abort,
+		      int *offset)
+{
+	return get_branch_type(from, to, abort, true, offset);
+}
+
+#define X86_BR_TYPE_MAP_MAX	16
+
+static int branch_map[X86_BR_TYPE_MAP_MAX] = {
+	PERF_BR_CALL,		/* X86_BR_CALL */
+	PERF_BR_RET,		/* X86_BR_RET */
+	PERF_BR_SYSCALL,	/* X86_BR_SYSCALL */
+	PERF_BR_SYSRET,		/* X86_BR_SYSRET */
+	PERF_BR_UNKNOWN,	/* X86_BR_INT */
+	PERF_BR_ERET,		/* X86_BR_IRET */
+	PERF_BR_COND,		/* X86_BR_JCC */
+	PERF_BR_UNCOND,		/* X86_BR_JMP */
+	PERF_BR_IRQ,		/* X86_BR_IRQ */
+	PERF_BR_IND_CALL,	/* X86_BR_IND_CALL */
+	PERF_BR_UNKNOWN,	/* X86_BR_ABORT */
+	PERF_BR_UNKNOWN,	/* X86_BR_IN_TX */
+	PERF_BR_NO_TX,		/* X86_BR_NO_TX */
+	PERF_BR_CALL,		/* X86_BR_ZERO_CALL */
+	PERF_BR_UNKNOWN,	/* X86_BR_CALL_STACK */
+	PERF_BR_IND,		/* X86_BR_IND_JMP */
+};
+
+int common_branch_type(int type)
+{
+	int i;
+
+	type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
+
+	if (type) {
+		i = __ffs(type);
+		if (i < X86_BR_TYPE_MAP_MAX)
+			return branch_map[i];
+	}
+
+	return PERF_BR_UNKNOWN;
+}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 3de6d8b53367..29774126e931 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -426,7 +426,7 @@ void __init hyperv_init(void)
 	 * 1. Register the guest ID
 	 * 2. Enable the hypercall and register the hypercall page
 	 */
-	guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
+	guest_id = hv_generate_guest_id(LINUX_VERSION_CODE);
 	wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
 
 	/* Hyper-V requires to write guest os id via ghcb in SNP IVM. */
@@ -459,13 +459,13 @@ void __init hyperv_init(void)
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
 
 		pg = vmalloc_to_page(hv_hypercall_pg);
-		dst = kmap(pg);
+		dst = kmap_local_page(pg);
 		src = memremap(hypercall_msr.guest_physical_address << PAGE_SHIFT, PAGE_SIZE,
 				MEMREMAP_WB);
 		BUG_ON(!(src && dst));
 		memcpy(dst, src, HV_HYP_PAGE_SIZE);
 		memunmap(src);
-		kunmap(pg);
+		kunmap_local(dst);
 	} else {
 		hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
index f3eb098d63d4..cb2a5e113daa 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -6,6 +6,22 @@
 
 #include <asm/msr-index.h>
 
+/* IBS_OP_DATA2 DataSrc */
+#define IBS_DATA_SRC_LOC_CACHE			 2
+#define IBS_DATA_SRC_DRAM			 3
+#define IBS_DATA_SRC_REM_CACHE			 4
+#define IBS_DATA_SRC_IO				 7
+
+/* IBS_OP_DATA2 DataSrc Extension */
+#define IBS_DATA_SRC_EXT_LOC_CACHE		 1
+#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE		 2
+#define IBS_DATA_SRC_EXT_DRAM			 3
+#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE		 5
+#define IBS_DATA_SRC_EXT_PMEM			 6
+#define IBS_DATA_SRC_EXT_IO			 7
+#define IBS_DATA_SRC_EXT_EXT_MEM		 8
+#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM		12
+
 /*
  * IBS Hardware MSRs
  */
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index bca625a60186..6df6ece8a28e 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,9 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
-#define HAVE_CSUM_COPY_USER
-#define _HAVE_ARCH_CSUM_AND_COPY
-#ifdef CONFIG_X86_32
-# include <asm/checksum_32.h>
+#ifdef CONFIG_GENERIC_CSUM
+# include <asm-generic/checksum.h>
 #else
-# include <asm/checksum_64.h>
+# define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
+# define HAVE_CSUM_COPY_USER
+# define _HAVE_ARCH_CSUM_AND_COPY
+# ifdef CONFIG_X86_32
+#  include <asm/checksum_32.h>
+# else
+#  include <asm/checksum_64.h>
+# endif
 #endif
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ef4775c6db01..b71f4f2ecdd5 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-/* FREE!                                ( 3*32+17) */
+#define X86_FEATURE_AMD_LBR_V2		( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b5ef474be858..908d99b127d3 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -23,7 +23,6 @@
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
 #ifndef __ASSEMBLY__
-extern atomic_t modifying_ftrace_code;
 extern void __fentry__(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index a1f0e90d0818..0bc931cd0698 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -44,10 +44,7 @@ struct arch_hw_breakpoint {
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
-static inline int hw_breakpoint_slots(int type)
-{
-	return HBP_NUM;
-}
+#define hw_breakpoint_slots(type) (HBP_NUM)
 
 struct perf_event_attr;
 struct perf_event;
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
new file mode 100644
index 000000000000..8fa6ac0e2d76
--- /dev/null
+++ b/arch/x86/include/asm/kmsan.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KMSAN support.
+ *
+ * Copyright (C) 2022, Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#ifndef _ASM_X86_KMSAN_H
+#define _ASM_X86_KMSAN_H
+
+#ifndef MODULE
+
+#include <asm/cpu_entry_area.h>
+#include <asm/processor.h>
+#include <linux/mmzone.h>
+
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DECLARE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
+
+/*
+ * Functions below are declared in the header to make sure they are inlined.
+ * They all are called from kmsan_get_metadata() for every memory access in
+ * the kernel, so speed is important here.
+ */
+
+/*
+ * Compute metadata addresses for the CPU entry area on x86.
+ */
+static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
+{
+	unsigned long addr64 = (unsigned long)addr;
+	char *metadata_array;
+	unsigned long off;
+	int cpu;
+
+	if ((addr64 < CPU_ENTRY_AREA_BASE) ||
+	    (addr64 >= (CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE)))
+		return NULL;
+	cpu = (addr64 - CPU_ENTRY_AREA_BASE) / CPU_ENTRY_AREA_SIZE;
+	off = addr64 - (unsigned long)get_cpu_entry_area(cpu);
+	if ((off < 0) || (off >= CPU_ENTRY_AREA_SIZE))
+		return NULL;
+	metadata_array = is_origin ? cpu_entry_area_origin :
+				     cpu_entry_area_shadow;
+	return &per_cpu(metadata_array[off], cpu);
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.h to avoid using an instrumented version.
+ */
+static inline bool kmsan_phys_addr_valid(unsigned long addr)
+{
+	if (IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+		return !(addr >> boot_cpu_data.x86_phys_bits);
+	else
+		return true;
+}
+
+/*
+ * Taken from arch/x86/mm/physaddr.c to avoid using an instrumented version.
+ */
+static inline bool kmsan_virt_addr_valid(void *addr)
+{
+	unsigned long x = (unsigned long)addr;
+	unsigned long y = x - __START_KERNEL_map;
+
+	/* use the carry flag to determine if x was < __START_KERNEL_map */
+	if (unlikely(x > y)) {
+		x = y + phys_base;
+
+		if (y >= KERNEL_IMAGE_SIZE)
+			return false;
+	} else {
+		x = y + (__START_KERNEL_map - PAGE_OFFSET);
+
+		/* carry flag will be set if starting x was >= PAGE_OFFSET */
+		if ((x > y) || !kmsan_phys_addr_valid(x))
+			return false;
+	}
+
+	return pfn_valid(x >> PAGE_SHIFT);
+}
+
+#endif /* !MODULE */
+
+#endif /* _ASM_X86_KMSAN_H */
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 71ea2eab43d5..a2e9317aad49 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -50,8 +50,6 @@ extern const int kretprobe_blacklist_size;
 
 void arch_remove_kprobe(struct kprobe *p);
 
-extern void arch_kprobe_override_function(struct pt_regs *regs);
-
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
 	/* copy of the original instruction */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 61b9dd34d333..7551b6f9c31c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1280,8 +1280,8 @@ struct kvm_arch {
 	bool tdp_mmu_enabled;
 
 	/*
-	 * List of struct kvm_mmu_pages being used as roots.
-	 * All struct kvm_mmu_pages in the list should have
+	 * List of kvm_mmu_page structs being used as roots.
+	 * All kvm_mmu_page structs in the list should have
 	 * tdp_mmu_page set.
 	 *
 	 * For reads, this list is protected by:
@@ -1300,8 +1300,8 @@ struct kvm_arch {
 	struct list_head tdp_mmu_roots;
 
 	/*
-	 * List of struct kvmp_mmu_pages not being used as roots.
-	 * All struct kvm_mmu_pages in the list should have
+	 * List of kvm_mmu_page structs not being used as roots.
+	 * All kvm_mmu_page structs in the list should have
 	 * tdp_mmu_page set and a tdp_mmu_root_count of 0.
 	 */
 	struct list_head tdp_mmu_pages;
@@ -1311,9 +1311,9 @@ struct kvm_arch {
 	 * is held in read mode:
 	 *  - tdp_mmu_roots (above)
 	 *  - tdp_mmu_pages (above)
-	 *  - the link field of struct kvm_mmu_pages used by the TDP MMU
+	 *  - the link field of kvm_mmu_page structs used by the TDP MMU
 	 *  - lpage_disallowed_mmu_pages
-	 *  - the lpage_disallowed_link field of struct kvm_mmu_pages used
+	 *  - the lpage_disallowed_link field of kvm_mmu_page structs used
 	 *    by the TDP MMU
 	 * It is acceptable, but not necessary, to acquire this lock when
 	 * the thread holds the MMU lock in write mode.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1e086b37a307..10ac52705892 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -590,6 +590,9 @@
 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL		0xc0000301
 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR	0xc0000302
 
+/* AMD Last Branch Record MSRs */
+#define MSR_AMD64_LBR_SELECT			0xc000010e
+
 /* Fam 17h MSRs */
 #define MSR_F17H_IRPERF			0xc00000e9
 
@@ -761,6 +764,8 @@
 #define MSR_AMD_DBG_EXTN_CFG		0xc000010f
 #define MSR_AMD_SAMP_BR_FROM		0xc0010300
 
+#define DBG_EXTN_CFG_LBRV2EN		BIT_ULL(6)
+
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
 
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index baa70451b8df..198e03e59ca1 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -8,6 +8,8 @@
 #include <asm/cpufeatures.h>
 #include <asm/alternative.h>
 
+#include <linux/kmsan-checks.h>
+
 /* duplicated to the one in bootmem.h */
 extern unsigned long max_pfn;
 extern unsigned long phys_base;
@@ -47,6 +49,11 @@ void clear_page_erms(void *page);
 
 static inline void clear_page(void *page)
 {
+	/*
+	 * Clean up KMSAN metadata for the page being cleared. The assembly call
+	 * below clobbers @page, so we perform unpoisoning before it.
+	 */
+	kmsan_unpoison_memory(page, PAGE_SIZE);
 	alternative_call_2(clear_page_orig,
 			   clear_page_rep, X86_FEATURE_REP_GOOD,
 			   clear_page_erms, X86_FEATURE_ERMS,
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f6fc8dd51ef4..9ac46dbe57d4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -207,7 +207,8 @@ union cpuid_0x80000022_ebx {
 	struct {
 		/* Number of Core Performance Counters */
 		unsigned int	num_core_pmc:4;
-		unsigned int	reserved:6;
+		/* Number of available LBR Stack Entries */
+		unsigned int	lbr_v2_stack_sz:6;
 		/* Number of Data Fabric Counters */
 		unsigned int	num_df_pmc:6;
 	} split;
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index e896ebef8c24..28421a887209 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -256,10 +256,10 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
 /* We always extract/encode the offset by shifting it all the way up, and then down again */
 #define SWP_OFFSET_SHIFT	(SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS)
 
-#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
-#define __swp_type(x)			(((x).val) & 0x1f)
-#define __swp_offset(x)			((x).val >> 5)
-#define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << 5})
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
+#define __swp_type(x)			(((x).val) & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)			((x).val >> SWP_TYPE_BITS)
+#define __swp_entry(type, offset)	((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS})
 
 /*
  * Normally, __swp_entry() converts from arch-independent swp_entry_t to
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 44e2d6f1dbaa..5059799bebe3 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+	return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
+	       (_KERNPG_TABLE & ~_PAGE_ACCESSED);
 }
 
 static inline unsigned long pages_to_mb(unsigned long npg)
@@ -1431,10 +1432,10 @@ static inline bool arch_has_pfn_modify_check(void)
 	return boot_cpu_has_bug(X86_BUG_L1TF);
 }
 
-#define arch_faults_on_old_pte arch_faults_on_old_pte
-static inline bool arch_faults_on_old_pte(void)
+#define arch_has_hw_pte_young arch_has_hw_pte_young
+static inline bool arch_has_hw_pte_young(void)
 {
-	return false;
+	return true;
 }
 
 #ifdef CONFIG_PAGE_TABLE_CHECK
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 70e360a2e5fb..04f36063ad54 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -139,7 +139,52 @@ extern unsigned int ptrs_per_p4d;
 # define VMEMMAP_START		__VMEMMAP_BASE_L4
 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
 
-#define VMALLOC_END		(VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+/*
+ * End of the region for which vmalloc page tables are pre-allocated.
+ * For non-KMSAN builds, this is the same as VMALLOC_END.
+ * For KMSAN builds, VMALLOC_START..VMEMORY_END is 4 times bigger than
+ * VMALLOC_START..VMALLOC_END (see below).
+ */
+#define VMEMORY_END		(VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+
+#ifndef CONFIG_KMSAN
+#define VMALLOC_END		VMEMORY_END
+#else
+/*
+ * In KMSAN builds vmalloc area is four times smaller, and the remaining 3/4
+ * are used to keep the metadata for virtual pages. The memory formerly
+ * belonging to vmalloc area is now laid out as follows:
+ *
+ * 1st quarter: VMALLOC_START to VMALLOC_END - new vmalloc area
+ * 2nd quarter: KMSAN_VMALLOC_SHADOW_START to
+ *              VMALLOC_END+KMSAN_VMALLOC_SHADOW_OFFSET - vmalloc area shadow
+ * 3rd quarter: KMSAN_VMALLOC_ORIGIN_START to
+ *              VMALLOC_END+KMSAN_VMALLOC_ORIGIN_OFFSET - vmalloc area origins
+ * 4th quarter: KMSAN_MODULES_SHADOW_START to KMSAN_MODULES_ORIGIN_START
+ *              - shadow for modules,
+ *              KMSAN_MODULES_ORIGIN_START to
+ *              KMSAN_MODULES_ORIGIN_START + MODULES_LEN - origins for modules.
+ */
+#define VMALLOC_QUARTER_SIZE	((VMALLOC_SIZE_TB << 40) >> 2)
+#define VMALLOC_END		(VMALLOC_START + VMALLOC_QUARTER_SIZE - 1)
+
+/*
+ * vmalloc metadata addresses are calculated by adding shadow/origin offsets
+ * to vmalloc address.
+ */
+#define KMSAN_VMALLOC_SHADOW_OFFSET	VMALLOC_QUARTER_SIZE
+#define KMSAN_VMALLOC_ORIGIN_OFFSET	(VMALLOC_QUARTER_SIZE << 1)
+
+#define KMSAN_VMALLOC_SHADOW_START	(VMALLOC_START + KMSAN_VMALLOC_SHADOW_OFFSET)
+#define KMSAN_VMALLOC_ORIGIN_START	(VMALLOC_START + KMSAN_VMALLOC_ORIGIN_OFFSET)
+
+/*
+ * The shadow/origin for modules are placed one by one in the last 1/4 of
+ * vmalloc space.
+ */
+#define KMSAN_MODULES_SHADOW_START	(VMALLOC_END + KMSAN_VMALLOC_ORIGIN_OFFSET + 1)
+#define KMSAN_MODULES_ORIGIN_START	(KMSAN_MODULES_SHADOW_START + MODULES_LEN)
+#endif /* CONFIG_KMSAN */
 
 #define MODULES_VADDR		(__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 356308c73951..67c9d73b31fa 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -587,9 +587,6 @@ static inline void load_sp0(unsigned long sp0)
 
 #endif /* CONFIG_PARAVIRT_XXL */
 
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-
 unsigned long __get_wchan(struct task_struct *p);
 
 /*
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 892fd8c3a6f7..60ece592b220 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -12,7 +12,7 @@
  */
 #ifdef CONFIG_64BIT
 
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
 #define __pv_queued_spin_unlock	__pv_queued_spin_unlock
 #define PV_UNLOCK		"__raw_callee_save___pv_queued_spin_unlock"
 #define PV_UNLOCK_SLOWPATH	"__raw_callee_save___pv_queued_spin_unlock_slowpath"
@@ -20,9 +20,10 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
 /*
  * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
  * which combines the registers saving trunk and the body of the following
- * C code:
+ * C code.  Note that it puts the code in the .spinlock.text section which
+ * is equivalent to adding __lockfunc in the C code:
  *
- * void __pv_queued_spin_unlock(struct qspinlock *lock)
+ * void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
  * {
  *	u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
  *
@@ -36,7 +37,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
  *   rsi = lockval           (second argument)
  *   rdx = internal variable (set to 0)
  */
-asm    (".pushsection .text;"
+asm    (".pushsection .spinlock.text;"
 	".globl " PV_UNLOCK ";"
 	".type " PV_UNLOCK ", @function;"
 	".align 4,0x90;"
@@ -65,8 +66,8 @@ asm    (".pushsection .text;"
 
 #else /* CONFIG_64BIT */
 
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
+extern void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock);
+__PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock, ".spinlock.text");
 
 #endif /* CONFIG_64BIT */
 #endif
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 6a9ccc1b2be5..64df897c0ee3 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SPARSEMEM_H
 #define _ASM_X86_SPARSEMEM_H
 
+#include <linux/types.h>
+
 #ifdef CONFIG_SPARSEMEM
 /*
  * generic non-linear memory support:
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 6e450827f677..3b87d889b6e1 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -11,11 +11,23 @@
    function. */
 
 #define __HAVE_ARCH_MEMCPY 1
+#if defined(__SANITIZE_MEMORY__)
+#undef memcpy
+void *__msan_memcpy(void *dst, const void *src, size_t size);
+#define memcpy __msan_memcpy
+#else
 extern void *memcpy(void *to, const void *from, size_t len);
+#endif
 extern void *__memcpy(void *to, const void *from, size_t len);
 
 #define __HAVE_ARCH_MEMSET
+#if defined(__SANITIZE_MEMORY__)
+extern void *__msan_memset(void *s, int c, size_t n);
+#undef memset
+#define memset __msan_memset
+#else
 void *memset(void *s, int c, size_t n);
+#endif
 void *__memset(void *s, int c, size_t n);
 
 #define __HAVE_ARCH_MEMSET16
@@ -55,7 +67,13 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
 }
 
 #define __HAVE_ARCH_MEMMOVE
+#if defined(__SANITIZE_MEMORY__)
+#undef memmove
+void *__msan_memmove(void *dest, const void *src, size_t len);
+#define memmove __msan_memmove
+#else
 void *memmove(void *dest, const void *src, size_t count);
+#endif
 void *__memmove(void *dest, const void *src, size_t count);
 
 int memcmp(const void *cs, const void *ct, size_t count);
@@ -64,8 +82,7 @@ char *strcpy(char *dest, const char *src);
 char *strcat(char *dest, const char *src);
 int strcmp(const char *cs, const char *ct);
 
-#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
-
+#if (defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__))
 /*
  * For files that not instrumented (e.g. mm/slub.c) we
  * should use not instrumented version of mem* functions.
@@ -73,7 +90,9 @@ int strcmp(const char *cs, const char *ct);
 
 #undef memcpy
 #define memcpy(dst, src, len) __memcpy(dst, src, len)
+#undef memmove
 #define memmove(dst, src, len) __memmove(dst, src, len)
+#undef memset
 #define memset(s, c, n) __memset(s, c, n)
 
 #ifndef __NO_FORTIFY
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1ec6a9ea2328..8bc614cfe21b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -5,6 +5,7 @@
  * User space memory access functions
  */
 #include <linux/compiler.h>
+#include <linux/instrumented.h>
 #include <linux/kasan-checks.h>
 #include <linux/string.h>
 #include <asm/asm.h>
@@ -103,6 +104,7 @@ extern int __get_user_bad(void);
 		     : "=a" (__ret_gu), "=r" (__val_gu),		\
 			ASM_CALL_CONSTRAINT				\
 		     : "0" (ptr), "i" (sizeof(*(ptr))));		\
+	instrument_get_user(__val_gu);					\
 	(x) = (__force __typeof__(*(ptr))) __val_gu;			\
 	__builtin_expect(__ret_gu, 0);					\
 })
@@ -192,9 +194,11 @@ extern void __put_user_nocheck_8(void);
 	int __ret_pu;							\
 	void __user *__ptr_pu;						\
 	register __typeof__(*(ptr)) __val_pu asm("%"_ASM_AX);		\
-	__chk_user_ptr(ptr);						\
-	__ptr_pu = (ptr);						\
-	__val_pu = (x);							\
+	__typeof__(*(ptr)) __x = (x); /* eval x once */			\
+	__typeof__(ptr) __ptr = (ptr); /* eval ptr once */		\
+	__chk_user_ptr(__ptr);						\
+	__ptr_pu = __ptr;						\
+	__val_pu = __x;							\
 	asm volatile("call __" #fn "_%P[size]"				\
 		     : "=c" (__ret_pu),					\
 			ASM_CALL_CONSTRAINT				\
@@ -202,6 +206,7 @@ extern void __put_user_nocheck_8(void);
 		       "r" (__val_pu),					\
 		       [size] "i" (sizeof(*(ptr)))			\
 		     :"ebx");						\
+	instrument_put_user(__x, __ptr, sizeof(*(ptr)));		\
 	__builtin_expect(__ret_pu, 0);					\
 })
 
@@ -248,23 +253,25 @@ extern void __put_user_nocheck_8(void);
 
 #define __put_user_size(x, ptr, size, label)				\
 do {									\
+	__typeof__(*(ptr)) __x = (x); /* eval x once */			\
 	__chk_user_ptr(ptr);						\
 	switch (size) {							\
 	case 1:								\
-		__put_user_goto(x, ptr, "b", "iq", label);		\
+		__put_user_goto(__x, ptr, "b", "iq", label);		\
 		break;							\
 	case 2:								\
-		__put_user_goto(x, ptr, "w", "ir", label);		\
+		__put_user_goto(__x, ptr, "w", "ir", label);		\
 		break;							\
 	case 4:								\
-		__put_user_goto(x, ptr, "l", "ir", label);		\
+		__put_user_goto(__x, ptr, "l", "ir", label);		\
 		break;							\
 	case 8:								\
-		__put_user_goto_u64(x, ptr, label);			\
+		__put_user_goto_u64(__x, ptr, label);			\
 		break;							\
 	default:							\
 		__put_user_bad();					\
 	}								\
+	instrument_put_user(__x, ptr, size);				\
 } while (0)
 
 #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
@@ -305,6 +312,7 @@ do {									\
 	default:							\
 		(x) = __get_user_bad();					\
 	}								\
+	instrument_get_user(x);						\
 } while (0)
 
 #define __get_user_asm(x, addr, itype, ltype, label)			\
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index c371ef695fcc..498dc600bd5c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -309,7 +309,7 @@ enum vmcs_field {
 	GUEST_LDTR_AR_BYTES             = 0x00004820,
 	GUEST_TR_AR_BYTES               = 0x00004822,
 	GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
-	GUEST_ACTIVITY_STATE            = 0X00004826,
+	GUEST_ACTIVITY_STATE            = 0x00004826,
 	GUEST_SYSENTER_CS               = 0x0000482A,
 	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
 	HOST_IA32_SYSENTER_CS           = 0x00004c00,
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1286a73ebdbc..f901658d9f7c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -3,10 +3,6 @@
 # Makefile for the linux kernel.
 #
 
-extra-y	:= head_$(BITS).o
-extra-y	+= head$(BITS).o
-extra-y	+= ebda.o
-extra-y	+= platform-quirks.o
 extra-y	+= vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
@@ -33,6 +29,8 @@ KASAN_SANITIZE_sev.o					:= n
 # With some compiler versions the generated code results in boot hangs, caused
 # by several compilation units. To be safe, disable all instrumentation.
 KCSAN_SANITIZE := n
+KMSAN_SANITIZE_head$(BITS).o				:= n
+KMSAN_SANITIZE_nmi.o					:= n
 
 # If instrumentation of this dir is enabled, boot hangs during first second.
 # Probably could be more selective here, but note that files related to irqs,
@@ -42,7 +40,11 @@ KCOV_INSTRUMENT		:= n
 
 CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
 
-obj-y			:= process_$(BITS).o signal.o
+obj-y			+= head_$(BITS).o
+obj-y			+= head$(BITS).o
+obj-y			+= ebda.o
+obj-y			+= platform-quirks.o
+obj-y			+= process_$(BITS).o signal.o
 obj-$(CONFIG_COMPAT)	+= signal_compat.o
 obj-y			+= traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y			+= time.o ioport.o dumpstack.o nmi.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 9661e3e802be..f10a921ee756 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -12,6 +12,7 @@ endif
 # If these files are instrumented, boot hangs during the first second.
 KCOV_INSTRUMENT_common.o := n
 KCOV_INSTRUMENT_perf_event.o := n
+KMSAN_SANITIZE_common.o := n
 
 # As above, instrumenting secondary CPU boot code causes boot hangs.
 KCSAN_SANITIZE_common.o := n
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index fd44b54c90d5..fc01f81f6e2a 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -45,6 +45,7 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },
 	{ X86_FEATURE_MBA,		CPUID_EBX,  6, 0x80000008, 0 },
 	{ X86_FEATURE_PERFMON_V2,	CPUID_EAX,  0, 0x80000022, 0 },
+	{ X86_FEATURE_AMD_LBR_V2,	CPUID_EAX,  1, 0x80000022, 0 },
 	{ 0, 0, 0, 0, 0 }
 };
 
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index b3dba35f466e..0bf6779187dd 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -177,6 +177,12 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
 	}
 }
 
+/*
+ * This function reads pointers from the stack and dereferences them. The
+ * pointers may not have their KMSAN shadow set up properly, which may result
+ * in false positive reports. Disable instrumentation to avoid those.
+ */
+__no_kmsan_checks
 static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			unsigned long *stack, const char *log_lvl)
 {
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4c3c27b6aea3..eb8bc82846b9 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -59,8 +59,6 @@
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-#define stack_addr(regs) ((unsigned long *)regs->sp)
-
 #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
 	(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) |   \
 	  (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) |   \
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1962008fe743..6b3418bff326 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -553,6 +553,7 @@ void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32)
  * Kprobes not supported here. Set the probe on schedule instead.
  * Function graph tracer not supported too.
  */
+__no_kmsan_checks
 __visible __notrace_funcgraph struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f24227bc3220..3f3ea0287f69 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1316,7 +1316,7 @@ static void __init smp_sanity_check(void)
 			nr++;
 		}
 
-		nr_cpu_ids = 8;
+		set_nr_cpu_ids(8);
 	}
 #endif
 
@@ -1569,7 +1569,7 @@ __init void prefill_possible_map(void)
 		possible = i;
 	}
 
-	nr_cpu_ids = possible;
+	set_nr_cpu_ids(possible);
 
 	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
 		possible, max_t(int, possible - num_processors, 0));
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 3bacd935f840..4c1bcb6053fc 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -95,7 +95,7 @@ void __init tboot_probe(void)
 
 static pgd_t *tboot_pg_dir;
 static struct mm_struct tboot_mm = {
-	.mm_rb          = RB_ROOT,
+	.mm_mt          = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
 	.pgd            = swapper_pg_dir,
 	.mm_users       = ATOMIC_INIT(2),
 	.mm_count       = ATOMIC_INIT(1),
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index 8e1c50c86e5d..d8ba93778ae3 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -183,6 +183,16 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 }
 #endif
 
+/*
+ * While walking the stack, KMSAN may stomp on stale locals from other
+ * functions that were marked as uninitialized upon function exit, and
+ * now hold the call frame information for the current function (e.g. the frame
+ * pointer). Because KMSAN does not specifically mark call frames as
+ * initialized, false positive reports are possible. To prevent such reports,
+ * we mark the functions scanning the stack (here and below) with
+ * __no_kmsan_checks.
+ */
+__no_kmsan_checks
 static bool update_stack_state(struct unwind_state *state,
 			       unsigned long *next_bp)
 {
@@ -250,6 +260,7 @@ static bool update_stack_state(struct unwind_state *state,
 	return true;
 }
 
+__no_kmsan_checks
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct pt_regs *regs;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index e3cbd7706136..67be7f217e37 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,7 +28,8 @@ config KVM
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_PFNCACHE
 	select HAVE_KVM_IRQFD
-	select HAVE_KVM_DIRTY_RING
+	select HAVE_KVM_DIRTY_RING_TSO
+	select HAVE_KVM_DIRTY_RING_ACQ_REL
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 02f9e4f245bd..d9b9a0f0db17 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -106,9 +106,19 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 		return;
 
 	if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
-		/* Indicate PEBS overflow PMI to guest. */
-		skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
-					      (unsigned long *)&pmu->global_status);
+		if (!in_pmi) {
+			/*
+			 * TODO: KVM is currently _choosing_ to not generate records
+			 * for emulated instructions, avoiding BUFFER_OVF PMI when
+			 * there are no records. Strictly speaking, it should be done
+			 * as well in the right context to improve sampling accuracy.
+			 */
+			skip_pmi = true;
+		} else {
+			/* Indicate PEBS overflow PMI to guest. */
+			skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT,
+						      (unsigned long *)&pmu->global_status);
+		}
 	} else {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 	}
@@ -227,8 +237,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 			      get_sample_period(pmc, pmc->counter)))
 		return false;
 
-	if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) &&
-	    pmc->perf_event->attr.precise_ip)
+	if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) !=
+	    (!!pmc->perf_event->attr.precise_ip))
 		return false;
 
 	/* reuse perf_event to serve as pmc_reprogram_counter() does*/
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index f24613a108c5..b68956299fa8 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -23,107 +23,52 @@ enum pmu_type {
 	PMU_TYPE_EVNTSEL,
 };
 
-enum index {
-	INDEX_ZERO = 0,
-	INDEX_ONE,
-	INDEX_TWO,
-	INDEX_THREE,
-	INDEX_FOUR,
-	INDEX_FIVE,
-	INDEX_ERROR,
-};
-
-static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 {
-	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+	unsigned int num_counters = pmu->nr_arch_gp_counters;
 
-	if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
-		if (type == PMU_TYPE_COUNTER)
-			return MSR_F15H_PERF_CTR;
-		else
-			return MSR_F15H_PERF_CTL;
-	} else {
-		if (type == PMU_TYPE_COUNTER)
-			return MSR_K7_PERFCTR0;
-		else
-			return MSR_K7_EVNTSEL0;
-	}
-}
+	if (pmc_idx >= num_counters)
+		return NULL;
 
-static enum index msr_to_index(u32 msr)
-{
-	switch (msr) {
-	case MSR_F15H_PERF_CTL0:
-	case MSR_F15H_PERF_CTR0:
-	case MSR_K7_EVNTSEL0:
-	case MSR_K7_PERFCTR0:
-		return INDEX_ZERO;
-	case MSR_F15H_PERF_CTL1:
-	case MSR_F15H_PERF_CTR1:
-	case MSR_K7_EVNTSEL1:
-	case MSR_K7_PERFCTR1:
-		return INDEX_ONE;
-	case MSR_F15H_PERF_CTL2:
-	case MSR_F15H_PERF_CTR2:
-	case MSR_K7_EVNTSEL2:
-	case MSR_K7_PERFCTR2:
-		return INDEX_TWO;
-	case MSR_F15H_PERF_CTL3:
-	case MSR_F15H_PERF_CTR3:
-	case MSR_K7_EVNTSEL3:
-	case MSR_K7_PERFCTR3:
-		return INDEX_THREE;
-	case MSR_F15H_PERF_CTL4:
-	case MSR_F15H_PERF_CTR4:
-		return INDEX_FOUR;
-	case MSR_F15H_PERF_CTL5:
-	case MSR_F15H_PERF_CTR5:
-		return INDEX_FIVE;
-	default:
-		return INDEX_ERROR;
-	}
+	return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)];
 }
 
 static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
 					     enum pmu_type type)
 {
 	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+	unsigned int idx;
 
 	if (!vcpu->kvm->arch.enable_pmu)
 		return NULL;
 
 	switch (msr) {
-	case MSR_F15H_PERF_CTL0:
-	case MSR_F15H_PERF_CTL1:
-	case MSR_F15H_PERF_CTL2:
-	case MSR_F15H_PERF_CTL3:
-	case MSR_F15H_PERF_CTL4:
-	case MSR_F15H_PERF_CTL5:
+	case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
 		if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
 			return NULL;
-		fallthrough;
+		/*
+		 * Each PMU counter has a pair of CTL and CTR MSRs. CTLn
+		 * MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd.
+		 */
+		idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2);
+		if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL))
+			return NULL;
+		break;
 	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
 		if (type != PMU_TYPE_EVNTSEL)
 			return NULL;
+		idx = msr - MSR_K7_EVNTSEL0;
 		break;
-	case MSR_F15H_PERF_CTR0:
-	case MSR_F15H_PERF_CTR1:
-	case MSR_F15H_PERF_CTR2:
-	case MSR_F15H_PERF_CTR3:
-	case MSR_F15H_PERF_CTR4:
-	case MSR_F15H_PERF_CTR5:
-		if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
-			return NULL;
-		fallthrough;
 	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
 		if (type != PMU_TYPE_COUNTER)
 			return NULL;
+		idx = msr - MSR_K7_PERFCTR0;
 		break;
 	default:
 		return NULL;
 	}
 
-	return &pmu->gp_counters[msr_to_index(msr)];
+	return amd_pmc_idx_to_pmc(pmu, idx);
 }
 
 static bool amd_hw_event_available(struct kvm_pmc *pmc)
@@ -139,22 +84,6 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
 	return true;
 }
 
-static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
-{
-	unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER);
-	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
-
-	if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
-		/*
-		 * The idx is contiguous. The MSRs are not. The counter MSRs
-		 * are interleaved with the event select MSRs.
-		 */
-		pmc_idx *= 2;
-	}
-
-	return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
-}
-
 static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -168,15 +97,7 @@ static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
 	unsigned int idx, u64 *mask)
 {
-	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	struct kvm_pmc *counters;
-
-	idx &= ~(3u << 30);
-	if (idx >= pmu->nr_arch_gp_counters)
-		return NULL;
-	counters = pmu->gp_counters;
-
-	return &counters[idx];
+	return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30));
 }
 
 static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index c399637a3a79..25b70a85bef5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -68,15 +68,11 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 	}
 }
 
-/* function is called when global control register has been updated. */
-static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
 {
 	int bit;
-	u64 diff = pmu->global_ctrl ^ data;
 	struct kvm_pmc *pmc;
 
-	pmu->global_ctrl = data;
-
 	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
 		pmc = intel_pmc_idx_to_pmc(pmu, bit);
 		if (pmc)
@@ -397,7 +393,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	struct kvm_pmc *pmc;
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
-	u64 reserved_bits;
+	u64 reserved_bits, diff;
 
 	switch (msr) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -418,7 +414,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (pmu->global_ctrl == data)
 			return 0;
 		if (kvm_valid_perf_global_ctrl(pmu, data)) {
-			global_ctrl_changed(pmu, data);
+			diff = pmu->global_ctrl ^ data;
+			pmu->global_ctrl = data;
+			reprogram_counters(pmu, diff);
 			return 0;
 		}
 		break;
@@ -433,7 +431,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (pmu->pebs_enable == data)
 			return 0;
 		if (!(data & pmu->pebs_enable_mask)) {
+			diff = pmu->pebs_enable ^ data;
 			pmu->pebs_enable = data;
+			reprogram_counters(pmu, diff);
 			return 0;
 		}
 		break;
@@ -776,20 +776,23 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu)
 void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu)
 {
 	struct kvm_pmc *pmc = NULL;
-	int bit;
+	int bit, hw_idx;
 
 	for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl,
 			 X86_PMC_IDX_MAX) {
 		pmc = intel_pmc_idx_to_pmc(pmu, bit);
 
 		if (!pmc || !pmc_speculative_in_use(pmc) ||
-		    !intel_pmc_is_enabled(pmc))
+		    !intel_pmc_is_enabled(pmc) || !pmc->perf_event)
 			continue;
 
-		if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) {
-			pmu->host_cross_mapped_mask |=
-				BIT_ULL(pmc->perf_event->hw.idx);
-		}
+		/*
+		 * A negative index indicates the event isn't mapped to a
+		 * physical counter in the host, e.g. due to contention.
+		 */
+		hw_idx = pmc->perf_event->hw.idx;
+		if (hw_idx != pmc->idx && hw_idx > -1)
+			pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx);
 	}
 }
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f76747862bd2..7ba5f61d7273 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -65,7 +65,9 @@ ifneq ($(CONFIG_X86_CMPXCHG64),y)
 endif
 else
         obj-y += iomap_copy_64.o
+ifneq ($(CONFIG_GENERIC_CSUM),y)
         lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
+endif
         lib-y += clear_page_64.o copy_page_64.o
         lib-y += memmove_64.o memset_64.o
         lib-y += copy_user_64.o
diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c
index 3e2f33fc33de..e0411a3774d4 100644
--- a/arch/x86/lib/iomem.c
+++ b/arch/x86/lib/iomem.c
@@ -1,6 +1,7 @@
 #include <linux/string.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/kmsan-checks.h>
 
 #define movs(type,to,from) \
 	asm volatile("movs" type:"=&D" (to), "=&S" (from):"0" (to), "1" (from):"memory")
@@ -37,6 +38,8 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si
 		n-=2;
 	}
 	rep_movs(to, (const void *)from, n);
+	/* KMSAN must treat values read from devices as initialized. */
+	kmsan_unpoison_memory(to, n);
 }
 
 static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n)
@@ -44,6 +47,8 @@ static void string_memcpy_toio(volatile void __iomem *to, const void *from, size
 	if (unlikely(!n))
 		return;
 
+	/* Make sure uninitialized memory isn't copied to devices. */
+	kmsan_check_memory(from, n);
 	/* Align any unaligned destination IO */
 	if (unlikely(1 & (unsigned long)to)) {
 		movs("b", to, from);
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 829c1409ffbd..c80febc44cd2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -14,6 +14,8 @@ KASAN_SANITIZE_pgprot.o		:= n
 # Disable KCSAN entirely, because otherwise we get warnings that some functions
 # reference __initdata sections.
 KCSAN_SANITIZE := n
+# Avoid recursion by not calling KMSAN hooks for CEA code.
+KMSAN_SANITIZE_cpu_entry_area.o := n
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_mem_encrypt.o		= -pg
@@ -44,6 +46,9 @@ obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
 KASAN_SANITIZE_kasan_init_$(BITS).o := n
 obj-$(CONFIG_KASAN)		+= kasan_init_$(BITS).o
 
+KMSAN_SANITIZE_kmsan_shadow.o	:= n
+obj-$(CONFIG_KMSAN)		+= kmsan_shadow.o
+
 obj-$(CONFIG_MMIOTRACE)		+= mmiotrace.o
 mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
 obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a498ae1fbe66..7b0d4ab894c8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -260,7 +260,7 @@ static noinline int vmalloc_fault(unsigned long address)
 }
 NOKPROBE_SYMBOL(vmalloc_fault);
 
-void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+static void __arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
 	unsigned long addr;
 
@@ -284,6 +284,27 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 	}
 }
 
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+	__arch_sync_kernel_mappings(start, end);
+#ifdef CONFIG_KMSAN
+	/*
+	 * KMSAN maintains two additional metadata page mappings for the
+	 * [VMALLOC_START, VMALLOC_END) range. These mappings start at
+	 * KMSAN_VMALLOC_SHADOW_START and KMSAN_VMALLOC_ORIGIN_START and
+	 * have to be synced together with the vmalloc memory mapping.
+	 */
+	if (start >= VMALLOC_START && end < VMALLOC_END) {
+		__arch_sync_kernel_mappings(
+			start - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START,
+			end - VMALLOC_START + KMSAN_VMALLOC_SHADOW_START);
+		__arch_sync_kernel_mappings(
+			start - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START,
+			end - VMALLOC_START + KMSAN_VMALLOC_ORIGIN_START);
+	}
+#endif
+}
+
 static bool low_pfn(unsigned long pfn)
 {
 	return pfn < max_low_pfn;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82a042c03824..9121bc1b9453 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1054,7 +1054,7 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 }
 
 #ifdef CONFIG_SWAP
-unsigned long max_swapfile_size(void)
+unsigned long arch_max_swapfile_size(void)
 {
 	unsigned long pages;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0fe690ebc269..3f040c6e5d13 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init)
 DEFINE_ENTRY(pmd, pmd, init)
 DEFINE_ENTRY(pte, pte, init)
 
+static inline pgprot_t prot_sethuge(pgprot_t prot)
+{
+	WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT);
+
+	return __pgprot(pgprot_val(prot) | _PAGE_PSE);
+}
 
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
@@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_2M)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-			set_pte_init((pte_t *)pmd,
-				     pfn_pte((paddr & PMD_MASK) >> PAGE_SHIFT,
-					     __pgprot(pgprot_val(prot) | _PAGE_PSE)),
+			set_pmd_init(pmd,
+				     pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
 				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
@@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
 		if (page_size_mask & (1<<PG_LEVEL_1G)) {
 			pages++;
 			spin_lock(&init_mm.page_table_lock);
-
-			prot = __pgprot(pgprot_val(prot) | _PAGE_PSE);
-
-			set_pte_init((pte_t *)pud,
-				     pfn_pte((paddr & PUD_MASK) >> PAGE_SHIFT,
-					     prot),
+			set_pud_init(pud,
+				     pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
 				     init);
 			spin_unlock(&init_mm.page_table_lock);
 			paddr_last = paddr_next;
@@ -1287,7 +1288,7 @@ static void __init preallocate_vmalloc_pages(void)
 	unsigned long addr;
 	const char *lvl;
 
-	for (addr = VMALLOC_START; addr <= VMALLOC_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+	for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
 		pgd_t *pgd = pgd_offset_k(addr);
 		p4d_t *p4d;
 		pud_t *pud;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 1ad0228f8ceb..78c5bc654cff 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -17,6 +17,7 @@
 #include <linux/cc_platform.h>
 #include <linux/efi.h>
 #include <linux/pgtable.h>
+#include <linux/kmsan.h>
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
@@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
+	kmsan_iounmap_page_range((unsigned long)addr,
+		(unsigned long)addr + get_vm_area_size(p));
 	memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
 
 	/* Finally remove it */
diff --git a/arch/x86/mm/kmsan_shadow.c b/arch/x86/mm/kmsan_shadow.c
new file mode 100644
index 000000000000..bee2ec4a3bfa
--- /dev/null
+++ b/arch/x86/mm/kmsan_shadow.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86-specific bits of KMSAN shadow implementation.
+ *
+ * Copyright (C) 2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ */
+
+#include <asm/cpu_entry_area.h>
+#include <linux/percpu-defs.h>
+
+/*
+ * Addresses within the CPU entry area (including e.g. exception stacks) do not
+ * have struct page entries corresponding to them, so they need separate
+ * handling.
+ * arch_kmsan_get_meta_or_null() (declared in the header) maps the addresses in
+ * CPU entry area to addresses in cpu_entry_area_shadow/cpu_entry_area_origin.
+ */
+DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_shadow);
+DEFINE_PER_CPU(char[CPU_ENTRY_AREA_SIZE], cpu_entry_area_origin);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 1abd5438f126..97342c42dda8 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -580,6 +580,46 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
 }
 
 /*
+ * Validate strict W^X semantics.
+ */
+static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
+				  unsigned long pfn, unsigned long npg)
+{
+	unsigned long end;
+
+	/*
+	 * 32-bit has some unfixable W+X issues, like EFI code
+	 * and writeable data being in the same page.  Disable
+	 * detection and enforcement there.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32))
+		return new;
+
+	/* Only verify when NX is supported: */
+	if (!(__supported_pte_mask & _PAGE_NX))
+		return new;
+
+	if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX)))
+		return new;
+
+	if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
+		return new;
+
+	end = start + npg * PAGE_SIZE - 1;
+	WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
+		  (unsigned long long)pgprot_val(old),
+		  (unsigned long long)pgprot_val(new),
+		  start, end, pfn);
+
+	/*
+	 * For now, allow all permission change attempts by returning the
+	 * attempted permissions.  This can 'return old' to actively
+	 * refuse the permission change at a later time.
+	 */
+	return new;
+}
+
+/*
  * Lookup the page table entry for a virtual address in a specific pgd.
  * Return a pointer to the entry and the level of the mapping.
  */
@@ -885,6 +925,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
 	new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
 				      psize, CPA_DETECT);
 
+	new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+
 	/*
 	 * If there is a conflict, split the large page.
 	 *
@@ -1525,6 +1567,7 @@ repeat:
 
 	if (level == PG_LEVEL_4K) {
 		pte_t new_pte;
+		pgprot_t old_prot = pte_pgprot(old_pte);
 		pgprot_t new_prot = pte_pgprot(old_pte);
 		unsigned long pfn = pte_pfn(old_pte);
 
@@ -1536,6 +1579,8 @@ repeat:
 		new_prot = static_protections(new_prot, address, pfn, 1, 0,
 					      CPA_PROTECT);
 
+		new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+
 		new_prot = pgprot_clear_protnone_bits(new_prot);
 
 		/*
@@ -1944,7 +1989,7 @@ int set_mce_nospec(unsigned long pfn)
 	return rc;
 }
 
-static int set_memory_present(unsigned long *addr, int numpages)
+static int set_memory_p(unsigned long *addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0);
 }
@@ -1954,7 +1999,7 @@ int clear_mce_nospec(unsigned long pfn)
 {
 	unsigned long addr = (unsigned long) pfn_to_kaddr(pfn);
 
-	return set_memory_present(&addr, 1);
+	return set_memory_p(&addr, 1);
 }
 EXPORT_SYMBOL_GPL(clear_mce_nospec);
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a932d7712d85..8525f2876fb4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
 	return ret;
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
 int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 			      unsigned long addr, pmd_t *pmdp)
 {
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 
 	return ret;
 }
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 int pudp_test_and_clear_young(struct vm_area_struct *vma,
 			      unsigned long addr, pud_t *pudp)
 {
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 83f1b6a56449..f614009d3e4e 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -10,6 +10,7 @@
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
+KMSAN_SANITIZE			:= n
 OBJECT_FILES_NON_STANDARD	:= y
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 85246dd9faa1..9b1ec5d8c99c 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -92,3 +92,12 @@ config XEN_DOM0
 	select X86_X2APIC if XEN_PVH && X86_64
 	help
 	  Support running as a Xen Dom0 guest.
+
+config XEN_PV_MSR_SAFE
+	bool "Always use safe MSR accesses in PV guests"
+	default y
+	depends on XEN_PV
+	help
+	  Use safe (not faulting) MSR access functions even if the MSR access
+	  should not fault anyway.
+	  The default can be changed by using the "xen_msr_safe" boot parameter.
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index 1c1ac418484b..c1cd28e915a3 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -212,7 +212,7 @@ static void __init xen_hvm_guest_init(void)
 		return;
 
 	if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT))
-		virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
+		virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
 
 	init_hvm_pv_info();
 
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 9b1a58dda935..f82857e48815 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -108,11 +108,21 @@ struct tls_descs {
  */
 static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
 
+static __read_mostly bool xen_msr_safe = IS_ENABLED(CONFIG_XEN_PV_MSR_SAFE);
+
+static int __init parse_xen_msr_safe(char *str)
+{
+	if (str)
+		return strtobool(str, &xen_msr_safe);
+	return -EINVAL;
+}
+early_param("xen_msr_safe", parse_xen_msr_safe);
+
 static void __init xen_pv_init_platform(void)
 {
 	/* PV guests can't operate virtio devices without grants. */
 	if (IS_ENABLED(CONFIG_XEN_VIRTIO))
-		virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc);
+		virtio_set_mem_acc_cb(xen_virtio_restricted_mem_acc);
 
 	populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
 
@@ -917,14 +927,18 @@ static void xen_write_cr4(unsigned long cr4)
 	native_write_cr4(cr4);
 }
 
-static u64 xen_read_msr_safe(unsigned int msr, int *err)
+static u64 xen_do_read_msr(unsigned int msr, int *err)
 {
-	u64 val;
+	u64 val = 0;	/* Avoid uninitialized value for safe variant. */
 
 	if (pmu_msr_read(msr, &val, err))
 		return val;
 
-	val = native_read_msr_safe(msr, err);
+	if (err)
+		val = native_read_msr_safe(msr, err);
+	else
+		val = native_read_msr(msr);
+
 	switch (msr) {
 	case MSR_IA32_APICBASE:
 		val &= ~X2APIC_ENABLE;
@@ -933,23 +947,39 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
 	return val;
 }
 
-static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+static void set_seg(unsigned int which, unsigned int low, unsigned int high,
+		    int *err)
 {
-	int ret;
-	unsigned int which;
-	u64 base;
+	u64 base = ((u64)high << 32) | low;
 
-	ret = 0;
+	if (HYPERVISOR_set_segment_base(which, base) == 0)
+		return;
 
+	if (err)
+		*err = -EIO;
+	else
+		WARN(1, "Xen set_segment_base(%u, %llx) failed\n", which, base);
+}
+
+/*
+ * Support write_msr_safe() and write_msr() semantics.
+ * With err == NULL write_msr() semantics are selected.
+ * Supplying an err pointer requires err to be pre-initialized with 0.
+ */
+static void xen_do_write_msr(unsigned int msr, unsigned int low,
+			     unsigned int high, int *err)
+{
 	switch (msr) {
-	case MSR_FS_BASE:		which = SEGBASE_FS; goto set;
-	case MSR_KERNEL_GS_BASE:	which = SEGBASE_GS_USER; goto set;
-	case MSR_GS_BASE:		which = SEGBASE_GS_KERNEL; goto set;
-
-	set:
-		base = ((u64)high << 32) | low;
-		if (HYPERVISOR_set_segment_base(which, base) != 0)
-			ret = -EIO;
+	case MSR_FS_BASE:
+		set_seg(SEGBASE_FS, low, high, err);
+		break;
+
+	case MSR_KERNEL_GS_BASE:
+		set_seg(SEGBASE_GS_USER, low, high, err);
+		break;
+
+	case MSR_GS_BASE:
+		set_seg(SEGBASE_GS_KERNEL, low, high, err);
 		break;
 
 	case MSR_STAR:
@@ -965,31 +995,42 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 		break;
 
 	default:
-		if (!pmu_msr_write(msr, low, high, &ret))
-			ret = native_write_msr_safe(msr, low, high);
+		if (!pmu_msr_write(msr, low, high, err)) {
+			if (err)
+				*err = native_write_msr_safe(msr, low, high);
+			else
+				native_write_msr(msr, low, high);
+		}
 	}
+}
 
-	return ret;
+static u64 xen_read_msr_safe(unsigned int msr, int *err)
+{
+	return xen_do_read_msr(msr, err);
+}
+
+static int xen_write_msr_safe(unsigned int msr, unsigned int low,
+			      unsigned int high)
+{
+	int err = 0;
+
+	xen_do_write_msr(msr, low, high, &err);
+
+	return err;
 }
 
 static u64 xen_read_msr(unsigned int msr)
 {
-	/*
-	 * This will silently swallow a #GP from RDMSR.  It may be worth
-	 * changing that.
-	 */
 	int err;
 
-	return xen_read_msr_safe(msr, &err);
+	return xen_do_read_msr(msr, xen_msr_safe ? &err : NULL);
 }
 
 static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
 {
-	/*
-	 * This will silently swallow a #GP from WRMSR.  It may be worth
-	 * changing that.
-	 */
-	xen_write_msr_safe(msr, low, high);
+	int err;
+
+	xen_do_write_msr(msr, low, high, xen_msr_safe ? &err : NULL);
 }
 
 /* This is called once we have the cpu_possible_mask */
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 21ecbe754cb2..68aff1382872 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -131,6 +131,10 @@ static inline uint32_t get_fam15h_addr(u32 addr)
 
 static inline bool is_amd_pmu_msr(unsigned int msr)
 {
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		return false;
+
 	if ((msr >= MSR_F15H_PERF_CTL &&
 	     msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) ||
 	    (msr >= MSR_K7_EVNTSEL0 &&
@@ -140,10 +144,15 @@ static inline bool is_amd_pmu_msr(unsigned int msr)
 	return false;
 }
 
-static int is_intel_pmu_msr(u32 msr_index, int *type, int *index)
+static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index)
 {
 	u32 msr_index_pmc;
 
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
+		return false;
+
 	switch (msr_index) {
 	case MSR_CORE_PERF_FIXED_CTR_CTRL:
 	case MSR_IA32_DS_AREA:
@@ -290,48 +299,52 @@ static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
 	return false;
 }
 
+static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read,
+				 bool *emul)
+{
+	int type, index;
+
+	if (is_amd_pmu_msr(msr))
+		*emul = xen_amd_pmu_emulate(msr, val, is_read);
+	else if (is_intel_pmu_msr(msr, &type, &index))
+		*emul = xen_intel_pmu_emulate(msr, val, type, index, is_read);
+	else
+		return false;
+
+	return true;
+}
+
 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
 {
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
-		if (is_amd_pmu_msr(msr)) {
-			if (!xen_amd_pmu_emulate(msr, val, 1))
-				*val = native_read_msr_safe(msr, err);
-			return true;
-		}
-	} else {
-		int type, index;
+	bool emulated;
 
-		if (is_intel_pmu_msr(msr, &type, &index)) {
-			if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
-				*val = native_read_msr_safe(msr, err);
-			return true;
-		}
+	if (!pmu_msr_chk_emulated(msr, val, true, &emulated))
+		return false;
+
+	if (!emulated) {
+		*val = err ? native_read_msr_safe(msr, err)
+			   : native_read_msr(msr);
 	}
 
-	return false;
+	return true;
 }
 
 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
 {
 	uint64_t val = ((uint64_t)high << 32) | low;
+	bool emulated;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
-		if (is_amd_pmu_msr(msr)) {
-			if (!xen_amd_pmu_emulate(msr, &val, 0))
-				*err = native_write_msr_safe(msr, low, high);
-			return true;
-		}
-	} else {
-		int type, index;
+	if (!pmu_msr_chk_emulated(msr, &val, false, &emulated))
+		return false;
 
-		if (is_intel_pmu_msr(msr, &type, &index)) {
-			if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
-				*err = native_write_msr_safe(msr, low, high);
-			return true;
-		}
+	if (!emulated) {
+		if (err)
+			*err = native_write_msr_safe(msr, low, high);
+		else
+			native_write_msr(msr, low, high);
 	}
 
-	return false;
+	return true;
 }
 
 static unsigned long long xen_amd_read_pmc(int counter)
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index ba7af2eca755..480be82e9b7b 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -179,7 +179,7 @@ static void __init _get_smp_config(unsigned int early)
 	 * hypercall to expand the max number of VCPUs an already
 	 * running guest has. So cap it up to X. */
 	if (subtract)
-		nr_cpu_ids = nr_cpu_ids - subtract;
+		set_nr_cpu_ids(nr_cpu_ids - subtract);
 #endif
 
 }
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 12ac277282ba..bcb0c5d2abc2 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -771,7 +771,7 @@ config HIGHMEM
 
 	  If unsure, say Y.
 
-config FORCE_MAX_ZONEORDER
+config ARCH_FORCE_MAX_ORDER
 	int "Maximum zone order"
 	default "11"
 	help
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile
index 5097caa7bf0c..bfd8e433ed62 100644
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -55,8 +55,6 @@ KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(vardirs) $(plfdirs))
 
 KBUILD_DEFCONFIG := iss_defconfig
 
-head-y		:= arch/xtensa/kernel/head.o
-
 libs-y		+= arch/xtensa/lib/
 
 boot		:= arch/xtensa/boot
diff --git a/arch/xtensa/configs/audio_kc705_defconfig b/arch/xtensa/configs/audio_kc705_defconfig
index 3be62da8089b..ef0ebcfbccf9 100644
--- a/arch/xtensa/configs/audio_kc705_defconfig
+++ b/arch/xtensa/configs/audio_kc705_defconfig
@@ -120,7 +120,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/xtensa/configs/cadence_csp_defconfig b/arch/xtensa/configs/cadence_csp_defconfig
index fc240737b14d..2665962d247a 100644
--- a/arch/xtensa/configs/cadence_csp_defconfig
+++ b/arch/xtensa/configs/cadence_csp_defconfig
@@ -100,7 +100,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/xtensa/configs/generic_kc705_defconfig b/arch/xtensa/configs/generic_kc705_defconfig
index e9d6b6f6eca1..236c7f23cc10 100644
--- a/arch/xtensa/configs/generic_kc705_defconfig
+++ b/arch/xtensa/configs/generic_kc705_defconfig
@@ -107,7 +107,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/xtensa/configs/nommu_kc705_defconfig b/arch/xtensa/configs/nommu_kc705_defconfig
index fcb620ef3799..8263da9e078d 100644
--- a/arch/xtensa/configs/nommu_kc705_defconfig
+++ b/arch/xtensa/configs/nommu_kc705_defconfig
@@ -105,7 +105,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 # CONFIG_FRAME_POINTER is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_VM=y
diff --git a/arch/xtensa/configs/smp_lx200_defconfig b/arch/xtensa/configs/smp_lx200_defconfig
index a47c85638ec1..7bdffa3a69c6 100644
--- a/arch/xtensa/configs/smp_lx200_defconfig
+++ b/arch/xtensa/configs/smp_lx200_defconfig
@@ -111,7 +111,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_VM=y
 CONFIG_LOCKUP_DETECTOR=y
diff --git a/arch/xtensa/configs/virt_defconfig b/arch/xtensa/configs/virt_defconfig
index 6d1387dfa96f..98acb7191cb7 100644
--- a/arch/xtensa/configs/virt_defconfig
+++ b/arch/xtensa/configs/virt_defconfig
@@ -97,7 +97,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_FONTS=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
diff --git a/arch/xtensa/configs/xip_kc705_defconfig b/arch/xtensa/configs/xip_kc705_defconfig
index 062148e17135..1c3cebaaa71b 100644
--- a/arch/xtensa/configs/xip_kc705_defconfig
+++ b/arch/xtensa/configs/xip_kc705_defconfig
@@ -102,7 +102,7 @@ CONFIG_CRYPTO_LZO=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
-CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/xtensa/include/asm/elf.h b/arch/xtensa/include/asm/elf.h
index 909a6ab4f22b..ffcf1ada19c6 100644
--- a/arch/xtensa/include/asm/elf.h
+++ b/arch/xtensa/include/asm/elf.h
@@ -93,6 +93,10 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 #define elf_check_arch(x) ( ( (x)->e_machine == EM_XTENSA )  || \
 			    ( (x)->e_machine == EM_XTENSA_OLD ) )
 
+#define ELFOSABI_XTENSA_FDPIC 65
+#define elf_check_fdpic(x) ((x)->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC)
+#define ELF_FDPIC_CORE_EFLAGS 0
+
 /*
  * These are used to set parameters in the core dumps.
  */
@@ -153,10 +157,22 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
  */
 
 #define ELF_PLAT_INIT(_r, load_addr) \
-	do { _r->areg[0]=0; /*_r->areg[1]=0;*/ _r->areg[2]=0;  _r->areg[3]=0;  \
-	     _r->areg[4]=0;  _r->areg[5]=0;    _r->areg[6]=0;  _r->areg[7]=0;  \
-	     _r->areg[8]=0;  _r->areg[9]=0;    _r->areg[10]=0; _r->areg[11]=0; \
-	     _r->areg[12]=0; _r->areg[13]=0;   _r->areg[14]=0; _r->areg[15]=0; \
+	do { \
+		(_r)->areg[0]  = 0; /*(_r)->areg[1] = 0;*/ \
+		(_r)->areg[2]  = 0; (_r)->areg[3]  = 0; \
+		(_r)->areg[4]  = 0; (_r)->areg[5]  = 0; \
+		(_r)->areg[6]  = 0; (_r)->areg[7]  = 0; \
+		(_r)->areg[8]  = 0; (_r)->areg[9]  = 0; \
+		(_r)->areg[10] = 0; (_r)->areg[11] = 0; \
+		(_r)->areg[12] = 0; (_r)->areg[13] = 0; \
+		(_r)->areg[14] = 0; (_r)->areg[15] = 0; \
+	} while (0)
+
+#define ELF_FDPIC_PLAT_INIT(_r, _exec_map_addr, _interp_map_addr, dynamic_addr) \
+	do { \
+		(_r)->areg[4] = _exec_map_addr; \
+		(_r)->areg[5] = _interp_map_addr; \
+		(_r)->areg[6] = dynamic_addr; \
 	} while (0)
 
 typedef struct {
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 76bc63127c66..228e4dff5fb2 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -205,9 +205,12 @@ struct thread_struct {
 #define start_thread(regs, new_pc, new_sp) \
 	do { \
 		unsigned long syscall = (regs)->syscall; \
+		unsigned long current_aregs[16]; \
+		memcpy(current_aregs, (regs)->areg, sizeof(current_aregs)); \
 		memset((regs), 0, sizeof(*(regs))); \
 		(regs)->pc = (new_pc); \
 		(regs)->ps = USER_PS_VALUE; \
+		memcpy((regs)->areg, current_aregs, sizeof(current_aregs)); \
 		(regs)->areg[1] = (new_sp); \
 		(regs)->areg[0] = 0; \
 		(regs)->wmask = 1; \
@@ -221,9 +224,6 @@ struct thread_struct {
 struct task_struct;
 struct mm_struct;
 
-/* Free all resources held by a thread. */
-#define release_thread(thread) do { } while(0)
-
 extern unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 7966a58af472..1ff0c858544f 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -111,6 +111,8 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/xtensa/include/uapi/asm/ptrace.h b/arch/xtensa/include/uapi/asm/ptrace.h
index 50db3e0a6341..9115e86ebc75 100644
--- a/arch/xtensa/include/uapi/asm/ptrace.h
+++ b/arch/xtensa/include/uapi/asm/ptrace.h
@@ -37,6 +37,10 @@
 #define PTRACE_SETXTREGS	19
 #define PTRACE_GETHBPREGS	20
 #define PTRACE_SETHBPREGS	21
+#define PTRACE_GETFDPIC		22
+
+#define PTRACE_GETFDPIC_EXEC	0
+#define PTRACE_GETFDPIC_INTERP	1
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index 897c1c741058..f28b8e3d717e 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -3,9 +3,9 @@
 # Makefile for the Linux/Xtensa kernel.
 #
 
-extra-y := head.o vmlinux.lds
+extra-y := vmlinux.lds
 
-obj-y := align.o coprocessor.o entry.o irq.o platform.o process.o \
+obj-y := head.o align.o coprocessor.o entry.o irq.o platform.o process.o \
 	 ptrace.o setup.o signal.o stacktrace.o syscall.o time.o traps.o \
 	 vectors.o
 
diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c
index 201356faa7e6..b3c2450d6f23 100644
--- a/arch/xtensa/kernel/syscall.c
+++ b/arch/xtensa/kernel/syscall.c
@@ -58,6 +58,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags)
 {
 	struct vm_area_struct *vmm;
+	struct vma_iterator vmi;
 
 	if (flags & MAP_FIXED) {
 		/* We do not accept a shared mapping if it would violate
@@ -79,15 +80,20 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	else
 		addr = PAGE_ALIGN(addr);
 
-	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
-		/* At this point:  (!vmm || addr < vmm->vm_end). */
-		if (TASK_SIZE - len < addr)
-			return -ENOMEM;
-		if (!vmm || addr + len <= vm_start_gap(vmm))
-			return addr;
+	vma_iter_init(&vmi, current->mm, addr);
+	for_each_vma(vmi, vmm) {
+		/* At this point:  (addr < vmm->vm_end). */
+		if (addr + len <= vm_start_gap(vmm))
+			break;
+
 		addr = vmm->vm_end;
 		if (flags & MAP_SHARED)
 			addr = COLOUR_ALIGN(addr, pgoff);
 	}
+
+	if (TASK_SIZE - len < addr)
+		return -ENOMEM;
+
+	return addr;
 }
 #endif