summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt7
-rw-r--r--Documentation/admin-guide/perf/alibaba_pmu.rst100
-rw-r--r--Documentation/admin-guide/perf/index.rst1
-rw-r--r--Documentation/arm64/elf_hwcaps.rst3
-rw-r--r--Documentation/arm64/silicon-errata.rst2
-rw-r--r--Documentation/arm64/sme.rst3
-rw-r--r--Documentation/arm64/sve.rst22
-rw-r--r--MAINTAINERS6
-rw-r--r--arch/arm64/Kconfig18
-rw-r--r--arch/arm64/configs/defconfig17
-rw-r--r--arch/arm64/include/asm/alternative-macros.h66
-rw-r--r--arch/arm64/include/asm/assembler.h20
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h58
-rw-r--r--arch/arm64/include/asm/atomic_lse.h46
-rw-r--r--arch/arm64/include/asm/cache.h4
-rw-r--r--arch/arm64/include/asm/cpufeature.h83
-rw-r--r--arch/arm64/include/asm/el2_setup.h18
-rw-r--r--arch/arm64/include/asm/exception.h9
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h4
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h5
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h6
-rw-r--r--arch/arm64/include/asm/lse.h5
-rw-r--r--arch/arm64/include/asm/module.h15
-rw-r--r--arch/arm64/include/asm/mte.h5
-rw-r--r--arch/arm64/include/asm/processor.h2
-rw-r--r--arch/arm64/include/asm/sdei.h17
-rw-r--r--arch/arm64/include/asm/stacktrace.h71
-rw-r--r--arch/arm64/include/asm/stacktrace/common.h221
-rw-r--r--arch/arm64/include/asm/stacktrace/nvhe.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h211
-rw-r--r--arch/arm64/include/asm/system_misc.h2
-rw-r--r--arch/arm64/include/asm/uaccess.h22
-rw-r--r--arch/arm64/include/asm/vdso.h3
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h19
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/include/uapi/asm/perf_regs.h7
-rw-r--r--arch/arm64/kernel/Makefile1
-rw-r--r--arch/arm64/kernel/alternative.c92
-rw-r--r--arch/arm64/kernel/compat_alignment.c387
-rw-r--r--arch/arm64/kernel/cpu_errata.c26
-rw-r--r--arch/arm64/kernel/cpufeature.c325
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/debug-monitors.c2
-rw-r--r--arch/arm64/kernel/entry-common.c32
-rw-r--r--arch/arm64/kernel/entry.S8
-rw-r--r--arch/arm64/kernel/ftrace.c17
-rw-r--r--arch/arm64/kernel/head.S10
-rw-r--r--arch/arm64/kernel/hyp-stub.S8
-rw-r--r--arch/arm64/kernel/idreg-override.c10
-rw-r--r--arch/arm64/kernel/image-vars.h5
-rw-r--r--arch/arm64/kernel/irq.c14
-rw-r--r--arch/arm64/kernel/module-plts.c3
-rw-r--r--arch/arm64/kernel/module.c15
-rw-r--r--arch/arm64/kernel/mte.c51
-rw-r--r--arch/arm64/kernel/perf_event.c8
-rw-r--r--arch/arm64/kernel/perf_regs.c30
-rw-r--r--arch/arm64/kernel/probes/kprobes.c27
-rw-r--r--arch/arm64/kernel/proton-pack.c16
-rw-r--r--arch/arm64/kernel/ptrace.c27
-rw-r--r--arch/arm64/kernel/reloc_test_core.c4
-rw-r--r--arch/arm64/kernel/sdei.c32
-rw-r--r--arch/arm64/kernel/stacktrace.c66
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kernel/traps.c48
-rw-r--r--arch/arm64/kernel/vdso.c3
-rw-r--r--arch/arm64/kernel/vdso/vdso.lds.S7
-rw-r--r--arch/arm64/kvm/debug.c4
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S4
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/fixed_config.h60
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c38
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stacktrace.c40
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sys_regs.c10
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c2
-rw-r--r--arch/arm64/kvm/pmu-emul.c16
-rw-r--r--arch/arm64/kvm/reset.c12
-rw-r--r--arch/arm64/kvm/stacktrace.c137
-rw-r--r--arch/arm64/kvm/sys_regs.c48
-rw-r--r--arch/arm64/kvm/va_layout.c5
-rw-r--r--arch/arm64/mm/context.c6
-rw-r--r--arch/arm64/mm/dma-mapping.c2
-rw-r--r--arch/arm64/mm/fault.c3
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/mmu.c23
-rw-r--r--arch/arm64/mm/pageattr.c8
-rw-r--r--arch/arm64/mm/proc.S46
-rw-r--r--arch/arm64/tools/cpucaps3
-rw-r--r--arch/arm64/tools/sysreg449
-rw-r--r--drivers/firmware/efi/libstub/arm64-stub.c4
-rw-r--r--drivers/hwtracing/coresight/coresight-etm4x-core.c4
-rw-r--r--drivers/hwtracing/coresight/coresight-trbe.h3
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c6
-rw-r--r--drivers/irqchip/irq-gic-v4.c2
-rw-r--r--drivers/perf/Kconfig7
-rw-r--r--drivers/perf/Makefile1
-rw-r--r--drivers/perf/alibaba_uncore_drw_pmu.c810
-rw-r--r--drivers/perf/arm_pmu.c2
-rw-r--r--drivers/perf/arm_spe_pmu.c6
-rw-r--r--tools/testing/selftests/arm64/abi/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c336
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c241
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c61
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile5
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c555
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S51
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S51
-rw-r--r--tools/testing/selftests/arm64/fp/za-test.S58
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile5
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c59
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h5
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_vl.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c25
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c23
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_vl.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c48
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h9
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_no_regs.c119
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c44
130 files changed, 4608 insertions, 1259 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e92d63d3e878..faa9f4137679 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3203,6 +3203,7 @@
 					       spectre_v2_user=off [X86]
 					       spec_store_bypass_disable=off [X86,PPC]
 					       ssbd=force-off [ARM64]
+					       nospectre_bhb [ARM64]
 					       l1tf=off [X86]
 					       mds=off [X86]
 					       tsx_async_abort=off [X86]
@@ -3609,7 +3610,7 @@
 
 	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
 
-	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
+	nohugevmalloc	[KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings.
 
 	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
 			Equivalent to smt=1.
@@ -3627,6 +3628,10 @@
 			vulnerability. System may allow data leaks with this
 			option.
 
+	nospectre_bhb	[ARM64] Disable all mitigations for Spectre-BHB (branch
+			history injection) vulnerability. System may allow data leaks
+			with this option.
+
 	nospec_store_bypass_disable
 			[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
 
diff --git a/Documentation/admin-guide/perf/alibaba_pmu.rst b/Documentation/admin-guide/perf/alibaba_pmu.rst
new file mode 100644
index 000000000000..11de998bb480
--- /dev/null
+++ b/Documentation/admin-guide/perf/alibaba_pmu.rst
@@ -0,0 +1,100 @@
+=============================================================
+Alibaba's T-Head SoC Uncore Performance Monitoring Unit (PMU)
+=============================================================
+
+The Yitian 710, custom-built by Alibaba Group's chip development business,
+T-Head, implements uncore PMU for performance and functional debugging to
+facilitate system maintenance.
+
+DDR Sub-System Driveway (DRW) PMU Driver
+=========================================
+
+Yitian 710 employs eight DDR5/4 channels, four on each die. Each DDR5 channel
+is independent of others to service system memory requests. And one DDR5
+channel is split into two independent sub-channels. The DDR Sub-System Driveway
+implements separate PMUs for each sub-channel to monitor various performance
+metrics.
+
+The Driveway PMU devices are named as ali_drw_<sys_base_addr> with perf.
+For example, ali_drw_21000 and ali_drw_21080 are two PMU devices for two
+sub-channels of the same channel in die 0. And the PMU device of die 1 is
+prefixed with ali_drw_400XXXXX, e.g. ali_drw_40021000.
+
+Each sub-channel has 36 PMU counters in total, which is classified into
+four groups:
+
+- Group 0: PMU Cycle Counter. This group has one pair of counters
+  pmu_cycle_cnt_low and pmu_cycle_cnt_high, that is used as the cycle count
+  based on DDRC core clock.
+
+- Group 1: PMU Bandwidth Counters. This group has 8 counters that are used
+  to count the total access number of either the eight bank groups in a
+  selected rank, or four ranks separately in the first 4 counters. The base
+  transfer unit is 64B.
+
+- Group 2: PMU Retry Counters. This group has 10 counters, that intend to
+  count the total retry number of each type of uncorrectable error.
+
+- Group 3: PMU Common Counters. This group has 16 counters, that are used
+  to count the common events.
+
+For now, the Driveway PMU driver only uses counters in group 0 and group 3.
+
+The DDR Controller (DDRCTL) and DDR PHY combine to create a complete solution
+for connecting an SoC application bus to DDR memory devices. The DDRCTL
+receives transactions Host Interface (HIF) which is custom-defined by Synopsys.
+These transactions are queued internally and scheduled for access while
+satisfying the SDRAM protocol timing requirements, transaction priorities, and
+dependencies between the transactions. The DDRCTL in turn issues commands on
+the DDR PHY Interface (DFI) to the PHY module, which launches and captures data
+to and from the SDRAM. The driveway PMUs have hardware logic to gather
+statistics and performance logging signals on HIF, DFI, etc.
+
+By counting the READ, WRITE and RMW commands sent to the DDRC through the HIF
+interface, we could calculate the bandwidth. Example usage of counting memory
+data bandwidth::
+
+  perf stat \
+    -e ali_drw_21000/hif_wr/ \
+    -e ali_drw_21000/hif_rd/ \
+    -e ali_drw_21000/hif_rmw/ \
+    -e ali_drw_21000/cycle/ \
+    -e ali_drw_21080/hif_wr/ \
+    -e ali_drw_21080/hif_rd/ \
+    -e ali_drw_21080/hif_rmw/ \
+    -e ali_drw_21080/cycle/ \
+    -e ali_drw_23000/hif_wr/ \
+    -e ali_drw_23000/hif_rd/ \
+    -e ali_drw_23000/hif_rmw/ \
+    -e ali_drw_23000/cycle/ \
+    -e ali_drw_23080/hif_wr/ \
+    -e ali_drw_23080/hif_rd/ \
+    -e ali_drw_23080/hif_rmw/ \
+    -e ali_drw_23080/cycle/ \
+    -e ali_drw_25000/hif_wr/ \
+    -e ali_drw_25000/hif_rd/ \
+    -e ali_drw_25000/hif_rmw/ \
+    -e ali_drw_25000/cycle/ \
+    -e ali_drw_25080/hif_wr/ \
+    -e ali_drw_25080/hif_rd/ \
+    -e ali_drw_25080/hif_rmw/ \
+    -e ali_drw_25080/cycle/ \
+    -e ali_drw_27000/hif_wr/ \
+    -e ali_drw_27000/hif_rd/ \
+    -e ali_drw_27000/hif_rmw/ \
+    -e ali_drw_27000/cycle/ \
+    -e ali_drw_27080/hif_wr/ \
+    -e ali_drw_27080/hif_rd/ \
+    -e ali_drw_27080/hif_rmw/ \
+    -e ali_drw_27080/cycle/ -- sleep 10
+
+The average DRAM bandwidth can be calculated as follows:
+
+- Read Bandwidth =  perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
+- Write Bandwidth = (perf_hif_wr + perf_hif_rmw) * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
+
+Here, DDRC_WIDTH = 64 bytes.
+
+The current driver does not support sampling. So "perf record" is
+unsupported.  Also attach to a task is unsupported as the events are all
+uncore.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 9c9ece88ce53..793e1970bc05 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -18,3 +18,4 @@ Performance monitor support
    xgene-pmu
    arm_dsu_pmu
    thunderx2-pmu
+   alibaba_pmu
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 311021f2e560..bb34287c8e01 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -272,6 +272,9 @@ HWCAP2_WFXT
 HWCAP2_EBF16
     Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
 
+HWCAP2_SVE_EBF16
+    Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0010.
+
 4. Unused AT_HWCAP bits
 -----------------------
 
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index fda97b3fcf01..17d9fc5d14fb 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -110,6 +110,8 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
diff --git a/Documentation/arm64/sme.rst b/Documentation/arm64/sme.rst
index 937147f58cc5..16d2db4c2e2e 100644
--- a/Documentation/arm64/sme.rst
+++ b/Documentation/arm64/sme.rst
@@ -331,6 +331,9 @@ The regset data starts with struct user_za_header, containing:
   been read if a PTRACE_GETREGSET of NT_ARM_ZA were executed for each thread
   when the coredump was generated.
 
+* The NT_ARM_TLS note will be extended to two registers, the second register
+  will contain TPIDR2_EL0 on systems that support SME and will be read as
+  zero with writes ignored otherwise.
 
 9.  System runtime configuration
 --------------------------------
diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst
index 93c2c2990584..f338ee2df46d 100644
--- a/Documentation/arm64/sve.rst
+++ b/Documentation/arm64/sve.rst
@@ -111,7 +111,7 @@ the SVE instruction set architecture.
 
 * On syscall, V0..V31 are preserved (as without SVE).  Thus, bits [127:0] of
   Z0..Z31 are preserved.  All other bits of Z0..Z31, and all of P0..P15 and FFR
-  become unspecified on return from a syscall.
+  become zero on return from a syscall.
 
 * The SVE registers are not used to pass arguments to or receive results from
   any syscall.
@@ -452,6 +452,24 @@ The regset data starts with struct user_sve_header, containing:
 * Modifying the system default vector length does not affect the vector length
   of any existing process or thread that does not make an execve() call.
 
+10.  Perf extensions
+--------------------------------
+
+* The arm64 specific DWARF standard [5] added the VG (Vector Granule) register
+  at index 46. This register is used for DWARF unwinding when variable length
+  SVE registers are pushed onto the stack.
+
+* Its value is equivalent to the current SVE vector length (VL) in bits divided
+  by 64.
+
+* The value is included in Perf samples in the regs[46] field if
+  PERF_SAMPLE_REGS_USER is set and the sample_regs_user mask has bit 46 set.
+
+* The value is the current value at the time the sample was taken, and it can
+  change over time.
+
+* If the system doesn't support SVE when perf_event_open is called with these
+  settings, the event will fail to open.
 
 Appendix A.  SVE programmer's model (informative)
 =================================================
@@ -593,3 +611,5 @@ References
     http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055c/IHI0055C_beta_aapcs64.pdf
     http://infocenter.arm.com/help/topic/com.arm.doc.subset.swdev.abi/index.html
     Procedure Call Standard for the ARM 64-bit Architecture (AArch64)
+
+[5] https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index 4a7164afdb3d..b66789eee32e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -748,6 +748,12 @@ S:	Supported
 F:	drivers/infiniband/hw/erdma
 F:	include/uapi/rdma/erdma-abi.h
 
+ALIBABA PMU DRIVER
+M:	Shuai Xue <xueshuai@linux.alibaba.com>
+S:	Supported
+F:	Documentation/admin-guide/perf/alibaba_pmu.rst
+F:	drivers/perf/alibaba_uncore_dwr_pmu.c
+
 ALIENWARE WMI DRIVER
 L:	Dell.Client.Kernel@dell.com
 S:	Maintained
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1ce7685ad5de..0e48d1e4c578 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,6 +149,7 @@ config ARM64
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_COMPILER_H
+	select HAVE_ARCH_HUGE_VMALLOC
 	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
@@ -230,6 +231,7 @@ config ARM64
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
 	select TRACE_IRQFLAGS_SUPPORT
 	select TRACE_IRQFLAGS_NMI_SUPPORT
+	select HAVE_SOFTIRQ_ON_OWN_STACK
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
@@ -733,6 +735,19 @@ config ARM64_ERRATUM_2077057
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_2658417
+	bool "Cortex-A510: 2658417: remove BF16 support due to incorrect result"
+	default y
+	help
+	  This option adds the workaround for ARM Cortex-A510 erratum 2658417.
+	  Affected Cortex-A510 (r0p0 to r1p1) may produce the wrong result for
+	  BFMMLA or VMMLA instructions in rare circumstances when a pair of
+	  A510 CPUs are using shared neon hardware. As the sharing is not
+	  discoverable by the kernel, hide the BF16 HWCAP to indicate that
+	  user-space should not be using these instructions.
+
+	  If unsure, say Y.
+
 config ARM64_ERRATUM_2119858
 	bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode"
 	default y
@@ -1562,6 +1577,9 @@ config THUMB2_COMPAT_VDSO
 	  Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y,
 	  otherwise with '-marm'.
 
+config COMPAT_ALIGNMENT_FIXUPS
+	bool "Fix up misaligned multi-word loads and stores in user space"
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on SYSCTL
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 297ad5b7939b..7b49c80286be 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -18,6 +18,7 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
 CONFIG_CPUSETS=y
 CONFIG_CGROUP_DEVICE=y
@@ -102,6 +103,8 @@ CONFIG_ARM_SCMI_CPUFREQ=y
 CONFIG_ARM_TEGRA186_CPUFREQ=y
 CONFIG_QORIQ_CPUFREQ=y
 CONFIG_ACPI=y
+CONFIG_ACPI_HOTPLUG_MEMORY=y
+CONFIG_ACPI_HMAT=y
 CONFIG_ACPI_APEI=y
 CONFIG_ACPI_APEI_GHES=y
 CONFIG_ACPI_APEI_PCIEAER=y
@@ -126,6 +129,8 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
 CONFIG_MEMORY_FAILURE=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
@@ -139,12 +144,16 @@ CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_IPV6=m
 CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NETFILTER_XT_MARK=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
 CONFIG_NETFILTER_XT_TARGET_LOG=m
 CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_IPVS=m
+CONFIG_IP_VS=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
@@ -1349,4 +1358,12 @@ CONFIG_DEBUG_FS=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
+CONFIG_CORESIGHT=m
+CONFIG_CORESIGHT_LINK_AND_SINK_TMC=m
+CONFIG_CORESIGHT_CATU=m
+CONFIG_CORESIGHT_SINK_TPIU=m
+CONFIG_CORESIGHT_SINK_ETBV10=m
+CONFIG_CORESIGHT_STM=m
+CONFIG_CORESIGHT_CPU_DEBUG=m
+CONFIG_CORESIGHT_CTI=m
 CONFIG_MEMTEST=y
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 7e157ab6cd50..3622e9f4fb44 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -2,10 +2,22 @@
 #ifndef __ASM_ALTERNATIVE_MACROS_H
 #define __ASM_ALTERNATIVE_MACROS_H
 
+#include <linux/const.h>
+#include <vdso/bits.h>
+
 #include <asm/cpucaps.h>
 #include <asm/insn-def.h>
 
-#define ARM64_CB_PATCH ARM64_NCAPS
+/*
+ * Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly
+ * macros below we must use we must use `(1 << ARM64_CB_SHIFT)`.
+ */
+#define ARM64_CB_SHIFT	15
+#define ARM64_CB_BIT	BIT(ARM64_CB_SHIFT)
+
+#if ARM64_NCAPS >= ARM64_CB_BIT
+#error "cpucaps have overflown ARM64_CB_BIT"
+#endif
 
 #ifndef __ASSEMBLY__
 
@@ -73,8 +85,8 @@
 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
 	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
 
-#define ALTERNATIVE_CB(oldinstr, cb) \
-	__ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb)
+#define ALTERNATIVE_CB(oldinstr, feature, cb) \
+	__ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb)
 #else
 
 #include <asm/assembler.h>
@@ -82,7 +94,7 @@
 .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
 	.word \orig_offset - .
 	.word \alt_offset - .
-	.hword \feature
+	.hword (\feature)
 	.byte \orig_len
 	.byte \alt_len
 .endm
@@ -141,10 +153,10 @@
 661:
 .endm
 
-.macro alternative_cb cb
+.macro alternative_cb cap, cb
 	.set .Lasm_alt_mode, 0
 	.pushsection .altinstructions, "a"
-	altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+	altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0
 	.popsection
 661:
 .endm
@@ -207,4 +219,46 @@ alternative_endif
 #define ALTERNATIVE(oldinstr, newinstr, ...)   \
 	_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
 
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+static __always_inline bool
+alternative_has_feature_likely(unsigned long feature)
+{
+	compiletime_assert(feature < ARM64_NCAPS,
+			   "feature must be < ARM64_NCAPS");
+
+	asm_volatile_goto(
+	ALTERNATIVE_CB("b	%l[l_no]", %[feature], alt_cb_patch_nops)
+	:
+	: [feature] "i" (feature)
+	:
+	: l_no);
+
+	return true;
+l_no:
+	return false;
+}
+
+static __always_inline bool
+alternative_has_feature_unlikely(unsigned long feature)
+{
+	compiletime_assert(feature < ARM64_NCAPS,
+			   "feature must be < ARM64_NCAPS");
+
+	asm_volatile_goto(
+	ALTERNATIVE("nop", "b	%l[l_yes]", %[feature])
+	:
+	: [feature] "i" (feature)
+	:
+	: l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* __ASM_ALTERNATIVE_MACROS_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 5846145be523..e5957a53be39 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -293,7 +293,7 @@ alternative_endif
 alternative_if_not ARM64_KVM_PROTECTED_MODE
 	ASM_BUG()
 alternative_else_nop_endif
-alternative_cb kvm_compute_final_ctr_el0
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_compute_final_ctr_el0
 	movz	\reg, #0
 	movk	\reg, #0, lsl #16
 	movk	\reg, #0, lsl #32
@@ -384,8 +384,8 @@ alternative_cb_end
 	.macro	tcr_compute_pa_size, tcr, pos, tmp0, tmp1
 	mrs	\tmp0, ID_AA64MMFR0_EL1
 	// Narrow PARange to fit the PS field in TCR_ELx
-	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
-	mov	\tmp1, #ID_AA64MMFR0_PARANGE_MAX
+	ubfx	\tmp0, \tmp0, #ID_AA64MMFR0_EL1_PARANGE_SHIFT, #3
+	mov	\tmp1, #ID_AA64MMFR0_EL1_PARANGE_MAX
 	cmp	\tmp0, \tmp1
 	csel	\tmp0, \tmp1, \tmp0, hi
 	bfi	\tcr, \tmp0, \pos, #3
@@ -512,7 +512,7 @@ alternative_endif
  */
 	.macro	reset_pmuserenr_el0, tmpreg
 	mrs	\tmpreg, id_aa64dfr0_el1
-	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
+	sbfx	\tmpreg, \tmpreg, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
 	cmp	\tmpreg, #1			// Skip if no PMU present
 	b.lt	9000f
 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
@@ -524,7 +524,7 @@ alternative_endif
  */
 	.macro	reset_amuserenr_el0, tmpreg
 	mrs	\tmpreg, id_aa64pfr0_el1	// Check ID_AA64PFR0_EL1
-	ubfx	\tmpreg, \tmpreg, #ID_AA64PFR0_AMU_SHIFT, #4
+	ubfx	\tmpreg, \tmpreg, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
 	cbz	\tmpreg, .Lskip_\@		// Skip if no AMU present
 	msr_s	SYS_AMUSERENR_EL0, xzr		// Disable AMU access from EL0
 .Lskip_\@:
@@ -612,7 +612,7 @@ alternative_endif
 	.macro	offset_ttbr1, ttbr, tmp
 #ifdef CONFIG_ARM64_VA_BITS_52
 	mrs_s	\tmp, SYS_ID_AA64MMFR2_EL1
-	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+	and	\tmp, \tmp, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
 	cbnz	\tmp, .Lskipoffs_\@
 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
 .Lskipoffs_\@ :
@@ -877,7 +877,7 @@ alternative_endif
 
 	.macro __mitigate_spectre_bhb_loop      tmp
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb  spectre_bhb_patch_loop_iter
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_iter
 	mov	\tmp, #32		// Patched to correct the immediate
 alternative_cb_end
 .Lspectre_bhb_loop\@:
@@ -890,7 +890,7 @@ alternative_cb_end
 
 	.macro mitigate_spectre_bhb_loop	tmp
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb	spectre_bhb_patch_loop_mitigation_enable
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_loop_mitigation_enable
 	b	.L_spectre_bhb_loop_done\@	// Patched to NOP
 alternative_cb_end
 	__mitigate_spectre_bhb_loop	\tmp
@@ -904,7 +904,7 @@ alternative_cb_end
 	stp	x0, x1, [sp, #-16]!
 	stp	x2, x3, [sp, #-16]!
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_3
-alternative_cb	smccc_patch_fw_mitigation_conduit
+alternative_cb ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
 	ldp	x2, x3, [sp], #16
@@ -914,7 +914,7 @@ alternative_cb_end
 
 	.macro mitigate_spectre_bhb_clear_insn
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-alternative_cb	spectre_bhb_patch_clearbhb
+alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_clearbhb
 	/* Patched to NOP when not supported */
 	clearbhb
 	isb
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index fe0db8d416fb..0890e4f568fb 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -12,19 +12,6 @@
 
 #include <linux/stringify.h>
 
-#ifdef CONFIG_ARM64_LSE_ATOMICS
-#define __LL_SC_FALLBACK(asm_ops)					\
-"	b	3f\n"							\
-"	.subsection	1\n"						\
-"3:\n"									\
-asm_ops "\n"								\
-"	b	4f\n"							\
-"	.previous\n"							\
-"4:\n"
-#else
-#define __LL_SC_FALLBACK(asm_ops) asm_ops
-#endif
-
 #ifndef CONFIG_CC_HAS_K_CONSTRAINT
 #define K
 #endif
@@ -36,38 +23,36 @@ asm_ops "\n"								\
  */
 
 #define ATOMIC_OP(op, asm_op, constraint)				\
-static inline void							\
+static __always_inline void						\
 __ll_sc_atomic_##op(int i, atomic_t *v)					\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
 	asm volatile("// atomic_" #op "\n"				\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxr	%w0, %2\n"					\
 	"	" #asm_op "	%w0, %w0, %w3\n"			\
 	"	stxr	%w1, %w0, %2\n"					\
-	"	cbnz	%w1, 1b\n")					\
+	"	cbnz	%w1, 1b\n"					\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: __stringify(constraint) "r" (i));				\
 }
 
 #define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline int							\
+static __always_inline int						\
 __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
 	asm volatile("// atomic_" #op "_return" #name "\n"		\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ld" #acq "xr	%w0, %2\n"				\
 	"	" #asm_op "	%w0, %w0, %w3\n"			\
 	"	st" #rel "xr	%w1, %w0, %2\n"				\
 	"	cbnz	%w1, 1b\n"					\
-	"	" #mb )							\
+	"	" #mb							\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: __stringify(constraint) "r" (i)				\
 	: cl);								\
@@ -76,20 +61,19 @@ __ll_sc_atomic_##op##_return##name(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint) \
-static inline int							\
+static __always_inline int						\
 __ll_sc_atomic_fetch_##op##name(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int val, result;						\
 									\
 	asm volatile("// atomic_fetch_" #op #name "\n"			\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %3\n"				\
 	"1:	ld" #acq "xr	%w0, %3\n"				\
 	"	" #asm_op "	%w1, %w0, %w4\n"			\
 	"	st" #rel "xr	%w2, %w1, %3\n"				\
 	"	cbnz	%w2, 1b\n"					\
-	"	" #mb )							\
+	"	" #mb							\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
 	: __stringify(constraint) "r" (i)				\
 	: cl);								\
@@ -135,38 +119,36 @@ ATOMIC_OPS(andnot, bic, )
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op, asm_op, constraint)				\
-static inline void							\
+static __always_inline void						\
 __ll_sc_atomic64_##op(s64 i, atomic64_t *v)				\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_" #op "\n"				\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxr	%0, %2\n"					\
 	"	" #asm_op "	%0, %0, %3\n"				\
 	"	stxr	%w1, %0, %2\n"					\
-	"	cbnz	%w1, 1b")					\
+	"	cbnz	%w1, 1b"					\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: __stringify(constraint) "r" (i));				\
 }
 
 #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long							\
+static __always_inline long						\
 __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)		\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_" #op "_return" #name "\n"		\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ld" #acq "xr	%0, %2\n"				\
 	"	" #asm_op "	%0, %0, %3\n"				\
 	"	st" #rel "xr	%w1, %0, %2\n"				\
 	"	cbnz	%w1, 1b\n"					\
-	"	" #mb )							\
+	"	" #mb							\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: __stringify(constraint) "r" (i)				\
 	: cl);								\
@@ -175,20 +157,19 @@ __ll_sc_atomic64_##op##_return##name(s64 i, atomic64_t *v)		\
 }
 
 #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op, constraint)\
-static inline long							\
+static __always_inline long						\
 __ll_sc_atomic64_fetch_##op##name(s64 i, atomic64_t *v)			\
 {									\
 	s64 result, val;						\
 	unsigned long tmp;						\
 									\
 	asm volatile("// atomic64_fetch_" #op #name "\n"		\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %3\n"				\
 	"1:	ld" #acq "xr	%0, %3\n"				\
 	"	" #asm_op "	%1, %0, %4\n"				\
 	"	st" #rel "xr	%w2, %1, %3\n"				\
 	"	cbnz	%w2, 1b\n"					\
-	"	" #mb )							\
+	"	" #mb							\
 	: "=&r" (result), "=&r" (val), "=&r" (tmp), "+Q" (v->counter)	\
 	: __stringify(constraint) "r" (i)				\
 	: cl);								\
@@ -233,14 +214,13 @@ ATOMIC64_OPS(andnot, bic, )
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline s64
+static __always_inline s64
 __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 result;
 	unsigned long tmp;
 
 	asm volatile("// atomic64_dec_if_positive\n"
-	__LL_SC_FALLBACK(
 	"	prfm	pstl1strm, %2\n"
 	"1:	ldxr	%0, %2\n"
 	"	subs	%0, %0, #1\n"
@@ -248,7 +228,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
 	"	stlxr	%w1, %0, %2\n"
 	"	cbnz	%w1, 1b\n"
 	"	dmb	ish\n"
-	"2:")
+	"2:"
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)
 	:
 	: "cc", "memory");
@@ -257,7 +237,7 @@ __ll_sc_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl, constraint)	\
-static inline u##sz							\
+static __always_inline u##sz						\
 __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
 					 unsigned long old,		\
 					 u##sz new)			\
@@ -274,7 +254,6 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
 		old = (u##sz)old;					\
 									\
 	asm volatile(							\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %[v]\n"				\
 	"1:	ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"		\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
@@ -282,7 +261,7 @@ __ll_sc__cmpxchg_case_##name##sz(volatile void *ptr,			\
 	"	st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n"	\
 	"	cbnz	%w[tmp], 1b\n"					\
 	"	" #mb "\n"						\
-	"2:")								\
+	"2:"								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(u##sz *)ptr)					\
 	: [old] __stringify(constraint) "r" (old), [new] "r" (new)	\
@@ -316,7 +295,7 @@ __CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory", L)
 #undef __CMPXCHG_CASE
 
 #define __CMPXCHG_DBL(name, mb, rel, cl)				\
-static inline long							\
+static __always_inline long						\
 __ll_sc__cmpxchg_double##name(unsigned long old1,			\
 				      unsigned long old2,		\
 				      unsigned long new1,		\
@@ -326,7 +305,6 @@ __ll_sc__cmpxchg_double##name(unsigned long old1,			\
 	unsigned long tmp, ret;						\
 									\
 	asm volatile("// __cmpxchg_double" #name "\n"			\
-	__LL_SC_FALLBACK(						\
 	"	prfm	pstl1strm, %2\n"				\
 	"1:	ldxp	%0, %1, %2\n"					\
 	"	eor	%0, %0, %3\n"					\
@@ -336,7 +314,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1,			\
 	"	st" #rel "xp	%w0, %5, %6, %2\n"			\
 	"	cbnz	%w0, 1b\n"					\
 	"	" #mb "\n"						\
-	"2:")								\
+	"2:"								\
 	: "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)	\
 	: "r" (old1), "r" (old2), "r" (new1), "r" (new2)		\
 	: cl);								\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 5d460f6b7675..52075e93de6c 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -11,7 +11,8 @@
 #define __ASM_ATOMIC_LSE_H
 
 #define ATOMIC_OP(op, asm_op)						\
-static inline void __lse_atomic_##op(int i, atomic_t *v)		\
+static __always_inline void						\
+__lse_atomic_##op(int i, atomic_t *v)					\
 {									\
 	asm volatile(							\
 	__LSE_PREAMBLE							\
@@ -25,7 +26,7 @@ ATOMIC_OP(or, stset)
 ATOMIC_OP(xor, steor)
 ATOMIC_OP(add, stadd)
 
-static inline void __lse_atomic_sub(int i, atomic_t *v)
+static __always_inline void __lse_atomic_sub(int i, atomic_t *v)
 {
 	__lse_atomic_add(-i, v);
 }
@@ -33,7 +34,8 @@ static inline void __lse_atomic_sub(int i, atomic_t *v)
 #undef ATOMIC_OP
 
 #define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...)			\
-static inline int __lse_atomic_fetch_##op##name(int i, atomic_t *v)	\
+static __always_inline int						\
+__lse_atomic_fetch_##op##name(int i, atomic_t *v)			\
 {									\
 	int old;							\
 									\
@@ -63,7 +65,8 @@ ATOMIC_FETCH_OPS(add, ldadd)
 #undef ATOMIC_FETCH_OPS
 
 #define ATOMIC_FETCH_OP_SUB(name)					\
-static inline int __lse_atomic_fetch_sub##name(int i, atomic_t *v)	\
+static __always_inline int						\
+__lse_atomic_fetch_sub##name(int i, atomic_t *v)			\
 {									\
 	return __lse_atomic_fetch_add##name(-i, v);			\
 }
@@ -76,12 +79,14 @@ ATOMIC_FETCH_OP_SUB(        )
 #undef ATOMIC_FETCH_OP_SUB
 
 #define ATOMIC_OP_ADD_SUB_RETURN(name)					\
-static inline int __lse_atomic_add_return##name(int i, atomic_t *v)	\
+static __always_inline int						\
+__lse_atomic_add_return##name(int i, atomic_t *v)			\
 {									\
 	return __lse_atomic_fetch_add##name(i, v) + i;			\
 }									\
 									\
-static inline int __lse_atomic_sub_return##name(int i, atomic_t *v)	\
+static __always_inline int						\
+__lse_atomic_sub_return##name(int i, atomic_t *v)			\
 {									\
 	return __lse_atomic_fetch_sub(i, v) - i;			\
 }
@@ -93,13 +98,14 @@ ATOMIC_OP_ADD_SUB_RETURN(        )
 
 #undef ATOMIC_OP_ADD_SUB_RETURN
 
-static inline void __lse_atomic_and(int i, atomic_t *v)
+static __always_inline void __lse_atomic_and(int i, atomic_t *v)
 {
 	return __lse_atomic_andnot(~i, v);
 }
 
 #define ATOMIC_FETCH_OP_AND(name, mb, cl...)				\
-static inline int __lse_atomic_fetch_and##name(int i, atomic_t *v)	\
+static __always_inline int						\
+__lse_atomic_fetch_and##name(int i, atomic_t *v)			\
 {									\
 	return __lse_atomic_fetch_andnot##name(~i, v);			\
 }
@@ -112,7 +118,8 @@ ATOMIC_FETCH_OP_AND(        , al, "memory")
 #undef ATOMIC_FETCH_OP_AND
 
 #define ATOMIC64_OP(op, asm_op)						\
-static inline void __lse_atomic64_##op(s64 i, atomic64_t *v)		\
+static __always_inline void						\
+__lse_atomic64_##op(s64 i, atomic64_t *v)				\
 {									\
 	asm volatile(							\
 	__LSE_PREAMBLE							\
@@ -126,7 +133,7 @@ ATOMIC64_OP(or, stset)
 ATOMIC64_OP(xor, steor)
 ATOMIC64_OP(add, stadd)
 
-static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
 {
 	__lse_atomic64_add(-i, v);
 }
@@ -134,7 +141,8 @@ static inline void __lse_atomic64_sub(s64 i, atomic64_t *v)
 #undef ATOMIC64_OP
 
 #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...)			\
-static inline long __lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)\
+static __always_inline long						\
+__lse_atomic64_fetch_##op##name(s64 i, atomic64_t *v)			\
 {									\
 	s64 old;							\
 									\
@@ -164,7 +172,8 @@ ATOMIC64_FETCH_OPS(add, ldadd)
 #undef ATOMIC64_FETCH_OPS
 
 #define ATOMIC64_FETCH_OP_SUB(name)					\
-static inline long __lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v)	\
+static __always_inline long						\
+__lse_atomic64_fetch_sub##name(s64 i, atomic64_t *v)			\
 {									\
 	return __lse_atomic64_fetch_add##name(-i, v);			\
 }
@@ -177,12 +186,14 @@ ATOMIC64_FETCH_OP_SUB(        )
 #undef ATOMIC64_FETCH_OP_SUB
 
 #define ATOMIC64_OP_ADD_SUB_RETURN(name)				\
-static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
+static __always_inline long						\
+__lse_atomic64_add_return##name(s64 i, atomic64_t *v)			\
 {									\
 	return __lse_atomic64_fetch_add##name(i, v) + i;		\
 }									\
 									\
-static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
+static __always_inline long						\
+__lse_atomic64_sub_return##name(s64 i, atomic64_t *v)			\
 {									\
 	return __lse_atomic64_fetch_sub##name(i, v) - i;		\
 }
@@ -194,13 +205,14 @@ ATOMIC64_OP_ADD_SUB_RETURN(        )
 
 #undef ATOMIC64_OP_ADD_SUB_RETURN
 
-static inline void __lse_atomic64_and(s64 i, atomic64_t *v)
+static __always_inline void __lse_atomic64_and(s64 i, atomic64_t *v)
 {
 	return __lse_atomic64_andnot(~i, v);
 }
 
 #define ATOMIC64_FETCH_OP_AND(name, mb, cl...)				\
-static inline long __lse_atomic64_fetch_and##name(s64 i, atomic64_t *v)	\
+static __always_inline long						\
+__lse_atomic64_fetch_and##name(s64 i, atomic64_t *v)			\
 {									\
 	return __lse_atomic64_fetch_andnot##name(~i, v);		\
 }
@@ -212,7 +224,7 @@ ATOMIC64_FETCH_OP_AND(        , al, "memory")
 
 #undef ATOMIC64_FETCH_OP_AND
 
-static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
+static __always_inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long tmp;
 
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 34256bda0da9..c0b178d1bb4f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -45,10 +45,6 @@ static inline unsigned int arch_slab_minalign(void)
 #define arch_slab_minalign() arch_slab_minalign()
 #endif
 
-#define CTR_CACHE_MINLINE_MASK	\
-	(0xf << CTR_EL0_DMINLINE_SHIFT | \
-	 CTR_EL0_IMINLINE_MASK << CTR_EL0_IMINLINE_SHIFT)
-
 #define CTR_L1IP(ctr)		SYS_FIELD_GET(CTR_EL0, L1Ip, ctr)
 
 #define ICACHEF_ALIASING	0
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index fd7d75a275f6..f73f11b55042 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -6,6 +6,7 @@
 #ifndef __ASM_CPUFEATURE_H
 #define __ASM_CPUFEATURE_H
 
+#include <asm/alternative-macros.h>
 #include <asm/cpucaps.h>
 #include <asm/cputype.h>
 #include <asm/hwcap.h>
@@ -419,12 +420,8 @@ static __always_inline bool is_hyp_code(void)
 }
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-extern struct static_key_false arm64_const_caps_ready;
 
-/* ARM64 CAPS + alternative_cb */
-#define ARM64_NPATCHABLE (ARM64_NCAPS + 1)
-extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+extern DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
 
 #define for_each_available_cap(cap)		\
 	for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
@@ -440,7 +437,7 @@ unsigned long cpu_get_elf_hwcap2(void);
 
 static __always_inline bool system_capabilities_finalized(void)
 {
-	return static_branch_likely(&arm64_const_caps_ready);
+	return alternative_has_feature_likely(ARM64_ALWAYS_SYSTEM);
 }
 
 /*
@@ -448,11 +445,11 @@ static __always_inline bool system_capabilities_finalized(void)
  *
  * Before the capability is detected, this returns false.
  */
-static inline bool cpus_have_cap(unsigned int num)
+static __always_inline bool cpus_have_cap(unsigned int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	return test_bit(num, cpu_hwcaps);
+	return arch_test_bit(num, cpu_hwcaps);
 }
 
 /*
@@ -467,7 +464,7 @@ static __always_inline bool __cpus_have_const_cap(int num)
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	return static_branch_unlikely(&cpu_hwcap_keys[num]);
+	return alternative_has_feature_unlikely(num);
 }
 
 /*
@@ -553,7 +550,7 @@ cpuid_feature_cap_perfmon_field(u64 features, int field, u64 cap)
 	u64 mask = GENMASK_ULL(field + 3, field);
 
 	/* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
-	if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
+	if (val == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
 		val = 0;
 
 	if (val > cap) {
@@ -597,43 +594,43 @@ static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
 
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 {
-	return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
-		cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+	return cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGEND_SHIFT) == 0x1 ||
+		cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT) == 0x1;
 }
 
 static inline bool id_aa64pfr0_32bit_el1(u64 pfr0)
 {
-	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL1_SHIFT);
 
-	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+	return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
 {
-	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_EL0_SHIFT);
 
-	return val == ID_AA64PFR0_ELx_32BIT_64BIT;
+	return val == ID_AA64PFR0_EL1_ELx_32BIT_64BIT;
 }
 
 static inline bool id_aa64pfr0_sve(u64 pfr0)
 {
-	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_SVE_SHIFT);
+	u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SVE_SHIFT);
 
 	return val > 0;
 }
 
 static inline bool id_aa64pfr1_sme(u64 pfr1)
 {
-	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_SME_SHIFT);
+	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_SME_SHIFT);
 
 	return val > 0;
 }
 
 static inline bool id_aa64pfr1_mte(u64 pfr1)
 {
-	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_EL1_MTE_SHIFT);
 
-	return val >= ID_AA64PFR1_MTE;
+	return val >= ID_AA64PFR1_EL1_MTE_MTE2;
 }
 
 void __init setup_cpu_features(void);
@@ -659,7 +656,7 @@ static inline bool supports_csv2p3(int scope)
 		pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 
 	csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
-							ID_AA64PFR0_CSV2_SHIFT);
+							ID_AA64PFR0_EL1_CSV2_SHIFT);
 	return csv2_val == 3;
 }
 
@@ -694,10 +691,10 @@ static inline bool system_supports_4kb_granule(void)
 
 	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
-						ID_AA64MMFR0_TGRAN4_SHIFT);
+						ID_AA64MMFR0_EL1_TGRAN4_SHIFT);
 
-	return (val >= ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN) &&
-	       (val <= ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX);
+	return (val >= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_64kb_granule(void)
@@ -707,10 +704,10 @@ static inline bool system_supports_64kb_granule(void)
 
 	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
-						ID_AA64MMFR0_TGRAN64_SHIFT);
+						ID_AA64MMFR0_EL1_TGRAN64_SHIFT);
 
-	return (val >= ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN) &&
-	       (val <= ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX);
+	return (val >= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_16kb_granule(void)
@@ -720,10 +717,10 @@ static inline bool system_supports_16kb_granule(void)
 
 	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
-						ID_AA64MMFR0_TGRAN16_SHIFT);
+						ID_AA64MMFR0_EL1_TGRAN16_SHIFT);
 
-	return (val >= ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN) &&
-	       (val <= ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX);
+	return (val >= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN) &&
+	       (val <= ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX);
 }
 
 static inline bool system_supports_mixed_endian_el0(void)
@@ -738,7 +735,7 @@ static inline bool system_supports_mixed_endian(void)
 
 	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	val = cpuid_feature_extract_unsigned_field(mmfr0,
-						ID_AA64MMFR0_BIGENDEL_SHIFT);
+						ID_AA64MMFR0_EL1_BIGEND_SHIFT);
 
 	return val == 0x1;
 }
@@ -840,13 +837,13 @@ extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 {
 	switch (parange) {
-	case ID_AA64MMFR0_PARANGE_32: return 32;
-	case ID_AA64MMFR0_PARANGE_36: return 36;
-	case ID_AA64MMFR0_PARANGE_40: return 40;
-	case ID_AA64MMFR0_PARANGE_42: return 42;
-	case ID_AA64MMFR0_PARANGE_44: return 44;
-	case ID_AA64MMFR0_PARANGE_48: return 48;
-	case ID_AA64MMFR0_PARANGE_52: return 52;
+	case ID_AA64MMFR0_EL1_PARANGE_32: return 32;
+	case ID_AA64MMFR0_EL1_PARANGE_36: return 36;
+	case ID_AA64MMFR0_EL1_PARANGE_40: return 40;
+	case ID_AA64MMFR0_EL1_PARANGE_42: return 42;
+	case ID_AA64MMFR0_EL1_PARANGE_44: return 44;
+	case ID_AA64MMFR0_EL1_PARANGE_48: return 48;
+	case ID_AA64MMFR0_EL1_PARANGE_52: return 52;
 	/*
 	 * A future PE could use a value unknown to the kernel.
 	 * However, by the "D10.1.4 Principles of the ID scheme
@@ -868,14 +865,14 @@ static inline bool cpu_has_hw_af(void)
 
 	mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
 	return cpuid_feature_extract_unsigned_field(mmfr1,
-						ID_AA64MMFR1_HADBS_SHIFT);
+						ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
 }
 
 static inline bool cpu_has_pan(void)
 {
 	u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
 	return cpuid_feature_extract_unsigned_field(mmfr1,
-						    ID_AA64MMFR1_PAN_SHIFT);
+						    ID_AA64MMFR1_EL1_PAN_SHIFT);
 }
 
 #ifdef CONFIG_ARM64_AMU_EXTN
@@ -896,8 +893,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 	int vmid_bits;
 
 	vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1,
-						ID_AA64MMFR1_VMIDBITS_SHIFT);
-	if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16)
+						ID_AA64MMFR1_EL1_VMIDBits_SHIFT);
+	if (vmid_bits == ID_AA64MMFR1_EL1_VMIDBits_16)
 		return 16;
 
 	/*
@@ -907,6 +904,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 	return 8;
 }
 
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id);
+
 extern struct arm64_ftr_override id_aa64mmfr1_override;
 extern struct arm64_ftr_override id_aa64pfr0_override;
 extern struct arm64_ftr_override id_aa64pfr1_override;
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 2630faa5bc08..668569adf4d3 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -40,7 +40,7 @@
 
 .macro __init_el2_debug
 	mrs	x1, id_aa64dfr0_el1
-	sbfx	x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
+	sbfx	x0, x1, #ID_AA64DFR0_EL1_PMUVer_SHIFT, #4
 	cmp	x0, #1
 	b.lt	.Lskip_pmu_\@			// Skip if no PMU present
 	mrs	x0, pmcr_el0			// Disable debug access traps
@@ -49,7 +49,7 @@
 	csel	x2, xzr, x0, lt			// all PMU counters from EL1
 
 	/* Statistical profiling */
-	ubfx	x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+	ubfx	x0, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
 	cbz	x0, .Lskip_spe_\@		// Skip if SPE not present
 
 	mrs_s	x0, SYS_PMBIDR_EL1              // If SPE available at EL2,
@@ -65,7 +65,7 @@
 
 .Lskip_spe_\@:
 	/* Trace buffer */
-	ubfx	x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
+	ubfx	x0, x1, #ID_AA64DFR0_EL1_TraceBuffer_SHIFT, #4
 	cbz	x0, .Lskip_trace_\@		// Skip if TraceBuffer is not present
 
 	mrs_s	x0, SYS_TRBIDR_EL1
@@ -83,7 +83,7 @@
 /* LORegions */
 .macro __init_el2_lor
 	mrs	x1, id_aa64mmfr1_el1
-	ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+	ubfx	x0, x1, #ID_AA64MMFR1_EL1_LO_SHIFT, 4
 	cbz	x0, .Lskip_lor_\@
 	msr_s	SYS_LORC_EL1, xzr
 .Lskip_lor_\@:
@@ -97,7 +97,7 @@
 /* GICv3 system register access */
 .macro __init_el2_gicv3
 	mrs	x0, id_aa64pfr0_el1
-	ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
+	ubfx	x0, x0, #ID_AA64PFR0_EL1_GIC_SHIFT, #4
 	cbz	x0, .Lskip_gicv3_\@
 
 	mrs_s	x0, SYS_ICC_SRE_EL2
@@ -132,12 +132,12 @@
 /* Disable any fine grained traps */
 .macro __init_el2_fgt
 	mrs	x1, id_aa64mmfr0_el1
-	ubfx	x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
+	ubfx	x1, x1, #ID_AA64MMFR0_EL1_FGT_SHIFT, #4
 	cbz	x1, .Lskip_fgt_\@
 
 	mov	x0, xzr
 	mrs	x1, id_aa64dfr0_el1
-	ubfx	x1, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
+	ubfx	x1, x1, #ID_AA64DFR0_EL1_PMSVer_SHIFT, #4
 	cmp	x1, #3
 	b.lt	.Lset_debug_fgt_\@
 	/* Disable PMSNEVFR_EL1 read and write traps */
@@ -149,7 +149,7 @@
 
 	mov	x0, xzr
 	mrs	x1, id_aa64pfr1_el1
-	ubfx	x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
+	ubfx	x1, x1, #ID_AA64PFR1_EL1_SME_SHIFT, #4
 	cbz	x1, .Lset_fgt_\@
 
 	/* Disable nVHE traps of TPIDR2 and SMPRI */
@@ -162,7 +162,7 @@
 	msr_s	SYS_HFGITR_EL2, xzr
 
 	mrs	x1, id_aa64pfr0_el1		// AMU traps UNDEF without AMU
-	ubfx	x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4
+	ubfx	x1, x1, #ID_AA64PFR0_EL1_AMU_SHIFT, #4
 	cbz	x1, .Lskip_fgt_\@
 
 	msr_s	SYS_HAFGRTR_EL2, xzr
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index d94aecff9690..19713d0f013b 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -58,8 +58,9 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 asmlinkage void asm_exit_to_user_mode(struct pt_regs *regs);
 
 void do_mem_abort(unsigned long far, unsigned long esr, struct pt_regs *regs);
-void do_undefinstr(struct pt_regs *regs);
-void do_bti(struct pt_regs *regs);
+void do_undefinstr(struct pt_regs *regs, unsigned long esr);
+void do_el0_bti(struct pt_regs *regs);
+void do_el1_bti(struct pt_regs *regs, unsigned long esr);
 void do_debug_exception(unsigned long addr_if_watchpoint, unsigned long esr,
 			struct pt_regs *regs);
 void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs);
@@ -70,9 +71,11 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs);
 void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs);
 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr);
 void do_cp15instr(unsigned long esr, struct pt_regs *regs);
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
-void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr);
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr);
 void do_serror(struct pt_regs *regs, unsigned long esr);
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags);
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index bc7aaed4b34e..fa4c6ff3aa9b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -142,7 +142,7 @@ static inline int get_num_brps(void)
 	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
 		cpuid_feature_extract_unsigned_field(dfr0,
-						ID_AA64DFR0_BRPS_SHIFT);
+						ID_AA64DFR0_EL1_BRPs_SHIFT);
 }
 
 /* Determine number of WRP registers available. */
@@ -151,7 +151,7 @@ static inline int get_num_wrps(void)
 	u64 dfr0 = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 	return 1 +
 		cpuid_feature_extract_unsigned_field(dfr0,
-						ID_AA64DFR0_WRPS_SHIFT);
+						ID_AA64DFR0_EL1_WRPs_SHIFT);
 }
 
 #endif	/* __ASM_BREAKPOINT_H */
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index cef4ae7a3d8b..298b386d3ebe 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -119,6 +119,7 @@
 #define KERNEL_HWCAP_SME_FA64		__khwcap2_feature(SME_FA64)
 #define KERNEL_HWCAP_WFXT		__khwcap2_feature(WFXT)
 #define KERNEL_HWCAP_EBF16		__khwcap2_feature(EBF16)
+#define KERNEL_HWCAP_SVE_EBF16		__khwcap2_feature(SVE_EBF16)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b208da3bebec..7784081088e7 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -63,7 +63,7 @@
  * specific registers encoded in the instructions).
  */
 .macro kern_hyp_va	reg
-alternative_cb kvm_update_va_mask
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_update_va_mask
 	and     \reg, \reg, #1		/* mask with va_mask */
 	ror	\reg, \reg, #1		/* rotate to the first tag bit */
 	add	\reg, \reg, #0		/* insert the low 12 bits of the tag */
@@ -97,7 +97,7 @@ alternative_cb_end
 	hyp_pa	\reg, \tmp
 
 	/* Load kimage_voffset. */
-alternative_cb kvm_get_kimage_voffset
+alternative_cb ARM64_ALWAYS_SYSTEM, kvm_get_kimage_voffset
 	movz	\tmp, #0
 	movk	\tmp, #0, lsl #16
 	movk	\tmp, #0, lsl #32
@@ -131,6 +131,7 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v)
 				    "add %0, %0, #0\n"
 				    "add %0, %0, #0, lsl 12\n"
 				    "ror %0, %0, #63\n",
+				    ARM64_ALWAYS_SYSTEM,
 				    kvm_update_va_mask)
 		     : "+r" (v));
 	return v;
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 9f339dffbc1a..1b098bd4cd37 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -16,9 +16,9 @@
 static inline u64 kvm_get_parange(u64 mmfr0)
 {
 	u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
-				ID_AA64MMFR0_PARANGE_SHIFT);
-	if (parange > ID_AA64MMFR0_PARANGE_MAX)
-		parange = ID_AA64MMFR0_PARANGE_MAX;
+				ID_AA64MMFR0_EL1_PARANGE_SHIFT);
+	if (parange > ID_AA64MMFR0_EL1_PARANGE_MAX)
+		parange = ID_AA64MMFR0_EL1_PARANGE_MAX;
 
 	return parange;
 }
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 29c85810ae69..c503db8e73b0 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -13,14 +13,13 @@
 #include <linux/jump_label.h>
 #include <linux/stringify.h>
 #include <asm/alternative.h>
+#include <asm/alternative-macros.h>
 #include <asm/atomic_lse.h>
 #include <asm/cpucaps.h>
 
-extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
-
 static __always_inline bool system_uses_lse_atomics(void)
 {
-	return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]);
+	return alternative_has_feature_likely(ARM64_HAS_LSE_ATOMICS);
 }
 
 #define __lse_ll_sc_body(op, ...)					\
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 4e7fa2623896..18734fed3bdd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -58,11 +58,20 @@ static inline bool is_forbidden_offset_for_adrp(void *place)
 }
 
 struct plt_entry get_plt_entry(u64 dst, void *pc);
-bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
 
-static inline bool plt_entry_is_initialized(const struct plt_entry *e)
+static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+				    const Elf_Shdr *sechdrs,
+				    const char *name)
 {
-	return e->adrp || e->add || e->br;
+	const Elf_Shdr *s, *se;
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+		if (strcmp(name, secstrs + s->sh_name) == 0)
+			return s;
+	}
+
+	return NULL;
 }
 
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index aa523591a44e..760c62f8e22f 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -42,7 +42,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
+void mte_cpu_setup(void);
 void mte_suspend_enter(void);
+void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
@@ -72,6 +74,9 @@ static inline void mte_thread_switch(struct task_struct *next)
 static inline void mte_suspend_enter(void)
 {
 }
+static inline void mte_suspend_exit(void)
+{
+}
 static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 86eb0bfe3b38..61883518fc50 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -410,7 +410,7 @@ long get_tagged_addr_ctrl(struct task_struct *task);
  * The top of the current task's task stack
  */
 #define current_top_of_stack()	((unsigned long)current->stack + THREAD_SIZE)
-#define on_thread_stack()	(on_task_stack(current, current_stack_pointer, 1, NULL))
+#define on_thread_stack()	(on_task_stack(current, current_stack_pointer, 1))
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 7bea1d705dd6..4292d9bafb9d 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -43,22 +43,5 @@ unsigned long do_sdei_event(struct pt_regs *regs,
 unsigned long sdei_arch_get_entry_point(int conduit);
 #define sdei_arch_get_entry_point(x)	sdei_arch_get_entry_point(x)
 
-struct stack_info;
-
-bool _on_sdei_stack(unsigned long sp, unsigned long size,
-		    struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	if (!IS_ENABLED(CONFIG_VMAP_STACK))
-		return false;
-	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
-		return false;
-	if (in_nmi())
-		return _on_sdei_stack(sp, size, info);
-
-	return false;
-}
-
 #endif /* __ASSEMBLY__ */
 #endif	/* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 6ebdcdff77f5..5a0edb064ea4 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,39 +22,86 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 
 DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
 
-static inline bool on_irq_stack(unsigned long sp, unsigned long size,
-				struct stack_info *info)
+static inline struct stack_info stackinfo_get_irq(void)
 {
 	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
 	unsigned long high = low + IRQ_STACK_SIZE;
 
-	return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info);
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
 }
 
-static inline bool on_task_stack(const struct task_struct *tsk,
-				 unsigned long sp, unsigned long size,
-				 struct stack_info *info)
+static inline bool on_irq_stack(unsigned long sp, unsigned long size)
+{
+	struct stack_info info = stackinfo_get_irq();
+	return stackinfo_on_stack(&info, sp, size);
+}
+
+static inline struct stack_info stackinfo_get_task(const struct task_struct *tsk)
 {
 	unsigned long low = (unsigned long)task_stack_page(tsk);
 	unsigned long high = low + THREAD_SIZE;
 
-	return on_stack(sp, size, low, high, STACK_TYPE_TASK, info);
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static inline bool on_task_stack(const struct task_struct *tsk,
+				 unsigned long sp, unsigned long size)
+{
+	struct stack_info info = stackinfo_get_task(tsk);
+	return stackinfo_on_stack(&info, sp, size);
 }
 
 #ifdef CONFIG_VMAP_STACK
 DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
 
-static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
-				struct stack_info *info)
+static inline struct stack_info stackinfo_get_overflow(void)
 {
 	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
 	unsigned long high = low + OVERFLOW_STACK_SIZE;
 
-	return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+#else
+#define stackinfo_get_overflow()	stackinfo_get_unknown()
+#endif
+
+#if defined(CONFIG_ARM_SDE_INTERFACE) && defined(CONFIG_VMAP_STACK)
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+
+static inline struct stack_info stackinfo_get_sdei_normal(void)
+{
+	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+	unsigned long high = low + SDEI_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static inline struct stack_info stackinfo_get_sdei_critical(void)
+{
+	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+	unsigned long high = low + SDEI_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
 }
 #else
-static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
-			struct stack_info *info) { return false; }
+#define stackinfo_get_sdei_normal()	stackinfo_get_unknown()
+#define stackinfo_get_sdei_critical()	stackinfo_get_unknown()
 #endif
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/stacktrace/common.h b/arch/arm64/include/asm/stacktrace/common.h
index f58eb944c46f..508f734de46e 100644
--- a/arch/arm64/include/asm/stacktrace/common.h
+++ b/arch/arm64/include/asm/stacktrace/common.h
@@ -2,13 +2,6 @@
 /*
  * Common arm64 stack unwinder code.
  *
- * To implement a new arm64 stack unwinder:
- *     1) Include this header
- *
- *     2) Call into unwind_next_common() from your top level unwind
- *        function, passing it the validation and translation callbacks
- *        (though the later can be NULL if no translation is required).
- *
  * See: arch/arm64/kernel/stacktrace.c for the reference implementation.
  *
  * Copyright (C) 2012 ARM Ltd.
@@ -16,78 +9,60 @@
 #ifndef __ASM_STACKTRACE_COMMON_H
 #define __ASM_STACKTRACE_COMMON_H
 
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
 #include <linux/kprobes.h>
 #include <linux/types.h>
 
-enum stack_type {
-	STACK_TYPE_UNKNOWN,
-	STACK_TYPE_TASK,
-	STACK_TYPE_IRQ,
-	STACK_TYPE_OVERFLOW,
-	STACK_TYPE_SDEI_NORMAL,
-	STACK_TYPE_SDEI_CRITICAL,
-	STACK_TYPE_HYP,
-	__NR_STACK_TYPES
-};
-
 struct stack_info {
 	unsigned long low;
 	unsigned long high;
-	enum stack_type type;
 };
 
-/*
- * A snapshot of a frame record or fp/lr register values, along with some
- * accounting information necessary for robust unwinding.
+/**
+ * struct unwind_state - state used for robust unwinding.
  *
  * @fp:          The fp value in the frame record (or the real fp)
  * @pc:          The lr value in the frame record (or the real lr)
  *
- * @stacks_done: Stacks which have been entirely unwound, for which it is no
- *               longer valid to unwind to.
- *
- * @prev_fp:     The fp that pointed to this frame record, or a synthetic value
- *               of 0. This is used to ensure that within a stack, each
- *               subsequent frame record is at an increasing address.
- * @prev_type:   The type of stack this frame record was on, or a synthetic
- *               value of STACK_TYPE_UNKNOWN. This is used to detect a
- *               transition from one stack to another.
- *
  * @kr_cur:      When KRETPROBES is selected, holds the kretprobe instance
  *               associated with the most recently encountered replacement lr
  *               value.
  *
  * @task:        The task being unwound.
+ *
+ * @stack:       The stack currently being unwound.
+ * @stacks:      An array of stacks which can be unwound.
+ * @nr_stacks:   The number of stacks in @stacks.
  */
 struct unwind_state {
 	unsigned long fp;
 	unsigned long pc;
-	DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
-	unsigned long prev_fp;
-	enum stack_type prev_type;
 #ifdef CONFIG_KRETPROBES
 	struct llist_node *kr_cur;
 #endif
 	struct task_struct *task;
+
+	struct stack_info stack;
+	struct stack_info *stacks;
+	int nr_stacks;
 };
 
-static inline bool on_stack(unsigned long sp, unsigned long size,
-			    unsigned long low, unsigned long high,
-			    enum stack_type type, struct stack_info *info)
+static inline struct stack_info stackinfo_get_unknown(void)
+{
+	return (struct stack_info) {
+		.low = 0,
+		.high = 0,
+	};
+}
+
+static inline bool stackinfo_on_stack(const struct stack_info *info,
+				      unsigned long sp, unsigned long size)
 {
-	if (!low)
+	if (!info->low)
 		return false;
 
-	if (sp < low || sp + size < sp || sp + size > high)
+	if (sp < info->low || sp + size < sp || sp + size > info->high)
 		return false;
 
-	if (info) {
-		info->low = low;
-		info->high = high;
-		info->type = type;
-	}
 	return true;
 }
 
@@ -99,99 +74,101 @@ static inline void unwind_init_common(struct unwind_state *state,
 	state->kr_cur = NULL;
 #endif
 
-	/*
-	 * Prime the first unwind.
-	 *
-	 * In unwind_next() we'll check that the FP points to a valid stack,
-	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
-	 * treated as a transition to whichever stack that happens to be. The
-	 * prev_fp value won't be used, but we set it to 0 such that it is
-	 * definitely not an accessible stack address.
-	 */
-	bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
-	state->prev_fp = 0;
-	state->prev_type = STACK_TYPE_UNKNOWN;
+	state->stack = stackinfo_get_unknown();
 }
 
-/*
- * stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
- * a kernel address.
- *
- * @fp:   the frame pointer to be updated to its kernel address.
- * @type: the stack type associated with frame pointer @fp
- *
- * Returns true and success and @fp is updated to the corresponding
- * kernel virtual address; otherwise returns false.
- */
-typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
-					    enum stack_type type);
+static struct stack_info *unwind_find_next_stack(const struct unwind_state *state,
+						 unsigned long sp,
+						 unsigned long size)
+{
+	for (int i = 0; i < state->nr_stacks; i++) {
+		struct stack_info *info = &state->stacks[i];
 
-/*
- * on_accessible_stack_fn() - Check whether a stack range is on any
- * of the possible stacks.
+		if (stackinfo_on_stack(info, sp, size))
+			return info;
+	}
+
+	return NULL;
+}
+
+/**
+ * unwind_consume_stack() - Check if an object is on an accessible stack,
+ * updating stack boundaries so that future unwind steps cannot consume this
+ * object again.
  *
- * @tsk:  task whose stack is being unwound
- * @sp:   stack address being checked
- * @size: size of the stack range being checked
- * @info: stack unwinding context
+ * @state: the current unwind state.
+ * @sp:    the base address of the object.
+ * @size:  the size of the object.
+ *
+ * Return: 0 upon success, an error code otherwise.
  */
-typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
-				       unsigned long sp, unsigned long size,
-				       struct stack_info *info);
-
-static inline int unwind_next_common(struct unwind_state *state,
-				     struct stack_info *info,
-				     on_accessible_stack_fn accessible,
-				     stack_trace_translate_fp_fn translate_fp)
+static inline int unwind_consume_stack(struct unwind_state *state,
+				       unsigned long sp,
+				       unsigned long size)
 {
-	unsigned long fp = state->fp, kern_fp = fp;
-	struct task_struct *tsk = state->task;
+	struct stack_info *next;
 
-	if (fp & 0x7)
-		return -EINVAL;
+	if (stackinfo_on_stack(&state->stack, sp, size))
+		goto found;
 
-	if (!accessible(tsk, fp, 16, info))
-		return -EINVAL;
-
-	if (test_bit(info->type, state->stacks_done))
+	next = unwind_find_next_stack(state, sp, size);
+	if (!next)
 		return -EINVAL;
 
 	/*
-	 * If fp is not from the current address space perform the necessary
-	 * translation before dereferencing it to get the next fp.
-	 */
-	if (translate_fp && !translate_fp(&kern_fp, info->type))
-		return -EINVAL;
-
-	/*
-	 * As stacks grow downward, any valid record on the same stack must be
-	 * at a strictly higher address than the prior record.
+	 * Stack transitions are strictly one-way, and once we've
+	 * transitioned from one stack to another, it's never valid to
+	 * unwind back to the old stack.
+	 *
+	 * Remove the current stack from the list of stacks so that it cannot
+	 * be found on a subsequent transition.
 	 *
-	 * Stacks can nest in several valid orders, e.g.
+	 * Note that stacks can nest in several valid orders, e.g.
 	 *
-	 * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
-	 * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
-	 * HYP -> OVERFLOW
+	 *   TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+	 *   TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+	 *   HYP -> OVERFLOW
 	 *
-	 * ... but the nesting itself is strict. Once we transition from one
-	 * stack to another, it's never valid to unwind back to that first
-	 * stack.
+	 * ... so we do not check the specific order of stack
+	 * transitions.
 	 */
-	if (info->type == state->prev_type) {
-		if (fp <= state->prev_fp)
-			return -EINVAL;
-	} else {
-		__set_bit(state->prev_type, state->stacks_done);
-	}
+	state->stack = *next;
+	*next = stackinfo_get_unknown();
+
+found:
+	/*
+	 * Future unwind steps can only consume stack above this frame record.
+	 * Update the current stack to start immediately above it.
+	 */
+	state->stack.low = sp + size;
+	return 0;
+}
+
+/**
+ * unwind_next_frame_record() - Unwind to the next frame record.
+ *
+ * @state:        the current unwind state.
+ *
+ * Return: 0 upon success, an error code otherwise.
+ */
+static inline int
+unwind_next_frame_record(struct unwind_state *state)
+{
+	unsigned long fp = state->fp;
+	int err;
+
+	if (fp & 0x7)
+		return -EINVAL;
+
+	err = unwind_consume_stack(state, fp, 16);
+	if (err)
+		return err;
 
 	/*
-	 * Record this frame record's values and location. The prev_fp and
-	 * prev_type are only meaningful to the next unwind_next() invocation.
+	 * Record this frame record's values.
 	 */
-	state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
-	state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
-	state->prev_fp = fp;
-	state->prev_type = info->type;
+	state->fp = READ_ONCE(*(unsigned long *)(fp));
+	state->pc = READ_ONCE(*(unsigned long *)(fp + 8));
 
 	return 0;
 }
diff --git a/arch/arm64/include/asm/stacktrace/nvhe.h b/arch/arm64/include/asm/stacktrace/nvhe.h
index d5527b600390..25ab83a315a7 100644
--- a/arch/arm64/include/asm/stacktrace/nvhe.h
+++ b/arch/arm64/include/asm/stacktrace/nvhe.h
@@ -20,8 +20,8 @@
 
 #include <asm/stacktrace/common.h>
 
-/*
- * kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
+/**
+ * kvm_nvhe_unwind_init() - Start an unwind from the given nVHE HYP fp and pc
  *
  * @state : unwind_state to initialize
  * @fp    : frame pointer at which to start the unwinding.
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 818df938a7ad..debc1c0b2b7f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -190,19 +190,6 @@
 #define SYS_MVFR1_EL1			sys_reg(3, 0, 0, 3, 1)
 #define SYS_MVFR2_EL1			sys_reg(3, 0, 0, 3, 2)
 
-#define SYS_ID_AA64PFR0_EL1		sys_reg(3, 0, 0, 4, 0)
-#define SYS_ID_AA64PFR1_EL1		sys_reg(3, 0, 0, 4, 1)
-
-#define SYS_ID_AA64DFR0_EL1		sys_reg(3, 0, 0, 5, 0)
-#define SYS_ID_AA64DFR1_EL1		sys_reg(3, 0, 0, 5, 1)
-
-#define SYS_ID_AA64AFR0_EL1		sys_reg(3, 0, 0, 5, 4)
-#define SYS_ID_AA64AFR1_EL1		sys_reg(3, 0, 0, 5, 5)
-
-#define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
-#define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
-#define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)
-
 #define SYS_ACTLR_EL1			sys_reg(3, 0, 1, 0, 1)
 #define SYS_RGSR_EL1			sys_reg(3, 0, 1, 0, 5)
 #define SYS_GCR_EL1			sys_reg(3, 0, 1, 0, 6)
@@ -436,19 +423,11 @@
 #define SYS_ICC_IGRPEN0_EL1		sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
 
-#define SYS_TPIDR_EL1			sys_reg(3, 0, 13, 0, 4)
-
-#define SYS_SCXTNUM_EL1			sys_reg(3, 0, 13, 0, 7)
-
 #define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_CCSIDR_EL1			sys_reg(3, 1, 0, 0, 0)
 #define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
 
-#define SMIDR_EL1_IMPLEMENTER_SHIFT	24
-#define SMIDR_EL1_SMPS_SHIFT	15
-#define SMIDR_EL1_AFFINITY_SHIFT	0
-
 #define SYS_RNDR_EL0			sys_reg(3, 3, 2, 4, 0)
 #define SYS_RNDRRS_EL0			sys_reg(3, 3, 2, 4, 1)
 
@@ -537,7 +516,6 @@
 #define SYS_HFGWTR_EL2			sys_reg(3, 4, 1, 1, 5)
 #define SYS_HFGITR_EL2			sys_reg(3, 4, 1, 1, 6)
 #define SYS_TRFCR_EL2			sys_reg(3, 4, 1, 2, 1)
-#define SYS_HCRX_EL2			sys_reg(3, 4, 1, 2, 2)
 #define SYS_HDFGRTR_EL2			sys_reg(3, 4, 3, 1, 4)
 #define SYS_HDFGWTR_EL2			sys_reg(3, 4, 3, 1, 5)
 #define SYS_HAFGRTR_EL2			sys_reg(3, 4, 3, 1, 6)
@@ -690,164 +668,30 @@
 #define MAIR_ATTRIDX(attr, idx)		((attr) << ((idx) * 8))
 
 /* id_aa64pfr0 */
-#define ID_AA64PFR0_CSV3_SHIFT		60
-#define ID_AA64PFR0_CSV2_SHIFT		56
-#define ID_AA64PFR0_DIT_SHIFT		48
-#define ID_AA64PFR0_AMU_SHIFT		44
-#define ID_AA64PFR0_MPAM_SHIFT		40
-#define ID_AA64PFR0_SEL2_SHIFT		36
-#define ID_AA64PFR0_SVE_SHIFT		32
-#define ID_AA64PFR0_RAS_SHIFT		28
-#define ID_AA64PFR0_GIC_SHIFT		24
-#define ID_AA64PFR0_ASIMD_SHIFT		20
-#define ID_AA64PFR0_FP_SHIFT		16
-#define ID_AA64PFR0_EL3_SHIFT		12
-#define ID_AA64PFR0_EL2_SHIFT		8
-#define ID_AA64PFR0_EL1_SHIFT		4
-#define ID_AA64PFR0_EL0_SHIFT		0
-
-#define ID_AA64PFR0_AMU			0x1
-#define ID_AA64PFR0_SVE			0x1
-#define ID_AA64PFR0_RAS_V1		0x1
-#define ID_AA64PFR0_RAS_V1P1		0x2
-#define ID_AA64PFR0_FP_NI		0xf
-#define ID_AA64PFR0_FP_SUPPORTED	0x0
-#define ID_AA64PFR0_ASIMD_NI		0xf
-#define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
-#define ID_AA64PFR0_ELx_64BIT_ONLY	0x1
-#define ID_AA64PFR0_ELx_32BIT_64BIT	0x2
-
-/* id_aa64pfr1 */
-#define ID_AA64PFR1_SME_SHIFT		24
-#define ID_AA64PFR1_MPAMFRAC_SHIFT	16
-#define ID_AA64PFR1_RASFRAC_SHIFT	12
-#define ID_AA64PFR1_MTE_SHIFT		8
-#define ID_AA64PFR1_SSBS_SHIFT		4
-#define ID_AA64PFR1_BT_SHIFT		0
-
-#define ID_AA64PFR1_SSBS_PSTATE_NI	0
-#define ID_AA64PFR1_SSBS_PSTATE_ONLY	1
-#define ID_AA64PFR1_SSBS_PSTATE_INSNS	2
-#define ID_AA64PFR1_BT_BTI		0x1
-#define ID_AA64PFR1_SME			1
-
-#define ID_AA64PFR1_MTE_NI		0x0
-#define ID_AA64PFR1_MTE_EL0		0x1
-#define ID_AA64PFR1_MTE			0x2
-#define ID_AA64PFR1_MTE_ASYMM		0x3
+#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY		0x1
+#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT		0x2
 
 /* id_aa64mmfr0 */
-#define ID_AA64MMFR0_ECV_SHIFT		60
-#define ID_AA64MMFR0_FGT_SHIFT		56
-#define ID_AA64MMFR0_EXS_SHIFT		44
-#define ID_AA64MMFR0_TGRAN4_2_SHIFT	40
-#define ID_AA64MMFR0_TGRAN64_2_SHIFT	36
-#define ID_AA64MMFR0_TGRAN16_2_SHIFT	32
-#define ID_AA64MMFR0_TGRAN4_SHIFT	28
-#define ID_AA64MMFR0_TGRAN64_SHIFT	24
-#define ID_AA64MMFR0_TGRAN16_SHIFT	20
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
-#define ID_AA64MMFR0_SNSMEM_SHIFT	12
-#define ID_AA64MMFR0_BIGENDEL_SHIFT	8
-#define ID_AA64MMFR0_ASID_SHIFT		4
-#define ID_AA64MMFR0_PARANGE_SHIFT	0
-
-#define ID_AA64MMFR0_ASID_8		0x0
-#define ID_AA64MMFR0_ASID_16		0x2
-
-#define ID_AA64MMFR0_TGRAN4_NI			0xf
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN	0x0
-#define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX	0x7
-#define ID_AA64MMFR0_TGRAN64_NI			0xf
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN	0x0
-#define ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX	0x7
-#define ID_AA64MMFR0_TGRAN16_NI			0x0
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN	0x1
-#define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX	0xf
-
-#define ID_AA64MMFR0_PARANGE_32		0x0
-#define ID_AA64MMFR0_PARANGE_36		0x1
-#define ID_AA64MMFR0_PARANGE_40		0x2
-#define ID_AA64MMFR0_PARANGE_42		0x3
-#define ID_AA64MMFR0_PARANGE_44		0x4
-#define ID_AA64MMFR0_PARANGE_48		0x5
-#define ID_AA64MMFR0_PARANGE_52		0x6
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN	0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN	0x0
+#define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN	0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX	0xf
 
 #define ARM64_MIN_PARANGE_BITS		32
 
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT	0x0
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE	0x1
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN	0x2
-#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX	0x7
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT	0x0
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE		0x1
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN		0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX		0x7
 
 #ifdef CONFIG_ARM64_PA_BITS_52
-#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_52
+#define ID_AA64MMFR0_EL1_PARANGE_MAX	ID_AA64MMFR0_EL1_PARANGE_52
 #else
-#define ID_AA64MMFR0_PARANGE_MAX	ID_AA64MMFR0_PARANGE_48
+#define ID_AA64MMFR0_EL1_PARANGE_MAX	ID_AA64MMFR0_EL1_PARANGE_48
 #endif
 
-/* id_aa64mmfr1 */
-#define ID_AA64MMFR1_ECBHB_SHIFT	60
-#define ID_AA64MMFR1_TIDCP1_SHIFT	52
-#define ID_AA64MMFR1_HCX_SHIFT		40
-#define ID_AA64MMFR1_AFP_SHIFT		44
-#define ID_AA64MMFR1_ETS_SHIFT		36
-#define ID_AA64MMFR1_TWED_SHIFT		32
-#define ID_AA64MMFR1_XNX_SHIFT		28
-#define ID_AA64MMFR1_SPECSEI_SHIFT	24
-#define ID_AA64MMFR1_PAN_SHIFT		20
-#define ID_AA64MMFR1_LOR_SHIFT		16
-#define ID_AA64MMFR1_HPD_SHIFT		12
-#define ID_AA64MMFR1_VHE_SHIFT		8
-#define ID_AA64MMFR1_VMIDBITS_SHIFT	4
-#define ID_AA64MMFR1_HADBS_SHIFT	0
-
-#define ID_AA64MMFR1_VMIDBITS_8		0
-#define ID_AA64MMFR1_VMIDBITS_16	2
-
-#define ID_AA64MMFR1_TIDCP1_NI		0
-#define ID_AA64MMFR1_TIDCP1_IMP		1
-
-/* id_aa64mmfr2 */
-#define ID_AA64MMFR2_E0PD_SHIFT		60
-#define ID_AA64MMFR2_EVT_SHIFT		56
-#define ID_AA64MMFR2_BBM_SHIFT		52
-#define ID_AA64MMFR2_TTL_SHIFT		48
-#define ID_AA64MMFR2_FWB_SHIFT		40
-#define ID_AA64MMFR2_IDS_SHIFT		36
-#define ID_AA64MMFR2_AT_SHIFT		32
-#define ID_AA64MMFR2_ST_SHIFT		28
-#define ID_AA64MMFR2_NV_SHIFT		24
-#define ID_AA64MMFR2_CCIDX_SHIFT	20
-#define ID_AA64MMFR2_LVA_SHIFT		16
-#define ID_AA64MMFR2_IESB_SHIFT		12
-#define ID_AA64MMFR2_LSM_SHIFT		8
-#define ID_AA64MMFR2_UAO_SHIFT		4
-#define ID_AA64MMFR2_CNP_SHIFT		0
-
-/* id_aa64dfr0 */
-#define ID_AA64DFR0_MTPMU_SHIFT		48
-#define ID_AA64DFR0_TRBE_SHIFT		44
-#define ID_AA64DFR0_TRACE_FILT_SHIFT	40
-#define ID_AA64DFR0_DOUBLELOCK_SHIFT	36
-#define ID_AA64DFR0_PMSVER_SHIFT	32
-#define ID_AA64DFR0_CTX_CMPS_SHIFT	28
-#define ID_AA64DFR0_WRPS_SHIFT		20
-#define ID_AA64DFR0_BRPS_SHIFT		12
-#define ID_AA64DFR0_PMUVER_SHIFT	8
-#define ID_AA64DFR0_TRACEVER_SHIFT	4
-#define ID_AA64DFR0_DEBUGVER_SHIFT	0
-
-#define ID_AA64DFR0_PMUVER_8_0		0x1
-#define ID_AA64DFR0_PMUVER_8_1		0x4
-#define ID_AA64DFR0_PMUVER_8_4		0x5
-#define ID_AA64DFR0_PMUVER_8_5		0x6
-#define ID_AA64DFR0_PMUVER_8_7		0x7
-#define ID_AA64DFR0_PMUVER_IMP_DEF	0xf
-
-#define ID_AA64DFR0_PMSVER_8_2		0x1
-#define ID_AA64DFR0_PMSVER_8_3		0x2
-
 #define ID_DFR0_PERFMON_SHIFT		24
 
 #define ID_DFR0_PERFMON_8_0		0x3
@@ -955,20 +799,20 @@
 #define ID_PFR1_PROGMOD_SHIFT		0
 
 #if defined(CONFIG_ARM64_4K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN4_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT		ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT		ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN16_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT		ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT		ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
 #elif defined(CONFIG_ARM64_64K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT		ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
-#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
-#define ID_AA64MMFR0_TGRAN_2_SHIFT		ID_AA64MMFR0_TGRAN64_2_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SHIFT		ID_AA64MMFR0_EL1_TGRAN64_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN
+#define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX
+#define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT		ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT
 #endif
 
 #define MVFR2_FPMISC_SHIFT		4
@@ -1028,9 +872,6 @@
 #define TRFCR_ELx_ExTRE			BIT(1)
 #define TRFCR_ELx_E0TRE			BIT(0)
 
-/* HCRX_EL2 definitions */
-#define HCRX_EL2_SMPME_MASK		(1 << 5)
-
 /* GIC Hypervisor interface registers */
 /* ICH_MISR_EL2 bit definitions */
 #define ICH_MISR_EOI		(1 << 0)
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 0eb7709422e2..c34344256762 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -18,7 +18,7 @@
 
 struct pt_regs;
 
-void die(const char *msg, struct pt_regs *regs, int err);
+void die(const char *msg, struct pt_regs *regs, long err);
 
 struct siginfo;
 void arm64_notify_die(const char *str, struct pt_regs *regs,
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 2fc9f0861769..5c7b2f9d5913 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -203,9 +203,11 @@ static inline void uaccess_enable_privileged(void)
 }
 
 /*
- * Sanitise a uaccess pointer such that it becomes NULL if above the maximum
- * user address. In case the pointer is tagged (has the top byte set), untag
- * the pointer before checking.
+ * Sanitize a uaccess pointer such that it cannot reach any kernel address.
+ *
+ * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1
+ * address range (i.e. any kernel address), and either the pointer falls within
+ * the TTBR0 address range or must cause a fault.
  */
 #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr)
 static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
@@ -213,14 +215,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr)
 	void __user *safe_ptr;
 
 	asm volatile(
-	"	bics	xzr, %3, %2\n"
-	"	csel	%0, %1, xzr, eq\n"
-	: "=&r" (safe_ptr)
-	: "r" (ptr), "r" (TASK_SIZE_MAX - 1),
-	  "r" (untagged_addr(ptr))
-	: "cc");
-
-	csdb();
+	"	bic	%0, %1, %2\n"
+	: "=r" (safe_ptr)
+	: "r" (ptr),
+	  "i" (BIT(55))
+	);
+
 	return safe_ptr;
 }
 
diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h
index f99dcb94b438..b4ae32109932 100644
--- a/arch/arm64/include/asm/vdso.h
+++ b/arch/arm64/include/asm/vdso.h
@@ -26,6 +26,9 @@
 	(void *)(vdso_offset_##name - VDSO_LBASE + (unsigned long)(base)); \
 })
 
+extern char vdso_start[], vdso_end[];
+extern char vdso32_start[], vdso32_end[];
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_H */
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 4f7a629df81f..764d13e2916c 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -7,8 +7,10 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/alternative.h>
 #include <asm/barrier.h>
 #include <asm/unistd.h>
+#include <asm/sysreg.h>
 
 #define VDSO_HAS_CLOCK_GETRES		1
 
@@ -78,11 +80,20 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
 		return 0;
 
 	/*
-	 * This isb() is required to prevent that the counter value
+	 * If FEAT_ECV is available, use the self-synchronizing counter.
+	 * Otherwise the isb is required to prevent that the counter value
 	 * is speculated.
-	 */
-	isb();
-	asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
+	*/
+	asm volatile(
+	ALTERNATIVE("isb\n"
+		    "mrs %0, cntvct_el0",
+		    "nop\n"
+		    __mrs_s("%0", SYS_CNTVCTSS_EL0),
+		    ARM64_HAS_ECV)
+	: "=r" (res)
+	:
+	: "memory");
+
 	arch_counter_enforce_ordering(res);
 
 	return res;
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 1ad2568a2569..9b245da6f507 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -92,5 +92,6 @@
 #define HWCAP2_SME_FA64		(1 << 30)
 #define HWCAP2_WFXT		(1UL << 31)
 #define HWCAP2_EBF16		(1UL << 32)
+#define HWCAP2_SVE_EBF16	(1UL << 33)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h
index d54daafa89e3..86e556429e0e 100644
--- a/arch/arm64/include/uapi/asm/perf_regs.h
+++ b/arch/arm64/include/uapi/asm/perf_regs.h
@@ -37,5 +37,12 @@ enum perf_event_arm_regs {
 	PERF_REG_ARM64_SP,
 	PERF_REG_ARM64_PC,
 	PERF_REG_ARM64_MAX,
+
+	/* Extended/pseudo registers */
+	PERF_REG_ARM64_VG = 46,				/* SVE Vector Granule */
+	PERF_REG_ARM64_EXTENDED_MAX
 };
+
+#define PERF_REG_EXTENDED_MASK	(1ULL << PERF_REG_ARM64_VG)
+
 #endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 1add7b01efa7..38a0b0291edb 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -45,6 +45,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
 obj-$(CONFIG_COMPAT)			+= sys32.o signal32.o			\
 					   sys_compat.o
 obj-$(CONFIG_COMPAT)			+= sigreturn32.o
+obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS)	+= compat_alignment.o
 obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
 obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
 obj-$(CONFIG_MODULES)			+= module.o
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d2c66507398d..91263d09ea65 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -10,17 +10,23 @@
 
 #include <linux/init.h>
 #include <linux/cpu.h>
+#include <linux/elf.h>
 #include <asm/cacheflush.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/insn.h>
+#include <asm/module.h>
 #include <asm/sections.h>
+#include <asm/vdso.h>
 #include <linux/stop_machine.h>
 
 #define __ALT_PTR(a, f)		((void *)&(a)->f + (a)->f)
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
 
+#define ALT_CAP(a)		((a)->cpufeature & ~ARM64_CB_BIT)
+#define ALT_HAS_CB(a)		((a)->cpufeature & ARM64_CB_BIT)
+
 /* Volatile, as we may be patching the guts of READ_ONCE() */
 static volatile int all_alternatives_applied;
 
@@ -133,7 +139,8 @@ static void clean_dcache_range_nopatch(u64 start, u64 end)
 	} while (cur += d_size, cur < end);
 }
 
-static void __apply_alternatives(struct alt_region *region, bool is_module,
+static void __apply_alternatives(const struct alt_region *region,
+				 bool is_module,
 				 unsigned long *feature_mask)
 {
 	struct alt_instr *alt;
@@ -142,30 +149,27 @@ static void __apply_alternatives(struct alt_region *region, bool is_module,
 
 	for (alt = region->begin; alt < region->end; alt++) {
 		int nr_inst;
+		int cap = ALT_CAP(alt);
 
-		if (!test_bit(alt->cpufeature, feature_mask))
+		if (!test_bit(cap, feature_mask))
 			continue;
 
-		/* Use ARM64_CB_PATCH as an unconditional patch */
-		if (alt->cpufeature < ARM64_CB_PATCH &&
-		    !cpus_have_cap(alt->cpufeature))
+		if (!cpus_have_cap(cap))
 			continue;
 
-		if (alt->cpufeature == ARM64_CB_PATCH)
+		if (ALT_HAS_CB(alt))
 			BUG_ON(alt->alt_len != 0);
 		else
 			BUG_ON(alt->alt_len != alt->orig_len);
 
-		pr_info_once("patching kernel code\n");
-
 		origptr = ALT_ORIG_PTR(alt);
 		updptr = is_module ? origptr : lm_alias(origptr);
 		nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
 
-		if (alt->cpufeature < ARM64_CB_PATCH)
-			alt_cb = patch_alternative;
-		else
+		if (ALT_HAS_CB(alt))
 			alt_cb  = ALT_REPL_PTR(alt);
+		else
+			alt_cb = patch_alternative;
 
 		alt_cb(alt, origptr, updptr, nr_inst);
 
@@ -192,30 +196,55 @@ static void __apply_alternatives(struct alt_region *region, bool is_module,
 	}
 }
 
+void apply_alternatives_vdso(void)
+{
+	struct alt_region region;
+	const struct elf64_hdr *hdr;
+	const struct elf64_shdr *shdr;
+	const struct elf64_shdr *alt;
+	DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
+
+	bitmap_fill(all_capabilities, ARM64_NCAPS);
+
+	hdr = (struct elf64_hdr *)vdso_start;
+	shdr = (void *)hdr + hdr->e_shoff;
+	alt = find_section(hdr, shdr, ".altinstructions");
+	if (!alt)
+		return;
+
+	region = (struct alt_region){
+		.begin	= (void *)hdr + alt->sh_offset,
+		.end	= (void *)hdr + alt->sh_offset + alt->sh_size,
+	};
+
+	__apply_alternatives(&region, false, &all_capabilities[0]);
+}
+
+static const struct alt_region kernel_alternatives = {
+	.begin	= (struct alt_instr *)__alt_instructions,
+	.end	= (struct alt_instr *)__alt_instructions_end,
+};
+
 /*
  * We might be patching the stop_machine state machine, so implement a
  * really simple polling protocol here.
  */
 static int __apply_alternatives_multi_stop(void *unused)
 {
-	struct alt_region region = {
-		.begin	= (struct alt_instr *)__alt_instructions,
-		.end	= (struct alt_instr *)__alt_instructions_end,
-	};
-
 	/* We always have a CPU 0 at this point (__init) */
 	if (smp_processor_id()) {
 		while (!all_alternatives_applied)
 			cpu_relax();
 		isb();
 	} else {
-		DECLARE_BITMAP(remaining_capabilities, ARM64_NPATCHABLE);
+		DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS);
 
 		bitmap_complement(remaining_capabilities, boot_capabilities,
-				  ARM64_NPATCHABLE);
+				  ARM64_NCAPS);
 
 		BUG_ON(all_alternatives_applied);
-		__apply_alternatives(&region, false, remaining_capabilities);
+		__apply_alternatives(&kernel_alternatives, false,
+				     remaining_capabilities);
 		/* Barriers provided by the cache flushing */
 		all_alternatives_applied = 1;
 	}
@@ -225,6 +254,9 @@ static int __apply_alternatives_multi_stop(void *unused)
 
 void __init apply_alternatives_all(void)
 {
+	pr_info("applying system-wide alternatives\n");
+
+	apply_alternatives_vdso();
 	/* better not try code patching on a live SMP system */
 	stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
 }
@@ -236,15 +268,13 @@ void __init apply_alternatives_all(void)
  */
 void __init apply_boot_alternatives(void)
 {
-	struct alt_region region = {
-		.begin	= (struct alt_instr *)__alt_instructions,
-		.end	= (struct alt_instr *)__alt_instructions_end,
-	};
-
 	/* If called on non-boot cpu things could go wrong */
 	WARN_ON(smp_processor_id() != 0);
 
-	__apply_alternatives(&region, false, &boot_capabilities[0]);
+	pr_info("applying boot alternatives\n");
+
+	__apply_alternatives(&kernel_alternatives, false,
+			     &boot_capabilities[0]);
 }
 
 #ifdef CONFIG_MODULES
@@ -254,10 +284,18 @@ void apply_alternatives_module(void *start, size_t length)
 		.begin	= start,
 		.end	= start + length,
 	};
-	DECLARE_BITMAP(all_capabilities, ARM64_NPATCHABLE);
+	DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
 
-	bitmap_fill(all_capabilities, ARM64_NPATCHABLE);
+	bitmap_fill(all_capabilities, ARM64_NCAPS);
 
 	__apply_alternatives(&region, true, &all_capabilities[0]);
 }
 #endif
+
+noinstr void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
+			       __le32 *updptr, int nr_inst)
+{
+	for (int i = 0; i < nr_inst; i++)
+		updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+}
+EXPORT_SYMBOL(alt_cb_patch_nops);
diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c
new file mode 100644
index 000000000000..5edec2f49ec9
--- /dev/null
+++ b/arch/arm64/kernel/compat_alignment.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// based on arch/arm/mm/alignment.c
+
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include <asm/exception.h>
+#include <asm/ptrace.h>
+#include <asm/traps.h>
+
+/*
+ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
+ *
+ * Speed optimisations and better fault handling by Russell King.
+ */
+#define CODING_BITS(i)	(i & 0x0e000000)
+
+#define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
+#define LDST_U_BIT(i)	(i & (1 << 23))		/* Add offset		*/
+#define LDST_W_BIT(i)	(i & (1 << 21))		/* Writeback		*/
+#define LDST_L_BIT(i)	(i & (1 << 20))		/* Load			*/
+
+#define LDST_P_EQ_U(i)	((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
+
+#define LDSTHD_I_BIT(i)	(i & (1 << 22))		/* double/half-word immed */
+
+#define RN_BITS(i)	((i >> 16) & 15)	/* Rn			*/
+#define RD_BITS(i)	((i >> 12) & 15)	/* Rd			*/
+#define RM_BITS(i)	(i & 15)		/* Rm			*/
+
+#define REGMASK_BITS(i)	(i & 0xffff)
+
+#define BAD_INSTR 	0xdeadc0de
+
+/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
+#define IS_T32(hi16) \
+	(((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
+
+union offset_union {
+	unsigned long un;
+	  signed long sn;
+};
+
+#define TYPE_ERROR	0
+#define TYPE_FAULT	1
+#define TYPE_LDST	2
+#define TYPE_DONE	3
+
+static void
+do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs,
+			 union offset_union offset)
+{
+	if (!LDST_U_BIT(instr))
+		offset.un = -offset.un;
+
+	if (!LDST_P_BIT(instr))
+		addr += offset.un;
+
+	if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
+		regs->regs[RN_BITS(instr)] = addr;
+}
+
+static int
+do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+	unsigned int rd2;
+	int load;
+
+	if ((instr & 0xfe000000) == 0xe8000000) {
+		/* ARMv7 Thumb-2 32-bit LDRD/STRD */
+		rd2 = (instr >> 8) & 0xf;
+		load = !!(LDST_L_BIT(instr));
+	} else if (((rd & 1) == 1) || (rd == 14)) {
+		return TYPE_ERROR;
+	} else {
+		load = ((instr & 0xf0) == 0xd0);
+		rd2 = rd + 1;
+	}
+
+	if (load) {
+		unsigned int val, val2;
+
+		if (get_user(val, (u32 __user *)addr) ||
+		    get_user(val2, (u32 __user *)(addr + 4)))
+			return TYPE_FAULT;
+		regs->regs[rd] = val;
+		regs->regs[rd2] = val2;
+	} else {
+		if (put_user(regs->regs[rd], (u32 __user *)addr) ||
+		    put_user(regs->regs[rd2], (u32 __user *)(addr + 4)))
+			return TYPE_FAULT;
+	}
+	return TYPE_LDST;
+}
+
+/*
+ * LDM/STM alignment handler.
+ *
+ * There are 4 variants of this instruction:
+ *
+ * B = rn pointer before instruction, A = rn pointer after instruction
+ *              ------ increasing address ----->
+ *	        |    | r0 | r1 | ... | rx |    |
+ * PU = 01             B                    A
+ * PU = 11        B                    A
+ * PU = 00        A                    B
+ * PU = 10             A                    B
+ */
+static int
+do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
+{
+	unsigned int rd, rn, nr_regs, regbits;
+	unsigned long eaddr, newaddr;
+	unsigned int val;
+
+	/* count the number of registers in the mask to be transferred */
+	nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
+
+	rn = RN_BITS(instr);
+	newaddr = eaddr = regs->regs[rn];
+
+	if (!LDST_U_BIT(instr))
+		nr_regs = -nr_regs;
+	newaddr += nr_regs;
+	if (!LDST_U_BIT(instr))
+		eaddr = newaddr;
+
+	if (LDST_P_EQ_U(instr))	/* U = P */
+		eaddr += 4;
+
+	for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+	     regbits >>= 1, rd += 1)
+		if (regbits & 1) {
+			if (LDST_L_BIT(instr)) {
+				if (get_user(val, (u32 __user *)eaddr))
+					return TYPE_FAULT;
+				if (rd < 15)
+					regs->regs[rd] = val;
+				else
+					regs->pc = val;
+			} else {
+				/*
+				 * The PC register has a bias of +8 in ARM mode
+				 * and +4 in Thumb mode. This means that a read
+				 * of the value of PC should account for this.
+				 * Since Thumb does not permit STM instructions
+				 * to refer to PC, just add 8 here.
+				 */
+				val = (rd < 15) ? regs->regs[rd] : regs->pc + 8;
+				if (put_user(val, (u32 __user *)eaddr))
+					return TYPE_FAULT;
+			}
+			eaddr += 4;
+		}
+
+	if (LDST_W_BIT(instr))
+		regs->regs[rn] = newaddr;
+
+	return TYPE_DONE;
+}
+
+/*
+ * Convert Thumb multi-word load/store instruction forms to equivalent ARM
+ * instructions so we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * This implementation was initially based on the algorithm found in
+ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
+ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
+ *
+ * NOTES:
+ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
+ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
+ *    decode, we return 0xdeadc0de. This should never happen under normal
+ *    circumstances but if it does, we've got other problems to deal with
+ *    elsewhere and we obviously can't fix those problems here.
+ */
+
+static unsigned long thumb2arm(u16 tinstr)
+{
+	u32 L = (tinstr & (1<<11)) >> 11;
+
+	switch ((tinstr & 0xf800) >> 11) {
+	/* 6.6.1 Format 1: */
+	case 0xc000 >> 11:				/* 7.1.51 STMIA */
+	case 0xc800 >> 11:				/* 7.1.25 LDMIA */
+		{
+			u32 Rn = (tinstr & (7<<8)) >> 8;
+			u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
+
+			return 0xe8800000 | W | (L<<20) | (Rn<<16) |
+				(tinstr&255);
+		}
+
+	/* 6.6.1 Format 2: */
+	case 0xb000 >> 11:				/* 7.1.48 PUSH */
+	case 0xb800 >> 11:				/* 7.1.47 POP */
+		if ((tinstr & (3 << 9)) == 0x0400) {
+			static const u32 subset[4] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe92d4000,	/* STMDB sp!,{registers,lr} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+				0xe8bd8000	/* LDMIA sp!,{registers,pc} */
+			};
+			return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
+			    (tinstr & 255);		/* register_list */
+		}
+		fallthrough;	/* for illegal instruction case */
+
+	default:
+		return BAD_INSTR;
+	}
+}
+
+/*
+ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
+ * handlable by ARM alignment handler, also find the corresponding handler,
+ * so that we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
+ * @regs: register context.
+ * @poffset: return offset from faulted addr for later writeback
+ *
+ * NOTES:
+ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
+ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
+ */
+static void *
+do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
+			    union offset_union *poffset)
+{
+	u32 instr = *pinstr;
+	u16 tinst1 = (instr >> 16) & 0xffff;
+	u16 tinst2 = instr & 0xffff;
+
+	switch (tinst1 & 0xffe0) {
+	/* A6.3.5 Load/Store multiple */
+	case 0xe880:		/* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
+	case 0xe8a0:		/* ...above writeback version */
+	case 0xe900:		/* STMDB/STMFD, LDMDB/LDMEA */
+	case 0xe920:		/* ...above writeback version */
+		/* no need offset decision since handler calculates it */
+		return do_alignment_ldmstm;
+
+	case 0xf840:		/* POP/PUSH T3 (single register) */
+		if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
+			u32 L = !!(LDST_L_BIT(instr));
+			const u32 subset[2] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+			};
+			*pinstr = subset[L] | (1<<RD_BITS(instr));
+			return do_alignment_ldmstm;
+		}
+		/* Else fall through for illegal instruction case */
+		break;
+
+	/* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
+	case 0xe860:
+	case 0xe960:
+	case 0xe8e0:
+	case 0xe9e0:
+		poffset->un = (tinst2 & 0xff) << 2;
+		fallthrough;
+
+	case 0xe940:
+	case 0xe9c0:
+		return do_alignment_ldrdstrd;
+
+	/*
+	 * No need to handle load/store instructions up to word size
+	 * since ARMv6 and later CPUs can perform unaligned accesses.
+	 */
+	default:
+		break;
+	}
+	return NULL;
+}
+
+static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst)
+{
+	__le32 instr = 0;
+	int fault;
+
+	fault = get_user(instr, ip);
+	if (fault)
+		return fault;
+
+	*inst = __le32_to_cpu(instr);
+	return 0;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst)
+{
+	__le16 instr = 0;
+	int fault;
+
+	fault = get_user(instr, ip);
+	if (fault)
+		return fault;
+
+	*inst = __le16_to_cpu(instr);
+	return 0;
+}
+
+int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
+{
+	union offset_union offset;
+	unsigned long instrptr;
+	int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
+	unsigned int type;
+	u32 instr = 0;
+	u16 tinstr = 0;
+	int isize = 4;
+	int thumb2_32b = 0;
+	int fault;
+
+	instrptr = instruction_pointer(regs);
+
+	if (compat_thumb_mode(regs)) {
+		__le16 __user *ptr = (__le16 __user *)(instrptr & ~1);
+
+		fault = alignment_get_thumb(regs, ptr, &tinstr);
+		if (!fault) {
+			if (IS_T32(tinstr)) {
+				/* Thumb-2 32-bit */
+				u16 tinst2;
+				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
+				instr = ((u32)tinstr << 16) | tinst2;
+				thumb2_32b = 1;
+			} else {
+				isize = 2;
+				instr = thumb2arm(tinstr);
+			}
+		}
+	} else {
+		fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr);
+	}
+
+	if (fault)
+		return 1;
+
+	switch (CODING_BITS(instr)) {
+	case 0x00000000:	/* 3.13.4 load/store instruction extensions */
+		if (LDSTHD_I_BIT(instr))
+			offset.un = (instr & 0xf00) >> 4 | (instr & 15);
+		else
+			offset.un = regs->regs[RM_BITS(instr)];
+
+		if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */
+		    (instr & 0x001000f0) == 0x000000f0)   /* STRD */
+			handler = do_alignment_ldrdstrd;
+		else
+			return 1;
+		break;
+
+	case 0x08000000:	/* ldm or stm, or thumb-2 32bit instruction */
+		if (thumb2_32b) {
+			offset.un = 0;
+			handler = do_alignment_t32_to_handler(&instr, regs, &offset);
+		} else {
+			offset.un = 0;
+			handler = do_alignment_ldmstm;
+		}
+		break;
+
+	default:
+		return 1;
+	}
+
+	type = handler(addr, instr, regs);
+
+	if (type == TYPE_ERROR || type == TYPE_FAULT)
+		return 1;
+
+	if (type == TYPE_LDST)
+		do_alignment_finish_ldst(addr, instr, regs, offset);
+
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc);
+	arm64_skip_faulting_instruction(regs, isize);
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 53b973b6059f..58ca4f6b25d6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -121,6 +121,22 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCI, 0);
 }
 
+static DEFINE_RAW_SPINLOCK(reg_user_mask_modification);
+static void __maybe_unused
+cpu_clear_bf16_from_user_emulation(const struct arm64_cpu_capabilities *__unused)
+{
+	struct arm64_ftr_reg *regp;
+
+	regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	if (!regp)
+		return;
+
+	raw_spin_lock(&reg_user_mask_modification);
+	if (regp->user_mask & ID_AA64ISAR1_EL1_BF16_MASK)
+		regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK;
+	raw_spin_unlock(&reg_user_mask_modification);
+}
+
 #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)	\
 	.matches = is_affected_midr_range,			\
 	.midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max)
@@ -692,6 +708,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 	},
 #endif
+#ifdef CONFIG_ARM64_ERRATUM_2658417
+	{
+		.desc = "ARM erratum 2658417",
+		.capability = ARM64_WORKAROUND_2658417,
+		/* Cortex-A510 r0p0 - r1p1 */
+		ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
+		MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)),
+		.cpu_enable = cpu_clear_bf16_from_user_emulation,
+	},
+#endif
 	{
 	}
 };
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d8361691efeb..6062454a9067 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -108,8 +108,7 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 EXPORT_SYMBOL(cpu_hwcaps);
 static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS];
 
-/* Need also bit for ARM64_CB_PATCH */
-DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
+DECLARE_BITMAP(boot_capabilities, ARM64_NCAPS);
 
 bool arm64_use_ng_mappings = false;
 EXPORT_SYMBOL(arm64_use_ng_mappings);
@@ -134,31 +133,12 @@ DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
  */
 static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
 
-/*
- * Flag to indicate if we have computed the system wide
- * capabilities based on the boot time active CPUs. This
- * will be used to determine if a new booting CPU should
- * go through the verification process to make sure that it
- * supports the system capabilities, without using a hotplug
- * notifier. This is also used to decide if we could use
- * the fast path for checking constant CPU caps.
- */
-DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready);
-EXPORT_SYMBOL(arm64_const_caps_ready);
-static inline void finalize_system_capabilities(void)
-{
-	static_branch_enable(&arm64_const_caps_ready);
-}
-
 void dump_cpu_features(void)
 {
 	/* file-wide pr_fmt adds "CPU features: " prefix */
 	pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
 }
 
-DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
-EXPORT_SYMBOL(cpu_hwcap_keys);
-
 #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
 		.sign = SIGNED,				\
@@ -243,35 +223,35 @@ static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_DIT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AMU_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_MPAM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SEL2_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
-				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
-	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
-	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+				   FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0),
+	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, ID_AA64PFR0_EL1_AdvSIMD_NI),
+	S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SME_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RAS_frac_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
-				    FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
+				    FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -316,9 +296,9 @@ static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_EXS_SHIFT, 4, 0),
 	/*
 	 * Page size not being supported at Stage-2 is not fatal. You
 	 * just give up KVM if PAGE_SIZE isn't supported there. Go fix
@@ -334,9 +314,9 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 	 * fields are inconsistent across vCPUs, then it isn't worth
 	 * trying to bring KVM up.
 	 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT, 4, 1),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT, 4, 1),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT, 4, 1),
 	/*
 	 * We already refuse to boot CPUs that don't support our configured
 	 * page size, so we can only detect mismatches for a page size other
@@ -344,55 +324,55 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 	 * exist in the wild so, even though we don't like it, we'll have to go
 	 * along with it and treat them as non-strict.
 	 */
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN4_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN4_NI),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN64_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN16_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN16_NI),
 
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0),
 	/* Linux shouldn't care about secure memory */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT, 4, 0),
 	/*
 	 * Differing PARange is fine as long as all peripherals and memory are mapped
 	 * within the minimum PARange of all CPUs
 	 */
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_PARANGE_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TIDCP1_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_SPECSEI_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1_SpecSEI_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_PAN_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_LO_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HPDS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VH_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VMIDBits_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_E0PD_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EVT_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_BBM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_TTL_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IDS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_ST_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_NV_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CCIDX_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_E0PD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_EVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_BBM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_TTL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_FWB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IDS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_AT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_VARange_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CnP_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -434,17 +414,17 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_DOUBLELOCK_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_DoubleLock_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_PMSVer_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_CTX_CMPs_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_WRPs_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_BRPs_SHIFT, 4, 0),
 	/*
 	 * We can instantiate multiple PMU instances with different levels
 	 * of support.
 	 */
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6),
 	ARM64_FTR_END,
 };
 
@@ -750,7 +730,7 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id)
  * returns - Upon success,  matching ftr_reg entry for id.
  *         - NULL on failure but with an WARN_ON().
  */
-static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
 {
 	struct arm64_ftr_reg *reg;
 
@@ -1392,6 +1372,12 @@ u64 __read_sysreg_by_encoding(u32 sys_id)
 #include <linux/irqchip/arm-gic-v3.h>
 
 static bool
+has_always(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	return true;
+}
+
+static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
 	int val = cpuid_feature_extract_field_width(reg, entry->field_pos,
@@ -1401,17 +1387,40 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 	return val >= entry->min_field_value;
 }
 
-static bool
-has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+static u64
+read_scoped_sysreg(const struct arm64_cpu_capabilities *entry, int scope)
 {
-	u64 val;
-
 	WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible());
 	if (scope == SCOPE_SYSTEM)
-		val = read_sanitised_ftr_reg(entry->sys_reg);
+		return read_sanitised_ftr_reg(entry->sys_reg);
 	else
-		val = __read_sysreg_by_encoding(entry->sys_reg);
+		return __read_sysreg_by_encoding(entry->sys_reg);
+}
 
+static bool
+has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	int mask;
+	struct arm64_ftr_reg *regp;
+	u64 val = read_scoped_sysreg(entry, scope);
+
+	regp = get_arm64_ftr_reg(entry->sys_reg);
+	if (!regp)
+		return false;
+
+	mask = cpuid_feature_extract_unsigned_field_width(regp->user_mask,
+							  entry->field_pos,
+							  entry->field_width);
+	if (!mask)
+		return false;
+
+	return feature_matches(val, entry);
+}
+
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	u64 val = read_scoped_sysreg(entry, scope);
 	return feature_matches(val, entry);
 }
 
@@ -1492,7 +1501,7 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
 	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 
 	return cpuid_feature_extract_signed_field(pfr0,
-					ID_AA64PFR0_FP_SHIFT) < 0;
+					ID_AA64PFR0_EL1_FP_SHIFT) < 0;
 }
 
 static bool has_cache_idc(const struct arm64_cpu_capabilities *entry,
@@ -1571,7 +1580,7 @@ bool kaslr_requires_kpti(void)
 	if (IS_ENABLED(CONFIG_ARM64_E0PD)) {
 		u64 mmfr2 = read_sysreg_s(SYS_ID_AA64MMFR2_EL1);
 		if (cpuid_feature_extract_unsigned_field(mmfr2,
-						ID_AA64MMFR2_E0PD_SHIFT))
+						ID_AA64MMFR2_EL1_E0PD_SHIFT))
 			return false;
 	}
 
@@ -2034,7 +2043,8 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
 static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 {
 	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
-	isb();
+
+	mte_cpu_setup();
 
 	/*
 	 * Clear the tags in the zero page. This needs to be done via the
@@ -2088,12 +2098,22 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
 
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
+		.capability = ARM64_ALWAYS_BOOT,
+		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
+		.matches = has_always,
+	},
+	{
+		.capability = ARM64_ALWAYS_SYSTEM,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_always,
+	},
+	{
 		.desc = "GIC system register CPU interface",
 		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
 		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
 		.matches = has_useable_gicv3_cpuif,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
-		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_GIC_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
@@ -2104,7 +2124,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR0_EL1,
-		.field_pos = ID_AA64MMFR0_ECV_SHIFT,
+		.field_pos = ID_AA64MMFR0_EL1_ECV_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
@@ -2116,7 +2136,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
-		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
@@ -2130,7 +2150,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
-		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.field_pos = ID_AA64MMFR1_EL1_PAN_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 3,
@@ -2168,9 +2188,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_32bit_el0,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR0_EL0_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_EL0_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT,
 	},
 #ifdef CONFIG_KVM
 	{
@@ -2180,9 +2200,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR0_EL1_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_EL1_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
+		.min_field_value = ID_AA64PFR0_EL1_ELx_32BIT_64BIT,
 	},
 	{
 		.desc = "Protected KVM",
@@ -2201,7 +2221,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		 * more details.
 		 */
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
-		.field_pos = ID_AA64PFR0_CSV3_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_CSV3_SHIFT,
 		.field_width = 4,
 		.min_field_value = 1,
 		.matches = unmap_kernel_at_el0,
@@ -2244,9 +2264,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_SVE,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR0_SVE_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_SVE_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR0_SVE,
+		.min_field_value = ID_AA64PFR0_EL1_SVE_IMP,
 		.matches = has_cpuid_feature,
 		.cpu_enable = sve_kernel_enable,
 	},
@@ -2259,9 +2279,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR0_RAS_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_RAS_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR0_RAS_V1,
+		.min_field_value = ID_AA64PFR0_EL1_RAS_IMP,
 		.cpu_enable = cpu_clear_disr,
 	},
 #endif /* CONFIG_ARM64_RAS_EXTN */
@@ -2278,9 +2298,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_amu,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR0_AMU_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_AMU_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR0_AMU,
+		.min_field_value = ID_AA64PFR0_EL1_AMU_IMP,
 		.cpu_enable = cpu_amu_enable,
 	},
 #endif /* CONFIG_ARM64_AMU_EXTN */
@@ -2303,7 +2323,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_STAGE2_FWB,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR2_FWB_SHIFT,
+		.field_pos = ID_AA64MMFR2_EL1_FWB_SHIFT,
 		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
@@ -2314,7 +2334,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_ARMv8_4_TTL,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR2_TTL_SHIFT,
+		.field_pos = ID_AA64MMFR2_EL1_TTL_SHIFT,
 		.field_width = 4,
 		.min_field_value = 1,
 		.matches = has_cpuid_feature,
@@ -2344,7 +2364,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HW_DBM,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR1_HADBS_SHIFT,
+		.field_pos = ID_AA64MMFR1_EL1_HAFDBS_SHIFT,
 		.field_width = 4,
 		.min_field_value = 2,
 		.matches = has_hw_dbm,
@@ -2367,10 +2387,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
-		.field_pos = ID_AA64PFR1_SSBS_SHIFT,
+		.field_pos = ID_AA64PFR1_EL1_SSBS_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
-		.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
+		.min_field_value = ID_AA64PFR1_EL1_SSBS_IMP,
 	},
 #ifdef CONFIG_ARM64_CNP
 	{
@@ -2380,7 +2400,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_useable_cnp,
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR2_CNP_SHIFT,
+		.field_pos = ID_AA64MMFR2_EL1_CnP_SHIFT,
 		.field_width = 4,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_cnp,
@@ -2485,7 +2505,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
 		.matches = can_use_gic_priorities,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
-		.field_pos = ID_AA64PFR0_GIC_SHIFT,
+		.field_pos = ID_AA64PFR0_EL1_GIC_SHIFT,
 		.field_width = 4,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
@@ -2499,7 +2519,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sys_reg = SYS_ID_AA64MMFR2_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_width = 4,
-		.field_pos = ID_AA64MMFR2_E0PD_SHIFT,
+		.field_pos = ID_AA64MMFR2_EL1_E0PD_SHIFT,
 		.matches = has_cpuid_feature,
 		.min_field_value = 1,
 		.cpu_enable = cpu_enable_e0pd,
@@ -2528,9 +2548,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = has_cpuid_feature,
 		.cpu_enable = bti_enable,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
-		.field_pos = ID_AA64PFR1_BT_SHIFT,
+		.field_pos = ID_AA64PFR1_EL1_BT_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR1_BT_BTI,
+		.min_field_value = ID_AA64PFR1_EL1_BT_IMP,
 		.sign = FTR_UNSIGNED,
 	},
 #endif
@@ -2541,9 +2561,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
-		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_pos = ID_AA64PFR1_EL1_MTE_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR1_MTE,
+		.min_field_value = ID_AA64PFR1_EL1_MTE_MTE2,
 		.sign = FTR_UNSIGNED,
 		.cpu_enable = cpu_enable_mte,
 	},
@@ -2553,9 +2573,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
-		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.field_pos = ID_AA64PFR1_EL1_MTE_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR1_MTE_ASYMM,
+		.min_field_value = ID_AA64PFR1_EL1_MTE_MTE3,
 		.sign = FTR_UNSIGNED,
 	},
 #endif /* CONFIG_ARM64_MTE */
@@ -2577,9 +2597,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_SME,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64PFR1_SME_SHIFT,
+		.field_pos = ID_AA64PFR1_EL1_SME_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64PFR1_SME,
+		.min_field_value = ID_AA64PFR1_EL1_SME_IMP,
 		.matches = has_cpuid_feature,
 		.cpu_enable = sme_kernel_enable,
 	},
@@ -2614,9 +2634,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.sys_reg = SYS_ID_AA64MMFR1_EL1,
 		.sign = FTR_UNSIGNED,
-		.field_pos = ID_AA64MMFR1_TIDCP1_SHIFT,
+		.field_pos = ID_AA64MMFR1_EL1_TIDCP1_SHIFT,
 		.field_width = 4,
-		.min_field_value = ID_AA64MMFR1_TIDCP1_IMP,
+		.min_field_value = ID_AA64MMFR1_EL1_TIDCP1_IMP,
 		.matches = has_cpuid_feature,
 		.cpu_enable = cpu_trap_el0_impdef,
 	},
@@ -2624,7 +2644,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 };
 
 #define HWCAP_CPUID_MATCH(reg, field, width, s, min_value)			\
-		.matches = has_cpuid_feature,					\
+		.matches = has_user_cpuid_feature,					\
 		.sys_reg = reg,							\
 		.field_pos = field,						\
 		.field_width = width,						\
@@ -2708,11 +2728,11 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
@@ -2725,38 +2745,39 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_EBF16),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM),
-	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
+	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_EL1_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
-	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_EL1_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_EL1_SVE_IMP, CAP_HWCAP, KERNEL_HWCAP_SVE),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SVEver_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_AES_PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BitPerm_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16),
+	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_BF16_EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SHA3_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_SM4_IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_I8MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F32MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM),
 	HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_EL1_F64MM_IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM),
 #endif
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SSBS_SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS),
 #ifdef CONFIG_ARM64_BTI
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_BT_IMP, CAP_HWCAP, KERNEL_HWCAP_BTI),
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
 #ifdef CONFIG_ARM64_MTE
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_MTE_MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3),
 #endif /* CONFIG_ARM64_MTE */
-	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
-	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
+	HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV),
+	HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP),
 	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES),
 	HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, FTR_UNSIGNED, ID_AA64ISAR2_EL1_WFxT_IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
 #ifdef CONFIG_ARM64_SME
-	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SME, CAP_HWCAP, KERNEL_HWCAP_SME),
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_EL1_SME_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_EL1_SME_IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
 	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_FA64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
 	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, FTR_UNSIGNED, ID_AA64SMFR0_EL1_I16I64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64),
 	HWCAP_CAP(SYS_ID_AA64SMFR0_EL1, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, FTR_UNSIGNED, ID_AA64SMFR0_EL1_F64F64_IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64),
@@ -2929,9 +2950,6 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
 		if (!cpus_have_cap(num))
 			continue;
 
-		/* Ensure cpus_have_const_cap(num) works */
-		static_branch_enable(&cpu_hwcap_keys[num]);
-
 		if (boot_scope && caps->cpu_enable)
 			/*
 			 * Capabilities with SCOPE_BOOT_CPU scope are finalised
@@ -3102,7 +3120,7 @@ static void verify_hyp_capabilities(void)
 
 	/* Verify IPA range */
 	parange = cpuid_feature_extract_unsigned_field(mmfr0,
-				ID_AA64MMFR0_PARANGE_SHIFT);
+				ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 	ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
 	if (ipa_max < get_kvm_ipa_limit()) {
 		pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id());
@@ -3253,9 +3271,6 @@ void __init setup_cpu_features(void)
 	sme_setup();
 	minsigstksz_setup();
 
-	/* Advertise that we have computed the system capabilities */
-	finalize_system_capabilities();
-
 	/*
 	 * Check for sane CTR_EL0.CWG value.
 	 */
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index d7702f39b4d3..28d4f442b0bc 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -115,6 +115,7 @@ static const char *const hwcap_str[] = {
 	[KERNEL_HWCAP_SME_FA64]		= "smefa64",
 	[KERNEL_HWCAP_WFXT]		= "wfxt",
 	[KERNEL_HWCAP_EBF16]		= "ebf16",
+	[KERNEL_HWCAP_SVE_EBF16]	= "sveebf16",
 };
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index bf9fe71589bc..3da09778267e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -28,7 +28,7 @@
 u8 debug_monitors_arch(void)
 {
 	return cpuid_feature_extract_unsigned_field(read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1),
-						ID_AA64DFR0_DEBUGVER_SHIFT);
+						ID_AA64DFR0_EL1_DebugVer_SHIFT);
 }
 
 /*
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index c75ca36b4a49..9173fad279af 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -379,11 +379,20 @@ static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr)
 	exit_to_kernel_mode(regs);
 }
 
-static void noinstr el1_undef(struct pt_regs *regs)
+static void noinstr el1_undef(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
-	do_undefinstr(regs);
+	do_undefinstr(regs, esr);
+	local_daif_mask();
+	exit_to_kernel_mode(regs);
+}
+
+static void noinstr el1_bti(struct pt_regs *regs, unsigned long esr)
+{
+	enter_from_kernel_mode(regs);
+	local_daif_inherit(regs);
+	do_el1_bti(regs, esr);
 	local_daif_mask();
 	exit_to_kernel_mode(regs);
 }
@@ -402,7 +411,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
-	do_ptrauth_fault(regs, esr);
+	do_el1_fpac(regs, esr);
 	local_daif_mask();
 	exit_to_kernel_mode(regs);
 }
@@ -425,7 +434,10 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
 		break;
 	case ESR_ELx_EC_SYS64:
 	case ESR_ELx_EC_UNKNOWN:
-		el1_undef(regs);
+		el1_undef(regs, esr);
+		break;
+	case ESR_ELx_EC_BTI:
+		el1_bti(regs, esr);
 		break;
 	case ESR_ELx_EC_BREAKPT_CUR:
 	case ESR_ELx_EC_SOFTSTP_CUR:
@@ -582,11 +594,11 @@ static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr)
 	exit_to_user_mode(regs);
 }
 
-static void noinstr el0_undef(struct pt_regs *regs)
+static void noinstr el0_undef(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
-	do_undefinstr(regs);
+	do_undefinstr(regs, esr);
 	exit_to_user_mode(regs);
 }
 
@@ -594,7 +606,7 @@ static void noinstr el0_bti(struct pt_regs *regs)
 {
 	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
-	do_bti(regs);
+	do_el0_bti(regs);
 	exit_to_user_mode(regs);
 }
 
@@ -629,7 +641,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_user_mode(regs);
 	local_daif_restore(DAIF_PROCCTX);
-	do_ptrauth_fault(regs, esr);
+	do_el0_fpac(regs, esr);
 	exit_to_user_mode(regs);
 }
 
@@ -670,7 +682,7 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
 		el0_pc(regs, esr);
 		break;
 	case ESR_ELx_EC_UNKNOWN:
-		el0_undef(regs);
+		el0_undef(regs, esr);
 		break;
 	case ESR_ELx_EC_BTI:
 		el0_bti(regs);
@@ -788,7 +800,7 @@ asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
 	case ESR_ELx_EC_CP14_MR:
 	case ESR_ELx_EC_CP14_LS:
 	case ESR_ELx_EC_CP14_64:
-		el0_undef(regs);
+		el0_undef(regs, esr);
 		break;
 	case ESR_ELx_EC_CP15_32:
 	case ESR_ELx_EC_CP15_64:
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2d73b3e793b2..e28137d64b76 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -114,7 +114,7 @@
 	 * them if required.
 	 */
 	.macro	apply_ssbd, state, tmp1, tmp2
-alternative_cb	spectre_v4_patch_fw_mitigation_enable
+alternative_cb	ARM64_ALWAYS_SYSTEM, spectre_v4_patch_fw_mitigation_enable
 	b	.L__asm_ssbd_skip\@		// Patched to NOP
 alternative_cb_end
 	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
@@ -123,7 +123,7 @@ alternative_cb_end
 	tbnz	\tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
-alternative_cb	smccc_patch_fw_mitigation_conduit
+alternative_cb	ARM64_ALWAYS_SYSTEM, smccc_patch_fw_mitigation_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
 .L__asm_ssbd_skip\@:
@@ -175,7 +175,7 @@ alternative_else_nop_endif
 
 	.macro mte_set_kernel_gcr, tmp, tmp2
 #ifdef CONFIG_KASAN_HW_TAGS
-alternative_cb	kasan_hw_tags_enable
+alternative_cb	ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable
 	b	1f
 alternative_cb_end
 	mov	\tmp, KERNEL_GCR_EL1
@@ -186,7 +186,7 @@ alternative_cb_end
 
 	.macro mte_set_user_gcr, tsk, tmp, tmp2
 #ifdef CONFIG_KASAN_HW_TAGS
-alternative_cb	kasan_hw_tags_enable
+alternative_cb	ARM64_ALWAYS_SYSTEM, kasan_hw_tags_enable
 	b	1f
 alternative_cb_end
 	ldr	\tmp, [\tsk, #THREAD_MTE_CTRL]
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 26789865748c..8745175f4a75 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -217,11 +217,26 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	unsigned long pc = rec->ip;
 	u32 old = 0, new;
 
+	new = aarch64_insn_gen_nop();
+
+	/*
+	 * When using mcount, callsites in modules may have been initalized to
+	 * call an arbitrary module PLT (which redirects to the _mcount stub)
+	 * rather than the ftrace PLT we'll use at runtime (which redirects to
+	 * the ftrace trampoline). We can ignore the old PLT when initializing
+	 * the callsite.
+	 *
+	 * Note: 'mod' is only set at module load time.
+	 */
+	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+	    IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) {
+		return aarch64_insn_patch_text_nosync((void *)pc, new);
+	}
+
 	if (!ftrace_find_callable_addr(rec, mod, &addr))
 		return -EINVAL;
 
 	old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
-	new = aarch64_insn_gen_nop();
 
 	return ftrace_modify_code(pc, old, new, true);
 }
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 814b6587ccb7..2196aad7b55b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -99,7 +99,7 @@ SYM_CODE_START(primary_entry)
 	 */
 #if VA_BITS > 48
 	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
-	tst	x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+	tst	x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT
 	mov	x0, #VA_BITS
 	mov	x25, #VA_BITS_MIN
 	csel	x25, x25, x0, eq
@@ -658,10 +658,10 @@ SYM_FUNC_END(__secondary_too_slow)
  */
 SYM_FUNC_START(__enable_mmu)
 	mrs	x3, ID_AA64MMFR0_EL1
-	ubfx	x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-	cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+	ubfx	x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+	cmp     x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN
 	b.lt    __no_granule_support
-	cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+	cmp     x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX
 	b.gt    __no_granule_support
 	phys_to_ttbr x2, x2
 	msr	ttbr0_el1, x2			// load TTBR0
@@ -679,7 +679,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
 	b.ne	2f
 
 	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
-	and	x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+	and	x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
 	cbnz	x0, 2f
 
 	update_early_cpu_boot_status \
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 12c7fad02ae5..2ee18c860f2a 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -98,7 +98,7 @@ SYM_CODE_START_LOCAL(elx_sync)
 SYM_CODE_END(elx_sync)
 
 SYM_CODE_START_LOCAL(__finalise_el2)
-	check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
+	check_override id_aa64pfr0 ID_AA64PFR0_EL1_SVE_SHIFT .Linit_sve .Lskip_sve
 
 .Linit_sve:	/* SVE register access */
 	mrs	x0, cptr_el2			// Disable SVE traps
@@ -109,7 +109,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
 	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
 
 .Lskip_sve:
-	check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
+	check_override id_aa64pfr1 ID_AA64PFR1_EL1_SME_SHIFT .Linit_sme .Lskip_sme
 
 .Linit_sme:	/* SME register access and priority mapping */
 	mrs	x0, cptr_el2			// Disable SME traps
@@ -142,7 +142,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
 	msr_s	SYS_SMPRIMAP_EL2, xzr		// Make all priorities equal
 
 	mrs	x1, id_aa64mmfr1_el1		// HCRX_EL2 present?
-	ubfx	x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+	ubfx	x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
 	cbz	x1, .Lskip_sme
 
 	mrs_s	x1, SYS_HCRX_EL2
@@ -157,7 +157,7 @@ SYM_CODE_START_LOCAL(__finalise_el2)
 	tbnz	x1, #0, 1f
 
 	// Needs to be VHE capable, obviously
-	check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
+	check_override id_aa64mmfr1 ID_AA64MMFR1_EL1_VH_SHIFT 2f 1f
 
 1:	mov_q	x0, HVC_STUB_ERR
 	eret
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 1b0542c69738..95133765ed29 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -50,7 +50,7 @@ static const struct ftr_set_desc mmfr1 __initconst = {
 	.name		= "id_aa64mmfr1",
 	.override	= &id_aa64mmfr1_override,
 	.fields		= {
-		FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
+		FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
 		{}
 	},
 };
@@ -74,7 +74,7 @@ static const struct ftr_set_desc pfr0 __initconst = {
 	.name		= "id_aa64pfr0",
 	.override	= &id_aa64pfr0_override,
 	.fields		= {
-	        FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
+	        FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
 		{}
 	},
 };
@@ -98,9 +98,9 @@ static const struct ftr_set_desc pfr1 __initconst = {
 	.name		= "id_aa64pfr1",
 	.override	= &id_aa64pfr1_override,
 	.fields		= {
-		FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
-		FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
-		FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
+		FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
+		FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
+		FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
 		{}
 	},
 };
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index afa69e04e75e..4aaa5f3d1f65 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -73,6 +73,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter);
 KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
 KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
 KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
+KVM_NVHE_ALIAS(alt_cb_patch_nops);
 
 /* Global kernel state accessed by nVHE hyp code. */
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
@@ -89,10 +90,6 @@ KVM_NVHE_ALIAS(__icache_flags);
 /* VMID bits set by the KVM VMID allocator */
 KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
 
-/* Kernel symbols needed for cpus_have_final/const_caps checks. */
-KVM_NVHE_ALIAS(arm64_const_caps_ready);
-KVM_NVHE_ALIAS(cpu_hwcap_keys);
-
 /* Static keys which are set if a vGIC trap should be handled in hyp. */
 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index bda49430c9ea..38dbd3828f13 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,7 +21,9 @@
 #include <linux/seq_file.h>
 #include <linux/vmalloc.h>
 #include <asm/daifflags.h>
+#include <asm/exception.h>
 #include <asm/vmap_stack.h>
+#include <asm/softirq_stack.h>
 
 /* Only access this in an NMI enter/exit */
 DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
@@ -71,6 +73,18 @@ static void init_irq_stacks(void)
 }
 #endif
 
+#ifndef CONFIG_PREEMPT_RT
+static void ____do_softirq(struct pt_regs *regs)
+{
+	__do_softirq();
+}
+
+void do_softirq_own_stack(void)
+{
+	call_on_irq_stack(NULL, ____do_softirq);
+}
+#endif
+
 static void default_handle_irq(struct pt_regs *regs)
 {
 	panic("IRQ taken without a root IRQ handler\n");
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index a3d0494f25a9..5a0a8f552a61 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -37,7 +37,8 @@ struct plt_entry get_plt_entry(u64 dst, void *pc)
 	return plt;
 }
 
-bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
+static bool plt_entries_equal(const struct plt_entry *a,
+			      const struct plt_entry *b)
 {
 	u64 p, q;
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f2d4bb14bfab..76b41e4ca9fa 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -476,21 +476,6 @@ overflow:
 	return -ENOEXEC;
 }
 
-static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
-				    const Elf_Shdr *sechdrs,
-				    const char *name)
-{
-	const Elf_Shdr *s, *se;
-	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
-
-	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
-		if (strcmp(name, secstrs + s->sh_name) == 0)
-			return s;
-	}
-
-	return NULL;
-}
-
 static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
 {
 	*plt = get_plt_entry(addr, plt);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b2b730233274..aca88470fb69 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -285,6 +285,49 @@ void mte_thread_switch(struct task_struct *next)
 	mte_check_tfsr_el1();
 }
 
+void mte_cpu_setup(void)
+{
+	u64 rgsr;
+
+	/*
+	 * CnP must be enabled only after the MAIR_EL1 register has been set
+	 * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may
+	 * lead to the wrong memory type being used for a brief window during
+	 * CPU power-up.
+	 *
+	 * CnP is not a boot feature so MTE gets enabled before CnP, but let's
+	 * make sure that is the case.
+	 */
+	BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
+	BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
+
+	/* Normal Tagged memory type at the corresponding MAIR index */
+	sysreg_clear_set(mair_el1,
+			 MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED),
+			 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED,
+				      MT_NORMAL_TAGGED));
+
+	write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1);
+
+	/*
+	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+	 * RGSR_EL1.SEED must be non-zero for IRG to produce
+	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+	 * must initialize it.
+	 */
+	rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) <<
+	       SYS_RGSR_EL1_SEED_SHIFT;
+	if (rgsr == 0)
+		rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT;
+	write_sysreg_s(rgsr, SYS_RGSR_EL1);
+
+	/* clear any pending tag check faults in TFSR*_EL1 */
+	write_sysreg_s(0, SYS_TFSR_EL1);
+	write_sysreg_s(0, SYS_TFSRE0_EL1);
+
+	local_flush_tlb_all();
+}
+
 void mte_suspend_enter(void)
 {
 	if (!system_supports_mte())
@@ -301,6 +344,14 @@ void mte_suspend_enter(void)
 	mte_check_tfsr_el1();
 }
 
+void mte_suspend_exit(void)
+{
+	if (!system_supports_mte())
+		return;
+
+	mte_cpu_setup();
+}
+
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index cb69ff1e6138..7b0643fe2f13 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -390,7 +390,7 @@ static const struct attribute_group armv8_pmuv3_caps_attr_group = {
  */
 static bool armv8pmu_has_long_event(struct arm_pmu *cpu_pmu)
 {
-	return (cpu_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_5);
+	return (cpu_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P5);
 }
 
 static inline bool armv8pmu_event_has_user_read(struct perf_event *event)
@@ -1145,8 +1145,8 @@ static void __armv8pmu_probe_pmu(void *info)
 
 	dfr0 = read_sysreg(id_aa64dfr0_el1);
 	pmuver = cpuid_feature_extract_unsigned_field(dfr0,
-			ID_AA64DFR0_PMUVER_SHIFT);
-	if (pmuver == ID_AA64DFR0_PMUVER_IMP_DEF || pmuver == 0)
+			ID_AA64DFR0_EL1_PMUVer_SHIFT);
+	if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == 0)
 		return;
 
 	cpu_pmu->pmuver = pmuver;
@@ -1172,7 +1172,7 @@ static void __armv8pmu_probe_pmu(void *info)
 			     pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
 
 	/* store PMMIR_EL1 register for sysfs */
-	if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31)))
+	if (pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4 && (pmceid_raw[1] & BIT(31)))
 		cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
 	else
 		cpu_pmu->reg_pmmir = 0;
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index f6f58e6265df..b4eece3eb17d 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -9,9 +9,27 @@
 #include <asm/perf_regs.h>
 #include <asm/ptrace.h>
 
+static u64 perf_ext_regs_value(int idx)
+{
+	switch (idx) {
+	case PERF_REG_ARM64_VG:
+		if (WARN_ON_ONCE(!system_supports_sve()))
+			return 0;
+
+		/*
+		 * Vector granule is current length in bits of SVE registers
+		 * divided by 64.
+		 */
+		return (task_get_sve_vl(current) * 8) / 64;
+	default:
+		WARN_ON_ONCE(true);
+		return 0;
+	}
+}
+
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
-	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX))
+	if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_EXTENDED_MAX))
 		return 0;
 
 	/*
@@ -51,6 +69,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 	if ((u32)idx == PERF_REG_ARM64_PC)
 		return regs->pc;
 
+	if ((u32)idx >= PERF_REG_ARM64_MAX)
+		return perf_ext_regs_value(idx);
+
 	return regs->regs[idx];
 }
 
@@ -58,7 +79,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 
 int perf_reg_validate(u64 mask)
 {
-	if (!mask || mask & REG_RESERVED)
+	u64 reserved_mask = REG_RESERVED;
+
+	if (system_supports_sve())
+		reserved_mask &= ~(1ULL << PERF_REG_ARM64_VG);
+
+	if (!mask || mask & reserved_mask)
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d1d182320245..c9e4d0720285 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,13 +44,28 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
 	kprobe_opcode_t *addr = p->ainsn.api.insn;
-	void *addrs[] = {addr, addr + 1};
-	u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS};
 
-	/* prepare insn slot */
-	aarch64_insn_patch_text(addrs, insns, 2);
-
-	flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));
+	/*
+	 * Prepare insn slot, Mark Rutland points out it depends on a coupe of
+	 * subtleties:
+	 *
+	 * - That the I-cache maintenance for these instructions is complete
+	 *   *before* the kprobe BRK is written (and aarch64_insn_patch_text_nosync()
+	 *   ensures this, but just omits causing a Context-Synchronization-Event
+	 *   on all CPUS).
+	 *
+	 * - That the kprobe BRK results in an exception (and consequently a
+	 *   Context-Synchronoization-Event), which ensures that the CPU will
+	 *   fetch thesingle-step slot instructions *after* this, ensuring that
+	 *   the new instructions are used
+	 *
+	 * It supposes to place ISB after patching to guarantee I-cache maintenance
+	 * is observed on all CPUS, however, single-step slot is installed in
+	 * the BRK exception handler, so it is unnecessary to generate
+	 * Contex-Synchronization-Event via ISB again.
+	 */
+	aarch64_insn_patch_text_nosync(addr, p->opcode);
+	aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS);
 
 	/*
 	 * Needs restoring of return address after stepping xol.
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 40be3a7c2c53..a8ea1637b137 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -168,7 +168,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
 
 	/* If the CPU has CSV2 set, we're safe */
 	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
-	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_CSV2_SHIFT))
 		return SPECTRE_UNAFFECTED;
 
 	/* Alternatively, we have a list of unaffected CPUs */
@@ -586,7 +586,7 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
 	if (spectre_v4_mitigations_off())
 		return;
 
-	if (cpus_have_final_cap(ARM64_SSBS))
+	if (cpus_have_cap(ARM64_SSBS))
 		return;
 
 	if (spectre_v4_mitigations_dynamic())
@@ -945,7 +945,7 @@ static bool supports_ecbhb(int scope)
 		mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 
 	return cpuid_feature_extract_unsigned_field(mmfr1,
-						    ID_AA64MMFR1_ECBHB_SHIFT);
+						    ID_AA64MMFR1_EL1_ECBHB_SHIFT);
 }
 
 bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
@@ -988,6 +988,14 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
 	isb();
 }
 
+static bool __read_mostly __nospectre_bhb;
+static int __init parse_spectre_bhb_param(char *str)
+{
+	__nospectre_bhb = true;
+	return 0;
+}
+early_param("nospectre_bhb", parse_spectre_bhb_param);
+
 void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
 {
 	bp_hardening_cb_t cpu_cb;
@@ -1001,7 +1009,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
 		/* No point mitigating Spectre-BHB alone. */
 	} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
 		pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
-	} else if (cpu_mitigations_off()) {
+	} else if (cpu_mitigations_off() || __nospectre_bhb) {
 		pr_info_once("spectre-bhb mitigation disabled by command line option\n");
 	} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
 		state = SPECTRE_MITIGATED;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 041d2ae5c30a..c2fb5755bbec 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -121,7 +121,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
 {
 	return ((addr & ~(THREAD_SIZE - 1))  ==
 		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-		on_irq_stack(addr, sizeof(unsigned long), NULL);
+		on_irq_stack(addr, sizeof(unsigned long));
 }
 
 /**
@@ -666,10 +666,18 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset,
 static int tls_get(struct task_struct *target, const struct user_regset *regset,
 		   struct membuf to)
 {
+	int ret;
+
 	if (target == current)
 		tls_preserve_current_state();
 
-	return membuf_store(&to, target->thread.uw.tp_value);
+	ret = membuf_store(&to, target->thread.uw.tp_value);
+	if (system_supports_tpidr2())
+		ret = membuf_store(&to, target->thread.tpidr2_el0);
+	else
+		ret = membuf_zero(&to, sizeof(u64));
+
+	return ret;
 }
 
 static int tls_set(struct task_struct *target, const struct user_regset *regset,
@@ -677,13 +685,20 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
 		   const void *kbuf, const void __user *ubuf)
 {
 	int ret;
-	unsigned long tls = target->thread.uw.tp_value;
+	unsigned long tls[2];
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+	tls[0] = target->thread.uw.tp_value;
+	if (system_supports_sme())
+		tls[1] = target->thread.tpidr2_el0;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count);
 	if (ret)
 		return ret;
 
-	target->thread.uw.tp_value = tls;
+	target->thread.uw.tp_value = tls[0];
+	if (system_supports_sme())
+		target->thread.tpidr2_el0 = tls[1];
+
 	return ret;
 }
 
@@ -1390,7 +1405,7 @@ static const struct user_regset aarch64_regsets[] = {
 	},
 	[REGSET_TLS] = {
 		.core_note_type = NT_ARM_TLS,
-		.n = 1,
+		.n = 2,
 		.size = sizeof(void *),
 		.align = sizeof(void *),
 		.regset_get = tls_get,
diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c
index e87a2b7f20f6..99f2ffe9fc05 100644
--- a/arch/arm64/kernel/reloc_test_core.c
+++ b/arch/arm64/kernel/reloc_test_core.c
@@ -48,7 +48,7 @@ static struct {
 	{ "R_AARCH64_PREL16",		relative_data16, (u64)&sym64_rel },
 };
 
-static int reloc_test_init(void)
+static int __init reloc_test_init(void)
 {
 	int i;
 
@@ -67,7 +67,7 @@ static int reloc_test_init(void)
 	return 0;
 }
 
-static void reloc_test_exit(void)
+static void __exit reloc_test_exit(void)
 {
 }
 
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d20620a1c51a..d56e170e1ca7 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -162,38 +162,6 @@ static int init_sdei_scs(void)
 	return err;
 }
 
-static bool on_sdei_normal_stack(unsigned long sp, unsigned long size,
-				 struct stack_info *info)
-{
-	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
-	unsigned long high = low + SDEI_STACK_SIZE;
-
-	return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info);
-}
-
-static bool on_sdei_critical_stack(unsigned long sp, unsigned long size,
-				   struct stack_info *info)
-{
-	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
-	unsigned long high = low + SDEI_STACK_SIZE;
-
-	return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info);
-}
-
-bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info)
-{
-	if (!IS_ENABLED(CONFIG_VMAP_STACK))
-		return false;
-
-	if (on_sdei_critical_stack(sp, size, info))
-		return true;
-
-	if (on_sdei_normal_stack(sp, size, info))
-		return true;
-
-	return false;
-}
-
 unsigned long sdei_arch_get_entry_point(int conduit)
 {
 	/*
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index ce190ee18a20..634279b3b03d 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,31 +68,6 @@ static inline void unwind_init_from_task(struct unwind_state *state,
 }
 
 /*
- * We can only safely access per-cpu stacks from current in a non-preemptible
- * context.
- */
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	if (info)
-		info->type = STACK_TYPE_UNKNOWN;
-
-	if (on_task_stack(tsk, sp, size, info))
-		return true;
-	if (tsk != current || preemptible())
-		return false;
-	if (on_irq_stack(sp, size, info))
-		return true;
-	if (on_overflow_stack(sp, size, info))
-		return true;
-	if (on_sdei_stack(sp, size, info))
-		return true;
-
-	return false;
-}
-
-/*
  * Unwind from one frame record (A) to the next frame record (B).
  *
  * We terminate early if the location of B indicates a malformed chain of frame
@@ -103,14 +78,13 @@ static int notrace unwind_next(struct unwind_state *state)
 {
 	struct task_struct *tsk = state->task;
 	unsigned long fp = state->fp;
-	struct stack_info info;
 	int err;
 
 	/* Final frame; nothing to unwind */
 	if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
 		return -ENOENT;
 
-	err = unwind_next_common(state, &info, on_accessible_stack, NULL);
+	err = unwind_next_frame_record(state);
 	if (err)
 		return err;
 
@@ -190,11 +164,47 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 	barrier();
 }
 
+/*
+ * Per-cpu stacks are only accessible when unwinding the current task in a
+ * non-preemptible context.
+ */
+#define STACKINFO_CPU(name)					\
+	({							\
+		((task == current) && !preemptible())		\
+			? stackinfo_get_##name()		\
+			: stackinfo_get_unknown();		\
+	})
+
+/*
+ * SDEI stacks are only accessible when unwinding the current task in an NMI
+ * context.
+ */
+#define STACKINFO_SDEI(name)					\
+	({							\
+		((task == current) && in_nmi())			\
+			? stackinfo_get_sdei_##name()		\
+			: stackinfo_get_unknown();		\
+	})
+
 noinline notrace void arch_stack_walk(stack_trace_consume_fn consume_entry,
 			      void *cookie, struct task_struct *task,
 			      struct pt_regs *regs)
 {
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_task(task),
+		STACKINFO_CPU(irq),
+#if defined(CONFIG_VMAP_STACK)
+		STACKINFO_CPU(overflow),
+#endif
+#if defined(CONFIG_VMAP_STACK) && defined(CONFIG_ARM_SDE_INTERFACE)
+		STACKINFO_SDEI(normal),
+		STACKINFO_SDEI(critical),
+#endif
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 
 	if (regs) {
 		if (task != current)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 9135fe0f3df5..8b02d310838f 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -43,6 +43,8 @@ void notrace __cpu_suspend_exit(void)
 {
 	unsigned int cpu = smp_processor_id();
 
+	mte_suspend_exit();
+
 	/*
 	 * We are resuming from reset with the idmap active in TTBR0_EL1.
 	 * We must uninstall the idmap and restore the expected MMU
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 3c026da95bbc..23d281ed7621 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -181,12 +181,12 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
 
 #define S_SMP " SMP"
 
-static int __die(const char *str, int err, struct pt_regs *regs)
+static int __die(const char *str, long err, struct pt_regs *regs)
 {
 	static int die_counter;
 	int ret;
 
-	pr_emerg("Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n",
+	pr_emerg("Internal error: %s: %016lx [#%d]" S_PREEMPT S_SMP "\n",
 		 str, err, ++die_counter);
 
 	/* trap and error numbers are mostly meaningless on ARM */
@@ -207,7 +207,7 @@ static DEFINE_RAW_SPINLOCK(die_lock);
 /*
  * This function is protected against re-entrancy.
  */
-void die(const char *str, struct pt_regs *regs, int err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	int ret;
 	unsigned long flags;
@@ -486,7 +486,7 @@ void arm64_notify_segfault(unsigned long addr)
 	force_signal_inject(SIGSEGV, code, addr, 0);
 }
 
-void do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs, unsigned long esr)
 {
 	/* check for AArch32 breakpoint instructions */
 	if (!aarch32_break_handler(regs))
@@ -495,28 +495,38 @@ void do_undefinstr(struct pt_regs *regs)
 	if (call_undef_hook(regs) == 0)
 		return;
 
-	BUG_ON(!user_mode(regs));
+	if (!user_mode(regs))
+		die("Oops - Undefined instruction", regs, esr);
+
 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
 NOKPROBE_SYMBOL(do_undefinstr);
 
-void do_bti(struct pt_regs *regs)
+void do_el0_bti(struct pt_regs *regs)
 {
-	BUG_ON(!user_mode(regs));
 	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
-NOKPROBE_SYMBOL(do_bti);
 
-void do_ptrauth_fault(struct pt_regs *regs, unsigned long esr)
+void do_el1_bti(struct pt_regs *regs, unsigned long esr)
+{
+	die("Oops - BTI", regs, esr);
+}
+NOKPROBE_SYMBOL(do_el1_bti);
+
+void do_el0_fpac(struct pt_regs *regs, unsigned long esr)
+{
+	force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+}
+
+void do_el1_fpac(struct pt_regs *regs, unsigned long esr)
 {
 	/*
-	 * Unexpected FPAC exception or pointer authentication failure in
-	 * the kernel: kill the task before it does any more harm.
+	 * Unexpected FPAC exception in the kernel: kill the task before it
+	 * does any more harm.
 	 */
-	BUG_ON(!user_mode(regs));
-	force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+	die("Oops - FPAC", regs, esr);
 }
-NOKPROBE_SYMBOL(do_ptrauth_fault);
+NOKPROBE_SYMBOL(do_el1_fpac)
 
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= TASK_SIZE_MAX) {				\
@@ -759,7 +769,7 @@ void do_cp15instr(unsigned long esr, struct pt_regs *regs)
 		hook_base = cp15_64_hooks;
 		break;
 	default:
-		do_undefinstr(regs);
+		do_undefinstr(regs, esr);
 		return;
 	}
 
@@ -774,7 +784,7 @@ void do_cp15instr(unsigned long esr, struct pt_regs *regs)
 	 * EL0. Fall back to our usual undefined instruction handler
 	 * so that we handle these consistently.
 	 */
-	do_undefinstr(regs);
+	do_undefinstr(regs, esr);
 }
 NOKPROBE_SYMBOL(do_cp15instr);
 #endif
@@ -794,7 +804,7 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs)
 	 * back to our usual undefined instruction handler so that we handle
 	 * these consistently.
 	 */
-	do_undefinstr(regs);
+	do_undefinstr(regs, esr);
 }
 NOKPROBE_SYMBOL(do_sysinstr);
 
@@ -971,7 +981,7 @@ static int bug_handler(struct pt_regs *regs, unsigned long esr)
 {
 	switch (report_bug(regs->pc, regs)) {
 	case BUG_TRAP_TYPE_BUG:
-		die("Oops - BUG", regs, 0);
+		die("Oops - BUG", regs, esr);
 		break;
 
 	case BUG_TRAP_TYPE_WARN:
@@ -1071,7 +1081,7 @@ static int kasan_handler(struct pt_regs *regs, unsigned long esr)
 	 * This is something that might be fixed at some point in the future.
 	 */
 	if (!recover)
-		die("Oops - KASAN", regs, 0);
+		die("Oops - KASAN", regs, esr);
 
 	/* If thread survives, skip over the brk instruction and continue: */
 	arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a61fc4f989b3..ac93a2ee9c07 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -29,9 +29,6 @@
 #include <asm/signal32.h>
 #include <asm/vdso.h>
 
-extern char vdso_start[], vdso_end[];
-extern char vdso32_start[], vdso32_end[];
-
 enum vdso_abi {
 	VDSO_ABI_AA64,
 	VDSO_ABI_AA32,
diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S
index e69fb4aaaf3e..6028f1fe2d1c 100644
--- a/arch/arm64/kernel/vdso/vdso.lds.S
+++ b/arch/arm64/kernel/vdso/vdso.lds.S
@@ -48,6 +48,13 @@ SECTIONS
 	PROVIDE (_etext = .);
 	PROVIDE (etext = .);
 
+	. = ALIGN(4);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
 	.dynamic	: { *(.dynamic) }		:text	:dynamic
 
 	.rela.dyn	: ALIGN(8) { *(.rela .rela*) }
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 0b28d7db7c76..3f7563d768e2 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -295,12 +295,12 @@ void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
 	 * If SPE is present on this CPU and is available at current EL,
 	 * we may need to check if the host state needs to be saved.
 	 */
-	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
+	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_PMSVer_SHIFT) &&
 	    !(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
 		vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_SPE);
 
 	/* Check if we have TRBE implemented and available at the host */
-	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
+	if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
 	    !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
 		vcpu_set_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
 }
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 7839d075729b..8f3f93fa119e 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -196,7 +196,7 @@ SYM_CODE_END(__kvm_hyp_vector)
 	sub	sp, sp, #(8 * 4)
 	stp	x2, x3, [sp, #(8 * 0)]
 	stp	x0, x1, [sp, #(8 * 2)]
-	alternative_cb spectre_bhb_patch_wa3
+	alternative_cb ARM64_ALWAYS_SYSTEM, spectre_bhb_patch_wa3
 	/* Patched to mov WA3 when supported */
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_1
 	alternative_cb_end
@@ -216,7 +216,7 @@ SYM_CODE_END(__kvm_hyp_vector)
 	mitigate_spectre_bhb_clear_insn
 	.endif
 	.if \indirect != 0
-	alternative_cb  kvm_patch_vector_branch
+	alternative_cb ARM64_ALWAYS_SYSTEM, kvm_patch_vector_branch
 	/*
 	 * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with:
 	 *
diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
index fa6e466ed57f..07edfc7524c9 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h
@@ -35,9 +35,9 @@
  * - Data Independent Timing
  */
 #define PVM_ID_AA64PFR0_ALLOW (\
-	ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \
-	ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \
-	ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \
+	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \
+	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \
+	ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) \
 	)
 
 /*
@@ -49,11 +49,11 @@
  *	Supported by KVM
  */
 #define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \
 	)
 
 /*
@@ -62,8 +62,8 @@
  * - Speculative Store Bypassing
  */
 #define PVM_ID_AA64PFR1_ALLOW (\
-	ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \
-	ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \
+	ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \
+	ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \
 	)
 
 /*
@@ -74,10 +74,10 @@
  * - Non-context synchronizing exception entry and exit
  */
 #define PVM_ID_AA64MMFR0_ALLOW (\
-	ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \
+	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \
 	)
 
 /*
@@ -86,8 +86,8 @@
  * - 16-bit ASID
  */
 #define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \
-	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \
+	FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \
 	)
 
 /*
@@ -100,12 +100,12 @@
  * - Enhanced Translation Synchronization
  */
 #define PVM_ID_AA64MMFR1_ALLOW (\
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \
 	)
 
 /*
@@ -120,14 +120,14 @@
  * - E0PDx mechanism
  */
 #define PVM_ID_AA64MMFR2_ALLOW (\
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \
-	ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \
+	ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \
 	)
 
 /*
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 99c8d8b73e70..85d3b7ae720f 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -20,35 +20,35 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
 	u64 cptr_set = 0;
 
 	/* Protected KVM does not support AArch32 guests. */
-	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0),
-		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
-	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
-		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY);
+	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0),
+		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
 
 	/*
 	 * Linux guests assume support for floating-point and Advanced SIMD. Do
 	 * not change the trapping behavior for these from the KVM default.
 	 */
-	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP),
+	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP),
 				PVM_ID_AA64PFR0_ALLOW));
-	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD),
+	BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD),
 				PVM_ID_AA64PFR0_ALLOW));
 
 	/* Trap RAS unless all current versions are supported */
-	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) <
-	    ID_AA64PFR0_RAS_V1P1) {
+	if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), feature_ids) <
+	    ID_AA64PFR0_EL1_RAS_V1P1) {
 		hcr_set |= HCR_TERR | HCR_TEA;
 		hcr_clear |= HCR_FIEN;
 	}
 
 	/* Trap AMU */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) {
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU), feature_ids)) {
 		hcr_clear |= HCR_AMVOFFEN;
 		cptr_set |= CPTR_EL2_TAM;
 	}
 
 	/* Trap SVE */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids))
 		cptr_set |= CPTR_EL2_TZ;
 
 	vcpu->arch.hcr_el2 |= hcr_set;
@@ -66,7 +66,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu)
 	u64 hcr_clear = 0;
 
 	/* Memory Tagging: Trap and Treat as Untagged if not supported. */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) {
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), feature_ids)) {
 		hcr_set |= HCR_TID5;
 		hcr_clear |= HCR_DCT | HCR_ATA;
 	}
@@ -86,32 +86,32 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu)
 	u64 cptr_set = 0;
 
 	/* Trap/constrain PMU */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) {
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), feature_ids)) {
 		mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR;
 		mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME |
 			      MDCR_EL2_HPMN_MASK;
 	}
 
 	/* Trap Debug */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), feature_ids))
 		mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE;
 
 	/* Trap OS Double Lock */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DoubleLock), feature_ids))
 		mdcr_set |= MDCR_EL2_TDOSA;
 
 	/* Trap SPE */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) {
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer), feature_ids)) {
 		mdcr_set |= MDCR_EL2_TPMS;
 		mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 	}
 
 	/* Trap Trace Filter */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceFilt), feature_ids))
 		mdcr_set |= MDCR_EL2_TTRF;
 
 	/* Trap Trace */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_TraceVer), feature_ids))
 		cptr_set |= CPTR_EL2_TTA;
 
 	vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -128,7 +128,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu)
 	u64 mdcr_set = 0;
 
 	/* Trap Debug Communications Channel registers */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_FGT), feature_ids))
 		mdcr_set |= MDCR_EL2_TDCC;
 
 	vcpu->arch.mdcr_el2 |= mdcr_set;
@@ -143,7 +143,7 @@ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu)
 	u64 hcr_set = 0;
 
 	/* Trap LOR */
-	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids))
+	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_LO), feature_ids))
 		hcr_set |= HCR_TLOR;
 
 	vcpu->arch.hcr_el2 |= hcr_set;
diff --git a/arch/arm64/kvm/hyp/nvhe/stacktrace.c b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
index 58f645ad66bc..ed6b58b19cfa 100644
--- a/arch/arm64/kvm/hyp/nvhe/stacktrace.c
+++ b/arch/arm64/kvm/hyp/nvhe/stacktrace.c
@@ -39,41 +39,32 @@ static void hyp_prepare_backtrace(unsigned long fp, unsigned long pc)
 
 DEFINE_PER_CPU(unsigned long [NVHE_STACKTRACE_SIZE/sizeof(long)], pkvm_stacktrace);
 
-static bool on_overflow_stack(unsigned long sp, unsigned long size,
-			      struct stack_info *info)
+static struct stack_info stackinfo_get_overflow(void)
 {
 	unsigned long low = (unsigned long)this_cpu_ptr(overflow_stack);
 	unsigned long high = low + OVERFLOW_STACK_SIZE;
 
-	return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
 }
 
-static bool on_hyp_stack(unsigned long sp, unsigned long size,
-			      struct stack_info *info)
+static struct stack_info stackinfo_get_hyp(void)
 {
 	struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
 	unsigned long high = params->stack_hyp_va;
 	unsigned long low = high - PAGE_SIZE;
 
-	return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
-}
-
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
-{
-	if (info)
-		info->type = STACK_TYPE_UNKNOWN;
-
-	return (on_overflow_stack(sp, size, info) ||
-		on_hyp_stack(sp, size, info));
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
 }
 
 static int unwind_next(struct unwind_state *state)
 {
-	struct stack_info info;
-
-	return unwind_next_common(state, &info, on_accessible_stack, NULL);
+	return unwind_next_frame_record(state);
 }
 
 static void notrace unwind(struct unwind_state *state,
@@ -129,7 +120,14 @@ static bool pkvm_save_backtrace_entry(void *arg, unsigned long where)
  */
 static void pkvm_save_backtrace(unsigned long fp, unsigned long pc)
 {
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_overflow(),
+		stackinfo_get_hyp(),
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 	int idx = 0;
 
 	kvm_nvhe_unwind_init(&state, fp, pc);
diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
index e20fa4475dac..0f9ac25afdf4 100644
--- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c
+++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c
@@ -92,9 +92,9 @@ static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu)
 		PVM_ID_AA64PFR0_RESTRICT_UNSIGNED);
 
 	/* Spectre and Meltdown mitigation in KVM */
-	set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2),
+	set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2),
 			       (u64)kvm->arch.pfr0_csv2);
-	set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3),
+	set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3),
 			       (u64)kvm->arch.pfr0_csv3);
 
 	return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask;
@@ -106,7 +106,7 @@ static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu)
 	u64 allow_mask = PVM_ID_AA64PFR1_ALLOW;
 
 	if (!kvm_has_mte(kvm))
-		allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+		allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
 
 	return id_aa64pfr1_el1_sys_val & allow_mask;
 }
@@ -281,8 +281,8 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu,
 	 * No support for AArch32 guests, therefore, pKVM has no sanitized copy
 	 * of AArch32 feature id registers.
 	 */
-	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1),
-		     PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY);
+	BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1),
+		     PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
 
 	return pvm_access_raz_wi(vcpu, p, r);
 }
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 2cb3867eb7c2..cdf8e76b0be1 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -61,7 +61,7 @@ struct kvm_pgtable_walk_data {
 
 static bool kvm_phys_is_valid(u64 phys)
 {
-	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
+	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
 }
 
 static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 11c43bed5f97..0003c7d37533 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -33,12 +33,12 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
 	pmuver = kvm->arch.arm_pmu->pmuver;
 
 	switch (pmuver) {
-	case ID_AA64DFR0_PMUVER_8_0:
+	case ID_AA64DFR0_EL1_PMUVer_IMP:
 		return GENMASK(9, 0);
-	case ID_AA64DFR0_PMUVER_8_1:
-	case ID_AA64DFR0_PMUVER_8_4:
-	case ID_AA64DFR0_PMUVER_8_5:
-	case ID_AA64DFR0_PMUVER_8_7:
+	case ID_AA64DFR0_EL1_PMUVer_V3P1:
+	case ID_AA64DFR0_EL1_PMUVer_V3P4:
+	case ID_AA64DFR0_EL1_PMUVer_V3P5:
+	case ID_AA64DFR0_EL1_PMUVer_V3P7:
 		return GENMASK(15, 0);
 	default:		/* Shouldn't be here, just for sanity */
 		WARN_ONCE(1, "Unknown PMU version %d\n", pmuver);
@@ -774,7 +774,7 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
 	struct arm_pmu_entry *entry;
 
-	if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
+	if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
 		return;
 
 	mutex_lock(&arm_pmus_lock);
@@ -828,7 +828,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
 	if (event->pmu) {
 		pmu = to_arm_pmu(event->pmu);
 		if (pmu->pmuver == 0 ||
-		    pmu->pmuver == ID_AA64DFR0_PMUVER_IMP_DEF)
+		    pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
 			pmu = NULL;
 	}
 
@@ -856,7 +856,7 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 		 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
 		 * as RAZ
 		 */
-		if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_PMUVER_8_4)
+		if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
 			val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
 		base = 32;
 	}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0e08fbe68715..5ae18472205a 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -359,7 +359,7 @@ int kvm_set_ipa_limit(void)
 
 	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	parange = cpuid_feature_extract_unsigned_field(mmfr0,
-				ID_AA64MMFR0_PARANGE_SHIFT);
+				ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 	/*
 	 * IPA size beyond 48 bits could not be supported
 	 * on either 4K or 16K page size. Hence let's cap
@@ -367,20 +367,20 @@ int kvm_set_ipa_limit(void)
 	 * on the system.
 	 */
 	if (PAGE_SIZE != SZ_64K)
-		parange = min(parange, (unsigned int)ID_AA64MMFR0_PARANGE_48);
+		parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48);
 
 	/*
 	 * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
 	 * Stage-2. If not, things will stop very quickly.
 	 */
-	switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_TGRAN_2_SHIFT)) {
-	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
+	switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT)) {
+	case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE:
 		kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
 		return -EINVAL;
-	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT:
+	case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT:
 		kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
 		break;
-	case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX:
+	case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX:
 		kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
 		break;
 	default:
diff --git a/arch/arm64/kvm/stacktrace.c b/arch/arm64/kvm/stacktrace.c
index 949d19d603fb..3ace5b75813b 100644
--- a/arch/arm64/kvm/stacktrace.c
+++ b/arch/arm64/kvm/stacktrace.c
@@ -21,6 +21,54 @@
 
 #include <asm/stacktrace/nvhe.h>
 
+static struct stack_info stackinfo_get_overflow(void)
+{
+	struct kvm_nvhe_stacktrace_info *stacktrace_info
+				= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+	unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
+	unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static struct stack_info stackinfo_get_overflow_kern_va(void)
+{
+	unsigned long low = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
+	unsigned long high = low + OVERFLOW_STACK_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static struct stack_info stackinfo_get_hyp(void)
+{
+	struct kvm_nvhe_stacktrace_info *stacktrace_info
+				= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+	unsigned long low = (unsigned long)stacktrace_info->stack_base;
+	unsigned long high = low + PAGE_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
+static struct stack_info stackinfo_get_hyp_kern_va(void)
+{
+	unsigned long low = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
+	unsigned long high = low + PAGE_SIZE;
+
+	return (struct stack_info) {
+		.low = low,
+		.high = high,
+	};
+}
+
 /*
  * kvm_nvhe_stack_kern_va - Convert KVM nVHE HYP stack addresses to a kernel VAs
  *
@@ -34,73 +82,45 @@
  * Returns true on success and updates @addr to its corresponding kernel VA;
  * otherwise returns false.
  */
-static bool kvm_nvhe_stack_kern_va(unsigned long *addr,
-				   enum stack_type type)
+static bool kvm_nvhe_stack_kern_va(unsigned long *addr, unsigned long size)
 {
-	struct kvm_nvhe_stacktrace_info *stacktrace_info;
-	unsigned long hyp_base, kern_base, hyp_offset;
+	struct stack_info stack_hyp, stack_kern;
 
-	stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
+	stack_hyp = stackinfo_get_hyp();
+	stack_kern = stackinfo_get_hyp_kern_va();
+	if (stackinfo_on_stack(&stack_hyp, *addr, size))
+		goto found;
 
-	switch (type) {
-	case STACK_TYPE_HYP:
-		kern_base = (unsigned long)*this_cpu_ptr(&kvm_arm_hyp_stack_page);
-		hyp_base = (unsigned long)stacktrace_info->stack_base;
-		break;
-	case STACK_TYPE_OVERFLOW:
-		kern_base = (unsigned long)this_cpu_ptr_nvhe_sym(overflow_stack);
-		hyp_base = (unsigned long)stacktrace_info->overflow_stack_base;
-		break;
-	default:
-		return false;
-	}
+	stack_hyp = stackinfo_get_overflow();
+	stack_kern = stackinfo_get_overflow_kern_va();
+	if (stackinfo_on_stack(&stack_hyp, *addr, size))
+		goto found;
 
-	hyp_offset = *addr - hyp_base;
-
-	*addr = kern_base + hyp_offset;
+	return false;
 
+found:
+	*addr = *addr - stack_hyp.low + stack_kern.low;
 	return true;
 }
 
-static bool on_overflow_stack(unsigned long sp, unsigned long size,
-			      struct stack_info *info)
-{
-	struct kvm_nvhe_stacktrace_info *stacktrace_info
-				= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
-	unsigned long low = (unsigned long)stacktrace_info->overflow_stack_base;
-	unsigned long high = low + OVERFLOW_STACK_SIZE;
-
-	return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
-}
-
-static bool on_hyp_stack(unsigned long sp, unsigned long size,
-			 struct stack_info *info)
-{
-	struct kvm_nvhe_stacktrace_info *stacktrace_info
-				= this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
-	unsigned long low = (unsigned long)stacktrace_info->stack_base;
-	unsigned long high = low + PAGE_SIZE;
-
-	return on_stack(sp, size, low, high, STACK_TYPE_HYP, info);
-}
-
-static bool on_accessible_stack(const struct task_struct *tsk,
-				unsigned long sp, unsigned long size,
-				struct stack_info *info)
+/*
+ * Convert a KVN nVHE HYP frame record address to a kernel VA
+ */
+static bool kvm_nvhe_stack_kern_record_va(unsigned long *addr)
 {
-	if (info)
-		info->type = STACK_TYPE_UNKNOWN;
-
-	return (on_overflow_stack(sp, size, info) ||
-		on_hyp_stack(sp, size, info));
+	return kvm_nvhe_stack_kern_va(addr, 16);
 }
 
 static int unwind_next(struct unwind_state *state)
 {
-	struct stack_info info;
-
-	return unwind_next_common(state, &info, on_accessible_stack,
-				  kvm_nvhe_stack_kern_va);
+	/*
+	 * The FP is in the hypervisor VA space. Convert it to the kernel VA
+	 * space so it can be unwound by the regular unwind functions.
+	 */
+	if (!kvm_nvhe_stack_kern_record_va(&state->fp))
+		return -EINVAL;
+
+	return unwind_next_frame_record(state);
 }
 
 static void unwind(struct unwind_state *state,
@@ -158,7 +178,14 @@ static void kvm_nvhe_dump_backtrace_end(void)
 static void hyp_dump_backtrace(unsigned long hyp_offset)
 {
 	struct kvm_nvhe_stacktrace_info *stacktrace_info;
-	struct unwind_state state;
+	struct stack_info stacks[] = {
+		stackinfo_get_overflow_kern_va(),
+		stackinfo_get_hyp_kern_va(),
+	};
+	struct unwind_state state = {
+		.stacks = stacks,
+		.nr_stacks = ARRAY_SIZE(stacks),
+	};
 
 	stacktrace_info = this_cpu_ptr_nvhe_sym(kvm_stacktrace_info);
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 3234f50b8c4b..2ef1121ab844 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -273,7 +273,7 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
 	u64 val = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	u32 sr = reg_to_encoding(r);
 
-	if (!(val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))) {
+	if (!(val & (0xfUL << ID_AA64MMFR1_EL1_LO_SHIFT))) {
 		kvm_inject_undefined(vcpu);
 		return false;
 	}
@@ -1077,22 +1077,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 	switch (id) {
 	case SYS_ID_AA64PFR0_EL1:
 		if (!vcpu_has_sve(vcpu))
-			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE);
-		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_AMU);
-		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2);
-		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
-		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3);
-		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
+			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3);
 		if (kvm_vgic_global_state.type == VGIC_V3) {
-			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC);
-			val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1);
+			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC);
+			val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1);
 		}
 		break;
 	case SYS_ID_AA64PFR1_EL1:
 		if (!kvm_has_mte(vcpu->kvm))
-			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE);
+			val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE);
 
-		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_SME);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SME);
 		break;
 	case SYS_ID_AA64ISAR1_EL1:
 		if (!vcpu_has_ptrauth(vcpu))
@@ -1110,14 +1110,14 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		break;
 	case SYS_ID_AA64DFR0_EL1:
 		/* Limit debug to ARMv8.0 */
-		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER);
-		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), 6);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
 		/* Limit guests to PMUv3 for ARMv8.4 */
 		val = cpuid_feature_cap_perfmon_field(val,
-						      ID_AA64DFR0_PMUVER_SHIFT,
-						      kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
+						      ID_AA64DFR0_EL1_PMUVer_SHIFT,
+						      kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0);
 		/* Hide SPE from guests */
-		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER);
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
 		break;
 	case SYS_ID_DFR0_EL1:
 		/* Limit guests to PMUv3 for ARMv8.4 */
@@ -1196,21 +1196,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
 	 * it doesn't promise more than what is actually provided (the
 	 * guest could otherwise be covered in ectoplasmic residue).
 	 */
-	csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT);
+	csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV2_SHIFT);
 	if (csv2 > 1 ||
 	    (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED))
 		return -EINVAL;
 
 	/* Same thing for CSV3 */
-	csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT);
+	csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_EL1_CSV3_SHIFT);
 	if (csv3 > 1 ||
 	    (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED))
 		return -EINVAL;
 
 	/* We can only differ with CSV[23], and anything else is an error */
 	val ^= read_id_reg(vcpu, rd, false);
-	val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) |
-		 (0xFUL << ID_AA64PFR0_CSV3_SHIFT));
+	val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) |
+		 (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT));
 	if (val)
 		return -EINVAL;
 
@@ -1825,11 +1825,11 @@ static bool trap_dbgdidr(struct kvm_vcpu *vcpu,
 	} else {
 		u64 dfr = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 		u64 pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
-		u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL3_SHIFT);
+		u32 el3 = !!cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR0_EL1_EL3_SHIFT);
 
-		p->regval = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
-			     (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
-			     (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20)
+		p->regval = ((((dfr >> ID_AA64DFR0_EL1_WRPs_SHIFT) & 0xf) << 28) |
+			     (((dfr >> ID_AA64DFR0_EL1_BRPs_SHIFT) & 0xf) << 24) |
+			     (((dfr >> ID_AA64DFR0_EL1_CTX_CMPs_SHIFT) & 0xf) << 20)
 			     | (6 << 16) | (1 << 15) | (el3 << 14) | (el3 << 12));
 		return true;
 	}
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
index acdb7b3cc97d..91b22a014610 100644
--- a/arch/arm64/kvm/va_layout.c
+++ b/arch/arm64/kvm/va_layout.c
@@ -169,7 +169,7 @@ void __init kvm_update_va_mask(struct alt_instr *alt,
 		 * dictates it and we don't have any spare bits in the
 		 * address), NOP everything after masking the kernel VA.
 		 */
-		if (has_vhe() || (!tag_val && i > 0)) {
+		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN) || (!tag_val && i > 0)) {
 			updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
 			continue;
 		}
@@ -193,7 +193,8 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
 
 	BUG_ON(nr_inst != 4);
 
-	if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe()))
+	if (!cpus_have_cap(ARM64_SPECTRE_V3A) ||
+	    WARN_ON_ONCE(cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)))
 		return;
 
 	/*
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index b8b4cf0bcf39..e1e0dca01839 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -43,17 +43,17 @@ static u32 get_cpu_asid_bits(void)
 {
 	u32 asid;
 	int fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64MMFR0_EL1),
-						ID_AA64MMFR0_ASID_SHIFT);
+						ID_AA64MMFR0_EL1_ASIDBITS_SHIFT);
 
 	switch (fld) {
 	default:
 		pr_warn("CPU%d: Unknown ASID size (%d); assuming 8-bit\n",
 					smp_processor_id(),  fld);
 		fallthrough;
-	case ID_AA64MMFR0_ASID_8:
+	case ID_AA64MMFR0_EL1_ASIDBITS_8:
 		asid = 8;
 		break;
-	case ID_AA64MMFR0_ASID_16:
+	case ID_AA64MMFR0_EL1_ASIDBITS_16:
 		asid = 16;
 	}
 
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 599cf81f5685..83a512a6ff0d 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -36,7 +36,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	unsigned long start = (unsigned long)page_address(page);
 
-	dcache_clean_inval_poc(start, start + size);
+	dcache_clean_poc(start, start + size);
 }
 
 #ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c33f1fad2745..5b391490e045 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -691,6 +691,9 @@ static int __kprobes do_translation_fault(unsigned long far,
 static int do_alignment_fault(unsigned long far, unsigned long esr,
 			      struct pt_regs *regs)
 {
+	if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) &&
+	    compat_user_mode(regs))
+		return do_compat_alignment_fixup(far, regs);
 	do_bad_area(far, esr, regs);
 	return 0;
 }
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index b9af30be813e..4b4651ee47f2 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -360,7 +360,7 @@ void __init arm64_memblock_init(void)
 		extern u16 memstart_offset_seed;
 		u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
 		int parange = cpuid_feature_extract_unsigned_field(
-					mmfr0, ID_AA64MMFR0_PARANGE_SHIFT);
+					mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 		s64 range = linear_region_size -
 			    BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index eb489302c28a..9a7c38965154 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -539,7 +539,7 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
@@ -690,7 +690,7 @@ static bool arm64_early_this_cpu_has_bti(void)
 
 	pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1);
 	return cpuid_feature_extract_unsigned_field(pfr1,
-						    ID_AA64PFR1_BT_SHIFT);
+						    ID_AA64PFR1_EL1_BT_SHIFT);
 }
 
 /*
@@ -1184,14 +1184,6 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
 }
 #endif
 
-#if !ARM64_KERNEL_USES_PMD_MAPS
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
-		struct vmem_altmap *altmap)
-{
-	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
-	return vmemmap_populate_basepages(start, end, node, altmap);
-}
-#else	/* !ARM64_KERNEL_USES_PMD_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
@@ -1203,6 +1195,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	pmd_t *pmdp;
 
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
+
+	if (!ARM64_KERNEL_USES_PMD_MAPS)
+		return vmemmap_populate_basepages(start, end, node, altmap);
+
 	do {
 		next = pmd_addr_end(addr, end);
 
@@ -1236,7 +1232,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 
 	return 0;
 }
-#endif	/* !ARM64_KERNEL_USES_PMD_MAPS */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
@@ -1551,11 +1546,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
-	/*
-	 * KFENCE requires linear map to be mapped at page granularity, so that
-	 * it is possible to protect/unprotect single pages in the KFENCE pool.
-	 */
-	if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE))
+	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 64e985eaa52d..d107c3d434e2 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -21,7 +21,13 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
 
 bool can_set_direct_map(void)
 {
-	return rodata_full || debug_pagealloc_enabled();
+	/*
+	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
+	 * mapped at page granularity, so that it is possible to
+	 * protect/unprotect single pages.
+	 */
+	return rodata_full || debug_pagealloc_enabled() ||
+		IS_ENABLED(CONFIG_KFENCE);
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 8b9f419fcad9..b9ecbbae1e1a 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -49,17 +49,19 @@
 
 #ifdef CONFIG_KASAN_HW_TAGS
 #define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1
-#else
+#elif defined(CONFIG_ARM64_MTE)
 /*
  * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
  * TBI being enabled at EL1.
  */
 #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
+#else
+#define TCR_MTE_FLAGS 0
 #endif
 
 /*
  * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and
- * changed during __cpu_setup to Normal Tagged if the system supports MTE.
+ * changed during mte_cpu_setup to Normal Tagged if the system supports MTE.
  */
 #define MAIR_EL1_SET							\
 	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |	\
@@ -427,46 +429,8 @@ SYM_FUNC_START(__cpu_setup)
 	mov_q	mair, MAIR_EL1_SET
 	mov_q	tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
-			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
-
-#ifdef CONFIG_ARM64_MTE
-	/*
-	 * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
-	 * (ID_AA64PFR1_EL1[11:8] > 1).
-	 */
-	mrs	x10, ID_AA64PFR1_EL1
-	ubfx	x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
-	cmp	x10, #ID_AA64PFR1_MTE
-	b.lt	1f
-
-	/* Normal Tagged memory type at the corresponding MAIR index */
-	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
-	bfi	mair, x10, #(8 *  MT_NORMAL_TAGGED), #8
+			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS
 
-	mov	x10, #KERNEL_GCR_EL1
-	msr_s	SYS_GCR_EL1, x10
-
-	/*
-	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
-	 * RGSR_EL1.SEED must be non-zero for IRG to produce
-	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
-	 * must initialize it.
-	 */
-	mrs	x10, CNTVCT_EL0
-	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
-	csinc	x10, x10, xzr, ne
-	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
-	msr_s	SYS_RGSR_EL1, x10
-
-	/* clear any pending tag check faults in TFSR*_EL1 */
-	msr_s	SYS_TFSR_EL1, xzr
-	msr_s	SYS_TFSRE0_EL1, xzr
-
-	/* set the TCR_EL1 bits */
-	mov_q	x10, TCR_MTE_FLAGS
-	orr	tcr, tcr, x10
-1:
-#endif
 	tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 63b2484ce6c3..f1c0347ec31a 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -2,6 +2,8 @@
 #
 # Internal CPU capabilities constants, keep this list sorted
 
+ALWAYS_BOOT
+ALWAYS_SYSTEM
 BTI
 # Unreliable: use system_supports_32bit_el0() instead.
 HAS_32BIT_EL0_DO_NOT_USE
@@ -68,6 +70,7 @@ WORKAROUND_2038923
 WORKAROUND_2064142
 WORKAROUND_2077057
 WORKAROUND_2457168
+WORKAROUND_2658417
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
 WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 9ae483ec1e56..7f1fb36f208c 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -46,6 +46,127 @@
 # feature that introduces them (eg, FEAT_LS64_ACCDATA introduces enumeration
 # item ACCDATA) though it may be more taseful to do something else.
 
+Sysreg	ID_AA64PFR0_EL1	3	0	0	4	0
+Enum	63:60	CSV3
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	59:56	CSV2
+	0b0000	NI
+	0b0001	IMP
+	0b0010	CSV2_2
+	0b0011	CSV2_3
+EndEnum
+Enum	55:52	RME
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	51:48	DIT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	47:44	AMU
+	0b0000	NI
+	0b0001	IMP
+	0b0010	V1P1
+EndEnum
+Enum	43:40	MPAM
+	0b0000	0
+	0b0001	1
+EndEnum
+Enum	39:36	SEL2
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	35:32	SVE
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	31:28	RAS
+	0b0000	NI
+	0b0001	IMP
+	0b0010	V1P1
+EndEnum
+Enum	27:24	GIC
+	0b0000	NI
+	0b0001	IMP
+	0b0010	V4P1
+EndEnum
+Enum	23:20	AdvSIMD
+	0b0000	IMP
+	0b0001	FP16
+	0b1111	NI
+EndEnum
+Enum	19:16	FP
+	0b0000	IMP
+	0b0001	FP16
+	0b1111	NI
+EndEnum
+Enum	15:12	EL3
+	0b0000	NI
+	0b0001	IMP
+	0b0010	AARCH32
+EndEnum
+Enum	11:8	EL2
+	0b0000	NI
+	0b0001	IMP
+	0b0010	AARCH32
+EndEnum
+Enum	7:4	EL1
+	0b0001	IMP
+	0b0010	AARCH32
+EndEnum
+Enum	3:0	EL0
+	0b0001	IMP
+	0b0010	AARCH32
+EndEnum
+EndSysreg
+
+Sysreg	ID_AA64PFR1_EL1	3	0	0	4	1
+Res0	63:40
+Enum	39:36	NMI
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	35:32	CSV2_frac
+	0b0000	NI
+	0b0001	CSV2_1p1
+	0b0010	CSV2_1p2
+EndEnum
+Enum	31:28	RNDR_trap
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	27:24	SME
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Res0	23:20
+Enum	19:16	MPAM_frac
+	0b0000	MINOR_0
+	0b0001	MINOR_1
+EndEnum
+Enum	15:12	RAS_frac
+	0b0000	NI
+	0b0001	RASv1p1
+EndEnum
+Enum	11:8	MTE
+	0b0000	NI
+	0b0001	IMP
+	0b0010	MTE2
+	0b0011	MTE3
+EndEnum
+Enum	7:4	SSBS
+	0b0000	NI
+	0b0001	IMP
+	0b0010	SSBS2
+EndEnum
+Enum	3:0	BT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+EndSysreg
+
 Sysreg	ID_AA64ZFR0_EL1	3	0	0	4	4
 Res0	63:60
 Enum	59:56	F64MM
@@ -98,7 +219,9 @@ Enum	63	FA64
 	0b1	IMP
 EndEnum
 Res0	62:60
-Field	59:56	SMEver
+Enum	59:56	SMEver
+	0b0000	IMP
+EndEnum
 Enum	55:52	I16I64
 	0b0000	NI
 	0b1111	IMP
@@ -129,6 +252,89 @@ EndEnum
 Res0	31:0
 EndSysreg
 
+Sysreg	ID_AA64DFR0_EL1	3	0	0	5	0
+Enum	63:60	HPMN0
+	0b0000	UNPREDICTABLE
+	0b0001	DEF
+EndEnum
+Res0	59:56
+Enum	55:52	BRBE
+	0b0000	NI
+	0b0001	IMP
+	0b0010	BRBE_V1P1
+EndEnum
+Enum	51:48	MTPMU
+	0b0000	NI_IMPDEF
+	0b0001	IMP
+	0b1111	NI
+EndEnum
+Enum	47:44	TraceBuffer
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	43:40	TraceFilt
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	39:36	DoubleLock
+	0b0000	IMP
+	0b1111	NI
+EndEnum
+Enum	35:32	PMSVer
+	0b0000	NI
+	0b0001	IMP
+	0b0010	V1P1
+	0b0011	V1P2
+	0b0100	V1P3
+EndEnum
+Field	31:28	CTX_CMPs
+Res0	27:24
+Field	23:20	WRPs
+Res0	19:16
+Field	15:12	BRPs
+Enum	11:8	PMUVer
+	0b0000	NI
+	0b0001	IMP
+	0b0100	V3P1
+	0b0101	V3P4
+	0b0110	V3P5
+	0b0111	V3P7
+	0b1000	V3P8
+	0b1111	IMP_DEF
+EndEnum
+Enum	7:4	TraceVer
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	3:0	DebugVer
+	0b0110	IMP
+	0b0111	VHE
+	0b1000	V8P2
+	0b1001	V8P4
+	0b1010	V8P8
+EndEnum
+EndSysreg
+
+Sysreg	ID_AA64DFR1_EL1	3	0	0	5	1
+Res0	63:0
+EndSysreg
+
+Sysreg	ID_AA64AFR0_EL1	3	0	0	5	4
+Res0	63:32
+Field	31:28	IMPDEF7
+Field	27:24	IMPDEF6
+Field	23:20	IMPDEF5
+Field	19:16	IMPDEF4
+Field	15:12	IMPDEF3
+Field	11:8	IMPDEF2
+Field	7:4	IMPDEF1
+Field	3:0	IMPDEF0
+EndSysreg
+
+Sysreg	ID_AA64AFR1_EL1	3	0	0	5	5
+Res0	63:0
+EndSysreg
+
 Sysreg	ID_AA64ISAR0_EL1	3	0	0	6	0
 Enum	63:60	RNDR
 	0b0000	NI
@@ -313,6 +519,217 @@ Enum	3:0	WFxT
 EndEnum
 EndSysreg
 
+Sysreg	ID_AA64MMFR0_EL1	3	0	0	7	0
+Enum	63:60	ECV
+	0b0000	NI
+	0b0001	IMP
+	0b0010	CNTPOFF
+EndEnum
+Enum	59:56	FGT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Res0	55:48
+Enum	47:44	EXS
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	43:40	TGRAN4_2
+	0b0000	TGRAN4
+	0b0001	NI
+	0b0010	IMP
+	0b0011	52_BIT
+EndEnum
+Enum	39:36	TGRAN64_2
+	0b0000	TGRAN64
+	0b0001	NI
+	0b0010	IMP
+EndEnum
+Enum	35:32	TGRAN16_2
+	0b0000	TGRAN16
+	0b0001	NI
+	0b0010	IMP
+	0b0011	52_BIT
+EndEnum
+Enum	31:28	TGRAN4
+	0b0000	IMP
+	0b0001	52_BIT
+	0b1111	NI
+EndEnum
+Enum	27:24	TGRAN64
+	0b0000	IMP
+	0b1111	NI
+EndEnum
+Enum	23:20	TGRAN16
+	0b0000	NI
+	0b0001	IMP
+	0b0010	52_BIT
+EndEnum
+Enum	19:16	BIGENDEL0
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	15:12	SNSMEM
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	11:8	BIGEND
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	7:4	ASIDBITS
+	0b0000	8
+	0b0010	16
+EndEnum
+Enum	3:0	PARANGE
+	0b0000	32
+	0b0001	36
+	0b0010	40
+	0b0011	42
+	0b0100	44
+	0b0101	48
+	0b0110	52
+EndEnum
+EndSysreg
+
+Sysreg	ID_AA64MMFR1_EL1	3	0	0	7	1
+Enum	63:60	ECBHB
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	59:56	CMOW
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	55:52	TIDCP1
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	51:48	nTLBPA
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	47:44	AFP
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	43:40	HCX
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	39:36	ETS
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	35:32	TWED
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	31:28	XNX
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	27:24	SpecSEI
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	23:20	PAN
+	0b0000	NI
+	0b0001	IMP
+	0b0010	PAN2
+	0b0011	PAN3
+EndEnum
+Enum	19:16	LO
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	15:12	HPDS
+	0b0000	NI
+	0b0001	IMP
+	0b0010	HPDS2
+EndEnum
+Enum	11:8	VH
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	7:4	VMIDBits
+	0b0000	8
+	0b0010	16
+EndEnum
+Enum	3:0	HAFDBS
+	0b0000	NI
+	0b0001	AF
+	0b0010	DBM
+EndEnum
+EndSysreg
+
+Sysreg	ID_AA64MMFR2_EL1	3	0	0	7	2
+Enum	63:60	E0PD
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	59:56	EVT
+	0b0000	NI
+	0b0001	IMP
+	0b0010	TTLBxS
+EndEnum
+Enum	55:52	BBM
+	0b0000	0
+	0b0001	1
+	0b0010	2
+EndEnum
+Enum	51:48	TTL
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Res0	47:44
+Enum	43:40	FWB
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	39:36	IDS
+	0b0000	0x0
+	0b0001	0x18
+EndEnum
+Enum	35:32	AT
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	31:28	ST
+	0b0000	39
+	0b0001	48_47
+EndEnum
+Enum	27:24	NV
+	0b0000	NI
+	0b0001	IMP
+	0b0010	NV2
+EndEnum
+Enum	23:20	CCIDX
+	0b0000	32
+	0b0001	64
+EndEnum
+Enum	19:16	VARange
+	0b0000	48
+	0b0001	52
+EndEnum
+Enum	15:12	IESB
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	11:8	LSM
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	7:4	UAO
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+Enum	3:0	CnP
+	0b0000	NI
+	0b0001	IMP
+EndEnum
+EndSysreg
+
 Sysreg	SCTLR_EL1	3	0	1	0	0
 Field	63	TIDCP
 Field	62	SPINMASK
@@ -427,6 +844,12 @@ Sysreg	SMCR_EL1	3	0	1	2	6
 Fields	SMCR_ELx
 EndSysreg
 
+Sysreg	ALLINT	3	0	4	3	0
+Res0	63:14
+Field	13	ALLINT
+Res0	12:0
+EndSysreg
+
 Sysreg	FAR_EL1	3	0	6	0	0
 Field	63:0	ADDR
 EndSysreg
@@ -440,6 +863,14 @@ Sysreg	CONTEXTIDR_EL1	3	0	13	0	1
 Fields	CONTEXTIDR_ELx
 EndSysreg
 
+Sysreg	TPIDR_EL1	3	0	13	0	4
+Field	63:0	ThreadID
+EndSysreg
+
+Sysreg	SCXTNUM_EL1	3	0	13	0	7
+Field	63:0	SoftwareContextNumber
+EndSysreg
+
 Sysreg	CLIDR_EL1	3	1	0	0	1
 Res0	63:47
 Field	46:33	Ttypen
@@ -514,6 +945,22 @@ Sysreg	ZCR_EL2	3	4	1	2	0
 Fields	ZCR_ELx
 EndSysreg
 
+Sysreg	HCRX_EL2	3	4	1	2	2
+Res0	63:12
+Field	11	MSCEn
+Field	10	MCE2
+Field	9	CMOW
+Field	8	VFNMI
+Field	7	VINMI
+Field	6	TALLINT
+Field	5	SMPME
+Field	4	FGTnXS
+Field	3	FnXS
+Field	2	EnASR
+Field	1	EnALS
+Field	0	EnAS0
+EndSysreg
+
 Sysreg	SMPRIMAP_EL2	3	4	1	2	5
 Field	63:60	P15
 Field	59:56	P14
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 577173ee1f83..60973e84d7ab 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -23,8 +23,8 @@ efi_status_t check_platform_features(void)
 	if (IS_ENABLED(CONFIG_ARM64_4K_PAGES))
 		return EFI_SUCCESS;
 
-	tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf;
-	if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) {
+	tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_EL1_TGRAN_SHIFT) & 0xf;
+	if (tg < ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX) {
 		if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
 			efi_err("This 64 KB granular kernel is not supported by your CPU\n");
 		else
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index d39660a3e50c..80fefaba58ee 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -966,7 +966,7 @@ static inline bool cpu_supports_sysreg_trace(void)
 {
 	u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
 
-	return ((dfr0 >> ID_AA64DFR0_TRACEVER_SHIFT) & 0xfUL) > 0;
+	return ((dfr0 >> ID_AA64DFR0_EL1_TraceVer_SHIFT) & 0xfUL) > 0;
 }
 
 static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata,
@@ -1054,7 +1054,7 @@ static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata)
 	u64 trfcr;
 
 	drvdata->trfcr = 0;
-	if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT))
+	if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT))
 		return;
 
 	/*
diff --git a/drivers/hwtracing/coresight/coresight-trbe.h b/drivers/hwtracing/coresight/coresight-trbe.h
index 30e4d7db4f8e..98ff1b17ad07 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.h
+++ b/drivers/hwtracing/coresight/coresight-trbe.h
@@ -20,7 +20,8 @@
 static inline bool is_trbe_available(void)
 {
 	u64 aa64dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
-	unsigned int trbe = cpuid_feature_extract_unsigned_field(aa64dfr0, ID_AA64DFR0_TRBE_SHIFT);
+	unsigned int trbe = cpuid_feature_extract_unsigned_field(aa64dfr0,
+								 ID_AA64DFR0_EL1_TraceBuffer_SHIFT);
 
 	return trbe >= 0b0001;
 }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 1ef7bbb4acf3..5968a568aae2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -150,7 +150,7 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
 	}
 
 	reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-	par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT);
+	par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 	tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par);
 
 	cd->ttbr = virt_to_phys(mm->pgd);
@@ -425,13 +425,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
 	 * addresses larger than what we support.
 	 */
 	reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT);
+	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_PARANGE_SHIFT);
 	oas = id_aa64mmfr0_parange_to_phys_shift(fld);
 	if (smmu->oas < oas)
 		return false;
 
 	/* We can support bigger ASIDs than the CPU, but not smaller */
-	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_ASID_SHIFT);
+	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT);
 	asid_bits = fld ? 16 : 8;
 	if (smmu->asid_bits < asid_bits)
 		return false;
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 4ea71b28f9f5..a6277dea4c7a 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -94,7 +94,7 @@ bool gic_cpuif_has_vsgi(void)
 {
 	unsigned long fld, reg = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 
-	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_GIC_SHIFT);
+	fld = cpuid_feature_extract_unsigned_field(reg, ID_AA64PFR0_EL1_GIC_SHIFT);
 
 	return fld >= 0x3;
 }
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 1e2d69453771..44c07ea487f4 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -183,6 +183,13 @@ config APPLE_M1_CPU_PMU
 	  Provides support for the non-architectural CPU PMUs present on
 	  the Apple M1 SoCs and derivatives.
 
+config ALIBABA_UNCORE_DRW_PMU
+	tristate "Alibaba T-Head Yitian 710 DDR Sub-system Driveway PMU driver"
+	depends on ARM64 || COMPILE_TEST
+	help
+	  Support for Driveway PMU events monitoring on Yitian 710 DDR
+	  Sub-system.
+
 source "drivers/perf/hisilicon/Kconfig"
 
 config MARVELL_CN10K_DDR_PMU
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 57a279c61df5..050d04ee19dd 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
+obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
diff --git a/drivers/perf/alibaba_uncore_drw_pmu.c b/drivers/perf/alibaba_uncore_drw_pmu.c
new file mode 100644
index 000000000000..82729b874f09
--- /dev/null
+++ b/drivers/perf/alibaba_uncore_drw_pmu.c
@@ -0,0 +1,810 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Alibaba DDR Sub-System Driveway PMU driver
+ *
+ * Copyright (C) 2022 Alibaba Inc
+ */
+
+#define ALI_DRW_PMUNAME		"ali_drw"
+#define ALI_DRW_DRVNAME		ALI_DRW_PMUNAME "_pmu"
+#define pr_fmt(fmt)		ALI_DRW_DRVNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/refcount.h>
+
+
+#define ALI_DRW_PMU_COMMON_MAX_COUNTERS			16
+#define ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE	19
+
+#define ALI_DRW_PMU_PA_SHIFT			12
+#define ALI_DRW_PMU_CNT_INIT			0x00000000
+#define ALI_DRW_CNT_MAX_PERIOD			0xffffffff
+#define ALI_DRW_PMU_CYCLE_EVT_ID		0x80
+
+#define ALI_DRW_PMU_CNT_CTRL			0xC00
+#define ALI_DRW_PMU_CNT_RST			BIT(2)
+#define ALI_DRW_PMU_CNT_STOP			BIT(1)
+#define ALI_DRW_PMU_CNT_START			BIT(0)
+
+#define ALI_DRW_PMU_CNT_STATE			0xC04
+#define ALI_DRW_PMU_TEST_CTRL			0xC08
+#define ALI_DRW_PMU_CNT_PRELOAD			0xC0C
+
+#define ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK		GENMASK(23, 0)
+#define ALI_DRW_PMU_CYCLE_CNT_LOW_MASK		GENMASK(31, 0)
+#define ALI_DRW_PMU_CYCLE_CNT_HIGH		0xC10
+#define ALI_DRW_PMU_CYCLE_CNT_LOW		0xC14
+
+/* PMU EVENT SEL 0-3 are paired in 32-bit registers on a 4-byte stride */
+#define ALI_DRW_PMU_EVENT_SEL0			0xC68
+/* counter 0-3 use sel0, counter 4-7 use sel1...*/
+#define ALI_DRW_PMU_EVENT_SELn(n) \
+	(ALI_DRW_PMU_EVENT_SEL0 + (n / 4) * 0x4)
+#define ALI_DRW_PMCOM_CNT_EN			BIT(7)
+#define ALI_DRW_PMCOM_CNT_EVENT_MASK		GENMASK(5, 0)
+#define ALI_DRW_PMCOM_CNT_EVENT_OFFSET(n) \
+	(8 * (n % 4))
+
+/* PMU COMMON COUNTER 0-15, are paired in 32-bit registers on a 4-byte stride */
+#define ALI_DRW_PMU_COMMON_COUNTER0		0xC78
+#define ALI_DRW_PMU_COMMON_COUNTERn(n) \
+	(ALI_DRW_PMU_COMMON_COUNTER0 + 0x4 * (n))
+
+#define ALI_DRW_PMU_OV_INTR_ENABLE_CTL		0xCB8
+#define ALI_DRW_PMU_OV_INTR_DISABLE_CTL		0xCBC
+#define ALI_DRW_PMU_OV_INTR_ENABLE_STATUS	0xCC0
+#define ALI_DRW_PMU_OV_INTR_CLR			0xCC4
+#define ALI_DRW_PMU_OV_INTR_STATUS		0xCC8
+#define ALI_DRW_PMCOM_CNT_OV_INTR_MASK		GENMASK(23, 8)
+#define ALI_DRW_PMBW_CNT_OV_INTR_MASK		GENMASK(7, 0)
+#define ALI_DRW_PMU_OV_INTR_MASK		GENMASK_ULL(63, 0)
+
+static int ali_drw_cpuhp_state_num;
+
+static LIST_HEAD(ali_drw_pmu_irqs);
+static DEFINE_MUTEX(ali_drw_pmu_irqs_lock);
+
+struct ali_drw_pmu_irq {
+	struct hlist_node node;
+	struct list_head irqs_node;
+	struct list_head pmus_node;
+	int irq_num;
+	int cpu;
+	refcount_t refcount;
+};
+
+struct ali_drw_pmu {
+	void __iomem *cfg_base;
+	struct device *dev;
+
+	struct list_head pmus_node;
+	struct ali_drw_pmu_irq *irq;
+	int irq_num;
+	int cpu;
+	DECLARE_BITMAP(used_mask, ALI_DRW_PMU_COMMON_MAX_COUNTERS);
+	struct perf_event *events[ALI_DRW_PMU_COMMON_MAX_COUNTERS];
+	int evtids[ALI_DRW_PMU_COMMON_MAX_COUNTERS];
+
+	struct pmu pmu;
+};
+
+#define to_ali_drw_pmu(p) (container_of(p, struct ali_drw_pmu, pmu))
+
+#define DRW_CONFIG_EVENTID		GENMASK(7, 0)
+#define GET_DRW_EVENTID(event)	FIELD_GET(DRW_CONFIG_EVENTID, (event)->attr.config)
+
+static ssize_t ali_drw_pmu_format_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "%s\n", (char *)eattr->var);
+}
+
+/*
+ * PMU event attributes
+ */
+static ssize_t ali_drw_pmu_event_show(struct device *dev,
+			       struct device_attribute *attr, char *page)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+#define ALI_DRW_PMU_ATTR(_name, _func, _config)                            \
+		(&((struct dev_ext_attribute[]) {                               \
+				{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+		})[0].attr.attr)
+
+#define ALI_DRW_PMU_FORMAT_ATTR(_name, _config)            \
+	ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_format_show, (void *)_config)
+#define ALI_DRW_PMU_EVENT_ATTR(_name, _config)             \
+	ALI_DRW_PMU_ATTR(_name, ali_drw_pmu_event_show, (unsigned long)_config)
+
+static struct attribute *ali_drw_pmu_events_attrs[] = {
+	ALI_DRW_PMU_EVENT_ATTR(hif_rd_or_wr,			0x0),
+	ALI_DRW_PMU_EVENT_ATTR(hif_wr,				0x1),
+	ALI_DRW_PMU_EVENT_ATTR(hif_rd,				0x2),
+	ALI_DRW_PMU_EVENT_ATTR(hif_rmw,				0x3),
+	ALI_DRW_PMU_EVENT_ATTR(hif_hi_pri_rd,			0x4),
+	ALI_DRW_PMU_EVENT_ATTR(dfi_wr_data_cycles,		0x7),
+	ALI_DRW_PMU_EVENT_ATTR(dfi_rd_data_cycles,		0x8),
+	ALI_DRW_PMU_EVENT_ATTR(hpr_xact_when_critical,		0x9),
+	ALI_DRW_PMU_EVENT_ATTR(lpr_xact_when_critical,		0xA),
+	ALI_DRW_PMU_EVENT_ATTR(wr_xact_when_critical,		0xB),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_activate,			0xC),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd_or_wr,			0xD),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd_activate,		0xE),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_rd,			0xF),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_wr,			0x10),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_mwr,			0x11),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_precharge,			0x12),
+	ALI_DRW_PMU_EVENT_ATTR(precharge_for_rdwr,		0x13),
+	ALI_DRW_PMU_EVENT_ATTR(precharge_for_other,		0x14),
+	ALI_DRW_PMU_EVENT_ATTR(rdwr_transitions,		0x15),
+	ALI_DRW_PMU_EVENT_ATTR(write_combine,			0x16),
+	ALI_DRW_PMU_EVENT_ATTR(war_hazard,			0x17),
+	ALI_DRW_PMU_EVENT_ATTR(raw_hazard,			0x18),
+	ALI_DRW_PMU_EVENT_ATTR(waw_hazard,			0x19),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk0,		0x1A),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk1,		0x1B),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk2,		0x1C),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_selfref_rk3,		0x1D),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk0,	0x1E),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk1,	0x1F),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk2,	0x20),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_enter_powerdown_rk3,	0x21),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk0,		0x26),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk1,		0x27),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk2,		0x28),
+	ALI_DRW_PMU_EVENT_ATTR(selfref_mode_rk3,		0x29),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_refresh,			0x2A),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_crit_ref,			0x2B),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_load_mode,			0x2D),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqcl,			0x2E),
+	ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_rd, 0x30),
+	ALI_DRW_PMU_EVENT_ATTR(visible_window_limit_reached_wr, 0x31),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mpc,		0x34),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_dqsosc_mrr,		0x35),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_tcr_mrr,			0x36),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqstart,			0x37),
+	ALI_DRW_PMU_EVENT_ATTR(op_is_zqlatch,			0x38),
+	ALI_DRW_PMU_EVENT_ATTR(chi_txreq,			0x39),
+	ALI_DRW_PMU_EVENT_ATTR(chi_txdat,			0x3A),
+	ALI_DRW_PMU_EVENT_ATTR(chi_rxdat,			0x3B),
+	ALI_DRW_PMU_EVENT_ATTR(chi_rxrsp,			0x3C),
+	ALI_DRW_PMU_EVENT_ATTR(tsz_vio,				0x3D),
+	ALI_DRW_PMU_EVENT_ATTR(cycle,				0x80),
+	NULL,
+};
+
+static struct attribute_group ali_drw_pmu_events_attr_group = {
+	.name = "events",
+	.attrs = ali_drw_pmu_events_attrs,
+};
+
+static struct attribute *ali_drw_pmu_format_attr[] = {
+	ALI_DRW_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group ali_drw_pmu_format_group = {
+	.name = "format",
+	.attrs = ali_drw_pmu_format_attr,
+};
+
+static ssize_t ali_drw_pmu_cpumask_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(drw_pmu->cpu));
+}
+
+static struct device_attribute ali_drw_pmu_cpumask_attr =
+		__ATTR(cpumask, 0444, ali_drw_pmu_cpumask_show, NULL);
+
+static struct attribute *ali_drw_pmu_cpumask_attrs[] = {
+	&ali_drw_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
+	.attrs = ali_drw_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
+	&ali_drw_pmu_events_attr_group,
+	&ali_drw_pmu_cpumask_attr_group,
+	&ali_drw_pmu_format_group,
+	NULL,
+};
+
+/* find a counter for event, then in add func, hw.idx will equal to counter */
+static int ali_drw_get_counter_idx(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	int idx;
+
+	for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; ++idx) {
+		if (!test_and_set_bit(idx, drw_pmu->used_mask))
+			return idx;
+	}
+
+	/* The counters are all in use. */
+	return -EBUSY;
+}
+
+static u64 ali_drw_pmu_read_counter(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	u64 cycle_high, cycle_low;
+
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) {
+		cycle_high = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_HIGH);
+		cycle_high &= ALI_DRW_PMU_CYCLE_CNT_HIGH_MASK;
+		cycle_low = readl(drw_pmu->cfg_base + ALI_DRW_PMU_CYCLE_CNT_LOW);
+		cycle_low &= ALI_DRW_PMU_CYCLE_CNT_LOW_MASK;
+		return (cycle_high << 32 | cycle_low);
+	}
+
+	return readl(drw_pmu->cfg_base +
+		     ALI_DRW_PMU_COMMON_COUNTERn(event->hw.idx));
+}
+
+static void ali_drw_pmu_event_update(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev, now;
+
+	do {
+		prev = local64_read(&hwc->prev_count);
+		now = ali_drw_pmu_read_counter(event);
+	} while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+	/* handle overflow. */
+	delta = now - prev;
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID)
+		delta &= ALI_DRW_PMU_OV_INTR_MASK;
+	else
+		delta &= ALI_DRW_CNT_MAX_PERIOD;
+	local64_add(delta, &event->count);
+}
+
+static void ali_drw_pmu_event_set_period(struct perf_event *event)
+{
+	u64 pre_val;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	/* set a preload counter for test purpose */
+	writel(ALI_DRW_PMU_TEST_SEL_COMMON_COUNTER_BASE + event->hw.idx,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL);
+
+	/* set conunter initial value */
+	pre_val = ALI_DRW_PMU_CNT_INIT;
+	writel(pre_val, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD);
+	local64_set(&event->hw.prev_count, pre_val);
+
+	/* set sel mode to zero to start test */
+	writel(0x0, drw_pmu->cfg_base + ALI_DRW_PMU_TEST_CTRL);
+}
+
+static void ali_drw_pmu_enable_counter(struct perf_event *event)
+{
+	u32 val, subval, reg, shift;
+	int counter = event->hw.idx;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	reg = ALI_DRW_PMU_EVENT_SELn(counter);
+	val = readl(drw_pmu->cfg_base + reg);
+	subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 1) |
+		 FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, drw_pmu->evtids[counter]);
+
+	shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter);
+	val &= ~(GENMASK(7, 0) << shift);
+	val |= subval << shift;
+
+	writel(val, drw_pmu->cfg_base + reg);
+}
+
+static void ali_drw_pmu_disable_counter(struct perf_event *event)
+{
+	u32 val, reg, subval, shift;
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	int counter = event->hw.idx;
+
+	reg = ALI_DRW_PMU_EVENT_SELn(counter);
+	val = readl(drw_pmu->cfg_base + reg);
+	subval = FIELD_PREP(ALI_DRW_PMCOM_CNT_EN, 0) |
+		 FIELD_PREP(ALI_DRW_PMCOM_CNT_EVENT_MASK, 0);
+
+	shift = ALI_DRW_PMCOM_CNT_EVENT_OFFSET(counter);
+	val &= ~(GENMASK(7, 0) << shift);
+	val |= subval << shift;
+
+	writel(val, drw_pmu->cfg_base + reg);
+}
+
+static irqreturn_t ali_drw_pmu_isr(int irq_num, void *data)
+{
+	struct ali_drw_pmu_irq *irq = data;
+	struct ali_drw_pmu *drw_pmu;
+	irqreturn_t ret = IRQ_NONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(drw_pmu, &irq->pmus_node, pmus_node) {
+		unsigned long status, clr_status;
+		struct perf_event *event;
+		unsigned int idx;
+
+		for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) {
+			event = drw_pmu->events[idx];
+			if (!event)
+				continue;
+			ali_drw_pmu_disable_counter(event);
+		}
+
+		/* common counter intr status */
+		status = readl(drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_STATUS);
+		status = FIELD_GET(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, status);
+		if (status) {
+			for_each_set_bit(idx, &status,
+					 ALI_DRW_PMU_COMMON_MAX_COUNTERS) {
+				event = drw_pmu->events[idx];
+				if (WARN_ON_ONCE(!event))
+					continue;
+				ali_drw_pmu_event_update(event);
+				ali_drw_pmu_event_set_period(event);
+			}
+
+			/* clear common counter intr status */
+			clr_status = FIELD_PREP(ALI_DRW_PMCOM_CNT_OV_INTR_MASK, 1);
+			writel(clr_status,
+			       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
+		}
+
+		for (idx = 0; idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS; idx++) {
+			event = drw_pmu->events[idx];
+			if (!event)
+				continue;
+			if (!(event->hw.state & PERF_HES_STOPPED))
+				ali_drw_pmu_enable_counter(event);
+		}
+		if (status)
+			ret = IRQ_HANDLED;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+static struct ali_drw_pmu_irq *__ali_drw_pmu_init_irq(struct platform_device
+						      *pdev, int irq_num)
+{
+	int ret;
+	struct ali_drw_pmu_irq *irq;
+
+	list_for_each_entry(irq, &ali_drw_pmu_irqs, irqs_node) {
+		if (irq->irq_num == irq_num
+		    && refcount_inc_not_zero(&irq->refcount))
+			return irq;
+	}
+
+	irq = kzalloc(sizeof(*irq), GFP_KERNEL);
+	if (!irq)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&irq->pmus_node);
+
+	/* Pick one CPU to be the preferred one to use */
+	irq->cpu = smp_processor_id();
+	refcount_set(&irq->refcount, 1);
+
+	/*
+	 * FIXME: one of DDRSS Driveway PMU overflow interrupt shares the same
+	 * irq number with MPAM ERR_IRQ. To register DDRSS PMU and MPAM drivers
+	 * successfully, add IRQF_SHARED flag. Howerer, PMU interrupt should not
+	 * share with other component.
+	 */
+	ret = devm_request_irq(&pdev->dev, irq_num, ali_drw_pmu_isr,
+			       IRQF_SHARED, dev_name(&pdev->dev), irq);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq_num, ret);
+		goto out_free;
+	}
+
+	ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu));
+	if (ret)
+		goto out_free;
+
+	ret = cpuhp_state_add_instance_nocalls(ali_drw_cpuhp_state_num,
+					     &irq->node);
+	if (ret)
+		goto out_free;
+
+	irq->irq_num = irq_num;
+	list_add(&irq->irqs_node, &ali_drw_pmu_irqs);
+
+	return irq;
+
+out_free:
+	kfree(irq);
+	return ERR_PTR(ret);
+}
+
+static int ali_drw_pmu_init_irq(struct ali_drw_pmu *drw_pmu,
+				struct platform_device *pdev)
+{
+	int irq_num;
+	struct ali_drw_pmu_irq *irq;
+
+	/* Read and init IRQ */
+	irq_num = platform_get_irq(pdev, 0);
+	if (irq_num < 0)
+		return irq_num;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	irq = __ali_drw_pmu_init_irq(pdev, irq_num);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	if (IS_ERR(irq))
+		return PTR_ERR(irq);
+
+	drw_pmu->irq = irq;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_add_rcu(&drw_pmu->pmus_node, &irq->pmus_node);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	return 0;
+}
+
+static void ali_drw_pmu_uninit_irq(struct ali_drw_pmu *drw_pmu)
+{
+	struct ali_drw_pmu_irq *irq = drw_pmu->irq;
+
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_del_rcu(&drw_pmu->pmus_node);
+
+	if (!refcount_dec_and_test(&irq->refcount)) {
+		mutex_unlock(&ali_drw_pmu_irqs_lock);
+		return;
+	}
+
+	list_del(&irq->irqs_node);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL));
+	cpuhp_state_remove_instance_nocalls(ali_drw_cpuhp_state_num,
+					    &irq->node);
+	kfree(irq);
+}
+
+static int ali_drw_pmu_event_init(struct perf_event *event)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct perf_event *sibling;
+	struct device *dev = drw_pmu->pmu.dev;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event)) {
+		dev_err(dev, "Sampling not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->attach_state & PERF_ATTACH_TASK) {
+		dev_err(dev, "Per-task counter cannot allocate!\n");
+		return -EOPNOTSUPP;
+	}
+
+	event->cpu = drw_pmu->cpu;
+	if (event->cpu < 0) {
+		dev_err(dev, "Per-task mode not supported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (event->group_leader != event &&
+	    !is_software_event(event->group_leader)) {
+		dev_err(dev, "driveway only allow one event!\n");
+		return -EINVAL;
+	}
+
+	for_each_sibling_event(sibling, event->group_leader) {
+		if (sibling != event && !is_software_event(sibling)) {
+			dev_err(dev, "driveway event not allowed!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* reset all the pmu counters */
+	writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	hwc->idx = -1;
+
+	return 0;
+}
+
+static void ali_drw_pmu_start(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	event->hw.state = 0;
+
+	if (GET_DRW_EVENTID(event) == ALI_DRW_PMU_CYCLE_EVT_ID) {
+		writel(ALI_DRW_PMU_CNT_START,
+		       drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+		return;
+	}
+
+	ali_drw_pmu_event_set_period(event);
+	if (flags & PERF_EF_RELOAD) {
+		unsigned long prev_raw_count =
+		    local64_read(&event->hw.prev_count);
+		writel(prev_raw_count,
+		       drw_pmu->cfg_base + ALI_DRW_PMU_CNT_PRELOAD);
+	}
+
+	ali_drw_pmu_enable_counter(event);
+
+	writel(ALI_DRW_PMU_CNT_START, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+}
+
+static void ali_drw_pmu_stop(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	if (GET_DRW_EVENTID(event) != ALI_DRW_PMU_CYCLE_EVT_ID)
+		ali_drw_pmu_disable_counter(event);
+
+	writel(ALI_DRW_PMU_CNT_STOP, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	ali_drw_pmu_event_update(event);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int ali_drw_pmu_add(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = -1;
+	int evtid;
+
+	evtid = GET_DRW_EVENTID(event);
+
+	if (evtid != ALI_DRW_PMU_CYCLE_EVT_ID) {
+		idx = ali_drw_get_counter_idx(event);
+		if (idx < 0)
+			return idx;
+		drw_pmu->events[idx] = event;
+		drw_pmu->evtids[idx] = evtid;
+	}
+	hwc->idx = idx;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		ali_drw_pmu_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void ali_drw_pmu_del(struct perf_event *event, int flags)
+{
+	struct ali_drw_pmu *drw_pmu = to_ali_drw_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	ali_drw_pmu_stop(event, PERF_EF_UPDATE);
+
+	if (idx >= 0 && idx < ALI_DRW_PMU_COMMON_MAX_COUNTERS) {
+		drw_pmu->events[idx] = NULL;
+		drw_pmu->evtids[idx] = 0;
+		clear_bit(idx, drw_pmu->used_mask);
+	}
+
+	perf_event_update_userpage(event);
+}
+
+static void ali_drw_pmu_read(struct perf_event *event)
+{
+	ali_drw_pmu_event_update(event);
+}
+
+static int ali_drw_pmu_probe(struct platform_device *pdev)
+{
+	struct ali_drw_pmu *drw_pmu;
+	struct resource *res;
+	char *name;
+	int ret;
+
+	drw_pmu = devm_kzalloc(&pdev->dev, sizeof(*drw_pmu), GFP_KERNEL);
+	if (!drw_pmu)
+		return -ENOMEM;
+
+	drw_pmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, drw_pmu);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	drw_pmu->cfg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (!drw_pmu->cfg_base)
+		return -ENOMEM;
+
+	name = devm_kasprintf(drw_pmu->dev, GFP_KERNEL, "ali_drw_%llx",
+			      (u64) (res->start >> ALI_DRW_PMU_PA_SHIFT));
+	if (!name)
+		return -ENOMEM;
+
+	writel(ALI_DRW_PMU_CNT_RST, drw_pmu->cfg_base + ALI_DRW_PMU_CNT_CTRL);
+
+	/* enable the generation of interrupt by all common counters */
+	writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_ENABLE_CTL);
+
+	/* clearing interrupt status */
+	writel(0xffffff, drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_CLR);
+
+	drw_pmu->cpu = smp_processor_id();
+
+	ret = ali_drw_pmu_init_irq(drw_pmu, pdev);
+	if (ret)
+		return ret;
+
+	drw_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= ali_drw_pmu_event_init,
+		.add		= ali_drw_pmu_add,
+		.del		= ali_drw_pmu_del,
+		.start		= ali_drw_pmu_start,
+		.stop		= ali_drw_pmu_stop,
+		.read		= ali_drw_pmu_read,
+		.attr_groups	= ali_drw_pmu_attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	ret = perf_pmu_register(&drw_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(drw_pmu->dev, "DRW Driveway PMU PMU register failed!\n");
+		ali_drw_pmu_uninit_irq(drw_pmu);
+	}
+
+	return ret;
+}
+
+static int ali_drw_pmu_remove(struct platform_device *pdev)
+{
+	struct ali_drw_pmu *drw_pmu = platform_get_drvdata(pdev);
+
+	/* disable the generation of interrupt by all common counters */
+	writel(ALI_DRW_PMCOM_CNT_OV_INTR_MASK,
+	       drw_pmu->cfg_base + ALI_DRW_PMU_OV_INTR_DISABLE_CTL);
+
+	ali_drw_pmu_uninit_irq(drw_pmu);
+	perf_pmu_unregister(&drw_pmu->pmu);
+
+	return 0;
+}
+
+static int ali_drw_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ali_drw_pmu_irq *irq;
+	struct ali_drw_pmu *drw_pmu;
+	unsigned int target;
+	int ret;
+	cpumask_t node_online_cpus;
+
+	irq = hlist_entry_safe(node, struct ali_drw_pmu_irq, node);
+	if (cpu != irq->cpu)
+		return 0;
+
+	ret = cpumask_and(&node_online_cpus,
+			  cpumask_of_node(cpu_to_node(cpu)), cpu_online_mask);
+	if (ret)
+		target = cpumask_any_but(&node_online_cpus, cpu);
+	else
+		target = cpumask_any_but(cpu_online_mask, cpu);
+
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	/* We're only reading, but this isn't the place to be involving RCU */
+	mutex_lock(&ali_drw_pmu_irqs_lock);
+	list_for_each_entry(drw_pmu, &irq->pmus_node, pmus_node)
+		perf_pmu_migrate_context(&drw_pmu->pmu, irq->cpu, target);
+	mutex_unlock(&ali_drw_pmu_irqs_lock);
+
+	WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target)));
+	irq->cpu = target;
+
+	return 0;
+}
+
+/*
+ * Due to historical reasons, the HID used in the production environment is
+ * ARMHD700, so we leave ARMHD700 as Compatible ID.
+ */
+static const struct acpi_device_id ali_drw_acpi_match[] = {
+	{"BABA5000", 0},
+	{"ARMHD700", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, ali_drw_acpi_match);
+
+static struct platform_driver ali_drw_pmu_driver = {
+	.driver = {
+		   .name = "ali_drw_pmu",
+		   .acpi_match_table = ali_drw_acpi_match,
+		   },
+	.probe = ali_drw_pmu_probe,
+	.remove = ali_drw_pmu_remove,
+};
+
+static int __init ali_drw_pmu_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "ali_drw_pmu:online",
+				      NULL, ali_drw_pmu_offline_cpu);
+
+	if (ret < 0) {
+		pr_err("DRW Driveway PMU: setup hotplug failed, ret = %d\n",
+		       ret);
+		return ret;
+	}
+	ali_drw_cpuhp_state_num = ret;
+
+	ret = platform_driver_register(&ali_drw_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(ali_drw_cpuhp_state_num);
+
+	return ret;
+}
+
+static void __exit ali_drw_pmu_exit(void)
+{
+	platform_driver_unregister(&ali_drw_pmu_driver);
+	cpuhp_remove_multi_state(ali_drw_cpuhp_state_num);
+}
+
+module_init(ali_drw_pmu_init);
+module_exit(ali_drw_pmu_exit);
+
+MODULE_AUTHOR("Hongbo Yao <yaohongbo@linux.alibaba.com>");
+MODULE_AUTHOR("Neng Chen <nengchen@linux.alibaba.com>");
+MODULE_AUTHOR("Shuai Xue <xueshuai@linux.alibaba.com>");
+MODULE_DESCRIPTION("Alibaba DDR Sub-System Driveway PMU driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 59d3980b8ca2..3f07df5a7e95 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -894,7 +894,7 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 		 * pmu::filter_match callback and pmu::event_init group
 		 * validation).
 		 */
-		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS,
+		.capabilities	= PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
 	};
 
 	pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index b65a7d9640e1..6ce05ef4844d 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -674,9 +674,9 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev)
 static u64 arm_spe_pmsevfr_res0(u16 pmsver)
 {
 	switch (pmsver) {
-	case ID_AA64DFR0_PMSVER_8_2:
+	case ID_AA64DFR0_EL1_PMSVer_IMP:
 		return SYS_PMSEVFR_EL1_RES0_8_2;
-	case ID_AA64DFR0_PMSVER_8_3:
+	case ID_AA64DFR0_EL1_PMSVer_V1P1:
 	/* Return the highest version we support in default */
 	default:
 		return SYS_PMSEVFR_EL1_RES0_8_3;
@@ -958,7 +958,7 @@ static void __arm_spe_pmu_dev_probe(void *info)
 	struct device *dev = &spe_pmu->pdev->dev;
 
 	fld = cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64DFR0_EL1),
-						   ID_AA64DFR0_PMSVER_SHIFT);
+						   ID_AA64DFR0_EL1_PMSVer_SHIFT);
 	if (!fld) {
 		dev_err(dev,
 			"unsupported ID_AA64DFR0_EL1.PMSVer [%d] on CPU %d\n",
diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
index b9e54417250d..44f8b80f37e3 100644
--- a/tools/testing/selftests/arm64/abi/.gitignore
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -1,2 +1,4 @@
+hwcap
+ptrace
 syscall-abi
 tpidr2
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
index c8d7f2495eb2..a6d30c620908 100644
--- a/tools/testing/selftests/arm64/abi/Makefile
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2021 ARM Limited
 
-TEST_GEN_PROGS := syscall-abi tpidr2
+TEST_GEN_PROGS := hwcap ptrace syscall-abi tpidr2
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
new file mode 100644
index 000000000000..9f1a7b5c6193
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include "../../kselftest.h"
+
+#define TESTS_PER_HWCAP 2
+
+/*
+ * Function expected to generate SIGILL when the feature is not
+ * supported and return when it is supported. If SIGILL is generated
+ * then the handler must be able to skip over the instruction safely.
+ *
+ * Note that it is expected that for many architecture extensions
+ * there are no specific traps due to no architecture state being
+ * added so we may not fault if running on a kernel which doesn't know
+ * to add the hwcap.
+ */
+typedef void (*sigill_fn)(void);
+
+static void rng_sigill(void)
+{
+	asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
+}
+
+static void sme_sigill(void)
+{
+	/* RDSVL x0, #0 */
+	asm volatile(".inst 0x04bf5800" : : : "x0");
+}
+
+static void sve_sigill(void)
+{
+	/* RDVL x0, #0 */
+	asm volatile(".inst 0x04bf5000" : : : "x0");
+}
+
+static void sve2_sigill(void)
+{
+	/* SQABS Z0.b, P0/M, Z0.B */
+	asm volatile(".inst 0x4408A000" : : : "z0");
+}
+
+static void sveaes_sigill(void)
+{
+	/* AESD z0.b, z0.b, z0.b */
+	asm volatile(".inst 0x4522e400" : : : "z0");
+}
+
+static void svepmull_sigill(void)
+{
+	/* PMULLB Z0.Q, Z0.D, Z0.D */
+	asm volatile(".inst 0x45006800" : : : "z0");
+}
+
+static void svebitperm_sigill(void)
+{
+	/* BDEP Z0.B, Z0.B, Z0.B */
+	asm volatile(".inst 0x4500b400" : : : "z0");
+}
+
+static void svesha3_sigill(void)
+{
+	/* EOR3 Z0.D, Z0.D, Z0.D, Z0.D */
+	asm volatile(".inst 0x4203800" : : : "z0");
+}
+
+static void svesm4_sigill(void)
+{
+	/* SM4E Z0.S, Z0.S, Z0.S */
+	asm volatile(".inst 0x4523e000" : : : "z0");
+}
+
+static void svei8mm_sigill(void)
+{
+	/* USDOT Z0.S, Z0.B, Z0.B[0] */
+	asm volatile(".inst 0x44a01800" : : : "z0");
+}
+
+static void svef32mm_sigill(void)
+{
+	/* FMMLA Z0.S, Z0.S, Z0.S */
+	asm volatile(".inst 0x64a0e400" : : : "z0");
+}
+
+static void svef64mm_sigill(void)
+{
+	/* FMMLA Z0.D, Z0.D, Z0.D */
+	asm volatile(".inst 0x64e0e400" : : : "z0");
+}
+
+static void svebf16_sigill(void)
+{
+	/* BFCVT Z0.H, P0/M, Z0.S */
+	asm volatile(".inst 0x658aa000" : : : "z0");
+}
+
+static const struct hwcap_data {
+	const char *name;
+	unsigned long at_hwcap;
+	unsigned long hwcap_bit;
+	const char *cpuinfo;
+	sigill_fn sigill_fn;
+	bool sigill_reliable;
+} hwcaps[] = {
+	{
+		.name = "RNG",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_RNG,
+		.cpuinfo = "rng",
+		.sigill_fn = rng_sigill,
+	},
+	{
+		.name = "SME",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SME,
+		.cpuinfo = "sme",
+		.sigill_fn = sme_sigill,
+		.sigill_reliable = true,
+	},
+	{
+		.name = "SVE",
+		.at_hwcap = AT_HWCAP,
+		.hwcap_bit = HWCAP_SVE,
+		.cpuinfo = "sve",
+		.sigill_fn = sve_sigill,
+		.sigill_reliable = true,
+	},
+	{
+		.name = "SVE 2",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVE2,
+		.cpuinfo = "sve2",
+		.sigill_fn = sve2_sigill,
+	},
+	{
+		.name = "SVE AES",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEAES,
+		.cpuinfo = "sveaes",
+		.sigill_fn = sveaes_sigill,
+	},
+	{
+		.name = "SVE2 PMULL",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEPMULL,
+		.cpuinfo = "svepmull",
+		.sigill_fn = svepmull_sigill,
+	},
+	{
+		.name = "SVE2 BITPERM",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEBITPERM,
+		.cpuinfo = "svebitperm",
+		.sigill_fn = svebitperm_sigill,
+	},
+	{
+		.name = "SVE2 SHA3",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVESHA3,
+		.cpuinfo = "svesha3",
+		.sigill_fn = svesha3_sigill,
+	},
+	{
+		.name = "SVE2 SM4",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVESM4,
+		.cpuinfo = "svesm4",
+		.sigill_fn = svesm4_sigill,
+	},
+	{
+		.name = "SVE2 I8MM",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEI8MM,
+		.cpuinfo = "svei8mm",
+		.sigill_fn = svei8mm_sigill,
+	},
+	{
+		.name = "SVE2 F32MM",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEF32MM,
+		.cpuinfo = "svef32mm",
+		.sigill_fn = svef32mm_sigill,
+	},
+	{
+		.name = "SVE2 F64MM",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEF64MM,
+		.cpuinfo = "svef64mm",
+		.sigill_fn = svef64mm_sigill,
+	},
+	{
+		.name = "SVE2 BF16",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVEBF16,
+		.cpuinfo = "svebf16",
+		.sigill_fn = svebf16_sigill,
+	},
+	{
+		.name = "SVE2 EBF16",
+		.at_hwcap = AT_HWCAP2,
+		.hwcap_bit = HWCAP2_SVE_EBF16,
+		.cpuinfo = "sveebf16",
+	},
+};
+
+static bool seen_sigill;
+
+static void handle_sigill(int sig, siginfo_t *info, void *context)
+{
+	ucontext_t *uc = context;
+
+	seen_sigill = true;
+
+	/* Skip over the offending instruction */
+	uc->uc_mcontext.pc += 4;
+}
+
+bool cpuinfo_present(const char *name)
+{
+	FILE *f;
+	char buf[2048], name_space[30], name_newline[30];
+	char *s;
+
+	/*
+	 * The feature should appear with a leading space and either a
+	 * trailing space or a newline.
+	 */
+	snprintf(name_space, sizeof(name_space), " %s ", name);
+	snprintf(name_newline, sizeof(name_newline), " %s\n", name);
+
+	f = fopen("/proc/cpuinfo", "r");
+	if (!f) {
+		ksft_print_msg("Failed to open /proc/cpuinfo\n");
+		return false;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		/* Features: line? */
+		if (strncmp(buf, "Features\t:", strlen("Features\t:")) != 0)
+			continue;
+
+		/* All CPUs should be symmetric, don't read any more */
+		fclose(f);
+
+		s = strstr(buf, name_space);
+		if (s)
+			return true;
+		s = strstr(buf, name_newline);
+		if (s)
+			return true;
+
+		return false;
+	}
+
+	ksft_print_msg("Failed to find Features in /proc/cpuinfo\n");
+	fclose(f);
+	return false;
+}
+
+int main(void)
+{
+	const struct hwcap_data *hwcap;
+	int i, ret;
+	bool have_cpuinfo, have_hwcap;
+	struct sigaction sa;
+
+	ksft_print_header();
+	ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handle_sigill;
+	sa.sa_flags = SA_RESTART | SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	ret = sigaction(SIGILL, &sa, NULL);
+	if (ret < 0)
+		ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n",
+				   strerror(errno), errno);
+
+	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+		hwcap = &hwcaps[i];
+
+		have_hwcap = getauxval(hwcap->at_hwcap) & hwcap->hwcap_bit;
+		have_cpuinfo = cpuinfo_present(hwcap->cpuinfo);
+
+		if (have_hwcap)
+			ksft_print_msg("%s present\n", hwcap->name);
+
+		ksft_test_result(have_hwcap == have_cpuinfo,
+				 "cpuinfo_match_%s\n", hwcap->name);
+
+		if (hwcap->sigill_fn) {
+			seen_sigill = false;
+			hwcap->sigill_fn();
+
+			if (have_hwcap) {
+				/* Should be able to use the extension */
+				ksft_test_result(!seen_sigill, "sigill_%s\n",
+						 hwcap->name);
+			} else if (hwcap->sigill_reliable) {
+				/* Guaranteed a SIGILL */
+				ksft_test_result(seen_sigill, "sigill_%s\n",
+						 hwcap->name);
+			} else {
+				/* Missing SIGILL might be fine */
+				ksft_print_msg("SIGILL %sreported for %s\n",
+					       seen_sigill ? "" : "not ",
+					       hwcap->name);
+				ksft_test_result_skip("sigill_%s\n",
+						      hwcap->name);
+			}
+		} else {
+			ksft_test_result_skip("sigill_%s\n",
+					      hwcap->name);
+		}
+	}
+
+	ksft_print_cnts();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
new file mode 100644
index 000000000000..be952511af22
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+#define EXPECTED_TESTS 7
+
+#define MAX_TPIDRS 2
+
+static bool have_sme(void)
+{
+	return getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+static void test_tpidr(pid_t child)
+{
+	uint64_t read_val[MAX_TPIDRS];
+	uint64_t write_val[MAX_TPIDRS];
+	struct iovec read_iov, write_iov;
+	bool test_tpidr2 = false;
+	int ret, i;
+
+	read_iov.iov_base = read_val;
+	write_iov.iov_base = write_val;
+
+	/* Should be able to read a single TPIDR... */
+	read_iov.iov_len = sizeof(uint64_t);
+	ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+	ksft_test_result(ret == 0, "read_tpidr_one\n");
+
+	/* ...write a new value.. */
+	write_iov.iov_len = sizeof(uint64_t);
+	write_val[0] = read_val[0]++;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+	ksft_test_result(ret == 0, "write_tpidr_one\n");
+
+	/* ...then read it back */
+	ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+	ksft_test_result(ret == 0 && write_val[0] == read_val[0],
+			 "verify_tpidr_one\n");
+
+	/* If we have TPIDR2 we should be able to read it */
+	read_iov.iov_len = sizeof(read_val);
+	ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+	if (ret == 0) {
+		/* If we have SME there should be two TPIDRs */
+		if (read_iov.iov_len >= sizeof(read_val))
+			test_tpidr2 = true;
+
+		if (have_sme() && test_tpidr2) {
+			ksft_test_result(test_tpidr2, "count_tpidrs\n");
+		} else {
+			ksft_test_result(read_iov.iov_len % sizeof(uint64_t) == 0,
+					 "count_tpidrs\n");
+		}
+	} else {
+		ksft_test_result_fail("count_tpidrs\n");
+	}
+
+	if (test_tpidr2) {
+		/* Try to write new values to all known TPIDRs... */
+		write_iov.iov_len = sizeof(write_val);
+		for (i = 0; i < MAX_TPIDRS; i++)
+			write_val[i] = read_val[i] + 1;
+		ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+		ksft_test_result(ret == 0 &&
+				 write_iov.iov_len == sizeof(write_val),
+				 "tpidr2_write\n");
+
+		/* ...then read them back */
+		read_iov.iov_len = sizeof(read_val);
+		ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+
+		if (have_sme()) {
+			/* Should read back the written value */
+			ksft_test_result(ret == 0 &&
+					 read_iov.iov_len >= sizeof(read_val) &&
+					 memcmp(read_val, write_val,
+						sizeof(read_val)) == 0,
+					 "tpidr2_read\n");
+		} else {
+			/* TPIDR2 should read as zero */
+			ksft_test_result(ret == 0 &&
+					 read_iov.iov_len >= sizeof(read_val) &&
+					 read_val[0] == write_val[0] &&
+					 read_val[1] == 0,
+					 "tpidr2_read\n");
+		}
+
+		/* Writing only TPIDR... */
+		write_iov.iov_len = sizeof(uint64_t);
+		memcpy(write_val, read_val, sizeof(read_val));
+		write_val[0] += 1;
+		ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+		if (ret == 0) {
+			/* ...should leave TPIDR2 untouched */
+			read_iov.iov_len = sizeof(read_val);
+			ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS,
+				     &read_iov);
+
+			ksft_test_result(ret == 0 &&
+					 read_iov.iov_len >= sizeof(read_val) &&
+					 memcmp(read_val, write_val,
+						sizeof(read_val)) == 0,
+					 "write_tpidr_only\n");
+		} else {
+			ksft_test_result_fail("write_tpidr_only\n");
+		}
+	} else {
+		ksft_test_result_skip("tpidr2_write\n");
+		ksft_test_result_skip("tpidr2_read\n");
+		ksft_test_result_skip("write_tpidr_only\n");
+	}
+}
+
+static int do_child(void)
+{
+	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+	if (raise(SIGSTOP))
+		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+	return EXIT_SUCCESS;
+}
+
+static int do_parent(pid_t child)
+{
+	int ret = EXIT_FAILURE;
+	pid_t pid;
+	int status;
+	siginfo_t si;
+
+	/* Attach to the child */
+	while (1) {
+		int sig;
+
+		pid = wait(&status);
+		if (pid == -1) {
+			perror("wait");
+			goto error;
+		}
+
+		/*
+		 * This should never happen but it's hard to flag in
+		 * the framework.
+		 */
+		if (pid != child)
+			continue;
+
+		if (WIFEXITED(status) || WIFSIGNALED(status))
+			ksft_exit_fail_msg("Child died unexpectedly\n");
+
+		if (!WIFSTOPPED(status))
+			goto error;
+
+		sig = WSTOPSIG(status);
+
+		if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			if (errno == EINVAL) {
+				sig = 0; /* bust group-stop */
+				goto cont;
+			}
+
+			ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+
+		if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+		    si.si_pid == pid)
+			break;
+
+	cont:
+		if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			ksft_test_result_fail("PTRACE_CONT: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+	}
+
+	ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+	test_tpidr(child);
+
+	ret = EXIT_SUCCESS;
+
+error:
+	kill(child, SIGKILL);
+
+disappeared:
+	return ret;
+}
+
+int main(void)
+{
+	int ret = EXIT_SUCCESS;
+	pid_t child;
+
+	srandom(getpid());
+
+	ksft_print_header();
+
+	ksft_set_plan(EXPECTED_TESTS);
+
+	child = fork();
+	if (!child)
+		return do_child();
+
+	if (do_parent(child))
+		ret = EXIT_FAILURE;
+
+	ksft_print_cnts();
+
+	return ret;
+}
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
index b632bfe9e022..dd7ebe536d05 100644
--- a/tools/testing/selftests/arm64/abi/syscall-abi.c
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -112,9 +112,11 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	return errors;
 }
 
+#define SVE_Z_SHARED_BYTES (128 / 8)
+
 static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
 
 static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 		    uint64_t svcr)
@@ -133,22 +135,39 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	if (!sve_vl)
 		return 0;
 
-	/*
-	 * After a syscall the low 128 bits of the Z registers should
-	 * be preserved and the rest be zeroed or preserved, except if
-	 * we were in streaming mode in which case the low 128 bits may
-	 * also be cleared by the transition out of streaming mode.
-	 */
 	for (i = 0; i < SVE_NUM_ZREGS; i++) {
-		void *in = &z_in[reg_size * i];
-		void *out = &z_out[reg_size * i];
-
-		if ((memcmp(in, out, SVE_VQ_BYTES) != 0) &&
-		    !((svcr & SVCR_SM_MASK) &&
-		      memcmp(z_zero, out, SVE_VQ_BYTES) == 0)) {
-			ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
-				       cfg->name, sve_vl, i);
-			errors++;
+		uint8_t *in = &z_in[reg_size * i];
+		uint8_t *out = &z_out[reg_size * i];
+
+		if (svcr & SVCR_SM_MASK) {
+			/*
+			 * In streaming mode the whole register should
+			 * be cleared by the transition out of
+			 * streaming mode.
+			 */
+			if (memcmp(z_zero, out, reg_size) != 0) {
+				ksft_print_msg("%s SVE VL %d Z%d non-zero\n",
+					       cfg->name, sve_vl, i);
+				errors++;
+			}
+		} else {
+			/*
+			 * For standard SVE the low 128 bits should be
+			 * preserved and any additional bits cleared.
+			 */
+			if (memcmp(in, out, SVE_Z_SHARED_BYTES) != 0) {
+				ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
+					       cfg->name, sve_vl, i);
+				errors++;
+			}
+
+			if (reg_size > SVE_Z_SHARED_BYTES &&
+			    (memcmp(z_zero, out + SVE_Z_SHARED_BYTES,
+				    reg_size - SVE_Z_SHARED_BYTES) != 0)) {
+				ksft_print_msg("%s SVE VL %d Z%d high bits non-zero\n",
+					       cfg->name, sve_vl, i);
+				errors++;
+			}
 		}
 	}
 
@@ -176,9 +195,9 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	if (!sve_vl)
 		return 0;
 
-	/* After a syscall the P registers should be preserved or zeroed */
+	/* After a syscall the P registers should be zeroed */
 	for (i = 0; i < SVE_NUM_PREGS * reg_size; i++)
-		if (p_out[i] && (p_in[i] != p_out[i]))
+		if (p_out[i])
 			errors++;
 	if (errors)
 		ksft_print_msg("%s SVE VL %d predicate registers non-zero\n",
@@ -226,9 +245,9 @@ static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 	    !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
 		return 0;
 
-	/* After a syscall the P registers should be preserved or zeroed */
+	/* After a syscall FFR should be zeroed */
 	for (i = 0; i < reg_size; i++)
-		if (ffr_out[i] && (ffr_in[i] != ffr_out[i]))
+		if (ffr_out[i])
 			errors++;
 	if (errors)
 		ksft_print_msg("%s SVE VL %d FFR non-zero\n",
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index ea947af63882..df79d29664a1 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,4 +1,5 @@
 fp-pidbench
+fp-stress
 fpsimd-test
 rdvl-sme
 rdvl-sve
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a7c2286bf65b..36db61358ed5 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -5,7 +5,10 @@ top_srcdir = $(realpath ../../../../../)
 
 CFLAGS += -I$(top_srcdir)/usr/include/
 
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg za-fork za-ptrace
+TEST_GEN_PROGS := fp-stress \
+	sve-ptrace sve-probe-vls \
+	vec-syscfg \
+	za-fork za-ptrace
 TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
 	rdvl-sme rdvl-sve \
 	sve-test \
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
index a180851496ec..757b2fd75dd7 100644
--- a/tools/testing/selftests/arm64/fp/asm-offsets.h
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -3,6 +3,7 @@
 #define sa_handler 0
 #define sa_mask_sz 8
 #define SIGUSR1 10
+#define SIGUSR2 12
 #define SIGTERM 15
 #define SIGINT 2
 #define SIGABRT 6
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
new file mode 100644
index 000000000000..4e62a9199f97
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+
+#define MAX_VLS 16
+
+struct child_data {
+	char *name, *output;
+	pid_t pid;
+	int stdout;
+	bool output_seen;
+	bool exited;
+	int exit_status;
+};
+
+static int epoll_fd;
+static struct child_data *children;
+static int num_children;
+static bool terminate;
+
+static void drain_output(bool flush);
+
+static int num_processors(void)
+{
+	long nproc = sysconf(_SC_NPROCESSORS_CONF);
+	if (nproc < 0) {
+		perror("Unable to read number of processors\n");
+		exit(EXIT_FAILURE);
+	}
+
+	return nproc;
+}
+
+static void child_start(struct child_data *child, const char *program)
+{
+	int ret, pipefd[2], i;
+	struct epoll_event ev;
+
+	ret = pipe(pipefd);
+	if (ret != 0)
+		ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n",
+				   strerror(errno), errno);
+
+	child->pid = fork();
+	if (child->pid == -1)
+		ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+				   strerror(errno), errno);
+
+	if (!child->pid) {
+		/*
+		 * In child, replace stdout with the pipe, errors to
+		 * stderr from here as kselftest prints to stdout.
+		 */
+		ret = dup2(pipefd[1], 1);
+		if (ret == -1) {
+			fprintf(stderr, "dup2() %d\n", errno);
+			exit(EXIT_FAILURE);
+		}
+
+		/*
+		 * Very dumb mechanism to clean open FDs other than
+		 * stdio. We don't want O_CLOEXEC for the pipes...
+		 */
+		for (i = 3; i < 8192; i++)
+			close(i);
+
+		ret = execl(program, program, NULL);
+		fprintf(stderr, "execl(%s) failed: %d (%s)\n",
+			program, errno, strerror(errno));
+
+		exit(EXIT_FAILURE);
+	} else {
+		/*
+		 * In parent, remember the child and close our copy of the
+		 * write side of stdout.
+		 */
+		close(pipefd[1]);
+		child->stdout = pipefd[0];
+		child->output = NULL;
+		child->exited = false;
+		child->output_seen = false;
+
+		ev.events = EPOLLIN | EPOLLHUP;
+		ev.data.ptr = child;
+
+		ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev);
+		if (ret < 0) {
+			ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n",
+					   child->name, strerror(errno), errno);
+		}
+
+		/*
+		 * Keep output flowing during child startup so logs
+		 * are more timely, can help debugging.
+		 */
+		drain_output(false);
+	}
+}
+
+static bool child_output_read(struct child_data *child)
+{
+	char read_data[1024];
+	char work[1024];
+	int ret, len, cur_work, cur_read;
+
+	ret = read(child->stdout, read_data, sizeof(read_data));
+	if (ret < 0) {
+		if (errno == EINTR)
+			return true;
+
+		ksft_print_msg("%s: read() failed: %s (%d)\n",
+			       child->name, strerror(errno),
+			       errno);
+		return false;
+	}
+	len = ret;
+
+	child->output_seen = true;
+
+	/* Pick up any partial read */
+	if (child->output) {
+		strncpy(work, child->output, sizeof(work) - 1);
+		cur_work = strnlen(work, sizeof(work));
+		free(child->output);
+		child->output = NULL;
+	} else {
+		cur_work = 0;
+	}
+
+	cur_read = 0;
+	while (cur_read < len) {
+		work[cur_work] = read_data[cur_read++];
+
+		if (work[cur_work] == '\n') {
+			work[cur_work] = '\0';
+			ksft_print_msg("%s: %s\n", child->name, work);
+			cur_work = 0;
+		} else {
+			cur_work++;
+		}
+	}
+
+	if (cur_work) {
+		work[cur_work] = '\0';
+		ret = asprintf(&child->output, "%s", work);
+		if (ret == -1)
+			ksft_exit_fail_msg("Out of memory\n");
+	}
+
+	return false;
+}
+
+static void child_output(struct child_data *child, uint32_t events,
+			 bool flush)
+{
+	bool read_more;
+
+	if (events & EPOLLIN) {
+		do {
+			read_more = child_output_read(child);
+		} while (read_more);
+	}
+
+	if (events & EPOLLHUP) {
+		close(child->stdout);
+		child->stdout = -1;
+		flush = true;
+	}
+
+	if (flush && child->output) {
+		ksft_print_msg("%s: %s<EOF>\n", child->name, child->output);
+		free(child->output);
+		child->output = NULL;
+	}
+}
+
+static void child_tickle(struct child_data *child)
+{
+	if (child->output_seen && !child->exited)
+		kill(child->pid, SIGUSR2);
+}
+
+static void child_stop(struct child_data *child)
+{
+	if (!child->exited)
+		kill(child->pid, SIGTERM);
+}
+
+static void child_cleanup(struct child_data *child)
+{
+	pid_t ret;
+	int status;
+	bool fail = false;
+
+	if (!child->exited) {
+		do {
+			ret = waitpid(child->pid, &status, 0);
+			if (ret == -1 && errno == EINTR)
+				continue;
+
+			if (ret == -1) {
+				ksft_print_msg("waitpid(%d) failed: %s (%d)\n",
+					       child->pid, strerror(errno),
+					       errno);
+				fail = true;
+				break;
+			}
+		} while (!WIFEXITED(status));
+		child->exit_status = WEXITSTATUS(status);
+	}
+
+	if (!child->output_seen) {
+		ksft_print_msg("%s no output seen\n", child->name);
+		fail = true;
+	}
+
+	if (child->exit_status != 0) {
+		ksft_print_msg("%s exited with error code %d\n",
+			       child->name, child->exit_status);
+		fail = true;
+	}
+
+	ksft_test_result(!fail, "%s\n", child->name);
+}
+
+static void handle_child_signal(int sig, siginfo_t *info, void *context)
+{
+	int i;
+	bool found = false;
+
+	for (i = 0; i < num_children; i++) {
+		if (children[i].pid == info->si_pid) {
+			children[i].exited = true;
+			children[i].exit_status = info->si_status;
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n",
+			       info->si_pid, info->si_status);
+}
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+	int i;
+
+	/* If we're already exiting then don't signal again */
+	if (terminate)
+		return;
+
+	ksft_print_msg("Got signal, exiting...\n");
+
+	terminate = true;
+
+	/*
+	 * This should be redundant, the main loop should clean up
+	 * after us, but for safety stop everything we can here.
+	 */
+	for (i = 0; i < num_children; i++)
+		child_stop(&children[i]);
+}
+
+static void start_fpsimd(struct child_data *child, int cpu, int copy)
+{
+	int ret;
+
+	child_start(child, "./fpsimd-test");
+
+	ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy);
+	if (ret == -1)
+		ksft_exit_fail_msg("asprintf() failed\n");
+
+	ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_sve(struct child_data *child, int vl, int cpu)
+{
+	int ret;
+
+	ret = prctl(PR_SVE_SET_VL, vl | PR_SVE_VL_INHERIT);
+	if (ret < 0)
+		ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl);
+
+	child_start(child, "./sve-test");
+
+	ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu);
+	if (ret == -1)
+		ksft_exit_fail_msg("asprintf() failed\n");
+
+	ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_ssve(struct child_data *child, int vl, int cpu)
+{
+	int ret;
+
+	ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT);
+	if (ret < 0)
+		ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+	child_start(child, "./ssve-test");
+
+	ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu);
+	if (ret == -1)
+		ksft_exit_fail_msg("asprintf() failed\n");
+
+	ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_za(struct child_data *child, int vl, int cpu)
+{
+	int ret;
+
+	ret = prctl(PR_SME_SET_VL, vl | PR_SVE_VL_INHERIT);
+	if (ret < 0)
+		ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+	child_start(child, "./za-test");
+
+	ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu);
+	if (ret == -1)
+		ksft_exit_fail_msg("asprintf() failed\n");
+
+	ksft_print_msg("Started %s\n", child->name);
+}
+
+static void probe_vls(int vls[], int *vl_count, int set_vl)
+{
+	unsigned int vq;
+	int vl;
+
+	*vl_count = 0;
+
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(set_vl, vq * 16);
+		if (vl == -1)
+			ksft_exit_fail_msg("SET_VL failed: %s (%d)\n",
+					   strerror(errno), errno);
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		vq = sve_vq_from_vl(vl);
+
+		vls[*vl_count] = vl;
+		*vl_count += 1;
+	}
+}
+
+/* Handle any pending output without blocking */
+static void drain_output(bool flush)
+{
+	struct epoll_event ev;
+	int ret = 1;
+
+	while (ret > 0) {
+		ret = epoll_wait(epoll_fd, &ev, 1, 0);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			ksft_print_msg("epoll_wait() failed: %s (%d)\n",
+				       strerror(errno), errno);
+		}
+
+		if (ret == 1)
+			child_output(ev.data.ptr, ev.events, flush);
+	}
+}
+
+static const struct option options[] = {
+	{ "timeout",	required_argument, NULL, 't' },
+	{ }
+};
+
+int main(int argc, char **argv)
+{
+	int ret;
+	int timeout = 10;
+	int cpus, tests, i, j, c;
+	int sve_vl_count, sme_vl_count, fpsimd_per_cpu;
+	int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
+	struct epoll_event ev;
+	struct sigaction sa;
+
+	while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
+		switch (c) {
+		case 't':
+			ret = sscanf(optarg, "%d", &timeout);
+			if (ret != 1)
+				ksft_exit_fail_msg("Failed to parse timeout %s\n",
+						   optarg);
+			break;
+		default:
+			ksft_exit_fail_msg("Unknown argument\n");
+		}
+	}
+
+	cpus = num_processors();
+	tests = 0;
+
+	if (getauxval(AT_HWCAP) & HWCAP_SVE) {
+		probe_vls(sve_vls, &sve_vl_count, PR_SVE_SET_VL);
+		tests += sve_vl_count * cpus;
+	} else {
+		sve_vl_count = 0;
+	}
+
+	if (getauxval(AT_HWCAP2) & HWCAP2_SME) {
+		probe_vls(sme_vls, &sme_vl_count, PR_SME_SET_VL);
+		tests += sme_vl_count * cpus * 2;
+	} else {
+		sme_vl_count = 0;
+	}
+
+	/* Force context switching if we only have FPSIMD */
+	if (!sve_vl_count && !sme_vl_count)
+		fpsimd_per_cpu = 2;
+	else
+		fpsimd_per_cpu = 1;
+	tests += cpus * fpsimd_per_cpu;
+
+	ksft_print_header();
+	ksft_set_plan(tests);
+
+	ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n",
+		       cpus, sve_vl_count, sme_vl_count);
+
+	if (timeout > 0)
+		ksft_print_msg("Will run for %ds\n", timeout);
+	else
+		ksft_print_msg("Will run until terminated\n");
+
+	children = calloc(sizeof(*children), tests);
+	if (!children)
+		ksft_exit_fail_msg("Unable to allocate child data\n");
+
+	ret = epoll_create1(EPOLL_CLOEXEC);
+	if (ret < 0)
+		ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n",
+				   strerror(errno), ret);
+	epoll_fd = ret;
+
+	/* Get signal handers ready before we start any children */
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handle_exit_signal;
+	sa.sa_flags = SA_RESTART | SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	ret = sigaction(SIGINT, &sa, NULL);
+	if (ret < 0)
+		ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n",
+			       strerror(errno), errno);
+	ret = sigaction(SIGTERM, &sa, NULL);
+	if (ret < 0)
+		ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n",
+			       strerror(errno), errno);
+	sa.sa_sigaction = handle_child_signal;
+	ret = sigaction(SIGCHLD, &sa, NULL);
+	if (ret < 0)
+		ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n",
+			       strerror(errno), errno);
+
+	for (i = 0; i < cpus; i++) {
+		for (j = 0; j < fpsimd_per_cpu; j++)
+			start_fpsimd(&children[num_children++], i, j);
+
+		for (j = 0; j < sve_vl_count; j++)
+			start_sve(&children[num_children++], sve_vls[j], i);
+
+		for (j = 0; j < sme_vl_count; j++) {
+			start_ssve(&children[num_children++], sme_vls[j], i);
+			start_za(&children[num_children++], sme_vls[j], i);
+		}
+	}
+
+	for (;;) {
+		/* Did we get a signal asking us to exit? */
+		if (terminate)
+			break;
+
+		/*
+		 * Timeout is counted in seconds with no output, the
+		 * tests print during startup then are silent when
+		 * running so this should ensure they all ran enough
+		 * to install the signal handler, this is especially
+		 * useful in emulation where we will both be slow and
+		 * likely to have a large set of VLs.
+		 */
+		ret = epoll_wait(epoll_fd, &ev, 1, 1000);
+		if (ret < 0) {
+			if (errno == EINTR)
+				continue;
+			ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n",
+					   strerror(errno), errno);
+		}
+
+		/* Output? */
+		if (ret == 1) {
+			child_output(ev.data.ptr, ev.events, false);
+			continue;
+		}
+
+		/* Otherwise epoll_wait() timed out */
+
+		for (i = 0; i < num_children; i++)
+			child_tickle(&children[i]);
+
+		/* Negative timeout means run indefinitely */
+		if (timeout < 0)
+			continue;
+		if (--timeout == 0)
+			break;
+	}
+
+	ksft_print_msg("Finishing up...\n");
+	terminate = true;
+
+	for (i = 0; i < tests; i++)
+		child_stop(&children[i]);
+
+	drain_output(false);
+
+	for (i = 0; i < tests; i++)
+		child_cleanup(&children[i]);
+
+	drain_output(true);
+
+	ksft_print_cnts();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
index e21e8ea52c7e..918d04885a33 100644
--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -151,6 +151,15 @@ function irritator_handler
 	ret
 endfunction
 
+function tickle_handler
+	// Increment the signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	ret
+endfunction
+
 function terminate_handler
 	mov	w21, w0
 	mov	x20, x2
@@ -207,6 +216,30 @@ endfunction
 .globl _start
 function _start
 _start:
+	mov	x23, #0		// signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	w0, #SIGUSR2
+	adr	x1, tickle_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
 	// Sanity-check and report the vector length
 
 	mov	x19, #128
@@ -237,24 +270,6 @@ _start:
 	mov	x0, x20
 	bl	putdecn
 
-	mov	x23, #0		// Irritation signal count
-
-	mov	w0, #SIGINT
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGTERM
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGUSR1
-	adr	x1, irritator_handler
-	mov	w2, #SA_SIGINFO
-	orr	w2, w2, #SA_NODEFER
-	bl	setsignal
-
 	mov	x22, #0		// generation number, increments per iteration
 .Ltest_loop:
 
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 589264231a2d..2a18cb4c528c 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -314,6 +314,15 @@ function irritator_handler
 	ret
 endfunction
 
+function tickle_handler
+	// Increment the signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	ret
+endfunction
+
 function terminate_handler
 	mov	w21, w0
 	mov	x20, x2
@@ -370,6 +379,30 @@ endfunction
 .globl _start
 function _start
 _start:
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	w0, #SIGUSR2
+	adr	x1, tickle_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
 #ifdef SSVE
 	puts	"Streaming mode "
 	smstart_sm
@@ -405,24 +438,6 @@ _start:
 	mov	x0, x20
 	bl	putdecn
 
-	mov	x23, #0		// Irritation signal count
-
-	mov	w0, #SIGINT
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGTERM
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGUSR1
-	adr	x1, irritator_handler
-	mov	w2, #SA_SIGINFO
-	orr	w2, w2, #SA_NODEFER
-	bl	setsignal
-
 #ifdef SSVE
 	smstart_sm		// syscalls will have exited streaming mode
 #endif
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
index 9ab6f9cd9623..53c54af65704 100644
--- a/tools/testing/selftests/arm64/fp/za-test.S
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -167,6 +167,15 @@ function irritator_handler
 	ret
 endfunction
 
+function tickle_handler
+	// Increment the signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	ret
+endfunction
+
 function terminate_handler
 	mov	w21, w0
 	mov	x20, x2
@@ -223,6 +232,30 @@ endfunction
 .globl _start
 function _start
 _start:
+	mov	x23, #0		// signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	w0, #SIGUSR2
+	adr	x1, tickle_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
 	puts	"Streaming mode "
 	smstart_za
 
@@ -255,24 +288,6 @@ _start:
 	mov	x0, x20
 	bl	putdecn
 
-	mov	x23, #0		// Irritation signal count
-
-	mov	w0, #SIGINT
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGTERM
-	adr	x1, terminate_handler
-	mov	w2, #SA_SIGINFO
-	bl	setsignal
-
-	mov	w0, #SIGUSR1
-	adr	x1, irritator_handler
-	mov	w2, #SA_SIGINFO
-	orr	w2, w2, #SA_NODEFER
-	bl	setsignal
-
 	mov	x22, #0		// generation number, increments per iteration
 .Ltest_loop:
 	rdsvl	0, 8
@@ -287,12 +302,7 @@ _start:
 	subs	x21, x21, #1
 	b.ne	0b
 
-	and	x8, x22, #127		// Every 128 interations...
-	cbz	x8, 0f
-	mov	x8, #__NR_getpid	// (otherwise minimal syscall)
-	b	1f
-0:
-	mov	x8, #__NR_sched_yield	// ...encourage preemption
+	mov	x8, #__NR_sched_yield	// encourage preemption
 1:
 	svc	#0
 
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index a5a0744423d8..037046f5784e 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -11,11 +11,8 @@ LDFLAGS += -pthread
 SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
 PROGS := $(patsubst %.c,%,$(SRCS))
 
-#Add mte compiler option
-CFLAGS += -march=armv8.5-a+memtag
-
 #check if the compiler works well
-mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.5-a+memtag -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
 
 ifeq ($(mte_cc_support),1)
 # Generated binaries to be installed by top KSFT script
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
index a02c04cd0aac..a55dbbc56ed1 100644
--- a/tools/testing/selftests/arm64/mte/mte_helper.S
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -3,6 +3,8 @@
 
 #include "mte_def.h"
 
+.arch	armv8.5-a+memtag
+
 #define ENTRY(name) \
 	.globl name ;\
 	.p2align 2;\
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index b588d10afd5b..308e229e58ab 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -165,15 +165,64 @@ static bool handle_signal_ok(struct tdescr *td,
 }
 
 static bool handle_signal_copyctx(struct tdescr *td,
-				  siginfo_t *si, void *uc)
+				  siginfo_t *si, void *uc_in)
 {
+	ucontext_t *uc = uc_in;
+	struct _aarch64_ctx *head;
+	struct extra_context *extra, *copied_extra;
+	size_t offset = 0;
+	size_t to_copy;
+
+	ASSERT_GOOD_CONTEXT(uc);
+
 	/* Mangling PC to avoid loops on original BRK instr */
-	((ucontext_t *)uc)->uc_mcontext.pc += 4;
-	memcpy(td->live_uc, uc, td->live_sz);
-	ASSERT_GOOD_CONTEXT(td->live_uc);
+	uc->uc_mcontext.pc += 4;
+
+	/*
+	 * Check for an preserve any extra data too with fixups.
+	 */
+	head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+	head = get_header(head, EXTRA_MAGIC, td->live_sz, &offset);
+	if (head) {
+		extra = (struct extra_context *)head;
+
+		/*
+		 * The extra buffer must be immediately after the
+		 * extra_context and a 16 byte terminator. Include it
+		 * in the copy, this was previously validated in
+		 * ASSERT_GOOD_CONTEXT().
+		 */
+		to_copy = offset + sizeof(struct extra_context) + 16 +
+			extra->size;
+		copied_extra = (struct extra_context *)&(td->live_uc->uc_mcontext.__reserved[offset]);
+	} else {
+		copied_extra = NULL;
+		to_copy = sizeof(ucontext_t);
+	}
+
+	if (to_copy > td->live_sz) {
+		fprintf(stderr,
+			"Not enough space to grab context, %lu/%lu bytes\n",
+			td->live_sz, to_copy);
+		return false;
+	}
+
+	memcpy(td->live_uc, uc, to_copy);
+
+	/*
+	 * If there was any EXTRA_CONTEXT fix up the size to be the
+	 * struct extra_context and the following terminator record,
+	 * this means that the rest of the code does not need to have
+	 * special handling for the record and we don't need to fix up
+	 * datap for the new location.
+	 */
+	if (copied_extra)
+		copied_extra->head.size = sizeof(*copied_extra) + 16;
+
 	td->live_uc_valid = 1;
 	fprintf(stderr,
-		"GOOD CONTEXT grabbed from sig_copyctx handler\n");
+		"%lu byte GOOD CONTEXT grabbed from sig_copyctx handler\n",
+		to_copy);
 
 	return true;
 }
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index f3aa99ba67bb..222093f51b67 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -56,7 +56,8 @@ static inline bool feats_ok(struct tdescr *td)
  * at sizeof(ucontext_t).
  */
 static __always_inline bool get_current_context(struct tdescr *td,
-						ucontext_t *dest_uc)
+						ucontext_t *dest_uc,
+						size_t dest_sz)
 {
 	static volatile bool seen_already;
 
@@ -64,7 +65,7 @@ static __always_inline bool get_current_context(struct tdescr *td,
 	/* it's a genuine invocation..reinit */
 	seen_already = 0;
 	td->live_uc_valid = 0;
-	td->live_sz = sizeof(*dest_uc);
+	td->live_sz = dest_sz;
 	memset(dest_uc, 0x00, td->live_sz);
 	td->live_uc = dest_uc;
 	/*
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
index 8dc600a7d4fd..8c7f00ea9823 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_magic_run(struct tdescr *td,
 	struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
 
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	/* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
index b3c362100666..1c03f6b638e0 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
@@ -24,7 +24,7 @@ static int fake_sigreturn_bad_size_run(struct tdescr *td,
 	struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
 
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
index a44b88bfc81a..bc22f64b544e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td,
 	struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
 
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	/* at least HDR_SZ for the badly sized terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
index afe8915f0998..63e3906b631c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td,
 	struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
 
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
index 1e089e66f9f3..d00625ff12c2 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
@@ -19,7 +19,7 @@ static int fake_sigreturn_misaligned_run(struct tdescr *td,
 					 siginfo_t *si, ucontext_t *uc)
 {
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	/* Forcing sigframe on misaligned SP (16 + 3) */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
index 08ecd8073a1a..f805138cb20d 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
@@ -23,7 +23,7 @@ static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td,
 	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
 
 	/* just to fill the ucontext_t with something real */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
index 7ed762b7202f..ebd5815b54bb 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -54,7 +54,7 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
 	struct sve_context *sve;
 
 	/* Get a signal context with a SME ZA frame in it */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
index 915821375b0a..e2a452190511 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -56,7 +56,7 @@ static int fake_sigreturn_sve_change_vl(struct tdescr *td,
 	struct sve_context *sve;
 
 	/* Get a signal context with a SVE frame in it */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
index 13ff3b35cbaf..75f387f2db81 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
@@ -34,7 +34,7 @@ static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	struct za_context *za;
 
 	/* Get a signal context which should have a ZA frame in it */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
index 9022a6cab4b3..d0a178945b1a 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -13,7 +13,10 @@
 #include "test_signals_utils.h"
 #include "testcases.h"
 
-struct fake_sigframe sf;
+static union {
+	ucontext_t uc;
+	char buf[1024 * 64];
+} context;
 static unsigned int vls[SVE_VQ_MAX];
 unsigned int nvls = 0;
 
@@ -55,8 +58,8 @@ static void setup_ssve_regs(void)
 static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 			 unsigned int vl)
 {
-	size_t resv_sz, offset;
-	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	size_t offset;
+	struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
 	struct sve_context *ssve;
 	int ret;
 
@@ -73,11 +76,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 	 * in it.
 	 */
 	setup_ssve_regs();
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &context.uc, sizeof(context)))
 		return 1;
 
-	resv_sz = GET_SF_RESV_SIZE(sf);
-	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+			  &offset);
 	if (!head) {
 		fprintf(stderr, "No SVE context\n");
 		return 1;
@@ -101,16 +104,6 @@ static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	int i;
 
 	for (i = 0; i < nvls; i++) {
-		/*
-		 * TODO: the signal test helpers can't currently cope
-		 * with signal frames bigger than struct sigcontext,
-		 * skip VLs that will trigger that.
-		 */
-		if (vls[i] > 64) {
-			printf("Skipping VL %u due to stack size\n", vls[i]);
-			continue;
-		}
-
 		if (do_one_sme_vl(td, si, uc, vls[i]))
 			return 1;
 	}
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
index 4b2418aa08a9..8b16eabbb769 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -13,7 +13,10 @@
 #include "test_signals_utils.h"
 #include "testcases.h"
 
-struct fake_sigframe sf;
+static union {
+	ucontext_t uc;
+	char buf[1024 * 64];
+} context;
 static unsigned int vls[SVE_VQ_MAX];
 unsigned int nvls = 0;
 
@@ -55,8 +58,8 @@ static void setup_sve_regs(void)
 static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 			 unsigned int vl)
 {
-	size_t resv_sz, offset;
-	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	size_t offset;
+	struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
 	struct sve_context *sve;
 
 	fprintf(stderr, "Testing VL %d\n", vl);
@@ -71,11 +74,11 @@ static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 	 * in it.
 	 */
 	setup_sve_regs();
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &context.uc, sizeof(context)))
 		return 1;
 
-	resv_sz = GET_SF_RESV_SIZE(sf);
-	head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+	head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+			  &offset);
 	if (!head) {
 		fprintf(stderr, "No SVE context\n");
 		return 1;
@@ -99,14 +102,6 @@ static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	int i;
 
 	for (i = 0; i < nvls; i++) {
-		/*
-		 * TODO: the signal test helpers can't currently cope
-		 * with signal frames bigger than struct sigcontext,
-		 * skip VLs that will trigger that.
-		 */
-		if (vls[i] > 64)
-			continue;
-
 		if (do_one_sve_vl(td, si, uc, vls[i]))
 			return 1;
 	}
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
index 92904653add1..aa835acec062 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -34,7 +34,7 @@ static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	struct sve_context *sve;
 
 	/* Get a signal context which should have a SVE frame in it */
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
 		return 1;
 
 	resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 84c36bee4d82..e1c625b20ac4 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -25,7 +25,8 @@ struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
 	return found;
 }
 
-bool validate_extra_context(struct extra_context *extra, char **err)
+bool validate_extra_context(struct extra_context *extra, char **err,
+			    void **extra_data, size_t *extra_size)
 {
 	struct _aarch64_ctx *term;
 
@@ -33,7 +34,7 @@ bool validate_extra_context(struct extra_context *extra, char **err)
 		return false;
 
 	fprintf(stderr, "Validating EXTRA...\n");
-	term = GET_RESV_NEXT_HEAD(extra);
+	term = GET_RESV_NEXT_HEAD(&extra->head);
 	if (!term || term->magic || term->size) {
 		*err = "Missing terminator after EXTRA context";
 		return false;
@@ -42,11 +43,14 @@ bool validate_extra_context(struct extra_context *extra, char **err)
 		*err = "Extra DATAP misaligned";
 	else if (extra->size & 0x0fUL)
 		*err = "Extra SIZE misaligned";
-	else if (extra->datap != (uint64_t)term + sizeof(*term))
+	else if (extra->datap != (uint64_t)term + 0x10UL)
 		*err = "Extra DATAP misplaced (not contiguous)";
 	if (*err)
 		return false;
 
+	*extra_data = (void *)extra->datap;
+	*extra_size = extra->size;
+
 	return true;
 }
 
@@ -105,11 +109,14 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 	bool terminated = false;
 	size_t offs = 0;
 	int flags = 0;
+	int new_flags;
 	struct extra_context *extra = NULL;
 	struct sve_context *sve = NULL;
 	struct za_context *za = NULL;
 	struct _aarch64_ctx *head =
 		(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+	void *extra_data = NULL;
+	size_t extra_sz = 0;
 
 	if (!err)
 		return false;
@@ -120,12 +127,24 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			return false;
 		}
 
+		new_flags = 0;
+
 		switch (head->magic) {
 		case 0:
-			if (head->size)
+			if (head->size) {
 				*err = "Bad size for terminator";
-			else
+			} else if (extra_data) {
+				/* End of main data, walking the extra data */
+				head = extra_data;
+				resv_sz = extra_sz;
+				offs = 0;
+
+				extra_data = NULL;
+				extra_sz = 0;
+				continue;
+			} else {
 				terminated = true;
+			}
 			break;
 		case FPSIMD_MAGIC:
 			if (flags & FPSIMD_CTX)
@@ -133,7 +152,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			else if (head->size !=
 				 sizeof(struct fpsimd_context))
 				*err = "Bad size for fpsimd_context";
-			flags |= FPSIMD_CTX;
+			new_flags |= FPSIMD_CTX;
 			break;
 		case ESR_MAGIC:
 			if (head->size != sizeof(struct esr_context))
@@ -144,14 +163,14 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 				*err = "Multiple SVE_MAGIC";
 			/* Size is validated in validate_sve_context() */
 			sve = (struct sve_context *)head;
-			flags |= SVE_CTX;
+			new_flags |= SVE_CTX;
 			break;
 		case ZA_MAGIC:
 			if (flags & ZA_CTX)
 				*err = "Multiple ZA_MAGIC";
 			/* Size is validated in validate_za_context() */
 			za = (struct za_context *)head;
-			flags |= ZA_CTX;
+			new_flags |= ZA_CTX;
 			break;
 		case EXTRA_MAGIC:
 			if (flags & EXTRA_CTX)
@@ -159,7 +178,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			else if (head->size !=
 				 sizeof(struct extra_context))
 				*err = "Bad size for extra_context";
-			flags |= EXTRA_CTX;
+			new_flags |= EXTRA_CTX;
 			extra = (struct extra_context *)head;
 			break;
 		case KSFT_BAD_MAGIC:
@@ -192,16 +211,19 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
 			return false;
 		}
 
-		if (flags & EXTRA_CTX)
-			if (!validate_extra_context(extra, err))
+		if (new_flags & EXTRA_CTX)
+			if (!validate_extra_context(extra, err,
+						    &extra_data, &extra_sz))
 				return false;
-		if (flags & SVE_CTX)
+		if (new_flags & SVE_CTX)
 			if (!validate_sve_context(sve, err))
 				return false;
-		if (flags & ZA_CTX)
+		if (new_flags & ZA_CTX)
 			if (!validate_za_context(za, err))
 				return false;
 
+		flags |= new_flags;
+
 		head = GET_RESV_NEXT_HEAD(head);
 	}
 
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index 49f1d5de7b5b..040afded0b76 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -30,6 +30,13 @@
 #define GET_SF_RESV_SIZE(sf) \
 	sizeof((sf).uc.uc_mcontext.__reserved)
 
+#define GET_BUF_RESV_HEAD(buf) \
+	(struct _aarch64_ctx *)(&(buf).uc.uc_mcontext.__reserved)
+
+#define GET_BUF_RESV_SIZE(buf) \
+	(sizeof(buf) - sizeof(buf.uc) +	\
+	 sizeof((buf).uc.uc_mcontext.__reserved))
+
 #define GET_UCP_RESV_SIZE(ucp) \
 	sizeof((ucp)->uc_mcontext.__reserved)
 
@@ -79,8 +86,6 @@ struct fake_sigframe {
 
 bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
 
-bool validate_extra_context(struct extra_context *extra, char **err);
-
 struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
 				size_t resv_sz, size_t *offset);
 
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
new file mode 100644
index 000000000000..4d6f94b6178f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+	ucontext_t uc;
+	char buf[1024 * 128];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+	int vq, vl;
+
+	/*
+	 * Enumerate up to SME_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SME_SET_VL, vq * 16);
+		if (vl == -1)
+			return false;
+
+		vl &= PR_SME_VL_LEN_MASK;
+
+		/* Skip missing VLs */
+		vq = sve_vq_from_vl(vl);
+
+		vls[nvls++] = vl;
+	}
+
+	/* We need at least one VL */
+	if (nvls < 1) {
+		fprintf(stderr, "Only %d VL supported\n", nvls);
+		return false;
+	}
+
+	return true;
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+			 unsigned int vl)
+{
+	size_t offset;
+	struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+	struct za_context *za;
+
+	fprintf(stderr, "Testing VL %d\n", vl);
+
+	if (prctl(PR_SME_SET_VL, vl) != vl) {
+		fprintf(stderr, "Failed to set VL\n");
+		return 1;
+	}
+
+	/*
+	 * Get a signal context which should have a SVE frame and registers
+	 * in it.
+	 */
+	if (!get_current_context(td, &context.uc, sizeof(context)))
+		return 1;
+
+	head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+	if (!head) {
+		fprintf(stderr, "No ZA context\n");
+		return 1;
+	}
+
+	za = (struct za_context *)head;
+	if (za->vl != vl) {
+		fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+		return 1;
+	}
+
+	if (head->size != ZA_SIG_REGS_OFFSET) {
+		fprintf(stderr, "Context size %u, expected %lu\n",
+			head->size, ZA_SIG_REGS_OFFSET);
+		return 1;
+	}
+
+	/* The actual size validation is done in get_current_context() */
+	fprintf(stderr, "Got expected size %u and VL %d\n",
+		head->size, za->vl);
+
+	return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+	int i;
+
+	for (i = 0; i < nvls; i++) {
+		if (do_one_sme_vl(td, si, uc, vls[i]))
+			return 1;
+	}
+
+	td->pass = 1;
+
+	return 0;
+}
+
+struct tdescr tde = {
+	.name = "ZA registers - ZA disabled",
+	.descr = "Check ZA context with ZA disabled",
+	.feats_required = FEAT_SME,
+	.timeout = 3,
+	.init = sme_get_vls,
+	.run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
index b94e4f99fcac..ea45acb115d5 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -13,7 +13,10 @@
 #include "test_signals_utils.h"
 #include "testcases.h"
 
-struct fake_sigframe sf;
+static union {
+	ucontext_t uc;
+	char buf[1024 * 128];
+} context;
 static unsigned int vls[SVE_VQ_MAX];
 unsigned int nvls = 0;
 
@@ -22,10 +25,10 @@ static bool sme_get_vls(struct tdescr *td)
 	int vq, vl;
 
 	/*
-	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 * Enumerate up to SME_VQ_MAX vector lengths
 	 */
 	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
-		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		vl = prctl(PR_SME_SET_VL, vq * 16);
 		if (vl == -1)
 			return false;
 
@@ -52,11 +55,13 @@ static void setup_za_regs(void)
 	asm volatile(".inst 0xd503457f" : : : );
 }
 
+static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+
 static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 			 unsigned int vl)
 {
-	size_t resv_sz, offset;
-	struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+	size_t offset;
+	struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
 	struct za_context *za;
 
 	fprintf(stderr, "Testing VL %d\n", vl);
@@ -71,11 +76,10 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 	 * in it.
 	 */
 	setup_za_regs();
-	if (!get_current_context(td, &sf.uc))
+	if (!get_current_context(td, &context.uc, sizeof(context)))
 		return 1;
 
-	resv_sz = GET_SF_RESV_SIZE(sf);
-	head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+	head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
 	if (!head) {
 		fprintf(stderr, "No ZA context\n");
 		return 1;
@@ -87,10 +91,22 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
 		return 1;
 	}
 
-	/* The actual size validation is done in get_current_context() */
+	if (head->size != ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl))) {
+		fprintf(stderr, "ZA context size %u, expected %lu\n",
+			head->size, ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl)));
+		return 1;
+	}
+
 	fprintf(stderr, "Got expected size %u and VL %d\n",
 		head->size, za->vl);
 
+	/* We didn't load any data into ZA so it should be all zeros */
+	if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET,
+		   ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) {
+		fprintf(stderr, "ZA data invalid\n");
+		return 1;
+	}
+
 	return 0;
 }
 
@@ -99,16 +115,6 @@ static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
 	int i;
 
 	for (i = 0; i < nvls; i++) {
-		/*
-		 * TODO: the signal test helpers can't currently cope
-		 * with signal frames bigger than struct sigcontext,
-		 * skip VLs that will trigger that.
-		 */
-		if (vls[i] > 32) {
-			printf("Skipping VL %u due to stack size\n", vls[i]);
-			continue;
-		}
-
 		if (do_one_sme_vl(td, si, uc, vls[i]))
 			return 1;
 	}