summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt3
-rw-r--r--Documentation/admin-guide/perf/hisi-pmu.rst54
-rw-r--r--Documentation/arm64/booting.rst13
-rw-r--r--Documentation/arm64/pointer-authentication.rst34
-rw-r--r--Documentation/arm64/tagged-address-abi.rst2
-rw-r--r--Documentation/dev-tools/kasan.rst9
-rw-r--r--arch/arm64/Kconfig41
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/aes-modes.S2
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S2
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S2
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S4
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S2
-rw-r--r--arch/arm64/include/asm/arch_gicv3.h2
-rw-r--r--arch/arm64/include/asm/arch_timer.h21
-rw-r--r--arch/arm64/include/asm/asm_pointer_auth.h20
-rw-r--r--arch/arm64/include/asm/assembler.h114
-rw-r--r--arch/arm64/include/asm/barrier.h23
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h17
-rw-r--r--arch/arm64/include/asm/daifflags.h10
-rw-r--r--arch/arm64/include/asm/el2_setup.h21
-rw-r--r--arch/arm64/include/asm/fpsimd.h1
-rw-r--r--arch/arm64/include/asm/irq.h4
-rw-r--r--arch/arm64/include/asm/irq_work.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h16
-rw-r--r--arch/arm64/include/asm/memory.h4
-rw-r--r--arch/arm64/include/asm/mte-kasan.h9
-rw-r--r--arch/arm64/include/asm/mte.h54
-rw-r--r--arch/arm64/include/asm/pgalloc.h19
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h15
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h31
-rw-r--r--arch/arm64/include/asm/pointer_auth.h61
-rw-r--r--arch/arm64/include/asm/processor.h13
-rw-r--r--arch/arm64/include/asm/ptdump.h2
-rw-r--r--arch/arm64/include/asm/smp.h1
-rw-r--r--arch/arm64/include/asm/stacktrace.h24
-rw-r--r--arch/arm64/include/asm/sysreg.h13
-rw-r--r--arch/arm64/include/asm/uaccess.h22
-rw-r--r--arch/arm64/include/asm/vdso/gettimeofday.h6
-rw-r--r--arch/arm64/include/asm/word-at-a-time.h4
-rw-r--r--arch/arm64/kernel/asm-offsets.c7
-rw-r--r--arch/arm64/kernel/cpufeature.c22
-rw-r--r--arch/arm64/kernel/entry-common.c6
-rw-r--r--arch/arm64/kernel/entry-fpsimd.S5
-rw-r--r--arch/arm64/kernel/entry.S174
-rw-r--r--arch/arm64/kernel/fpsimd.c39
-rw-r--r--arch/arm64/kernel/head.S39
-rw-r--r--arch/arm64/kernel/hyp-stub.S10
-rw-r--r--arch/arm64/kernel/idreg-override.c26
-rw-r--r--arch/arm64/kernel/irq.c35
-rw-r--r--arch/arm64/kernel/kaslr.c18
-rw-r--r--arch/arm64/kernel/module.c16
-rw-r--r--arch/arm64/kernel/mte.c121
-rw-r--r--arch/arm64/kernel/perf_event.c5
-rw-r--r--arch/arm64/kernel/pointer_auth.c63
-rw-r--r--arch/arm64/kernel/probes/kprobes.c3
-rw-r--r--arch/arm64/kernel/process.c35
-rw-r--r--arch/arm64/kernel/ptrace.c41
-rw-r--r--arch/arm64/kernel/smp.c1
-rw-r--r--arch/arm64/kernel/stacktrace.c24
-rw-r--r--arch/arm64/kernel/suspend.c6
-rw-r--r--arch/arm64/kernel/vdso.c26
-rw-r--r--arch/arm64/mm/fault.c18
-rw-r--r--arch/arm64/mm/kasan_init.c29
-rw-r--r--arch/arm64/mm/mmu.c41
-rw-r--r--arch/arm64/mm/proc.S48
-rw-r--r--arch/arm64/mm/ptdump.c4
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c2
-rw-r--r--drivers/perf/arm-cci.c12
-rw-r--r--drivers/perf/arm-ccn.c31
-rw-r--r--drivers/perf/arm-cmn.c22
-rw-r--r--drivers/perf/arm_dmc620_pmu.c2
-rw-r--r--drivers/perf/arm_dsu_pmu.c5
-rw-r--r--drivers/perf/arm_pmu_platform.c54
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c36
-rw-r--r--drivers/perf/arm_spe_pmu.c3
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c7
-rw-r--r--drivers/perf/hisilicon/Makefile3
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c348
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c301
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c355
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pa_pmu.c500
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c79
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.h20
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c530
-rw-r--r--drivers/perf/qcom_l2_pmu.c2
-rw-r--r--drivers/perf/qcom_l3_pmu.c4
-rw-r--r--drivers/perf/thunderx2_pmu.c4
-rw-r--r--drivers/perf/xgene_pmu.c4
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/irq.h2
-rw-r--r--include/linux/kasan.h6
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/prctl.h4
-rw-r--r--kernel/sys.c16
-rw-r--r--lib/test_kasan.c19
-rw-r--r--mm/kasan/hw_tags.c66
-rw-r--r--mm/kasan/kasan.h40
-rw-r--r--mm/kasan/report.c22
-rw-r--r--mm/mmap.c6
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/bti/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/Makefile61
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h80
-rw-r--r--tools/testing/selftests/arm64/bti/btitest.h23
-rw-r--r--tools/testing/selftests/arm64/bti/compiler.h21
-rw-r--r--tools/testing/selftests/arm64/bti/gen/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/signal.c37
-rw-r--r--tools/testing/selftests/arm64/bti/signal.h21
-rw-r--r--tools/testing/selftests/arm64/bti/start.S14
-rw-r--r--tools/testing/selftests/arm64/bti/syscall.S23
-rw-r--r--tools/testing/selftests/arm64/bti/system.c22
-rw-r--r--tools/testing/selftests/arm64/bti/system.h28
-rw-r--r--tools/testing/selftests/arm64/bti/test.c234
-rw-r--r--tools/testing/selftests/arm64/bti/teststubs.S39
-rw-r--r--tools/testing/selftests/arm64/bti/trampoline.S29
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile15
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c5
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c3
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c39
122 files changed, 3862 insertions, 914 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0136cdf7e55d..9b3c086d4266 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2279,8 +2279,7 @@
 				   state is kept private from the host.
 				   Not valid if the kernel is running in EL2.
 
-			Defaults to VHE/nVHE based on hardware support and
-			the value of CONFIG_ARM64_VHE.
+			Defaults to VHE/nVHE based on hardware support.
 
 	kvm-arm.vgic_v3_group0_trap=
 			[KVM,ARM] Trap guest accesses to GICv3 group-0
diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst
index 404a5c3d9d00..546979360513 100644
--- a/Documentation/admin-guide/perf/hisi-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pmu.rst
@@ -53,6 +53,60 @@ Example usage of perf::
   $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5
   $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5
 
+For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same
+as PMU v1, but some new functions are added to the hardware.
+
+(a) L3C PMU supports filtering by core/thread within the cluster which can be
+specified as a bitmap::
+
+  $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5
+
+This will only count the operations from core/thread 0 and 1 in this cluster.
+
+(b) Tracetag allow the user to chose to count only read, write or atomic
+operations via the tt_req parameeter in perf. The default value counts all
+operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101
+represents write operations, 3'b110 represents atomic store operations and
+3'b111 represents atomic non-store operations, other values are reserved::
+
+  $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5
+
+This will only count the read operations in this cluster.
+
+(c) Datasrc allows the user to check where the data comes from. It is 5 bits.
+Some important codes are as follows:
+5'b00001: comes from L3C in this die;
+5'b01000: comes from L3C in the cross-die;
+5'b01001: comes from L3C which is in another socket;
+5'b01110: comes from the local DDR;
+5'b01111: comes from the cross-die DDR;
+5'b10000: comes from cross-socket DDR;
+etc, it is mainly helpful to find that the data source is nearest from the CPU
+cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be
+configured in perf command::
+
+  $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/,
+  hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5
+
+(d)Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die
+contains several Compute Clusters (CCLs). The I/O dies are called Super I/O
+clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the
+SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit
+CCL/ICL-ID. For I/O die, the ICL-ID is followed by:
+5'b00000: I/O_MGMT_ICL;
+5'b00001: Network_ICL;
+5'b00011: HAC_ICL;
+5'b10000: PCIe_ICL;
+
+Users could configure IDs to count data come from specific CCL/ICL, by setting
+srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting
+tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not
+check the bit when matching against the srcid_cmd/tgtid_cmd.
+
+If all of these options are disabled, it can works by the default value that
+doesn't distinguish the filter condition and ID information and will return
+the total counter values in the PMU counters.
+
 The current driver does not support sampling. So "perf record" is unsupported.
 Also attach to a task is unsupported as the events are all uncore.
 
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 7552dbc1cc54..4fcc00add117 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -202,9 +202,10 @@ Before jumping into the kernel, the following conditions must be met:
 
 - System registers
 
-  All writable architected system registers at the exception level where
-  the kernel image will be entered must be initialised by software at a
-  higher exception level to prevent execution in an UNKNOWN state.
+  All writable architected system registers at or below the exception
+  level where the kernel image will be entered must be initialised by
+  software at a higher exception level to prevent execution in an UNKNOWN
+  state.
 
   - SCR_EL3.FIQ must have the same value across all CPUs the kernel is
     executing on.
@@ -270,6 +271,12 @@ Before jumping into the kernel, the following conditions must be met:
       having 0b1 set for the corresponding bit for each of the auxiliary
       counters present.
 
+  For CPUs with the Fine Grained Traps (FEAT_FGT) extension present:
+
+  - If EL3 is present and the kernel is entered at EL2:
+
+    - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+
 The requirements described above for CPU mode, caches, MMUs, architected
 timers, coherency and system registers apply to all CPUs.  All CPUs must
 enter the kernel in the same exception level.
diff --git a/Documentation/arm64/pointer-authentication.rst b/Documentation/arm64/pointer-authentication.rst
index 30b2ab06526b..f127666ea3a8 100644
--- a/Documentation/arm64/pointer-authentication.rst
+++ b/Documentation/arm64/pointer-authentication.rst
@@ -107,3 +107,37 @@ filter out the Pointer Authentication system key registers from
 KVM_GET/SET_REG_* ioctls and mask those features from cpufeature ID
 register. Any attempt to use the Pointer Authentication instructions will
 result in an UNDEFINED exception being injected into the guest.
+
+
+Enabling and disabling keys
+---------------------------
+
+The prctl PR_PAC_SET_ENABLED_KEYS allows the user program to control which
+PAC keys are enabled in a particular task. It takes two arguments, the
+first being a bitmask of PR_PAC_APIAKEY, PR_PAC_APIBKEY, PR_PAC_APDAKEY
+and PR_PAC_APDBKEY specifying which keys shall be affected by this prctl,
+and the second being a bitmask of the same bits specifying whether the key
+should be enabled or disabled. For example::
+
+  prctl(PR_PAC_SET_ENABLED_KEYS,
+        PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY,
+        PR_PAC_APIBKEY, 0, 0);
+
+disables all keys except the IB key.
+
+The main reason why this is useful is to enable a userspace ABI that uses PAC
+instructions to sign and authenticate function pointers and other pointers
+exposed outside of the function, while still allowing binaries conforming to
+the ABI to interoperate with legacy binaries that do not sign or authenticate
+pointers.
+
+The idea is that a dynamic loader or early startup code would issue this
+prctl very early after establishing that a process may load legacy binaries,
+but before executing any PAC instructions.
+
+For compatibility with previous kernel versions, processes start up with IA,
+IB, DA and DB enabled, and are reset to this state on exec(). Processes created
+via fork() and clone() inherit the key enabled state from the calling process.
+
+It is recommended to avoid disabling the IA key, as this has higher performance
+overhead than disabling any of the other keys.
diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst
index 4a9d9c794ee5..cbc4d4500241 100644
--- a/Documentation/arm64/tagged-address-abi.rst
+++ b/Documentation/arm64/tagged-address-abi.rst
@@ -40,7 +40,7 @@ space obtained in one of the following ways:
   during creation and with the same restrictions as for ``mmap()`` above
   (e.g. data, bss, stack).
 
-The AArch64 Tagged Address ABI has two stages of relaxation depending
+The AArch64 Tagged Address ABI has two stages of relaxation depending on
 how the user addresses are used by the kernel:
 
 1. User addresses not accessed by the kernel but used for address space
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index ddf4239a5890..6f6ab3ed7b79 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -161,6 +161,15 @@ particular KASAN features.
 
 - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
 
+- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in
+  synchronous or asynchronous mode of execution (default: ``sync``).
+  Synchronous mode: a bad access is detected immediately when a tag
+  check fault occurs.
+  Asynchronous mode: a bad access detection is delayed. When a tag check
+  fault occurs, the information is stored in hardware (in the TFSR_EL1
+  register for arm64). The kernel periodically checks the hardware and
+  only reports tag faults during these checks.
+
 - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
   traces collection (default: ``on``).
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f1a032ed2274..406b42c05ee1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,9 +108,9 @@ config ARM64
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_IPI
-	select GENERIC_IRQ_MULTI_HANDLER
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
@@ -138,6 +138,7 @@ config ARM64
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
+	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
 	select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
 	select HAVE_ARCH_KFENCE
@@ -195,6 +196,7 @@ config ARM64
 	select IOMMU_DMA if IOMMU_SUPPORT
 	select IRQ_DOMAIN
 	select IRQ_FORCED_THREADING
+	select KASAN_VMALLOC if KASAN_GENERIC
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
@@ -1069,6 +1071,9 @@ config SYS_SUPPORTS_HUGETLBFS
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
+config ARCH_HAS_FILTER_PGPROT
+	def_bool y
+
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y if PGTABLE_LEVELS > 2
 
@@ -1430,19 +1435,6 @@ config ARM64_USE_LSE_ATOMICS
 	  built with binutils >= 2.25 in order for the new instructions
 	  to be used.
 
-config ARM64_VHE
-	bool "Enable support for Virtualization Host Extensions (VHE)"
-	default y
-	help
-	  Virtualization Host Extensions (VHE) allow the kernel to run
-	  directly at EL2 (instead of EL1) on processors that support
-	  it. This leads to better performance for KVM, as they reduce
-	  the cost of the world switch.
-
-	  Selecting this option allows the VHE feature to be detected
-	  at runtime, and does not affect processors that do not
-	  implement this feature.
-
 endmenu
 
 menu "ARMv8.2 architectural features"
@@ -1696,10 +1688,23 @@ config ARM64_MTE
 
 endmenu
 
+menu "ARMv8.7 architectural features"
+
+config ARM64_EPAN
+	bool "Enable support for Enhanced Privileged Access Never (EPAN)"
+	default y
+	depends on ARM64_PAN
+	help
+	 Enhanced Privileged Access Never (EPAN) allows Privileged
+	 Access Never to be used with Execute-only mappings.
+
+	 The feature is detected at runtime, and will remain disabled
+	 if the cpu does not implement the feature.
+endmenu
+
 config ARM64_SVE
 	bool "ARM Scalable Vector Extension support"
 	default y
-	depends on !KVM || ARM64_VHE
 	help
 	  The Scalable Vector Extension (SVE) is an extension to the AArch64
 	  execution state which complements and extends the SIMD functionality
@@ -1728,12 +1733,6 @@ config ARM64_SVE
 	  booting the kernel.  If unsure and you are not observing these
 	  symptoms, you should assume that it is safe to say Y.
 
-	  CPUs that support SVE are architecturally required to support the
-	  Virtualization Host Extensions (VHE), so the kernel makes no
-	  provision for supporting SVE alongside KVM without VHE enabled.
-	  Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
-	  KVM in the same kernel image.
-
 config ARM64_MODULE_PLTS
 	bool "Use PLTs to allow module memory to spill over into vmalloc area"
 	depends on MODULES
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index d612f633b771..8793a9cb9d4b 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1156,6 +1156,7 @@ CONFIG_CRYPTO_DEV_HISI_TRNG=m
 CONFIG_CMA_SIZE_MBYTES=32
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 247011356d11..b495de22bb38 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -701,7 +701,7 @@ AES_FUNC_START(aes_mac_update)
 	cbz		w5, .Lmacout
 	encrypt_block	v0, w2, x1, x7, w8
 	st1		{v0.16b}, [x4]			/* return dg */
-	cond_yield	.Lmacout, x7
+	cond_yield	.Lmacout, x7, x8
 	b		.Lmacloop4x
 .Lmac1x:
 	add		w3, w3, #4
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 8c02bbc2684e..889ca0f8972b 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -121,7 +121,7 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	add		dgav.4s, dgav.4s, dg0v.4s
 
 	cbz		w2, 2f
-	cond_yield	3f, x5
+	cond_yield	3f, x5, x6
 	b		0b
 
 	/*
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 6cdea7d56059..491179922f49 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -129,7 +129,7 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 
 	/* handled all input blocks? */
 	cbz		w2, 2f
-	cond_yield	3f, x5
+	cond_yield	3f, x5, x6
 	b		0b
 
 	/*
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 6f5208414fe3..9c77313f5a60 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -184,11 +184,11 @@ SYM_FUNC_START(sha3_ce_transform)
 	eor	 v0.16b,  v0.16b, v31.16b
 
 	cbnz	w8, 3b
-	cond_yield 3f, x8
+	cond_yield 4f, x8, x9
 	cbnz	w2, 0b
 
 	/* save state */
-3:	st1	{ v0.1d- v3.1d}, [x0], #32
+4:	st1	{ v0.1d- v3.1d}, [x0], #32
 	st1	{ v4.1d- v7.1d}, [x0], #32
 	st1	{ v8.1d-v11.1d}, [x0], #32
 	st1	{v12.1d-v15.1d}, [x0], #32
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index d6e7f6c95fa6..b6a3a36e15f5 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -195,7 +195,7 @@ CPU_LE(	rev64		v19.16b, v19.16b	)
 	add		v10.2d, v10.2d, v2.2d
 	add		v11.2d, v11.2d, v3.2d
 
-	cond_yield	3f, x4
+	cond_yield	3f, x4, x5
 	/* handled all input blocks? */
 	cbnz		w2, 0b
 
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 880b9054d75c..934b9be582d2 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void)
 
 static inline void gic_arch_enable_irqs(void)
 {
-	asm volatile ("msr daifclr, #2" : : : "memory");
+	asm volatile ("msr daifclr, #3" : : : "memory");
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index 9f0ec21d6327..88d20f04c64a 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
 	isb();
 }
 
-/*
- * Ensure that reads of the counter are treated the same as memory reads
- * for the purposes of ordering by subsequent memory barriers.
- *
- * This insanity brought to you by speculative system register reads,
- * out-of-order memory accesses, sequence locks and Thomas Gleixner.
- *
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
- */
-#define arch_counter_enforce_ordering(val) do {				\
-	u64 tmp, _val = (val);						\
-									\
-	asm volatile(							\
-	"	eor	%0, %1, %1\n"					\
-	"	add	%0, sp, %0\n"					\
-	"	ldr	xzr, [%0]"					\
-	: "=r" (tmp) : "r" (_val));					\
-} while (0)
-
 static __always_inline u64 __arch_counter_get_cntpct_stable(void)
 {
 	u64 cnt;
@@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void)
 	return cnt;
 }
 
-#undef arch_counter_enforce_ordering
-
 static inline int arch_timer_arch_init(void)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 52dead2a8640..8ca2dc0661ee 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -13,30 +13,12 @@
  * so use the base value of ldp as thread.keys_user and offset as
  * thread.keys_user.ap*.
  */
-	.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+	.macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
 	mov	\tmp1, #THREAD_KEYS_USER
 	add	\tmp1, \tsk, \tmp1
-alternative_if_not ARM64_HAS_ADDRESS_AUTH
-	b	.Laddr_auth_skip_\@
-alternative_else_nop_endif
 	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
 	msr_s	SYS_APIAKEYLO_EL1, \tmp2
 	msr_s	SYS_APIAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIB]
-	msr_s	SYS_APIBKEYLO_EL1, \tmp2
-	msr_s	SYS_APIBKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDA]
-	msr_s	SYS_APDAKEYLO_EL1, \tmp2
-	msr_s	SYS_APDAKEYHI_EL1, \tmp3
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APDB]
-	msr_s	SYS_APDBKEYLO_EL1, \tmp2
-	msr_s	SYS_APDBKEYHI_EL1, \tmp3
-.Laddr_auth_skip_\@:
-alternative_if ARM64_HAS_GENERIC_AUTH
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APGA]
-	msr_s	SYS_APGAKEYLO_EL1, \tmp2
-	msr_s	SYS_APGAKEYHI_EL1, \tmp3
-alternative_else_nop_endif
 	.endm
 
 	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ca31594d3d6c..ab569b0b45fc 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -15,6 +15,7 @@
 #include <asm-generic/export.h>
 
 #include <asm/asm-offsets.h>
+#include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
 #include <asm/debug-monitors.h>
@@ -23,6 +24,14 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
+	/*
+	 * Provide a wxN alias for each wN register so what we can paste a xN
+	 * reference after a 'w' to obtain the 32-bit version.
+	 */
+	.irp	n,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
+	wx\n	.req	w\n
+	.endr
+
 	.macro save_and_disable_daif, flags
 	mrs	\flags, daif
 	msr	daifset, #0xf
@@ -40,9 +49,9 @@
 	msr	daif, \flags
 	.endm
 
-	/* IRQ is the lowest priority flag, unconditionally unmask the rest. */
-	.macro enable_da_f
-	msr	daifclr, #(8 | 4 | 1)
+	/* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */
+	.macro enable_da
+	msr	daifclr, #(8 | 4)
 	.endm
 
 /*
@@ -50,7 +59,7 @@
  */
 	.macro	save_and_disable_irq, flags
 	mrs	\flags, daif
-	msr	daifset, #2
+	msr	daifset, #3
 	.endm
 
 	.macro	restore_irq, flags
@@ -692,90 +701,33 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	isb
 .endm
 
-/*
- * Check whether to yield to another runnable task from kernel mode NEON code
- * (which runs with preemption disabled).
- *
- * if_will_cond_yield_neon
- *        // pre-yield patchup code
- * do_cond_yield_neon
- *        // post-yield patchup code
- * endif_yield_neon    <label>
- *
- * where <label> is optional, and marks the point where execution will resume
- * after a yield has been performed. If omitted, execution resumes right after
- * the endif_yield_neon invocation. Note that the entire sequence, including
- * the provided patchup code, will be omitted from the image if
- * CONFIG_PREEMPTION is not defined.
- *
- * As a convenience, in the case where no patchup code is required, the above
- * sequence may be abbreviated to
- *
- * cond_yield_neon <label>
- *
- * Note that the patchup code does not support assembler directives that change
- * the output section, any use of such directives is undefined.
- *
- * The yield itself consists of the following:
- * - Check whether the preempt count is exactly 1 and a reschedule is also
- *   needed. If so, calling of preempt_enable() in kernel_neon_end() will
- *   trigger a reschedule. If it is not the case, yielding is pointless.
- * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
- *   code.
- *
- * This macro sequence may clobber all CPU state that is not guaranteed by the
- * AAPCS to be preserved across an ordinary function call.
- */
-
-	.macro		cond_yield_neon, lbl
-	if_will_cond_yield_neon
-	do_cond_yield_neon
-	endif_yield_neon	\lbl
-	.endm
-
-	.macro		if_will_cond_yield_neon
-#ifdef CONFIG_PREEMPTION
-	get_current_task	x0
-	ldr		x0, [x0, #TSK_TI_PREEMPT]
-	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
-	cbz		x0, .Lyield_\@
-	/* fall through to endif_yield_neon */
-	.subsection	1
-.Lyield_\@ :
-#else
-	.section	".discard.cond_yield_neon", "ax"
-#endif
-	.endm
-
-	.macro		do_cond_yield_neon
-	bl		kernel_neon_end
-	bl		kernel_neon_begin
-	.endm
-
-	.macro		endif_yield_neon, lbl
-	.ifnb		\lbl
-	b		\lbl
-	.else
-	b		.Lyield_out_\@
-	.endif
-	.previous
-.Lyield_out_\@ :
-	.endm
-
 	/*
-	 * Check whether preempt-disabled code should yield as soon as it
-	 * is able. This is the case if re-enabling preemption a single
-	 * time results in a preempt count of zero, and the TIF_NEED_RESCHED
-	 * flag is set. (Note that the latter is stored negated in the
-	 * top word of the thread_info::preempt_count field)
+	 * Check whether preempt/bh-disabled asm code should yield as soon as
+	 * it is able. This is the case if we are currently running in task
+	 * context, and either a softirq is pending, or the TIF_NEED_RESCHED
+	 * flag is set and re-enabling preemption a single time would result in
+	 * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is
+	 * stored negated in the top word of the thread_info::preempt_count
+	 * field)
 	 */
-	.macro		cond_yield, lbl:req, tmp:req
-#ifdef CONFIG_PREEMPTION
+	.macro		cond_yield, lbl:req, tmp:req, tmp2:req
 	get_current_task \tmp
 	ldr		\tmp, [\tmp, #TSK_TI_PREEMPT]
+	/*
+	 * If we are serving a softirq, there is no point in yielding: the
+	 * softirq will not be preempted no matter what we do, so we should
+	 * run to completion as quickly as we can.
+	 */
+	tbnz		\tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@
+#ifdef CONFIG_PREEMPTION
 	sub		\tmp, \tmp, #PREEMPT_DISABLE_OFFSET
 	cbz		\tmp, \lbl
 #endif
+	adr_l		\tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
+	this_cpu_offset	\tmp2
+	ldr		w\tmp, [\tmp, \tmp2]
+	cbnz		w\tmp, \lbl	// yield on pending softirq in task context
+.Lnoyield_\@:
 	.endm
 
 /*
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index c3009b0e5239..065ba482daf0 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -25,10 +25,6 @@
 #define psb_csync()	asm volatile("hint #17" : : : "memory")
 #define csdb()		asm volatile("hint #20" : : : "memory")
 
-#define spec_bar()	asm volatile(ALTERNATIVE("dsb nsh\nisb\n",		\
-						 SB_BARRIER_INSN"nop\n",	\
-						 ARM64_HAS_SB))
-
 #ifdef CONFIG_ARM64_PSEUDO_NMI
 #define pmr_sync()						\
 	do {							\
@@ -70,6 +66,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
 	return mask;
 }
 
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ */
+#define arch_counter_enforce_ordering(val) do {				\
+	u64 tmp, _val = (val);						\
+									\
+	asm volatile(							\
+	"	eor	%0, %1, %1\n"					\
+	"	add	%0, sp, %0\n"					\
+	"	ldr	xzr, [%0]"					\
+	: "=r" (tmp) : "r" (_val));					\
+} while (0)
+
 #define __smp_mb()	dmb(ish)
 #define __smp_rmb()	dmb(ishld)
 #define __smp_wmb()	dmb(ishst)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index c40f2490cd7b..b0c5eda0498f 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -67,7 +67,8 @@
 #define ARM64_HAS_LDAPR				59
 #define ARM64_KVM_PROTECTED_MODE		60
 #define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP	61
+#define ARM64_HAS_EPAN				62
 
-#define ARM64_NCAPS				62
+#define ARM64_NCAPS				63
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 61177bac49fa..338840c00e8e 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -63,6 +63,23 @@ struct arm64_ftr_bits {
 	s64		safe_val; /* safe value for FTR_EXACT features */
 };
 
+/*
+ * Describe the early feature override to the core override code:
+ *
+ * @val			Values that are to be merged into the final
+ *			sanitised value of the register. Only the bitfields
+ *			set to 1 in @mask are valid
+ * @mask		Mask of the features that are overridden by @val
+ *
+ * A @mask field set to full-1 indicates that the corresponding field
+ * in @val is a valid override.
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-0 denotes that this field has no override
+ *
+ * A @mask field set to full-0 with the corresponding @val field set
+ * to full-1 denotes thath this field has an invalid override.
+ */
 struct arm64_ftr_override {
 	u64		val;
 	u64		mask;
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 1c26d7baa67f..5eb7af9c4557 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -13,8 +13,8 @@
 #include <asm/ptrace.h>
 
 #define DAIF_PROCCTX		0
-#define DAIF_PROCCTX_NOIRQ	PSR_I_BIT
-#define DAIF_ERRCTX		(PSR_I_BIT | PSR_A_BIT)
+#define DAIF_PROCCTX_NOIRQ	(PSR_I_BIT | PSR_F_BIT)
+#define DAIF_ERRCTX		(PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
 #define DAIF_MASK		(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
 
 
@@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void)
 	if (system_uses_irq_prio_masking()) {
 		/* If IRQs are masked with PMR, reflect it in the flags */
 		if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON)
-			flags |= PSR_I_BIT;
+			flags |= PSR_I_BIT | PSR_F_BIT;
 	}
 
 	return flags;
@@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags)
 	bool irq_disabled = flags & PSR_I_BIT;
 
 	WARN_ON(system_has_prio_mask_debugging() &&
-		!(read_sysreg(daif) & PSR_I_BIT));
+		(read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT));
 
 	if (!irq_disabled) {
 		trace_hardirqs_on();
@@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags)
 			 * If interrupts are disabled but we can take
 			 * asynchronous errors, we can take NMIs
 			 */
-			flags &= ~PSR_I_BIT;
+			flags &= ~(PSR_I_BIT | PSR_F_BIT);
 			pmr = GIC_PRIO_IRQOFF;
 		} else {
 			pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET;
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index d77d358f9395..b3f2d3bb0938 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -131,6 +131,26 @@
 .Lskip_sve_\@:
 .endm
 
+/* Disable any fine grained traps */
+.macro __init_el2_fgt
+	mrs	x1, id_aa64mmfr0_el1
+	ubfx	x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4
+	cbz	x1, .Lskip_fgt_\@
+
+	msr_s	SYS_HDFGRTR_EL2, xzr
+	msr_s	SYS_HDFGWTR_EL2, xzr
+	msr_s	SYS_HFGRTR_EL2, xzr
+	msr_s	SYS_HFGWTR_EL2, xzr
+	msr_s	SYS_HFGITR_EL2, xzr
+
+	mrs	x1, id_aa64pfr0_el1		// AMU traps UNDEF without AMU
+	ubfx	x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4
+	cbz	x1, .Lskip_fgt_\@
+
+	msr_s	SYS_HAFGRTR_EL2, xzr
+.Lskip_fgt_\@:
+.endm
+
 .macro __init_el2_nvhe_prepare_eret
 	mov	x0, #INIT_PSTATE_EL1
 	msr	spsr_el2, x0
@@ -155,6 +175,7 @@
 	__init_el2_nvhe_idregs
 	__init_el2_nvhe_cptr
 	__init_el2_nvhe_sve
+	__init_el2_fgt
 	__init_el2_nvhe_prepare_eret
 .endm
 
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index bec5f14b622a..ebb263b2d3b1 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -73,6 +73,7 @@ extern void sve_flush_live(void);
 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
 				       unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
+extern void sve_set_vq(unsigned long vq_minus_1);
 
 struct arm64_cpu_capabilities;
 extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index b2b0c6405eb0..fac08e18bcd5 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -8,6 +8,10 @@
 
 struct pt_regs;
 
+int set_handle_irq(void (*handle_irq)(struct pt_regs *));
+#define set_handle_irq	set_handle_irq
+int set_handle_fiq(void (*handle_fiq)(struct pt_regs *));
+
 static inline int nr_legacy_irqs(void)
 {
 	return 0;
diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h
index a1020285ea75..81bbfa3a035b 100644
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@@ -2,6 +2,8 @@
 #ifndef __ASM_IRQ_WORK_H
 #define __ASM_IRQ_WORK_H
 
+extern void arch_irq_work_raise(void);
+
 static inline bool arch_irq_work_has_interrupt(void)
 {
 	return true;
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index ff328e5bbb75..b57b9b1e4344 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -12,15 +12,13 @@
 
 /*
  * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and
- * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai'
+ * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif'
  * order:
  * Masking debug exceptions causes all other exceptions to be masked too/
- * Masking SError masks irq, but not debug exceptions. Masking irqs has no
- * side effects for other flags. Keeping to this order makes it easier for
- * entry.S to know which exceptions should be unmasked.
- *
- * FIQ is never expected, but we mask it when we disable debug exceptions, and
- * unmask it at all other times.
+ * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are
+ * always masked and unmasked together, and have no side effects for other
+ * flags. Keeping to this order makes it easier for entry.S to know which
+ * exceptions should be unmasked.
  */
 
 /*
@@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void)
 	}
 
 	asm volatile(ALTERNATIVE(
-		"msr	daifclr, #2		// arch_local_irq_enable",
+		"msr	daifclr, #3		// arch_local_irq_enable",
 		__msr_s(SYS_ICC_PMR_EL1, "%0"),
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
@@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void)
 	}
 
 	asm volatile(ALTERNATIVE(
-		"msr	daifset, #2		// arch_local_irq_disable",
+		"msr	daifset, #3		// arch_local_irq_disable",
 		__msr_s(SYS_ICC_PMR_EL1, "%0"),
 		ARM64_HAS_IRQ_PRIO_MASKING)
 		:
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..b943879c1c24 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -243,8 +243,10 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 }
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define arch_enable_tagging()			mte_enable_kernel()
+#define arch_enable_tagging_sync()		mte_enable_kernel_sync()
+#define arch_enable_tagging_async()		mte_enable_kernel_async()
 #define arch_set_tagging_report_once(state)	mte_set_report_once(state)
+#define arch_force_async_tag_fault()		mte_check_tfsr_exit()
 #define arch_init_tags(max_tag)			mte_init_tags(max_tag)
 #define arch_get_random_tag()			mte_get_random_tag()
 #define arch_get_mem_tag(addr)			mte_get_mem_tag(addr)
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 7ab500e2ad17..4acf8bf41cad 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -77,7 +77,8 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
 	} while (curr != end);
 }
 
-void mte_enable_kernel(void);
+void mte_enable_kernel_sync(void);
+void mte_enable_kernel_async(void);
 void mte_init_tags(u64 max_tag);
 
 void mte_set_report_once(bool state);
@@ -104,7 +105,11 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
 {
 }
 
-static inline void mte_enable_kernel(void)
+static inline void mte_enable_kernel_sync(void)
+{
+}
+
+static inline void mte_enable_kernel_async(void)
 {
 }
 
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 9b557a457f24..bc88a1ced0d7 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -39,16 +39,15 @@ void mte_free_tag_storage(char *storage);
 
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
-void flush_mte_state(void);
+void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
+void mte_suspend_enter(void);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
 int mte_ptrace_copy_tags(struct task_struct *child, long request,
 			 unsigned long addr, unsigned long data);
 
-void mte_assign_mem_tag_range(void *addr, size_t size);
-
 #else /* CONFIG_ARM64_MTE */
 
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
@@ -60,12 +59,15 @@ static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 {
 }
-static inline void flush_mte_state(void)
+static inline void mte_thread_init_user(void)
 {
 }
 static inline void mte_thread_switch(struct task_struct *next)
 {
 }
+static inline void mte_suspend_enter(void)
+{
+}
 static inline void mte_suspend_exit(void)
 {
 }
@@ -84,11 +86,51 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child,
 	return -EIO;
 }
 
-static inline void mte_assign_mem_tag_range(void *addr, size_t size)
+#endif /* CONFIG_ARM64_MTE */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Whether the MTE asynchronous mode is enabled. */
+DECLARE_STATIC_KEY_FALSE(mte_async_mode);
+
+static inline bool system_uses_mte_async_mode(void)
 {
+	return static_branch_unlikely(&mte_async_mode);
 }
 
-#endif /* CONFIG_ARM64_MTE */
+void mte_check_tfsr_el1(void);
+
+static inline void mte_check_tfsr_entry(void)
+{
+	mte_check_tfsr_el1();
+}
+
+static inline void mte_check_tfsr_exit(void)
+{
+	/*
+	 * The asynchronous faults are sync'ed automatically with
+	 * TFSR_EL1 on kernel entry but for exit an explicit dsb()
+	 * is required.
+	 */
+	dsb(nsh);
+	isb();
+
+	mte_check_tfsr_el1();
+}
+#else
+static inline bool system_uses_mte_async_mode(void)
+{
+	return false;
+}
+static inline void mte_check_tfsr_el1(void)
+{
+}
+static inline void mte_check_tfsr_entry(void)
+{
+}
+static inline void mte_check_tfsr_exit(void)
+{
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_MTE_H  */
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 3c6a7f5988b1..31fbab3d6f99 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-	__pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
+	pudval_t pudval = PUD_TYPE_TABLE;
+
+	pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN;
+	__pud_populate(pudp, __pa(pmdp), pudval);
 }
 #else
 static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
@@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
 
 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
 {
-	__p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE);
+	p4dval_t p4dval = P4D_TYPE_TABLE;
+
+	p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN;
+	__p4d_populate(p4dp, __pa(pudp), p4dval);
 }
 #else
 static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
@@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 {
-	/*
-	 * The pmd must be loaded with the physical address of the PTE table
-	 */
-	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE);
+	VM_BUG_ON(mm != &init_mm);
+	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
 }
 
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
 {
-	__pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE);
+	VM_BUG_ON(mm == &init_mm);
+	__pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 42442a0ae2ab..b82575a33f8b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -94,6 +94,17 @@
 /*
  * Hardware page table definitions.
  *
+ * Level 0 descriptor (P4D).
+ */
+#define P4D_TYPE_TABLE		(_AT(p4dval_t, 3) << 0)
+#define P4D_TABLE_BIT		(_AT(p4dval_t, 1) << 1)
+#define P4D_TYPE_MASK		(_AT(p4dval_t, 3) << 0)
+#define P4D_TYPE_SECT		(_AT(p4dval_t, 1) << 0)
+#define P4D_SECT_RDONLY		(_AT(p4dval_t, 1) << 7)		/* AP[2] */
+#define P4D_TABLE_PXN		(_AT(p4dval_t, 1) << 59)
+#define P4D_TABLE_UXN		(_AT(p4dval_t, 1) << 60)
+
+/*
  * Level 1 descriptor (PUD).
  */
 #define PUD_TYPE_TABLE		(_AT(pudval_t, 3) << 0)
@@ -101,6 +112,8 @@
 #define PUD_TYPE_MASK		(_AT(pudval_t, 3) << 0)
 #define PUD_TYPE_SECT		(_AT(pudval_t, 1) << 0)
 #define PUD_SECT_RDONLY		(_AT(pudval_t, 1) << 7)		/* AP[2] */
+#define PUD_TABLE_PXN		(_AT(pudval_t, 1) << 59)
+#define PUD_TABLE_UXN		(_AT(pudval_t, 1) << 60)
 
 /*
  * Level 2 descriptor (PMD).
@@ -122,6 +135,8 @@
 #define PMD_SECT_CONT		(_AT(pmdval_t, 1) << 52)
 #define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
+#define PMD_TABLE_PXN		(_AT(pmdval_t, 1) << 59)
+#define PMD_TABLE_UXN		(_AT(pmdval_t, 1) << 60)
 
 /*
  * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9a65fb528110..fab2f573f7a4 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings;
 #define PAGE_SHARED_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
 #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_READONLY
 #define __P011  PAGE_READONLY
-#define __P100  PAGE_READONLY_EXEC
+#define __P100  PAGE_EXECONLY
 #define __P101  PAGE_READONLY_EXEC
 #define __P110  PAGE_READONLY_EXEC
 #define __P111  PAGE_READONLY_EXEC
@@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings;
 #define __S001  PAGE_READONLY
 #define __S010  PAGE_SHARED
 #define __S011  PAGE_SHARED
-#define __S100  PAGE_READONLY_EXEC
+#define __S100  PAGE_EXECONLY
 #define __S101  PAGE_READONLY_EXEC
 #define __S110  PAGE_SHARED_EXEC
 #define __S111  PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 47027796c2f9..0b10204e72fc 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
+/*
+ * Execute-only user mappings do not have the PTE_USER bit set. All valid
+ * kernel mappings have the PTE_UXN bit set.
+ */
 #define pte_valid_not_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
-
+	((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
 /*
  * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
  * so that we don't erroneously return false for pages that have been
@@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
 
 /*
- * p??_access_permitted() is true for valid user mappings (subject to the
- * write permission check). PROT_NONE mappings do not have the PTE_VALID bit
- * set.
+ * p??_access_permitted() is true for valid user mappings (PTE_USER
+ * bit set, subject to the write permission check). For execute-only
+ * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits
+ * not set) must return false. PROT_NONE mappings do not have the
+ * PTE_VALID bit set.
  */
 #define pte_access_permitted(pte, write) \
-	(pte_valid_user(pte) && (!(write) || pte_write(pte)))
+	(((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte)))
 #define pmd_access_permitted(pmd, write) \
 	(pte_access_permitted(pmd_pte(pmd), (write)))
 #define pud_access_permitted(pud, write) \
@@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void)
 }
 #define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
 
+static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
+{
+	if (cpus_have_const_cap(ARM64_HAS_EPAN))
+		return prot;
+
+	if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY))
+		return prot;
+
+	return PAGE_READONLY_EXEC;
+}
+
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index b112a11e9302..d50416be99be 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -3,6 +3,7 @@
 #define __ASM_POINTER_AUTH_H
 
 #include <linux/bitops.h>
+#include <linux/prctl.h>
 #include <linux/random.h>
 
 #include <asm/cpufeature.h>
@@ -34,6 +35,25 @@ struct ptrauth_keys_kernel {
 	struct ptrauth_key apia;
 };
 
+#define __ptrauth_key_install_nosync(k, v)			\
+do {								\
+	struct ptrauth_key __pki_v = (v);			\
+	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
+	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
+} while (0)
+
+static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
+{
+	if (system_supports_address_auth()) {
+		__ptrauth_key_install_nosync(APIB, keys->apib);
+		__ptrauth_key_install_nosync(APDA, keys->apda);
+		__ptrauth_key_install_nosync(APDB, keys->apdb);
+	}
+
+	if (system_supports_generic_auth())
+		__ptrauth_key_install_nosync(APGA, keys->apga);
+}
+
 static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 {
 	if (system_supports_address_auth()) {
@@ -45,14 +65,9 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 
 	if (system_supports_generic_auth())
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
-}
 
-#define __ptrauth_key_install_nosync(k, v)			\
-do {								\
-	struct ptrauth_key __pki_v = (v);			\
-	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
-	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
-} while (0)
+	ptrauth_keys_install_user(keys);
+}
 
 static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
 {
@@ -71,6 +86,10 @@ static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kerne
 
 extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
 
+extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+				    unsigned long enabled);
+extern int ptrauth_get_enabled_keys(struct task_struct *tsk);
+
 static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 {
 	return ptrauth_clear_pac(ptr);
@@ -85,8 +104,23 @@ static __always_inline void ptrauth_enable(void)
 	isb();
 }
 
-#define ptrauth_thread_init_user(tsk)					\
-	ptrauth_keys_init_user(&(tsk)->thread.keys_user)
+#define ptrauth_suspend_exit()                                                 \
+	ptrauth_keys_install_user(&current->thread.keys_user)
+
+#define ptrauth_thread_init_user()                                             \
+	do {                                                                   \
+		ptrauth_keys_init_user(&current->thread.keys_user);            \
+									       \
+		/* enable all keys */                                          \
+		if (system_supports_address_auth())                            \
+			set_task_sctlr_el1(current->thread.sctlr_user |        \
+					   SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |   \
+					   SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);   \
+	} while (0)
+
+#define ptrauth_thread_switch_user(tsk)                                        \
+	ptrauth_keys_install_user(&(tsk)->thread.keys_user)
+
 #define ptrauth_thread_init_kernel(tsk)					\
 	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
 #define ptrauth_thread_switch_kernel(tsk)				\
@@ -95,10 +129,17 @@ static __always_inline void ptrauth_enable(void)
 #else /* CONFIG_ARM64_PTR_AUTH */
 #define ptrauth_enable()
 #define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
+#define ptrauth_set_enabled_keys(tsk, keys, enabled)	(-EINVAL)
+#define ptrauth_get_enabled_keys(tsk)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
-#define ptrauth_thread_init_user(tsk)
+#define ptrauth_suspend_exit()
+#define ptrauth_thread_init_user()
 #define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_user(tsk)
 #define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#define PR_PAC_ENABLED_KEYS_MASK                                               \
+	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
+
 #endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index efc10e9041a0..9df3feeee890 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,11 +151,15 @@ struct thread_struct {
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
 #ifdef CONFIG_ARM64_MTE
-	u64			sctlr_tcf0;
 	u64			gcr_user_excl;
 #endif
+	u64			sctlr_user;
 };
 
+#define SCTLR_USER_MASK                                                        \
+	(SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB |   \
+	 SCTLR_EL1_TCF0_MASK)
+
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
 						unsigned long *size)
 {
@@ -247,6 +251,8 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
+void set_task_sctlr_el1(u64 sctlr);
+
 /* Thread switching */
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
@@ -303,6 +309,11 @@ extern void __init minsigstksz_setup(void);
 /* PR_PAC_RESET_KEYS prctl */
 #define PAC_RESET_KEYS(tsk, arg)	ptrauth_prctl_reset_keys(tsk, arg)
 
+/* PR_PAC_{SET,GET}_ENABLED_KEYS prctl */
+#define PAC_SET_ENABLED_KEYS(tsk, keys, enabled)				\
+	ptrauth_set_enabled_keys(tsk, keys, enabled)
+#define PAC_GET_ENABLED_KEYS(tsk) ptrauth_get_enabled_keys(tsk)
+
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
 /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
 long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 38187f74e089..b1dd7ecff7ef 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -23,7 +23,7 @@ struct ptdump_info {
 
 void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
 #ifdef CONFIG_PTDUMP_DEBUGFS
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name);
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name);
 #else
 static inline void ptdump_debugfs_register(struct ptdump_info *info,
 					   const char *name) { }
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index bcb01ca15325..0e357757c0cc 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -145,6 +145,7 @@ bool cpus_are_stuck_in_kernel(void);
 
 extern void crash_smp_send_stop(void);
 extern bool smp_crash_stop_failed(void);
+extern void panic_smp_self_stop(void);
 
 #endif /* ifndef __ASSEMBLY__ */
 
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index eb29b1fe8255..4b33ca620679 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk,
 	return false;
 }
 
-static inline void start_backtrace(struct stackframe *frame,
-				   unsigned long fp, unsigned long pc)
-{
-	frame->fp = fp;
-	frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	frame->graph = 0;
-#endif
-
-	/*
-	 * Prime the first unwind.
-	 *
-	 * In unwind_frame() we'll check that the FP points to a valid stack,
-	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
-	 * treated as a transition to whichever stack that happens to be. The
-	 * prev_fp value won't be used, but we set it to 0 such that it is
-	 * definitely not an accessible stack address.
-	 */
-	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
-	frame->prev_fp = 0;
-	frame->prev_type = STACK_TYPE_UNKNOWN;
-}
+void start_backtrace(struct stackframe *frame, unsigned long fp,
+		     unsigned long pc);
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d4a5fca984c3..b31ac5ccc8ab 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -475,9 +475,15 @@
 #define SYS_PMCCFILTR_EL0		sys_reg(3, 3, 14, 15, 7)
 
 #define SYS_SCTLR_EL2			sys_reg(3, 4, 1, 0, 0)
+#define SYS_HFGRTR_EL2			sys_reg(3, 4, 1, 1, 4)
+#define SYS_HFGWTR_EL2			sys_reg(3, 4, 1, 1, 5)
+#define SYS_HFGITR_EL2			sys_reg(3, 4, 1, 1, 6)
 #define SYS_ZCR_EL2			sys_reg(3, 4, 1, 2, 0)
 #define SYS_TRFCR_EL2			sys_reg(3, 4, 1, 2, 1)
 #define SYS_DACR32_EL2			sys_reg(3, 4, 3, 0, 0)
+#define SYS_HDFGRTR_EL2			sys_reg(3, 4, 3, 1, 4)
+#define SYS_HDFGWTR_EL2			sys_reg(3, 4, 3, 1, 5)
+#define SYS_HAFGRTR_EL2			sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2			sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2			sys_reg(3, 4, 4, 0, 1)
 #define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1)
@@ -565,8 +571,10 @@
 #define SCTLR_ELx_TCF_ASYNC	(UL(0x2) << SCTLR_ELx_TCF_SHIFT)
 #define SCTLR_ELx_TCF_MASK	(UL(0x3) << SCTLR_ELx_TCF_SHIFT)
 
+#define SCTLR_ELx_ENIA_SHIFT	31
+
 #define SCTLR_ELx_ITFSB	(BIT(37))
-#define SCTLR_ELx_ENIA	(BIT(31))
+#define SCTLR_ELx_ENIA	(BIT(SCTLR_ELx_ENIA_SHIFT))
 #define SCTLR_ELx_ENIB	(BIT(30))
 #define SCTLR_ELx_ENDA	(BIT(27))
 #define SCTLR_ELx_EE    (BIT(25))
@@ -597,6 +605,7 @@
 	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_EPAN		(BIT(57))
 #define SCTLR_EL1_ATA0		(BIT(42))
 
 #define SCTLR_EL1_TCF0_SHIFT	38
@@ -637,7 +646,7 @@
 	 SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
 	 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
 	 SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
-	 SCTLR_EL1_RES1)
+	 SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
 
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 0deb88467111..b5f08621fa29 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -20,6 +20,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/mmu.h>
+#include <asm/mte.h>
 #include <asm/ptrace.h>
 #include <asm/memory.h>
 #include <asm/extable.h>
@@ -188,6 +189,23 @@ static inline void __uaccess_enable_tco(void)
 				 ARM64_MTE, CONFIG_KASAN_HW_TAGS));
 }
 
+/*
+ * These functions disable tag checking only if in MTE async mode
+ * since the sync mode generates exceptions synchronously and the
+ * nofault or load_unaligned_zeropad can handle them.
+ */
+static inline void __uaccess_disable_tco_async(void)
+{
+	if (system_uses_mte_async_mode())
+		 __uaccess_disable_tco();
+}
+
+static inline void __uaccess_enable_tco_async(void)
+{
+	if (system_uses_mte_async_mode())
+		__uaccess_enable_tco();
+}
+
 static inline void uaccess_disable_privileged(void)
 {
 	__uaccess_disable_tco();
@@ -307,8 +325,10 @@ do {									\
 do {									\
 	int __gkn_err = 0;						\
 									\
+	__uaccess_enable_tco_async();					\
 	__raw_get_mem("ldr", *((type *)(dst)),				\
 		      (__force type *)(src), __gkn_err);		\
+	__uaccess_disable_tco_async();					\
 	if (unlikely(__gkn_err))					\
 		goto err_label;						\
 } while (0)
@@ -380,8 +400,10 @@ do {									\
 do {									\
 	int __pkn_err = 0;						\
 									\
+	__uaccess_enable_tco_async();					\
 	__raw_put_mem("str", *((type *)(src)),				\
 		      (__force type *)(dst), __pkn_err);		\
+	__uaccess_disable_tco_async();					\
 	if (unlikely(__pkn_err))					\
 		goto err_label;						\
 } while(0)
diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h
index 631ab1281633..4b4c0dac0e14 100644
--- a/arch/arm64/include/asm/vdso/gettimeofday.h
+++ b/arch/arm64/include/asm/vdso/gettimeofday.h
@@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
 	 */
 	isb();
 	asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory");
-	/*
-	 * This isb() is required to prevent that the seq lock is
-	 * speculated.#
-	 */
-	isb();
+	arch_counter_enforce_ordering(res);
 
 	return res;
 }
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index ea487218db79..2dcb104c645b 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -55,6 +55,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 {
 	unsigned long ret, tmp;
 
+	__uaccess_enable_tco_async();
+
 	/* Load word from unaligned pointer addr */
 	asm(
 	"1:	ldr	%0, %3\n"
@@ -76,6 +78,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 	: "=&r" (ret), "=&r" (tmp)
 	: "r" (addr), "Q" (*(unsigned long *)addr));
 
+	__uaccess_disable_tco_async();
+
 	return ret;
 }
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a36e2fc330d4..e797603e55b7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
 #endif
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
+  DEFINE(THREAD_SCTLR_USER,	offsetof(struct task_struct, thread.sctlr_user));
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(THREAD_KEYS_USER,	offsetof(struct task_struct, thread.keys_user));
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
@@ -95,6 +96,8 @@ int main(void)
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
   BLANK();
   DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
+  DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
+  DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
   BLANK();
   DEFINE(CPU_BOOT_STACK,	offsetof(struct secondary_data, stack));
   DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
@@ -147,10 +150,6 @@ int main(void)
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(PTRAUTH_USER_KEY_APIA,		offsetof(struct ptrauth_keys_user, apia));
-  DEFINE(PTRAUTH_USER_KEY_APIB,		offsetof(struct ptrauth_keys_user, apib));
-  DEFINE(PTRAUTH_USER_KEY_APDA,		offsetof(struct ptrauth_keys_user, apda));
-  DEFINE(PTRAUTH_USER_KEY_APDB,		offsetof(struct ptrauth_keys_user, apdb));
-  DEFINE(PTRAUTH_USER_KEY_APGA,		offsetof(struct ptrauth_keys_user, apga));
   DEFINE(PTRAUTH_KERNEL_KEY_APIA,	offsetof(struct ptrauth_keys_kernel, apia));
   BLANK();
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e5281e1c8f1d..76c60b3cda53 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -808,6 +808,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
 					reg->name,
 					ftrp->shift + ftrp->width - 1,
 					ftrp->shift, str, tmp);
+		} else if ((ftr_mask & reg->override->val) == ftr_mask) {
+			reg->override->val &= ~ftr_mask;
+			pr_warn("%s[%d:%d]: impossible override, ignored\n",
+				reg->name,
+				ftrp->shift + ftrp->width - 1,
+				ftrp->shift);
 		}
 
 		val = arm64_ftr_set_value(ftrp, val, ftr_new);
@@ -1619,7 +1625,6 @@ int get_cpu_with_amu_feat(void)
 }
 #endif
 
-#ifdef CONFIG_ARM64_VHE
 static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused)
 {
 	return is_kernel_in_hyp_mode();
@@ -1638,7 +1643,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 	if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN))
 		write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
 }
-#endif
 
 static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
 {
@@ -1823,6 +1827,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_pan,
 	},
 #endif /* CONFIG_ARM64_PAN */
+#ifdef CONFIG_ARM64_EPAN
+	{
+		.desc = "Enhanced Privileged Access Never",
+		.capability = ARM64_HAS_EPAN,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 3,
+	},
+#endif /* CONFIG_ARM64_EPAN */
 #ifdef CONFIG_ARM64_LSE_ATOMICS
 	{
 		.desc = "LSE atomic instructions",
@@ -1841,7 +1857,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
 		.matches = has_no_hw_prefetch,
 	},
-#ifdef CONFIG_ARM64_VHE
 	{
 		.desc = "Virtualization Host Extensions",
 		.capability = ARM64_HAS_VIRT_HOST_EXTN,
@@ -1849,7 +1864,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.matches = runs_at_el2,
 		.cpu_enable = cpu_copy_el2regs,
 	},
-#endif	/* CONFIG_ARM64_VHE */
 	{
 		.desc = "32-bit EL0 Support",
 		.capability = ARM64_HAS_32BIT_EL0,
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 9d3588450473..a1ec351c36bd 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -37,6 +37,8 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs)
 	lockdep_hardirqs_off(CALLER_ADDR0);
 	rcu_irq_enter_check_tick();
 	trace_hardirqs_off_finish();
+
+	mte_check_tfsr_entry();
 }
 
 /*
@@ -47,6 +49,8 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
 {
 	lockdep_assert_irqs_disabled();
 
+	mte_check_tfsr_exit();
+
 	if (interrupts_enabled(regs)) {
 		if (regs->exit_rcu) {
 			trace_hardirqs_on_prepare();
@@ -293,6 +297,8 @@ asmlinkage void noinstr enter_from_user_mode(void)
 
 asmlinkage void noinstr exit_to_user_mode(void)
 {
+	mte_check_tfsr_exit();
+
 	trace_hardirqs_on_prepare();
 	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
 	user_enter_irqoff();
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 2ca395c25448..3ecec60d3295 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -48,6 +48,11 @@ SYM_FUNC_START(sve_get_vl)
 	ret
 SYM_FUNC_END(sve_get_vl)
 
+SYM_FUNC_START(sve_set_vq)
+	sve_load_vq x0, x1, x2
+	ret
+SYM_FUNC_END(sve_set_vq)
+
 /*
  * Load SVE state from FPSIMD state.
  *
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6acfc5e6b5e0..4ac5455c0ead 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -249,7 +249,29 @@ alternative_else_nop_endif
 	check_mte_async_tcf x22, x23
 	apply_ssbd 1, x22, x23
 
-	ptrauth_keys_install_kernel tsk, x20, x22, x23
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * Enable IA for in-kernel PAC if the task had it disabled. Although
+	 * this could be implemented with an unconditional MRS which would avoid
+	 * a load, this was measured to be slower on Cortex-A75 and Cortex-A76.
+	 *
+	 * Install the kernel IA key only if IA was enabled in the task. If IA
+	 * was disabled on kernel exit then we would have left the kernel IA
+	 * installed so there is no need to install it again.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_kernel_nosync tsk, x20, x22, x23
+	b	2f
+1:
+	mrs	x0, sctlr_el1
+	orr	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+2:
+	isb
+alternative_else_nop_endif
+#endif
 
 	mte_set_kernel_gcr x22, x23
 
@@ -353,8 +375,26 @@ alternative_else_nop_endif
 3:
 	scs_save tsk, x0
 
-	/* No kernel C function calls after this as user keys are set. */
-	ptrauth_keys_install_user tsk, x0, x1, x2
+#ifdef CONFIG_ARM64_PTR_AUTH
+alternative_if ARM64_HAS_ADDRESS_AUTH
+	/*
+	 * IA was enabled for in-kernel PAC. Disable it now if needed, or
+	 * alternatively install the user's IA. All other per-task keys and
+	 * SCTLR bits were updated on task switch.
+	 *
+	 * No kernel C function calls after this.
+	 */
+	ldr	x0, [tsk, THREAD_SCTLR_USER]
+	tbz	x0, SCTLR_ELx_ENIA_SHIFT, 1f
+	__ptrauth_keys_install_user tsk, x0, x1, x2
+	b	2f
+1:
+	mrs	x0, sctlr_el1
+	bic	x0, x0, SCTLR_ELx_ENIA
+	msr	sctlr_el1, x0
+2:
+alternative_else_nop_endif
+#endif
 
 	mte_set_user_gcr tsk, x0, x1
 
@@ -493,28 +533,14 @@ tsk	.req	x28		// current thread_info
 /*
  * Interrupt handling.
  */
-	.macro	irq_handler
-	ldr_l	x1, handle_arch_irq
+	.macro	irq_handler, handler:req
+	ldr_l	x1, \handler
 	mov	x0, sp
 	irq_stack_entry
 	blr	x1
 	irq_stack_exit
 	.endm
 
-#ifdef CONFIG_ARM64_PSEUDO_NMI
-	/*
-	 * Set res to 0 if irqs were unmasked in interrupted context.
-	 * Otherwise set res to non-0 value.
-	 */
-	.macro	test_irqs_unmasked res:req, pmr:req
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	sub	\res, \pmr, #GIC_PRIO_IRQON
-alternative_else
-	mov	\res, xzr
-alternative_endif
-	.endm
-#endif
-
 	.macro	gic_prio_kentry_setup, tmp:req
 #ifdef CONFIG_ARM64_PSEUDO_NMI
 	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
@@ -533,6 +559,47 @@ alternative_endif
 #endif
 	.endm
 
+	.macro el1_interrupt_handler, handler:req
+	gic_prio_irq_setup pmr=x20, tmp=x1
+	enable_da
+
+	mov	x0, sp
+	bl	enter_el1_irq_or_nmi
+
+	irq_handler	\handler
+
+#ifdef CONFIG_PREEMPTION
+	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
+alternative_if ARM64_HAS_IRQ_PRIO_MASKING
+	/*
+	 * DA were cleared at start of handling, and IF are cleared by
+	 * the GIC irqchip driver using gic_arch_enable_irqs() for
+	 * normal IRQs. If anything is set, it means we come back from
+	 * an NMI instead of a normal IRQ, so skip preemption
+	 */
+	mrs	x0, daif
+	orr	x24, x24, x0
+alternative_else_nop_endif
+	cbnz	x24, 1f				// preempt count != 0 || NMI return path
+	bl	arm64_preempt_schedule_irq	// irq en/disable is done inside
+1:
+#endif
+
+	mov	x0, sp
+	bl	exit_el1_irq_or_nmi
+	.endm
+
+	.macro el0_interrupt_handler, handler:req
+	gic_prio_irq_setup pmr=x20, tmp=x0
+	user_exit_irqoff
+	enable_da
+
+	tbz	x22, #55, 1f
+	bl	do_el0_irq_bp_hardening
+1:
+	irq_handler	\handler
+	.endm
+
 	.text
 
 /*
@@ -549,18 +616,18 @@ SYM_CODE_START(vectors)
 
 	kernel_ventry	1, sync				// Synchronous EL1h
 	kernel_ventry	1, irq				// IRQ EL1h
-	kernel_ventry	1, fiq_invalid			// FIQ EL1h
+	kernel_ventry	1, fiq				// FIQ EL1h
 	kernel_ventry	1, error			// Error EL1h
 
 	kernel_ventry	0, sync				// Synchronous 64-bit EL0
 	kernel_ventry	0, irq				// IRQ 64-bit EL0
-	kernel_ventry	0, fiq_invalid			// FIQ 64-bit EL0
+	kernel_ventry	0, fiq				// FIQ 64-bit EL0
 	kernel_ventry	0, error			// Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
 	kernel_ventry	0, sync_compat, 32		// Synchronous 32-bit EL0
 	kernel_ventry	0, irq_compat, 32		// IRQ 32-bit EL0
-	kernel_ventry	0, fiq_invalid_compat, 32	// FIQ 32-bit EL0
+	kernel_ventry	0, fiq_compat, 32		// FIQ 32-bit EL0
 	kernel_ventry	0, error_compat, 32		// Error 32-bit EL0
 #else
 	kernel_ventry	0, sync_invalid, 32		// Synchronous 32-bit EL0
@@ -626,12 +693,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid)
 	inv_entry 0, BAD_ERROR
 SYM_CODE_END(el0_error_invalid)
 
-#ifdef CONFIG_COMPAT
-SYM_CODE_START_LOCAL(el0_fiq_invalid_compat)
-	inv_entry 0, BAD_FIQ, 32
-SYM_CODE_END(el0_fiq_invalid_compat)
-#endif
-
 SYM_CODE_START_LOCAL(el1_sync_invalid)
 	inv_entry 1, BAD_SYNC
 SYM_CODE_END(el1_sync_invalid)
@@ -662,35 +723,16 @@ SYM_CODE_END(el1_sync)
 	.align	6
 SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
 	kernel_entry 1
-	gic_prio_irq_setup pmr=x20, tmp=x1
-	enable_da_f
-
-	mov	x0, sp
-	bl	enter_el1_irq_or_nmi
-
-	irq_handler
-
-#ifdef CONFIG_PREEMPTION
-	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	/*
-	 * DA_F were cleared at start of handling. If anything is set in DAIF,
-	 * we come back from an NMI, so skip preemption
-	 */
-	mrs	x0, daif
-	orr	x24, x24, x0
-alternative_else_nop_endif
-	cbnz	x24, 1f				// preempt count != 0 || NMI return path
-	bl	arm64_preempt_schedule_irq	// irq en/disable is done inside
-1:
-#endif
-
-	mov	x0, sp
-	bl	exit_el1_irq_or_nmi
-
+	el1_interrupt_handler handle_arch_irq
 	kernel_exit 1
 SYM_CODE_END(el1_irq)
 
+SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
+	kernel_entry 1
+	el1_interrupt_handler handle_arch_fiq
+	kernel_exit 1
+SYM_CODE_END(el1_fiq)
+
 /*
  * EL0 mode handlers.
  */
@@ -717,6 +759,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
 	b	el0_irq_naked
 SYM_CODE_END(el0_irq_compat)
 
+SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
+	kernel_entry 0, 32
+	b	el0_fiq_naked
+SYM_CODE_END(el0_fiq_compat)
+
 SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
 	kernel_entry 0, 32
 	b	el0_error_naked
@@ -727,18 +774,17 @@ SYM_CODE_END(el0_error_compat)
 SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
 	kernel_entry 0
 el0_irq_naked:
-	gic_prio_irq_setup pmr=x20, tmp=x0
-	user_exit_irqoff
-	enable_da_f
-
-	tbz	x22, #55, 1f
-	bl	do_el0_irq_bp_hardening
-1:
-	irq_handler
-
+	el0_interrupt_handler handle_arch_irq
 	b	ret_to_user
 SYM_CODE_END(el0_irq)
 
+SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
+	kernel_entry 0
+el0_fiq_naked:
+	el0_interrupt_handler handle_arch_fiq
+	b	ret_to_user
+SYM_CODE_END(el0_fiq)
+
 SYM_CODE_START_LOCAL(el1_error)
 	kernel_entry 1
 	mrs	x1, esr_el1
@@ -759,7 +805,7 @@ el0_error_naked:
 	mov	x0, sp
 	mov	x1, x25
 	bl	do_serror
-	enable_da_f
+	enable_da
 	b	ret_to_user
 SYM_CODE_END(el0_error)
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 062b21f30f94..ad3dd34a83cf 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,7 +180,7 @@ static void __get_cpu_fpsimd_context(void)
  */
 static void get_cpu_fpsimd_context(void)
 {
-	preempt_disable();
+	local_bh_disable();
 	__get_cpu_fpsimd_context();
 }
 
@@ -201,7 +201,7 @@ static void __put_cpu_fpsimd_context(void)
 static void put_cpu_fpsimd_context(void)
 {
 	__put_cpu_fpsimd_context();
-	preempt_enable();
+	local_bh_enable();
 }
 
 static bool have_cpu_fpsimd_context(void)
@@ -285,7 +285,7 @@ static void task_fpsimd_load(void)
 	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!have_cpu_fpsimd_context());
 
-	if (system_supports_sve() && test_thread_flag(TIF_SVE))
+	if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE))
 		sve_load_state(sve_pffr(&current->thread),
 			       &current->thread.uw.fpsimd_state.fpsr,
 			       sve_vq_from_vl(current->thread.sve_vl) - 1);
@@ -307,7 +307,8 @@ static void fpsimd_save(void)
 	WARN_ON(!have_cpu_fpsimd_context());
 
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
-		if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
+		if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+		    test_thread_flag(TIF_SVE)) {
 			if (WARN_ON(sve_get_vl() != last->sve_vl)) {
 				/*
 				 * Can't save the user regs, so current would
@@ -926,9 +927,8 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * Trapped SVE access
  *
  * Storage is allocated for the full SVE state, the current FPSIMD
- * register contents are migrated across, and TIF_SVE is set so that
- * the SVE access trap will be disabled the next time this task
- * reaches ret_to_user.
+ * register contents are migrated across, and the access trap is
+ * disabled.
  *
  * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
  * would have disabled the SVE access trap for userspace during
@@ -946,15 +946,24 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 
 	get_cpu_fpsimd_context();
 
-	fpsimd_save();
-
-	/* Force ret_to_user to reload the registers: */
-	fpsimd_flush_task_state(current);
-
-	fpsimd_to_sve(current);
 	if (test_and_set_thread_flag(TIF_SVE))
 		WARN_ON(1); /* SVE access shouldn't have trapped */
 
+	/*
+	 * Convert the FPSIMD state to SVE, zeroing all the state that
+	 * is not shared with FPSIMD. If (as is likely) the current
+	 * state is live in the registers then do this there and
+	 * update our metadata for the current task including
+	 * disabling the trap, otherwise update our in-memory copy.
+	 */
+	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
+		sve_flush_live();
+		fpsimd_bind_task_to_cpu();
+	} else {
+		fpsimd_to_sve(current);
+	}
+
 	put_cpu_fpsimd_context();
 }
 
@@ -1092,7 +1101,7 @@ void fpsimd_preserve_current_state(void)
 void fpsimd_signal_preserve_current_state(void)
 {
 	fpsimd_preserve_current_state();
-	if (system_supports_sve() && test_thread_flag(TIF_SVE))
+	if (test_thread_flag(TIF_SVE))
 		sve_to_fpsimd(current);
 }
 
@@ -1181,7 +1190,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 	get_cpu_fpsimd_context();
 
 	current->thread.uw.fpsimd_state = *state;
-	if (system_supports_sve() && test_thread_flag(TIF_SVE))
+	if (test_thread_flag(TIF_SVE))
 		fpsimd_to_sve(current);
 
 	task_fpsimd_load();
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 840bda1869e9..96873dfa67fd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  */
 SYM_FUNC_START(init_kernel_el)
-	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
-	msr	sctlr_el1, x0
-
 	mrs	x0, CurrentEL
 	cmp	x0, #CurrentEL_EL2
 	b.eq	init_el2
 
 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr	sctlr_el1, x0
 	isb
 	mov_q	x0, INIT_PSTATE_EL1
 	msr	spsr_el1, x0
@@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	msr	vbar_el2, x0
 	isb
 
+	/*
+	 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
+	 * making it impossible to start in nVHE mode. Is that
+	 * compliant with the architecture? Absolutely not!
+	 */
+	mrs	x0, hcr_el2
+	and	x0, x0, #HCR_E2H
+	cbz	x0, 1f
+
+	/* Switching to VHE requires a sane SCTLR_EL1 as a start */
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr_s	SYS_SCTLR_EL12, x0
+
+	/*
+	 * Force an eret into a helper "function", and let it return
+	 * to our original caller... This makes sure that we have
+	 * initialised the basic PSTATE state.
+	 */
+	mov	x0, #INIT_PSTATE_EL2
+	msr	spsr_el1, x0
+	adr	x0, __cpu_stick_to_vhe
+	msr	elr_el1, x0
+	eret
+
+1:
+	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
+	msr	sctlr_el1, x0
+
 	msr	elr_el2, lr
 	mov	w0, #BOOT_CPU_MODE_EL2
 	eret
+
+__cpu_stick_to_vhe:
+	mov	x0, #HVC_VHE_RESTART
+	hvc	#0
+	mov	x0, #BOOT_CPU_MODE_EL2
+	ret
 SYM_FUNC_END(init_kernel_el)
 
 /*
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 5eccbd62fec8..74ad3db061d1 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors)
 	ventry	el2_fiq_invalid			// FIQ EL2t
 	ventry	el2_error_invalid		// Error EL2t
 
-	ventry	el2_sync_invalid		// Synchronous EL2h
+	ventry	elx_sync			// Synchronous EL2h
 	ventry	el2_irq_invalid			// IRQ EL2h
 	ventry	el2_fiq_invalid			// FIQ EL2h
 	ventry	el2_error_invalid		// Error EL2h
 
-	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	elx_sync			// Synchronous 64-bit EL1
 	ventry	el1_irq_invalid			// IRQ 64-bit EL1
 	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
 	ventry	el1_error_invalid		// Error 64-bit EL1
@@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors)
 
 	.align 11
 
-SYM_CODE_START_LOCAL(el1_sync)
+SYM_CODE_START_LOCAL(elx_sync)
 	cmp	x0, #HVC_SET_VECTORS
 	b.ne	1f
 	msr	vbar_el2, x1
@@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync)
 
 9:	mov	x0, xzr
 	eret
-SYM_CODE_END(el1_sync)
+SYM_CODE_END(elx_sync)
 
 // nVHE? No way! Give me the real thing!
 SYM_CODE_START_LOCAL(mutate_to_vhe)
@@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors)
  * Entry point to switch to VHE if deemed capable
  */
 SYM_FUNC_START(switch_to_vhe)
-#ifdef CONFIG_ARM64_VHE
 	// Need to have booted at EL2
 	adr_l	x1, __boot_cpu_mode
 	ldr	w0, [x1]
@@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe)
 	mov	x0, #HVC_VHE_RESTART
 	hvc	#0
 1:
-#endif
 	ret
 SYM_FUNC_END(switch_to_vhe)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 83f1c4b92095..e628c8ce1ffe 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -25,14 +25,26 @@ struct ftr_set_desc {
 	struct {
 		char			name[FTR_DESC_FIELD_LEN];
 		u8			shift;
+		bool			(*filter)(u64 val);
 	} 				fields[];
 };
 
+static bool __init mmfr1_vh_filter(u64 val)
+{
+	/*
+	 * If we ever reach this point while running VHE, we're
+	 * guaranteed to be on one of these funky, VHE-stuck CPUs. If
+	 * the user was trying to force nVHE on us, proceed with
+	 * attitude adjustment.
+	 */
+	return !(is_kernel_in_hyp_mode() && val == 0);
+}
+
 static const struct ftr_set_desc mmfr1 __initconst = {
 	.name		= "id_aa64mmfr1",
 	.override	= &id_aa64mmfr1_override,
 	.fields		= {
-	        { "vh", ID_AA64MMFR1_VHE_SHIFT },
+		{ "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
 		{}
 	},
 };
@@ -124,6 +136,18 @@ static void __init match_options(const char *cmdline)
 			if (find_field(cmdline, regs[i], f, &v))
 				continue;
 
+			/*
+			 * If an override gets filtered out, advertise
+			 * it by setting the value to 0xf, but
+			 * clearing the mask... Yes, this is fragile.
+			 */
+			if (regs[i]->fields[f].filter &&
+			    !regs[i]->fields[f].filter(v)) {
+				regs[i]->override->val  |= mask;
+				regs[i]->override->mask &= ~mask;
+				continue;
+			}
+
 			regs[i]->override->val  &= ~mask;
 			regs[i]->override->val  |= (v << shift) & mask;
 			regs[i]->override->mask |= mask;
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index dfb1feab867d..bda49430c9ea 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -71,13 +71,44 @@ static void init_irq_stacks(void)
 }
 #endif
 
+static void default_handle_irq(struct pt_regs *regs)
+{
+	panic("IRQ taken without a root IRQ handler\n");
+}
+
+static void default_handle_fiq(struct pt_regs *regs)
+{
+	panic("FIQ taken without a root FIQ handler\n");
+}
+
+void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq;
+void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq;
+
+int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
+{
+	if (handle_arch_irq != default_handle_irq)
+		return -EBUSY;
+
+	handle_arch_irq = handle_irq;
+	pr_info("Root IRQ handler: %ps\n", handle_irq);
+	return 0;
+}
+
+int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *))
+{
+	if (handle_arch_fiq != default_handle_fiq)
+		return -EBUSY;
+
+	handle_arch_fiq = handle_fiq;
+	pr_info("Root FIQ handler: %ps\n", handle_fiq);
+	return 0;
+}
+
 void __init init_IRQ(void)
 {
 	init_irq_stacks();
 	init_irq_scs();
 	irqchip_init();
-	if (!handle_arch_irq)
-		panic("No interrupt controller found.");
 
 	if (system_uses_irq_prio_masking()) {
 		/*
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 27f8939deb1b..341342b207f6 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void)
 	/* use the top 16 bits to randomize the linear region */
 	memstart_offset_seed = seed >> 48;
 
-	if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-	    IS_ENABLED(CONFIG_KASAN_SW_TAGS))
+	if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
+	    (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+	     IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
 		/*
-		 * KASAN does not expect the module region to intersect the
-		 * vmalloc region, since shadow memory is allocated for each
-		 * module at load time, whereas the vmalloc region is shadowed
-		 * by KASAN zero pages. So keep modules out of the vmalloc
-		 * region if KASAN is enabled, and put the kernel well within
-		 * 4 GB of the module region.
+		 * KASAN without KASAN_VMALLOC does not expect the module region
+		 * to intersect the vmalloc region, since shadow memory is
+		 * allocated for each module at load time, whereas the vmalloc
+		 * region is shadowed by KASAN zero pages. So keep modules
+		 * out of the vmalloc region if KASAN is enabled without
+		 * KASAN_VMALLOC, and put the kernel well within 4 GB of the
+		 * module region.
 		 */
 		return offset % SZ_2G;
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index fe21e0f06492..b5ec010c481f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -40,14 +40,16 @@ void *module_alloc(unsigned long size)
 				NUMA_NO_NODE, __builtin_return_address(0));
 
 	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-	    !IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-	    !IS_ENABLED(CONFIG_KASAN_SW_TAGS))
+	    (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
+	     (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+	      !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
 		/*
-		 * KASAN can only deal with module allocations being served
-		 * from the reserved module region, since the remainder of
-		 * the vmalloc region is already backed by zero shadow pages,
-		 * and punching holes into it is non-trivial. Since the module
-		 * region is not randomized when KASAN is enabled, it is even
+		 * KASAN without KASAN_VMALLOC can only deal with module
+		 * allocations being served from the reserved module region,
+		 * since the remainder of the vmalloc region is already
+		 * backed by zero shadow pages, and punching holes into it
+		 * is non-trivial. Since the module region is not randomized
+		 * when KASAN is enabled without KASAN_VMALLOC, it is even
 		 * less likely that the module region gets exhausted, so we
 		 * can simply omit this fallback in that case.
 		 */
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index b3c70a612c7a..125a10e413e9 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -26,6 +26,12 @@ u64 gcr_kernel_excl __ro_after_init;
 
 static bool report_fault_once = true;
 
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Whether the MTE asynchronous mode is enabled. */
+DEFINE_STATIC_KEY_FALSE(mte_async_mode);
+EXPORT_SYMBOL_GPL(mte_async_mode);
+#endif
+
 static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
 {
 	pte_t old_pte = READ_ONCE(*ptep);
@@ -107,13 +113,45 @@ void mte_init_tags(u64 max_tag)
 	write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1);
 }
 
-void mte_enable_kernel(void)
+static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
 {
 	/* Enable MTE Sync Mode for EL1. */
-	sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC);
+	sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf);
 	isb();
+
+	pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
+}
+
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_enable_kernel_sync(void)
+{
+	/*
+	 * Make sure we enter this function when no PE has set
+	 * async mode previously.
+	 */
+	WARN_ONCE(system_uses_mte_async_mode(),
+			"MTE async mode enabled system wide!");
+
+	__mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC);
 }
 
+void mte_enable_kernel_async(void)
+{
+	__mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC);
+
+	/*
+	 * MTE async mode is set system wide by the first PE that
+	 * executes this function.
+	 *
+	 * Note: If in future KASAN acquires a runtime switching
+	 * mode in between sync and async, this strategy needs
+	 * to be reviewed.
+	 */
+	if (!system_uses_mte_async_mode())
+		static_branch_enable(&mte_async_mode);
+}
+#endif
+
 void mte_set_report_once(bool state)
 {
 	WRITE_ONCE(report_fault_once, state);
@@ -124,25 +162,28 @@ bool mte_report_once(void)
 	return READ_ONCE(report_fault_once);
 }
 
-static void update_sctlr_el1_tcf0(u64 tcf0)
+#ifdef CONFIG_KASAN_HW_TAGS
+void mte_check_tfsr_el1(void)
 {
-	/* ISB required for the kernel uaccess routines */
-	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
-	isb();
-}
+	u64 tfsr_el1;
 
-static void set_sctlr_el1_tcf0(u64 tcf0)
-{
-	/*
-	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
-	 * optimisation. Disable preemption so that it does not see
-	 * the variable update before the SCTLR_EL1.TCF0 one.
-	 */
-	preempt_disable();
-	current->thread.sctlr_tcf0 = tcf0;
-	update_sctlr_el1_tcf0(tcf0);
-	preempt_enable();
+	if (!system_supports_mte())
+		return;
+
+	tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
+
+	if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
+		/*
+		 * Note: isb() is not required after this direct write
+		 * because there is no indirect read subsequent to it
+		 * (per ARM DDI 0487F.c table D13-1).
+		 */
+		write_sysreg_s(0, SYS_TFSR_EL1);
+
+		kasan_report_async();
+	}
 }
+#endif
 
 static void update_gcr_el1_excl(u64 excl)
 {
@@ -166,7 +207,7 @@ static void set_gcr_el1_excl(u64 excl)
 	 */
 }
 
-void flush_mte_state(void)
+void mte_thread_init_user(void)
 {
 	if (!system_supports_mte())
 		return;
@@ -176,19 +217,39 @@ void flush_mte_state(void)
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking */
-	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
+			   SCTLR_EL1_TCF0_NONE);
 	/* reset tag generation mask */
 	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
 }
 
 void mte_thread_switch(struct task_struct *next)
 {
+	/*
+	 * Check if an async tag exception occurred at EL1.
+	 *
+	 * Note: On the context switch path we rely on the dsb() present
+	 * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
+	 * are synchronized before this point.
+	 */
+	isb();
+	mte_check_tfsr_el1();
+}
+
+void mte_suspend_enter(void)
+{
 	if (!system_supports_mte())
 		return;
 
-	/* avoid expensive SCTLR_EL1 accesses if no change */
-	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
-		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+	/*
+	 * The barriers are required to guarantee that the indirect writes
+	 * to TFSR_EL1 are synchronized before we report the state.
+	 */
+	dsb(nsh);
+	isb();
+
+	/* Report SYS_TFSR_EL1 before suspend entry */
+	mte_check_tfsr_el1();
 }
 
 void mte_suspend_exit(void)
@@ -201,7 +262,7 @@ void mte_suspend_exit(void)
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
-	u64 tcf0;
+	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
 	u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
 		       SYS_GCR_EL1_EXCL_MASK;
 
@@ -210,23 +271,23 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 
 	switch (arg & PR_MTE_TCF_MASK) {
 	case PR_MTE_TCF_NONE:
-		tcf0 = SCTLR_EL1_TCF0_NONE;
+		sctlr |= SCTLR_EL1_TCF0_NONE;
 		break;
 	case PR_MTE_TCF_SYNC:
-		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		sctlr |= SCTLR_EL1_TCF0_SYNC;
 		break;
 	case PR_MTE_TCF_ASYNC:
-		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 		break;
 	default:
 		return -EINVAL;
 	}
 
 	if (task != current) {
-		task->thread.sctlr_tcf0 = tcf0;
+		task->thread.sctlr_user = sctlr;
 		task->thread.gcr_user_excl = gcr_excl;
 	} else {
-		set_sctlr_el1_tcf0(tcf0);
+		set_task_sctlr_el1(sctlr);
 		set_gcr_el1_excl(gcr_excl);
 	}
 
@@ -243,7 +304,7 @@ long get_mte_ctrl(struct task_struct *task)
 
 	ret = incl << PR_MTE_TAG_SHIFT;
 
-	switch (task->thread.sctlr_tcf0) {
+	switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
 	case SCTLR_EL1_TCF0_NONE:
 		ret |= PR_MTE_TCF_NONE;
 		break;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4658fcf88c2b..f594957e29bd 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx)
 static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
 {
 	int idx = event->hw.idx;
-	u64 val = 0;
+	u64 val = armv8pmu_read_evcntr(idx);
 
-	val = armv8pmu_read_evcntr(idx);
 	if (armv8pmu_event_is_chained(event))
 		val = (val << 32) | armv8pmu_read_evcntr(idx - 1);
 	return val;
@@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
-	u64 value = 0;
+	u64 value;
 
 	if (idx == ARMV8_IDX_CYCLE_COUNTER)
 		value = read_sysreg(pmccntr_el0);
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index adb955fd9bdd..60901ab0a7fe 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -43,6 +43,69 @@ int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 		get_random_bytes(&keys->apdb, sizeof(keys->apdb));
 	if (arg & PR_PAC_APGAKEY)
 		get_random_bytes(&keys->apga, sizeof(keys->apga));
+	ptrauth_keys_install_user(keys);
 
 	return 0;
 }
+
+static u64 arg_to_enxx_mask(unsigned long arg)
+{
+	u64 sctlr_enxx_mask = 0;
+
+	WARN_ON(arg & ~PR_PAC_ENABLED_KEYS_MASK);
+	if (arg & PR_PAC_APIAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIA;
+	if (arg & PR_PAC_APIBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENIB;
+	if (arg & PR_PAC_APDAKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDA;
+	if (arg & PR_PAC_APDBKEY)
+		sctlr_enxx_mask |= SCTLR_ELx_ENDB;
+	return sctlr_enxx_mask;
+}
+
+int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
+			     unsigned long enabled)
+{
+	u64 sctlr = tsk->thread.sctlr_user;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if ((keys & ~PR_PAC_ENABLED_KEYS_MASK) || (enabled & ~keys))
+		return -EINVAL;
+
+	sctlr &= ~arg_to_enxx_mask(keys);
+	sctlr |= arg_to_enxx_mask(enabled);
+	if (tsk == current)
+		set_task_sctlr_el1(sctlr);
+	else
+		tsk->thread.sctlr_user = sctlr;
+
+	return 0;
+}
+
+int ptrauth_get_enabled_keys(struct task_struct *tsk)
+{
+	int retval = 0;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	if (is_compat_thread(task_thread_info(tsk)))
+		return -EINVAL;
+
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIA)
+		retval |= PR_PAC_APIAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENIB)
+		retval |= PR_PAC_APIBKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDA)
+		retval |= PR_PAC_APDAKEY;
+	if (tsk->thread.sctlr_user & SCTLR_ELx_ENDB)
+		retval |= PR_PAC_APDBKEY;
+
+	return retval;
+}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 85645b2b0c7a..d607c9912025 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -264,8 +264,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 		 * normal page fault.
 		 */
 		instruction_pointer_set(regs, (unsigned long) cur->addr);
-		if (!instruction_pointer(regs))
-			BUG();
+		BUG_ON(!instruction_pointer(regs));
 
 		if (kcb->kprobe_status == KPROBE_REENTER) {
 			restore_previous_kprobe(kcb);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6e60aa3b5ea9..cbf52109583b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -86,7 +86,7 @@ static void noinstr __cpu_do_idle_irqprio(void)
 	unsigned long daif_bits;
 
 	daif_bits = read_sysreg(daif);
-	write_sysreg(daif_bits | PSR_I_BIT, daif);
+	write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
 
 	/*
 	 * Unmask PMR before going idle to make sure interrupts can
@@ -341,7 +341,6 @@ void flush_thread(void)
 	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
 	flush_tagged_addr_state();
-	flush_mte_state();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -531,6 +530,31 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
+static void update_sctlr_el1(u64 sctlr)
+{
+	/*
+	 * EnIA must not be cleared while in the kernel as this is necessary for
+	 * in-kernel PAC. It will be cleared on kernel exit if needed.
+	 */
+	sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
+
+	/* ISB required for the kernel uaccess routines when setting TCF0. */
+	isb();
+}
+
+void set_task_sctlr_el1(u64 sctlr)
+{
+	/*
+	 * __switch_to() checks current->thread.sctlr as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_user = sctlr;
+	update_sctlr_el1(sctlr);
+	preempt_enable();
+}
+
 /*
  * Thread switching.
  */
@@ -546,6 +570,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	entry_task_switch(next);
 	ssbs_thread_switch(next);
 	erratum_1418040_thread_switch(prev, next);
+	ptrauth_thread_switch_user(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
@@ -561,6 +586,9 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 * registers.
 	 */
 	mte_thread_switch(next);
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (prev->thread.sctlr_user != next->thread.sctlr_user)
+		update_sctlr_el1(next->thread.sctlr_user);
 
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
@@ -610,7 +638,8 @@ void arch_setup_new_exec(void)
 {
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 
-	ptrauth_thread_init_user(current);
+	ptrauth_thread_init_user();
+	mte_thread_init_user();
 
 	if (task_spec_ssb_noexec(current)) {
 		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 170f42fd6101..eb2f73939b7b 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -909,6 +909,38 @@ static int pac_mask_get(struct task_struct *target,
 	return membuf_write(&to, &uregs, sizeof(uregs));
 }
 
+static int pac_enabled_keys_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	return membuf_write(&to, &enabled_keys, sizeof(enabled_keys));
+}
+
+static int pac_enabled_keys_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	long enabled_keys = ptrauth_get_enabled_keys(target);
+
+	if (IS_ERR_VALUE(enabled_keys))
+		return enabled_keys;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &enabled_keys, 0,
+				 sizeof(long));
+	if (ret)
+		return ret;
+
+	return ptrauth_set_enabled_keys(target, PR_PAC_ENABLED_KEYS_MASK,
+					enabled_keys);
+}
+
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static __uint128_t pac_key_to_user(const struct ptrauth_key *key)
 {
@@ -1074,6 +1106,7 @@ enum aarch64_regset {
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	REGSET_PAC_MASK,
+	REGSET_PAC_ENABLED_KEYS,
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REGSET_PACA_KEYS,
 	REGSET_PACG_KEYS,
@@ -1160,6 +1193,14 @@ static const struct user_regset aarch64_regsets[] = {
 		.regset_get = pac_mask_get,
 		/* this cannot be set dynamically */
 	},
+	[REGSET_PAC_ENABLED_KEYS] = {
+		.core_note_type = NT_ARM_PAC_ENABLED_KEYS,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = pac_enabled_keys_get,
+		.set = pac_enabled_keys_set,
+	},
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	[REGSET_PACA_KEYS] = {
 		.core_note_type = NT_ARM_PACA_KEYS,
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 357590beaabb..dcd7041b2b07 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -188,6 +188,7 @@ static void init_gic_priority_masking(void)
 	cpuflags = read_sysreg(daif);
 
 	WARN_ON(!(cpuflags & PSR_I_BIT));
+	WARN_ON(!(cpuflags & PSR_F_BIT));
 
 	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
 }
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d55bdfb7789c..84b676bcf867 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -32,6 +32,30 @@
  *	add	sp, sp, #0x10
  */
 
+
+void start_backtrace(struct stackframe *frame, unsigned long fp,
+		     unsigned long pc)
+{
+	frame->fp = fp;
+	frame->pc = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame->graph = 0;
+#endif
+
+	/*
+	 * Prime the first unwind.
+	 *
+	 * In unwind_frame() we'll check that the FP points to a valid stack,
+	 * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+	 * treated as a transition to whichever stack that happens to be. The
+	 * prev_fp value won't be used, but we set it to 0 such that it is
+	 * definitely not an accessible stack address.
+	 */
+	bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+	frame->prev_fp = 0;
+	frame->prev_type = STACK_TYPE_UNKNOWN;
+}
+
 /*
  * Unwind from one frame record (A) to the next frame record (B).
  *
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index d7564891ffe1..e3f72df9509d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -74,8 +74,9 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	spectre_v4_enable_mitigation(NULL);
 
-	/* Restore additional MTE-specific configuration */
+	/* Restore additional feature-specific configuration */
 	mte_suspend_exit();
+	ptrauth_suspend_exit();
 }
 
 /*
@@ -91,6 +92,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	unsigned long flags;
 	struct sleep_stack_data state;
 
+	/* Report any MTE async fault before going to suspend */
+	mte_suspend_enter();
+
 	/*
 	 * From this point debug exceptions are disabled to prevent
 	 * updates to mdscr register (saved and restored along with
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index cee5d04ea9ad..a61fc4f989b3 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -86,7 +86,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
 	return 0;
 }
 
-static int __vdso_init(enum vdso_abi abi)
+static int __init __vdso_init(enum vdso_abi abi)
 {
 	int i;
 	struct page **vdso_pagelist;
@@ -271,6 +271,14 @@ enum aarch32_map {
 static struct page *aarch32_vectors_page __ro_after_init;
 static struct page *aarch32_sig_page __ro_after_init;
 
+static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm,
+				  struct vm_area_struct *new_vma)
+{
+	current->mm->context.sigpage = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
 static struct vm_special_mapping aarch32_vdso_maps[] = {
 	[AA32_MAP_VECTORS] = {
 		.name	= "[vectors]", /* ABI */
@@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
 	[AA32_MAP_SIGPAGE] = {
 		.name	= "[sigpage]", /* ABI */
 		.pages	= &aarch32_sig_page,
+		.mremap	= aarch32_sigpage_mremap,
 	},
 	[AA32_MAP_VVAR] = {
 		.name = "[vvar]",
@@ -299,34 +308,35 @@ static int aarch32_alloc_kuser_vdso_page(void)
 	if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
 		return 0;
 
-	vdso_page = get_zeroed_page(GFP_ATOMIC);
+	vdso_page = get_zeroed_page(GFP_KERNEL);
 	if (!vdso_page)
 		return -ENOMEM;
 
 	memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
 	       kuser_sz);
 	aarch32_vectors_page = virt_to_page(vdso_page);
-	flush_dcache_page(aarch32_vectors_page);
 	return 0;
 }
 
+#define COMPAT_SIGPAGE_POISON_WORD	0xe7fddef1
 static int aarch32_alloc_sigpage(void)
 {
 	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
 	int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
-	unsigned long sigpage;
+	__le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD);
+	void *sigpage;
 
-	sigpage = get_zeroed_page(GFP_ATOMIC);
+	sigpage = (void *)__get_free_page(GFP_KERNEL);
 	if (!sigpage)
 		return -ENOMEM;
 
-	memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
+	memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison));
+	memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz);
 	aarch32_sig_page = virt_to_page(sigpage);
-	flush_dcache_page(aarch32_sig_page);
 	return 0;
 }
 
-static int __aarch32_alloc_vdso_pages(void)
+static int __init __aarch32_alloc_vdso_pages(void)
 {
 
 	if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f37d4e3830b7..871c82ab0a30 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 	const struct fault_info *inf;
 	struct mm_struct *mm = current->mm;
 	vm_fault_t fault;
-	unsigned long vm_flags = VM_ACCESS_FLAGS;
+	unsigned long vm_flags;
 	unsigned int mm_flags = FAULT_FLAG_DEFAULT;
 	unsigned long addr = untagged_addr(far);
 
@@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
 	if (user_mode(regs))
 		mm_flags |= FAULT_FLAG_USER;
 
+	/*
+	 * vm_flags tells us what bits we must have in vma->vm_flags
+	 * for the fault to be benign, __do_page_fault() would check
+	 * vma->vm_flags & vm_flags and returns an error if the
+	 * intersection is empty
+	 */
 	if (is_el0_instruction_abort(esr)) {
+		/* It was exec fault */
 		vm_flags = VM_EXEC;
 		mm_flags |= FAULT_FLAG_INSTRUCTION;
 	} else if (is_write_abort(esr)) {
+		/* It was write fault */
 		vm_flags = VM_WRITE;
 		mm_flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* It was read fault */
+		vm_flags = VM_READ;
+		/* Write implies read */
+		vm_flags |= VM_WRITE;
+		/* If EPAN is absent then exec implies read */
+		if (!cpus_have_const_cap(ARM64_HAS_EPAN))
+			vm_flags |= VM_EXEC;
 	}
 
 	if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index d8e66c78440e..61b52a92b8b6 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -79,7 +79,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
 		phys_addr_t pmd_phys = early ?
 				__pa_symbol(kasan_early_shadow_pmd)
 					: kasan_alloc_zeroed_page(node);
-		__pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
 	}
 
 	return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
@@ -92,7 +92,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node,
 		phys_addr_t pud_phys = early ?
 				__pa_symbol(kasan_early_shadow_pud)
 					: kasan_alloc_zeroed_page(node);
-		__p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE);
+		__p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE);
 	}
 
 	return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr);
@@ -214,15 +214,18 @@ static void __init kasan_init_shadow(void)
 {
 	u64 kimg_shadow_start, kimg_shadow_end;
 	u64 mod_shadow_start, mod_shadow_end;
+	u64 vmalloc_shadow_end;
 	phys_addr_t pa_start, pa_end;
 	u64 i;
 
-	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
-	kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
+	kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK;
+	kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END));
 
 	mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
 	mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
 
+	vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call below).
@@ -237,16 +240,22 @@ static void __init kasan_init_shadow(void)
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	kasan_map_populate(kimg_shadow_start, kimg_shadow_end,
-			   early_pfn_to_nid(virt_to_pfn(lm_alias(_text))));
+			   early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START))));
 
 	kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END),
 				   (void *)mod_shadow_start);
-	kasan_populate_early_shadow((void *)kimg_shadow_end,
-				   (void *)KASAN_SHADOW_END);
 
-	if (kimg_shadow_start > mod_shadow_end)
-		kasan_populate_early_shadow((void *)mod_shadow_end,
-					    (void *)kimg_shadow_start);
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		BUILD_BUG_ON(VMALLOC_START != MODULES_END);
+		kasan_populate_early_shadow((void *)vmalloc_shadow_end,
+					    (void *)KASAN_SHADOW_END);
+	} else {
+		kasan_populate_early_shadow((void *)kimg_shadow_end,
+					    (void *)KASAN_SHADOW_END);
+		if (kimg_shadow_start > mod_shadow_end)
+			kasan_populate_early_shadow((void *)mod_shadow_end,
+						    (void *)kimg_shadow_start);
+	}
 
 	for_each_mem_range(i, &pa_start, &pa_end) {
 		void *start = (void *)__phys_to_virt(pa_start);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5d9550fdb9cf..d563335ad43f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -39,6 +39,7 @@
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
+#define NO_EXEC_MAPPINGS	BIT(2)	/* assumes FEAT_HPDS is not used */
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
@@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 
 	BUG_ON(pmd_sect(pmd));
 	if (pmd_none(pmd)) {
+		pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN;
 		phys_addr_t pte_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			pmdval |= PMD_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pte_phys = pgtable_alloc(PAGE_SHIFT);
-		__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+		__pmd_populate(pmdp, pte_phys, pmdval);
 		pmd = READ_ONCE(*pmdp);
 	}
 	BUG_ON(pmd_bad(pmd));
@@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
 	 */
 	BUG_ON(pud_sect(pud));
 	if (pud_none(pud)) {
+		pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN;
 		phys_addr_t pmd_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			pudval |= PUD_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pmd_phys = pgtable_alloc(PMD_SHIFT);
-		__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+		__pud_populate(pudp, pmd_phys, pudval);
 		pud = READ_ONCE(*pudp);
 	}
 	BUG_ON(pud_bad(pud));
@@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	p4d_t p4d = READ_ONCE(*p4dp);
 
 	if (p4d_none(p4d)) {
+		p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN;
 		phys_addr_t pud_phys;
+
+		if (flags & NO_EXEC_MAPPINGS)
+			p4dval |= P4D_TABLE_PXN;
 		BUG_ON(!pgtable_alloc);
 		pud_phys = pgtable_alloc(PUD_SHIFT);
-		__p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE);
+		__p4d_populate(p4dp, pud_phys, p4dval);
 		p4d = READ_ONCE(*p4dp);
 	}
 	BUG_ON(p4d_bad(p4d));
@@ -486,14 +499,24 @@ early_param("crashkernel", enable_crash_mem_map);
 
 static void __init map_mem(pgd_t *pgdp)
 {
+	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
 	phys_addr_t kernel_start = __pa_symbol(_stext);
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 	phys_addr_t start, end;
-	int flags = 0;
+	int flags = NO_EXEC_MAPPINGS;
 	u64 i;
 
+	/*
+	 * Setting hierarchical PXNTable attributes on table entries covering
+	 * the linear region is only possible if it is guaranteed that no table
+	 * entries at any level are being shared between the linear region and
+	 * the vmalloc region. Check whether this is true for the PGD level, in
+	 * which case it is guaranteed to be true for all other levels as well.
+	 */
+	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
+
 	if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
-		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
 	 * Take care not to create a writable alias for the
@@ -1210,11 +1233,11 @@ void __init early_fixmap_init(void)
 		pudp = pud_offset_kimg(p4dp, addr);
 	} else {
 		if (p4d_none(p4d))
-			__p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+			__p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
 		pudp = fixmap_pud(addr);
 	}
 	if (pud_none(READ_ONCE(*pudp)))
-		__pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+		__pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
 	pmdp = fixmap_pmd(addr);
 	__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
@@ -1480,7 +1503,7 @@ struct range arch_get_mappable_range(void)
 int arch_add_memory(int nid, u64 start, u64 size,
 		    struct mhp_params *params)
 {
-	int ret, flags = 0;
+	int ret, flags = NO_EXEC_MAPPINGS;
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
@@ -1490,7 +1513,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
 	 */
 	if (rodata_full || debug_pagealloc_enabled() ||
 	    IS_ENABLED(CONFIG_KFENCE))
-		flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
+		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
 			     size, params->pgprot, __pgd_pgtable_alloc,
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c967bfd30d2b..0a48191534ff 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -419,14 +419,17 @@ SYM_FUNC_START(__cpu_setup)
 	reset_amuserenr_el0 x1			// Disable AMU access from EL0
 
 	/*
-	 * Memory region attributes
+	 * Default values for VMSA control registers. These will be adjusted
+	 * below depending on detected CPU features.
 	 */
-	mov_q	x5, MAIR_EL1_SET
-#ifdef CONFIG_ARM64_MTE
-	mte_tcr	.req	x20
-
-	mov	mte_tcr, #0
+	mair	.req	x17
+	tcr	.req	x16
+	mov_q	mair, MAIR_EL1_SET
+	mov_q	tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
+			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
+			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
 
+#ifdef CONFIG_ARM64_MTE
 	/*
 	 * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
 	 * (ID_AA64PFR1_EL1[11:8] > 1).
@@ -438,7 +441,7 @@ SYM_FUNC_START(__cpu_setup)
 
 	/* Normal Tagged memory type at the corresponding MAIR index */
 	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
-	bfi	x5, x10, #(8 *  MT_NORMAL_TAGGED), #8
+	bfi	mair, x10, #(8 *  MT_NORMAL_TAGGED), #8
 
 	/* initialize GCR_EL1: all non-zero tags excluded by default */
 	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
@@ -449,37 +452,26 @@ SYM_FUNC_START(__cpu_setup)
 	msr_s	SYS_TFSRE0_EL1, xzr
 
 	/* set the TCR_EL1 bits */
-	mov_q	mte_tcr, TCR_KASAN_HW_FLAGS
+	mov_q	x10, TCR_KASAN_HW_FLAGS
+	orr	tcr, tcr, x10
 1:
 #endif
-	msr	mair_el1, x5
-	/*
-	 * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further
-	 * adjusted if the kernel is compiled with 52bit VA support.
-	 */
-	mov_q	x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
-			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
-			TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS
-#ifdef CONFIG_ARM64_MTE
-	orr	x10, x10, mte_tcr
-	.unreq	mte_tcr
-#endif
-	tcr_clear_errata_bits x10, x9, x5
+	tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
 	ldr_l		x9, vabits_actual
 	sub		x9, xzr, x9
 	add		x9, x9, #64
-	tcr_set_t1sz	x10, x9
+	tcr_set_t1sz	tcr, x9
 #else
 	ldr_l		x9, idmap_t0sz
 #endif
-	tcr_set_t0sz	x10, x9
+	tcr_set_t0sz	tcr, x9
 
 	/*
 	 * Set the IPS bits in TCR_EL1.
 	 */
-	tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
+	tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6
 #ifdef CONFIG_ARM64_HW_AFDBM
 	/*
 	 * Enable hardware update of the Access Flags bit.
@@ -489,13 +481,17 @@ SYM_FUNC_START(__cpu_setup)
 	mrs	x9, ID_AA64MMFR1_EL1
 	and	x9, x9, #0xf
 	cbz	x9, 1f
-	orr	x10, x10, #TCR_HA		// hardware Access flag update
+	orr	tcr, tcr, #TCR_HA		// hardware Access flag update
 1:
 #endif	/* CONFIG_ARM64_HW_AFDBM */
-	msr	tcr_el1, x10
+	msr	mair_el1, mair
+	msr	tcr_el1, tcr
 	/*
 	 * Prepare SCTLR
 	 */
 	mov_q	x0, INIT_SCTLR_EL1_MMU_ON
 	ret					// return to head.S
+
+	.unreq	mair
+	.unreq	tcr
 SYM_FUNC_END(__cpu_setup)
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 0e050d76b83a..a50e92ea1878 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -337,7 +337,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
 	ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
 }
 
-static void ptdump_initialize(void)
+static void __init ptdump_initialize(void)
 {
 	unsigned i, j;
 
@@ -381,7 +381,7 @@ void ptdump_check_wx(void)
 		pr_info("Checked W+X mappings: passed, no W+X pages found\n");
 }
 
-static int ptdump_init(void)
+static int __init ptdump_init(void)
 {
 	address_markers[PAGE_END_NR].start_address = PAGE_END;
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
index d29d722ec3ec..68bf1a125502 100644
--- a/arch/arm64/mm/ptdump_debugfs.c
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -16,7 +16,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 }
 DEFINE_SHOW_ATTRIBUTE(ptdump);
 
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
 {
 	debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
 }
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index f81e2ec90005..666d8a9b557f 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -306,7 +306,7 @@ static ssize_t cci400_pmu_cycle_event_show(struct device *dev,
 {
 	struct dev_ext_attribute *eattr = container_of(attr,
 				struct dev_ext_attribute, attr);
-	return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var);
+	return sysfs_emit(buf, "config=0x%lx\n", (unsigned long)eattr->var);
 }
 
 static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
@@ -525,8 +525,8 @@ static ssize_t cci5xx_pmu_global_event_show(struct device *dev,
 	struct dev_ext_attribute *eattr = container_of(attr,
 					struct dev_ext_attribute, attr);
 	/* Global events have single fixed source code */
-	return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n",
-				(unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
+	return sysfs_emit(buf, "event=0x%lx,source=0x%x\n",
+			  (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL);
 }
 
 /*
@@ -696,7 +696,7 @@ static ssize_t cci_pmu_format_show(struct device *dev,
 {
 	struct dev_ext_attribute *eattr = container_of(attr,
 				struct dev_ext_attribute, attr);
-	return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
 }
 
 static ssize_t cci_pmu_event_show(struct device *dev,
@@ -705,8 +705,8 @@ static ssize_t cci_pmu_event_show(struct device *dev,
 	struct dev_ext_attribute *eattr = container_of(attr,
 				struct dev_ext_attribute, attr);
 	/* source parameter is mandatory for normal PMU events */
-	return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n",
-					 (unsigned long)eattr->var);
+	return sysfs_emit(buf, "source=?,event=0x%lx\n",
+			  (unsigned long)eattr->var);
 }
 
 static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index a0a71c1df042..96d47cb302dd 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -221,7 +221,7 @@ static ssize_t arm_ccn_pmu_format_show(struct device *dev,
 	struct dev_ext_attribute *ea = container_of(attr,
 			struct dev_ext_attribute, attr);
 
-	return snprintf(buf, PAGE_SIZE, "%s\n", (char *)ea->var);
+	return sysfs_emit(buf, "%s\n", (char *)ea->var);
 }
 
 #define CCN_FORMAT_ATTR(_name, _config) \
@@ -326,43 +326,38 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev,
 	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
 	struct arm_ccn_pmu_event *event = container_of(attr,
 			struct arm_ccn_pmu_event, attr);
-	ssize_t res;
+	int res;
 
-	res = scnprintf(buf, PAGE_SIZE, "type=0x%x", event->type);
+	res = sysfs_emit(buf, "type=0x%x", event->type);
 	if (event->event)
-		res += scnprintf(buf + res, PAGE_SIZE - res, ",event=0x%x",
-				event->event);
+		res += sysfs_emit_at(buf, res, ",event=0x%x", event->event);
 	if (event->def)
-		res += scnprintf(buf + res, PAGE_SIZE - res, ",%s",
-				event->def);
+		res += sysfs_emit_at(buf, res, ",%s", event->def);
 	if (event->mask)
-		res += scnprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x",
-				event->mask);
+		res += sysfs_emit_at(buf, res, ",mask=0x%x", event->mask);
 
 	/* Arguments required by an event */
 	switch (event->type) {
 	case CCN_TYPE_CYCLES:
 		break;
 	case CCN_TYPE_XP:
-		res += scnprintf(buf + res, PAGE_SIZE - res,
-				",xp=?,vc=?");
+		res += sysfs_emit_at(buf, res, ",xp=?,vc=?");
 		if (event->event == CCN_EVENT_WATCHPOINT)
-			res += scnprintf(buf + res, PAGE_SIZE - res,
+			res += sysfs_emit_at(buf, res,
 					",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
 		else
-			res += scnprintf(buf + res, PAGE_SIZE - res,
-					",bus=?");
+			res += sysfs_emit_at(buf, res, ",bus=?");
 
 		break;
 	case CCN_TYPE_MN:
-		res += scnprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
+		res += sysfs_emit_at(buf, res, ",node=%d", ccn->mn_id);
 		break;
 	default:
-		res += scnprintf(buf + res, PAGE_SIZE - res, ",node=?");
+		res += sysfs_emit_at(buf, res, ",node=?");
 		break;
 	}
 
-	res += scnprintf(buf + res, PAGE_SIZE - res, "\n");
+	res += sysfs_emit_at(buf, res, "\n");
 
 	return res;
 }
@@ -476,7 +471,7 @@ static ssize_t arm_ccn_pmu_cmp_mask_show(struct device *dev,
 	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
 	u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name);
 
-	return mask ? snprintf(buf, PAGE_SIZE, "0x%016llx\n", *mask) : -EINVAL;
+	return mask ? sysfs_emit(buf, "0x%016llx\n", *mask) : -EINVAL;
 }
 
 static ssize_t arm_ccn_pmu_cmp_mask_store(struct device *dev,
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 1328159fe564..56a5c355701d 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -348,19 +348,19 @@ static ssize_t arm_cmn_event_show(struct device *dev,
 	eattr = container_of(attr, typeof(*eattr), attr);
 
 	if (eattr->type == CMN_TYPE_DTC)
-		return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type);
+		return sysfs_emit(buf, "type=0x%x\n", eattr->type);
 
 	if (eattr->type == CMN_TYPE_WP)
-		return snprintf(buf, PAGE_SIZE,
-				"type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
-				eattr->type, eattr->eventid);
+		return sysfs_emit(buf,
+				  "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
+				  eattr->type, eattr->eventid);
 
 	if (arm_cmn_is_occup_event(eattr->type, eattr->eventid))
-		return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
-				eattr->type, eattr->eventid, eattr->occupid);
+		return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
+				  eattr->type, eattr->eventid, eattr->occupid);
 
-	return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n",
-			eattr->type, eattr->eventid);
+	return sysfs_emit(buf, "type=0x%x,eventid=0x%x\n", eattr->type,
+			  eattr->eventid);
 }
 
 static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
@@ -560,12 +560,12 @@ static ssize_t arm_cmn_format_show(struct device *dev,
 	int lo = __ffs(fmt->field), hi = __fls(fmt->field);
 
 	if (lo == hi)
-		return snprintf(buf, PAGE_SIZE, "config:%d\n", lo);
+		return sysfs_emit(buf, "config:%d\n", lo);
 
 	if (!fmt->config)
-		return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi);
+		return sysfs_emit(buf, "config:%d-%d\n", lo, hi);
 
-	return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi);
+	return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi);
 }
 
 #define _CMN_FORMAT_ATTR(_name, _cfg, _fld)				\
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index f2a85500258d..b6c2511d59af 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -113,7 +113,7 @@ dmc620_pmu_event_show(struct device *dev,
 
 	eattr = container_of(attr, typeof(*eattr), attr);
 
-	return sprintf(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2);
+	return sysfs_emit(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2);
 }
 
 #define DMC620_PMU_EVENT_ATTR(_name, _eventid, _clkdiv2)		\
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 0459a3403469..196faea074d0 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -136,8 +136,7 @@ static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
 {
 	struct dev_ext_attribute *eattr = container_of(attr,
 					struct dev_ext_attribute, attr);
-	return snprintf(buf, PAGE_SIZE, "event=0x%lx\n",
-					 (unsigned long)eattr->var);
+	return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var);
 }
 
 static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
@@ -146,7 +145,7 @@ static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
 {
 	struct dev_ext_attribute *eattr = container_of(attr,
 					struct dev_ext_attribute, attr);
-	return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
 }
 
 static ssize_t dsu_pmu_cpumask_show(struct device *dev,
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 933bd8410fc2..513de1f54e2d 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
  */
 #define pr_fmt(fmt) "hw perfevents: " fmt
+#define dev_fmt pr_fmt
 
 #include <linux/bug.h>
 #include <linux/cpumask.h>
@@ -62,7 +63,7 @@ static bool pmu_has_irq_affinity(struct device_node *node)
 	return !!of_find_property(node, "interrupt-affinity", NULL);
 }
 
-static int pmu_parse_irq_affinity(struct device_node *node, int i)
+static int pmu_parse_irq_affinity(struct device *dev, int i)
 {
 	struct device_node *dn;
 	int cpu;
@@ -72,19 +73,18 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i)
 	 * affinity matches our logical CPU order, as we used to assume.
 	 * This is fragile, so we'll warn in pmu_parse_irqs().
 	 */
-	if (!pmu_has_irq_affinity(node))
+	if (!pmu_has_irq_affinity(dev->of_node))
 		return i;
 
-	dn = of_parse_phandle(node, "interrupt-affinity", i);
+	dn = of_parse_phandle(dev->of_node, "interrupt-affinity", i);
 	if (!dn) {
-		pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n",
-			i, node);
+		dev_warn(dev, "failed to parse interrupt-affinity[%d]\n", i);
 		return -EINVAL;
 	}
 
 	cpu = of_cpu_node_to_id(dn);
 	if (cpu < 0) {
-		pr_warn("failed to find logical CPU for %pOFn\n", dn);
+		dev_warn(dev, "failed to find logical CPU for %pOFn\n", dn);
 		cpu = nr_cpu_ids;
 	}
 
@@ -98,19 +98,18 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 	int i = 0, num_irqs;
 	struct platform_device *pdev = pmu->plat_device;
 	struct pmu_hw_events __percpu *hw_events = pmu->hw_events;
+	struct device *dev = &pdev->dev;
 
 	num_irqs = platform_irq_count(pdev);
-	if (num_irqs < 0) {
-		pr_err("unable to count PMU IRQs\n");
-		return num_irqs;
-	}
+	if (num_irqs < 0)
+		return dev_err_probe(dev, num_irqs, "unable to count PMU IRQs\n");
 
 	/*
 	 * In this case we have no idea which CPUs are covered by the PMU.
 	 * To match our prior behaviour, we assume all CPUs in this case.
 	 */
 	if (num_irqs == 0) {
-		pr_warn("no irqs for PMU, sampling events not supported\n");
+		dev_warn(dev, "no irqs for PMU, sampling events not supported\n");
 		pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 		cpumask_setall(&pmu->supported_cpus);
 		return 0;
@@ -122,10 +121,8 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 			return pmu_parse_percpu_irq(pmu, irq);
 	}
 
-	if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(pdev->dev.of_node)) {
-		pr_warn("no interrupt-affinity property for %pOF, guessing.\n",
-			pdev->dev.of_node);
-	}
+	if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node))
+		dev_warn(dev, "no interrupt-affinity property, guessing.\n");
 
 	for (i = 0; i < num_irqs; i++) {
 		int cpu, irq;
@@ -135,18 +132,18 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
 			continue;
 
 		if (irq_is_percpu_devid(irq)) {
-			pr_warn("multiple PPIs or mismatched SPI/PPI detected\n");
+			dev_warn(dev, "multiple PPIs or mismatched SPI/PPI detected\n");
 			return -EINVAL;
 		}
 
-		cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i);
+		cpu = pmu_parse_irq_affinity(dev, i);
 		if (cpu < 0)
 			return cpu;
 		if (cpu >= nr_cpu_ids)
 			continue;
 
 		if (per_cpu(hw_events->irq, cpu)) {
-			pr_warn("multiple PMU IRQs for the same CPU detected\n");
+			dev_warn(dev, "multiple PMU IRQs for the same CPU detected\n");
 			return -EINVAL;
 		}
 
@@ -191,9 +188,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 			 const struct of_device_id *of_table,
 			 const struct pmu_probe_info *probe_table)
 {
-	const struct of_device_id *of_id;
 	armpmu_init_fn init_fn;
-	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
 	struct arm_pmu *pmu;
 	int ret = -ENODEV;
 
@@ -207,15 +203,14 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 	if (ret)
 		goto out_free;
 
-	if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) {
-		init_fn = of_id->data;
-
-		pmu->secure_access = of_property_read_bool(pdev->dev.of_node,
+	init_fn = of_device_get_match_data(dev);
+	if (init_fn) {
+		pmu->secure_access = of_property_read_bool(dev->of_node,
 							   "secure-reg-access");
 
 		/* arm64 systems boot only as non-secure */
 		if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) {
-			pr_warn("ignoring \"secure-reg-access\" property for arm64\n");
+			dev_warn(dev, "ignoring \"secure-reg-access\" property for arm64\n");
 			pmu->secure_access = false;
 		}
 
@@ -226,7 +221,7 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 	}
 
 	if (ret) {
-		pr_info("%pOF: failed to probe PMU!\n", node);
+		dev_err(dev, "failed to probe PMU!\n");
 		goto out_free;
 	}
 
@@ -235,15 +230,16 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 		goto out_free_irqs;
 
 	ret = armpmu_register(pmu);
-	if (ret)
-		goto out_free;
+	if (ret) {
+		dev_err(dev, "failed to register PMU devices!\n");
+		goto out_free_irqs;
+	}
 
 	return 0;
 
 out_free_irqs:
 	armpmu_free_irqs(pmu);
 out_free:
-	pr_info("%pOF: failed to register PMU devices!\n", node);
 	armpmu_free(pmu);
 	return ret;
 }
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 8ff7a67f691c..ff6fab4bae30 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -506,30 +506,24 @@ static ssize_t smmu_pmu_event_show(struct device *dev,
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
 
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
-#define SMMU_EVENT_ATTR(name, config) \
-	PMU_EVENT_ATTR(name, smmu_event_attr_##name, \
-		       config, smmu_pmu_event_show)
-SMMU_EVENT_ATTR(cycles, 0);
-SMMU_EVENT_ATTR(transaction, 1);
-SMMU_EVENT_ATTR(tlb_miss, 2);
-SMMU_EVENT_ATTR(config_cache_miss, 3);
-SMMU_EVENT_ATTR(trans_table_walk_access, 4);
-SMMU_EVENT_ATTR(config_struct_access, 5);
-SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6);
-SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7);
+#define SMMU_EVENT_ATTR(name, config)					\
+	(&((struct perf_pmu_events_attr) {				\
+		.attr = __ATTR(name, 0444, smmu_pmu_event_show, NULL),	\
+		.id = config,						\
+	}).attr.attr)
 
 static struct attribute *smmu_pmu_events[] = {
-	&smmu_event_attr_cycles.attr.attr,
-	&smmu_event_attr_transaction.attr.attr,
-	&smmu_event_attr_tlb_miss.attr.attr,
-	&smmu_event_attr_config_cache_miss.attr.attr,
-	&smmu_event_attr_trans_table_walk_access.attr.attr,
-	&smmu_event_attr_config_struct_access.attr.attr,
-	&smmu_event_attr_pcie_ats_trans_rq.attr.attr,
-	&smmu_event_attr_pcie_ats_trans_passed.attr.attr,
+	SMMU_EVENT_ATTR(cycles, 0),
+	SMMU_EVENT_ATTR(transaction, 1),
+	SMMU_EVENT_ATTR(tlb_miss, 2),
+	SMMU_EVENT_ATTR(config_cache_miss, 3),
+	SMMU_EVENT_ATTR(trans_table_walk_access, 4),
+	SMMU_EVENT_ATTR(config_struct_access, 5),
+	SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6),
+	SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7),
 	NULL
 };
 
@@ -560,7 +554,7 @@ static ssize_t smmu_pmu_identifier_attr_show(struct device *dev,
 {
 	struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev));
 
-	return snprintf(page, PAGE_SIZE, "0x%08x\n", smmu_pmu->iidr);
+	return sysfs_emit(page, "0x%08x\n", smmu_pmu->iidr);
 }
 
 static umode_t smmu_pmu_identifier_attr_visible(struct kobject *kobj,
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index d3929ccebfd2..8a1e86ab2d8e 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -126,8 +126,7 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev,
 		container_of(attr, struct dev_ext_attribute, attr);
 	int cap = (long)ea->var;
 
-	return snprintf(buf, PAGE_SIZE, "%u\n",
-		arm_spe_pmu_cap_get(spe_pmu, cap));
+	return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap));
 }
 
 #define SPE_EXT_ATTR_ENTRY(_name, _func, _var)				\
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index be1f26b62ddb..2bbb93188064 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -110,7 +110,7 @@ static ssize_t ddr_perf_identifier_show(struct device *dev,
 {
 	struct ddr_pmu *pmu = dev_get_drvdata(dev);
 
-	return sprintf(page, "%s\n", pmu->devtype_data->identifier);
+	return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier);
 }
 
 static umode_t ddr_perf_identifier_attr_visible(struct kobject *kobj,
@@ -170,8 +170,7 @@ static ssize_t ddr_perf_filter_cap_show(struct device *dev,
 		container_of(attr, struct dev_ext_attribute, attr);
 	int cap = (long)ea->var;
 
-	return snprintf(buf, PAGE_SIZE, "%u\n",
-			ddr_perf_filter_cap_get(pmu, cap));
+	return sysfs_emit(buf, "%u\n", ddr_perf_filter_cap_get(pmu, cap));
 }
 
 #define PERF_EXT_ATTR_ENTRY(_name, _func, _var)				\
@@ -220,7 +219,7 @@ ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
 	struct perf_pmu_events_attr *pmu_attr;
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
 #define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)				\
diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index e8377061845f..7643c9f93e36 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \
-			  hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o
+			  hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \
+			  hisi_uncore_pa_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index ac1a8c120a00..7c8a4bc21db4 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -14,12 +14,11 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/list.h>
-#include <linux/platform_device.h>
 #include <linux/smp.h>
 
 #include "hisi_uncore_pmu.h"
 
-/* DDRC register definition */
+/* DDRC register definition in v1 */
 #define DDRC_PERF_CTRL		0x010
 #define DDRC_FLUX_WR		0x380
 #define DDRC_FLUX_RD		0x384
@@ -35,12 +34,24 @@
 #define DDRC_INT_CLEAR		0x6d0
 #define DDRC_VERSION		0x710
 
+/* DDRC register definition in v2 */
+#define DDRC_V2_INT_MASK	0x528
+#define DDRC_V2_INT_STATUS	0x52c
+#define DDRC_V2_INT_CLEAR	0x530
+#define DDRC_V2_EVENT_CNT	0xe00
+#define DDRC_V2_EVENT_CTRL	0xe70
+#define DDRC_V2_EVENT_TYPE	0xe74
+#define DDRC_V2_PERF_CTRL	0xeA0
+
 /* DDRC has 8-counters */
 #define DDRC_NR_COUNTERS	0x8
-#define DDRC_PERF_CTRL_EN	0x2
+#define DDRC_V1_PERF_CTRL_EN	0x2
+#define DDRC_V2_PERF_CTRL_EN	0x1
+#define DDRC_V1_NR_EVENTS	0x7
+#define DDRC_V2_NR_EVENTS	0x90
 
 /*
- * For DDRC PMU, there are eight-events and every event has been mapped
+ * For PMU v1, there are eight-events and every event has been mapped
  * to fixed-purpose counters which register offset is not consistent.
  * Therefore there is no write event type and we assume that event
  * code (0 to 7) is equal to counter index in PMU driver.
@@ -54,73 +65,85 @@ static const u32 ddrc_reg_off[] = {
 
 /*
  * Select the counter register offset using the counter index.
- * In DDRC there are no programmable counter, the count
- * is readed form the statistics counter register itself.
+ * In PMU v1, there are no programmable counter, the count
+ * is read form the statistics counter register itself.
  */
-static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx)
+static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx)
 {
 	return ddrc_reg_off[cntr_idx];
 }
 
-static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu,
-				      struct hw_perf_event *hwc)
+static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx)
 {
-	/* Use event code as counter index */
-	u32 idx = GET_DDRC_EVENTID(hwc);
-
-	if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
-		dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return 0;
-	}
+	return DDRC_V2_EVENT_CNT + cntr_idx * 8;
+}
 
-	return readl(ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu,
+				      struct hw_perf_event *hwc)
+{
+	return readl(ddrc_pmu->base +
+		     hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
 }
 
-static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu,
+static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu,
 					struct hw_perf_event *hwc, u64 val)
 {
-	u32 idx = GET_DDRC_EVENTID(hwc);
+	writel((u32)val,
+	       ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx));
+}
 
-	if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) {
-		dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return;
-	}
+static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu,
+					 struct hw_perf_event *hwc)
+{
+	return readq(ddrc_pmu->base +
+		     hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
+}
 
-	writel((u32)val,
-	       ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx));
+static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu,
+					   struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val,
+	       ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx));
 }
 
 /*
- * For DDRC PMU, event has been mapped to fixed-purpose counter by hardware,
- * so there is no need to write event type.
+ * For DDRC PMU v1, event has been mapped to fixed-purpose counter by hardware,
+ * so there is no need to write event type, while it is programmable counter in
+ * PMU v2.
  */
 static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
 				       u32 type)
 {
+	u32 offset;
+
+	if (hha_pmu->identifier >= HISI_PMU_V2) {
+		offset = DDRC_V2_EVENT_TYPE + 4 * idx;
+		writel(type, hha_pmu->base + offset);
+	}
 }
 
-static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu)
 {
 	u32 val;
 
 	/* Set perf_enable in DDRC_PERF_CTRL to start event counting */
 	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
-	val |= DDRC_PERF_CTRL_EN;
+	val |= DDRC_V1_PERF_CTRL_EN;
 	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
 }
 
-static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu)
+static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu)
 {
 	u32 val;
 
 	/* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */
 	val = readl(ddrc_pmu->base + DDRC_PERF_CTRL);
-	val &= ~DDRC_PERF_CTRL_EN;
+	val &= ~DDRC_V1_PERF_CTRL_EN;
 	writel(val, ddrc_pmu->base + DDRC_PERF_CTRL);
 }
 
-static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
-					 struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu,
+					    struct hw_perf_event *hwc)
 {
 	u32 val;
 
@@ -130,8 +153,8 @@ static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu,
 	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
 }
 
-static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
-					  struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu,
+					     struct hw_perf_event *hwc)
 {
 	u32 val;
 
@@ -141,7 +164,7 @@ static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu,
 	writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL);
 }
 
-static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
+static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event)
 {
 	struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu);
 	unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask;
@@ -157,87 +180,117 @@ static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event)
 	return idx;
 }
 
-static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event)
+{
+	return hisi_uncore_pmu_get_event_idx(event);
+}
+
+static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+	val |= DDRC_V2_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+	val &= ~DDRC_V2_PERF_CTRL_EN;
+	writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu,
 					     struct hw_perf_event *hwc)
 {
 	u32 val;
 
+	val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL);
+}
+
+static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
+{
+	u32 val;
+
 	/* Write 0 to enable interrupt */
 	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
-	val &= ~(1 << GET_DDRC_EVENTID(hwc));
+	val &= ~(1 << hwc->idx);
 	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
 }
 
-static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu,
-					      struct hw_perf_event *hwc)
+static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+						 struct hw_perf_event *hwc)
 {
 	u32 val;
 
 	/* Write 1 to mask interrupt */
 	val = readl(ddrc_pmu->base + DDRC_INT_MASK);
-	val |= (1 << GET_DDRC_EVENTID(hwc));
+	val |= 1 << hwc->idx;
 	writel(val, ddrc_pmu->base + DDRC_INT_MASK);
 }
 
-static irqreturn_t hisi_ddrc_pmu_isr(int irq, void *dev_id)
+static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
 {
-	struct hisi_pmu *ddrc_pmu = dev_id;
-	struct perf_event *event;
-	unsigned long overflown;
-	int idx;
-
-	/* Read the DDRC_INT_STATUS register */
-	overflown = readl(ddrc_pmu->base + DDRC_INT_STATUS);
-	if (!overflown)
-		return IRQ_NONE;
+	u32 val;
 
-	/*
-	 * Find the counter index which overflowed if the bit was set
-	 * and handle it
-	 */
-	for_each_set_bit(idx, &overflown, DDRC_NR_COUNTERS) {
-		/* Write 1 to clear the IRQ status flag */
-		writel((1 << idx), ddrc_pmu->base + DDRC_INT_CLEAR);
+	val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+}
 
-		/* Get the corresponding event struct */
-		event = ddrc_pmu->pmu_events.hw_events[idx];
-		if (!event)
-			continue;
+static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu,
+						struct hw_perf_event *hwc)
+{
+	u32 val;
 
-		hisi_uncore_pmu_event_update(event);
-		hisi_uncore_pmu_set_event_period(event);
-	}
+	val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK);
+	val |= 1 << hwc->idx;
+	writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK);
+}
 
-	return IRQ_HANDLED;
+static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu)
+{
+	return readl(ddrc_pmu->base + DDRC_INT_STATUS);
 }
 
-static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu,
-				  struct platform_device *pdev)
+static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu,
+					      int idx)
 {
-	int irq, ret;
-
-	/* Read and init IRQ */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr,
-			       IRQF_NOBALANCING | IRQF_NO_THREAD,
-			       dev_name(&pdev->dev), ddrc_pmu);
-	if (ret < 0) {
-		dev_err(&pdev->dev,
-			"Fail to request IRQ:%d ret:%d\n", irq, ret);
-		return ret;
-	}
+	writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR);
+}
 
-	ddrc_pmu->irq = irq;
+static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu)
+{
+	return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS);
+}
 
-	return 0;
+static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu,
+					      int idx)
+{
+	writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR);
 }
 
 static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = {
 	{ "HISI0233", },
-	{},
+	{ "HISI0234", },
+	{}
 };
 MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
 
@@ -269,21 +322,38 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
 	}
 
 	ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION);
+	if (ddrc_pmu->identifier >= HISI_PMU_V2) {
+		if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id",
+					     &ddrc_pmu->sub_id)) {
+			dev_err(&pdev->dev, "Can not read sub-id!\n");
+			return -EINVAL;
+		}
+	}
 
 	return 0;
 }
 
-static struct attribute *hisi_ddrc_pmu_format_attr[] = {
+static struct attribute *hisi_ddrc_pmu_v1_format_attr[] = {
 	HISI_PMU_FORMAT_ATTR(event, "config:0-4"),
 	NULL,
 };
 
-static const struct attribute_group hisi_ddrc_pmu_format_group = {
+static const struct attribute_group hisi_ddrc_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_ddrc_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v2_format_group = {
 	.name = "format",
-	.attrs = hisi_ddrc_pmu_format_attr,
+	.attrs = hisi_ddrc_pmu_v2_format_attr,
 };
 
-static struct attribute *hisi_ddrc_pmu_events_attr[] = {
+static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = {
 	HISI_PMU_EVENT_ATTR(flux_wr,		0x00),
 	HISI_PMU_EVENT_ATTR(flux_rd,		0x01),
 	HISI_PMU_EVENT_ATTR(flux_wcmd,		0x02),
@@ -295,9 +365,21 @@ static struct attribute *hisi_ddrc_pmu_events_attr[] = {
 	NULL,
 };
 
-static const struct attribute_group hisi_ddrc_pmu_events_group = {
+static const struct attribute_group hisi_ddrc_pmu_v1_events_group = {
 	.name = "events",
-	.attrs = hisi_ddrc_pmu_events_attr,
+	.attrs = hisi_ddrc_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_ddrc_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(cycles,		0x00),
+	HISI_PMU_EVENT_ATTR(flux_wr,		0x83),
+	HISI_PMU_EVENT_ATTR(flux_rd,		0x84),
+	NULL
+};
+
+static const struct attribute_group hisi_ddrc_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_ddrc_pmu_v2_events_attr,
 };
 
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
@@ -323,25 +405,50 @@ static const struct attribute_group hisi_ddrc_pmu_identifier_group = {
 	.attrs = hisi_ddrc_pmu_identifier_attrs,
 };
 
-static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = {
-	&hisi_ddrc_pmu_format_group,
-	&hisi_ddrc_pmu_events_group,
+static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = {
+	&hisi_ddrc_pmu_v1_format_group,
+	&hisi_ddrc_pmu_v1_events_group,
 	&hisi_ddrc_pmu_cpumask_attr_group,
 	&hisi_ddrc_pmu_identifier_group,
 	NULL,
 };
 
-static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = {
+static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = {
+	&hisi_ddrc_pmu_v2_format_group,
+	&hisi_ddrc_pmu_v2_events_group,
+	&hisi_ddrc_pmu_cpumask_attr_group,
+	&hisi_ddrc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = {
+	.write_evtype           = hisi_ddrc_pmu_write_evtype,
+	.get_event_idx		= hisi_ddrc_pmu_v1_get_event_idx,
+	.start_counters		= hisi_ddrc_pmu_v1_start_counters,
+	.stop_counters		= hisi_ddrc_pmu_v1_stop_counters,
+	.enable_counter		= hisi_ddrc_pmu_v1_enable_counter,
+	.disable_counter	= hisi_ddrc_pmu_v1_disable_counter,
+	.enable_counter_int	= hisi_ddrc_pmu_v1_enable_counter_int,
+	.disable_counter_int	= hisi_ddrc_pmu_v1_disable_counter_int,
+	.write_counter		= hisi_ddrc_pmu_v1_write_counter,
+	.read_counter		= hisi_ddrc_pmu_v1_read_counter,
+	.get_int_status		= hisi_ddrc_pmu_v1_get_int_status,
+	.clear_int_status	= hisi_ddrc_pmu_v1_clear_int_status,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = {
 	.write_evtype           = hisi_ddrc_pmu_write_evtype,
-	.get_event_idx		= hisi_ddrc_pmu_get_event_idx,
-	.start_counters		= hisi_ddrc_pmu_start_counters,
-	.stop_counters		= hisi_ddrc_pmu_stop_counters,
-	.enable_counter		= hisi_ddrc_pmu_enable_counter,
-	.disable_counter	= hisi_ddrc_pmu_disable_counter,
-	.enable_counter_int	= hisi_ddrc_pmu_enable_counter_int,
-	.disable_counter_int	= hisi_ddrc_pmu_disable_counter_int,
-	.write_counter		= hisi_ddrc_pmu_write_counter,
-	.read_counter		= hisi_ddrc_pmu_read_counter,
+	.get_event_idx		= hisi_ddrc_pmu_v2_get_event_idx,
+	.start_counters		= hisi_ddrc_pmu_v2_start_counters,
+	.stop_counters		= hisi_ddrc_pmu_v2_stop_counters,
+	.enable_counter		= hisi_ddrc_pmu_v2_enable_counter,
+	.disable_counter	= hisi_ddrc_pmu_v2_disable_counter,
+	.enable_counter_int	= hisi_ddrc_pmu_v2_enable_counter_int,
+	.disable_counter_int	= hisi_ddrc_pmu_v2_disable_counter_int,
+	.write_counter		= hisi_ddrc_pmu_v2_write_counter,
+	.read_counter		= hisi_ddrc_pmu_v2_read_counter,
+	.get_int_status		= hisi_ddrc_pmu_v2_get_int_status,
+	.clear_int_status	= hisi_ddrc_pmu_v2_clear_int_status,
 };
 
 static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
@@ -353,16 +460,25 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	ret = hisi_ddrc_pmu_init_irq(ddrc_pmu, pdev);
+	ret = hisi_uncore_pmu_init_irq(ddrc_pmu, pdev);
 	if (ret)
 		return ret;
 
+	if (ddrc_pmu->identifier >= HISI_PMU_V2) {
+		ddrc_pmu->counter_bits = 48;
+		ddrc_pmu->check_event = DDRC_V2_NR_EVENTS;
+		ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups;
+		ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops;
+	} else {
+		ddrc_pmu->counter_bits = 32;
+		ddrc_pmu->check_event = DDRC_V1_NR_EVENTS;
+		ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups;
+		ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops;
+	}
+
 	ddrc_pmu->num_counters = DDRC_NR_COUNTERS;
-	ddrc_pmu->counter_bits = 32;
-	ddrc_pmu->ops = &hisi_uncore_ddrc_ops;
 	ddrc_pmu->dev = &pdev->dev;
 	ddrc_pmu->on_cpu = -1;
-	ddrc_pmu->check_event = 7;
 
 	return 0;
 }
@@ -390,8 +506,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u",
-			      ddrc_pmu->sccl_id, ddrc_pmu->index_id);
+	if (ddrc_pmu->identifier >= HISI_PMU_V2)
+		name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+				      "hisi_sccl%u_ddrc%u_%u",
+				      ddrc_pmu->sccl_id, ddrc_pmu->index_id,
+				      ddrc_pmu->sub_id);
+	else
+		name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+				      "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id,
+				      ddrc_pmu->index_id);
+
 	ddrc_pmu->pmu = (struct pmu) {
 		.name		= name,
 		.module		= THIS_MODULE,
@@ -404,7 +528,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
 		.start		= hisi_uncore_pmu_start,
 		.stop		= hisi_uncore_pmu_stop,
 		.read		= hisi_uncore_pmu_read,
-		.attr_groups	= hisi_ddrc_pmu_attr_groups,
+		.attr_groups	= ddrc_pmu->pmu_events.attr_groups,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
 	};
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 3402f1a395a8..0316fabe32f1 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/list.h>
-#include <linux/platform_device.h>
 #include <linux/smp.h>
 
 #include "hisi_uncore_pmu.h"
@@ -26,18 +25,136 @@
 #define HHA_VERSION		0x1cf0
 #define HHA_PERF_CTRL		0x1E00
 #define HHA_EVENT_CTRL		0x1E04
+#define HHA_SRCID_CTRL		0x1E08
+#define HHA_DATSRC_CTRL		0x1BF0
 #define HHA_EVENT_TYPE0		0x1E80
 /*
- * Each counter is 48-bits and [48:63] are reserved
- * which are Read-As-Zero and Writes-Ignored.
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
  */
 #define HHA_CNT0_LOWER		0x1F00
 
-/* HHA has 16-counters */
-#define HHA_NR_COUNTERS		0x10
+/* HHA PMU v1 has 16 counters and v2 only has 8 counters */
+#define HHA_V1_NR_COUNTERS	0x10
+#define HHA_V2_NR_COUNTERS	0x8
 
 #define HHA_PERF_CTRL_EN	0x1
+#define HHA_TRACETAG_EN		BIT(31)
+#define HHA_SRCID_EN		BIT(2)
+#define HHA_SRCID_CMD_SHIFT	6
+#define HHA_SRCID_MSK_SHIFT	20
+#define HHA_SRCID_CMD		GENMASK(16, 6)
+#define HHA_SRCID_MSK		GENMASK(30, 20)
+#define HHA_DATSRC_SKT_EN	BIT(23)
 #define HHA_EVTYPE_NONE		0xff
+#define HHA_V1_NR_EVENT		0x65
+#define HHA_V2_NR_EVENT		0xCE
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 22, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 23, 23);
+
+static void hisi_hha_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val |= HHA_TRACETAG_EN;
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_clear_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 val;
+
+	val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+	val &= ~HHA_TRACETAG_EN;
+	writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+}
+
+static void hisi_hha_pmu_config_ds(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
+		val |= HHA_DATSRC_SKT_EN;
+		writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_clear_ds(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
+		val &= ~HHA_DATSRC_SKT_EN;
+		writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val, msk;
+
+		msk = hisi_get_srcid_msk(event);
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val |= HHA_SRCID_EN | (cmd << HHA_SRCID_CMD_SHIFT) |
+			(msk << HHA_SRCID_MSK_SHIFT);
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_disable_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val;
+
+		val = readl(hha_pmu->base + HHA_SRCID_CTRL);
+		val &= ~(HHA_SRCID_EN | HHA_SRCID_MSK | HHA_SRCID_CMD);
+		writel(val, hha_pmu->base + HHA_SRCID_CTRL);
+	}
+}
+
+static void hisi_hha_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_hha_pmu_enable_tracetag(event);
+		hisi_hha_pmu_config_ds(event);
+		hisi_hha_pmu_config_srcid(event);
+	}
+}
+
+static void hisi_hha_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_hha_pmu_disable_srcid(event);
+		hisi_hha_pmu_clear_ds(event);
+		hisi_hha_pmu_clear_tracetag(event);
+	}
+}
 
 /*
  * Select the counter register offset using the counter index
@@ -51,29 +168,15 @@ static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx)
 static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu,
 				     struct hw_perf_event *hwc)
 {
-	u32 idx = hwc->idx;
-
-	if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
-		dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return 0;
-	}
-
 	/* Read 64 bits and like L3C, top 16 bits are RAZ */
-	return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+	return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu,
 				       struct hw_perf_event *hwc, u64 val)
 {
-	u32 idx = hwc->idx;
-
-	if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) {
-		dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return;
-	}
-
 	/* Write 64 bits and like L3C, top 16 bits are WI */
-	writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx));
+	writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx,
@@ -169,65 +272,20 @@ static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu,
 	writel(val, hha_pmu->base + HHA_INT_MASK);
 }
 
-static irqreturn_t hisi_hha_pmu_isr(int irq, void *dev_id)
+static u32 hisi_hha_pmu_get_int_status(struct hisi_pmu *hha_pmu)
 {
-	struct hisi_pmu *hha_pmu = dev_id;
-	struct perf_event *event;
-	unsigned long overflown;
-	int idx;
-
-	/* Read HHA_INT_STATUS register */
-	overflown = readl(hha_pmu->base + HHA_INT_STATUS);
-	if (!overflown)
-		return IRQ_NONE;
-
-	/*
-	 * Find the counter index which overflowed if the bit was set
-	 * and handle it
-	 */
-	for_each_set_bit(idx, &overflown, HHA_NR_COUNTERS) {
-		/* Write 1 to clear the IRQ status flag */
-		writel((1 << idx), hha_pmu->base + HHA_INT_CLEAR);
-
-		/* Get the corresponding event struct */
-		event = hha_pmu->pmu_events.hw_events[idx];
-		if (!event)
-			continue;
-
-		hisi_uncore_pmu_event_update(event);
-		hisi_uncore_pmu_set_event_period(event);
-	}
-
-	return IRQ_HANDLED;
+	return readl(hha_pmu->base + HHA_INT_STATUS);
 }
 
-static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu,
-				 struct platform_device *pdev)
+static void hisi_hha_pmu_clear_int_status(struct hisi_pmu *hha_pmu, int idx)
 {
-	int irq, ret;
-
-	/* Read and init IRQ */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr,
-			      IRQF_NOBALANCING | IRQF_NO_THREAD,
-			      dev_name(&pdev->dev), hha_pmu);
-	if (ret < 0) {
-		dev_err(&pdev->dev,
-			"Fail to request IRQ:%d ret:%d\n", irq, ret);
-		return ret;
-	}
-
-	hha_pmu->irq = irq;
-
-	return 0;
+	writel(1 << idx, hha_pmu->base + HHA_INT_CLEAR);
 }
 
 static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = {
 	{ "HISI0243", },
-	{},
+	{ "HISI0244", },
+	{}
 };
 MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match);
 
@@ -237,13 +295,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
 	unsigned long long id;
 	acpi_status status;
 
-	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
-				       "_UID", NULL, &id);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	hha_pmu->index_id = id;
-
 	/*
 	 * Use SCCL_ID and UID to identify the HHA PMU, while
 	 * SCCL_ID is in MPIDR[aff2].
@@ -253,6 +304,22 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
 		dev_err(&pdev->dev, "Can not read hha sccl-id!\n");
 		return -EINVAL;
 	}
+
+	/*
+	 * Early versions of BIOS support _UID by mistake, so we support
+	 * both "hisilicon, idx-id" as preference, if available.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &hha_pmu->index_id)) {
+		status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+					       "_UID", NULL, &id);
+		if (ACPI_FAILURE(status)) {
+			dev_err(&pdev->dev, "Cannot read idx-id!\n");
+			return -EINVAL;
+		}
+
+		hha_pmu->index_id = id;
+	}
 	/* HHA PMUs only share the same SCCL */
 	hha_pmu->ccl_id = -1;
 
@@ -267,17 +334,31 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
 	return 0;
 }
 
-static struct attribute *hisi_hha_pmu_format_attr[] = {
+static struct attribute *hisi_hha_pmu_v1_format_attr[] = {
 	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
 	NULL,
 };
 
-static const struct attribute_group hisi_hha_pmu_format_group = {
+static const struct attribute_group hisi_hha_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_hha_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_hha_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:22"),
+	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:23"),
+	NULL
+};
+
+static const struct attribute_group hisi_hha_pmu_v2_format_group = {
 	.name = "format",
-	.attrs = hisi_hha_pmu_format_attr,
+	.attrs = hisi_hha_pmu_v2_format_attr,
 };
 
-static struct attribute *hisi_hha_pmu_events_attr[] = {
+static struct attribute *hisi_hha_pmu_v1_events_attr[] = {
 	HISI_PMU_EVENT_ATTR(rx_ops_num,		0x00),
 	HISI_PMU_EVENT_ATTR(rx_outer,		0x01),
 	HISI_PMU_EVENT_ATTR(rx_sccl,		0x02),
@@ -307,9 +388,23 @@ static struct attribute *hisi_hha_pmu_events_attr[] = {
 	NULL,
 };
 
-static const struct attribute_group hisi_hha_pmu_events_group = {
+static const struct attribute_group hisi_hha_pmu_v1_events_group = {
 	.name = "events",
-	.attrs = hisi_hha_pmu_events_attr,
+	.attrs = hisi_hha_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_hha_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_ops_num,		0x00),
+	HISI_PMU_EVENT_ATTR(rx_outer,		0x01),
+	HISI_PMU_EVENT_ATTR(rx_sccl,		0x02),
+	HISI_PMU_EVENT_ATTR(hha_retry,		0x2e),
+	HISI_PMU_EVENT_ATTR(cycles,		0x55),
+	NULL
+};
+
+static const struct attribute_group hisi_hha_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_hha_pmu_v2_events_attr,
 };
 
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
@@ -335,14 +430,22 @@ static const struct attribute_group hisi_hha_pmu_identifier_group = {
 	.attrs = hisi_hha_pmu_identifier_attrs,
 };
 
-static const struct attribute_group *hisi_hha_pmu_attr_groups[] = {
-	&hisi_hha_pmu_format_group,
-	&hisi_hha_pmu_events_group,
+static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = {
+	&hisi_hha_pmu_v1_format_group,
+	&hisi_hha_pmu_v1_events_group,
 	&hisi_hha_pmu_cpumask_attr_group,
 	&hisi_hha_pmu_identifier_group,
 	NULL,
 };
 
+static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = {
+	&hisi_hha_pmu_v2_format_group,
+	&hisi_hha_pmu_v2_events_group,
+	&hisi_hha_pmu_cpumask_attr_group,
+	&hisi_hha_pmu_identifier_group,
+	NULL
+};
+
 static const struct hisi_uncore_ops hisi_uncore_hha_ops = {
 	.write_evtype		= hisi_hha_pmu_write_evtype,
 	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
@@ -354,6 +457,10 @@ static const struct hisi_uncore_ops hisi_uncore_hha_ops = {
 	.disable_counter_int	= hisi_hha_pmu_disable_counter_int,
 	.write_counter		= hisi_hha_pmu_write_counter,
 	.read_counter		= hisi_hha_pmu_read_counter,
+	.get_int_status		= hisi_hha_pmu_get_int_status,
+	.clear_int_status	= hisi_hha_pmu_clear_int_status,
+	.enable_filter		= hisi_hha_pmu_enable_filter,
+	.disable_filter		= hisi_hha_pmu_disable_filter,
 };
 
 static int hisi_hha_pmu_dev_probe(struct platform_device *pdev,
@@ -365,16 +472,24 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	ret = hisi_hha_pmu_init_irq(hha_pmu, pdev);
+	ret = hisi_uncore_pmu_init_irq(hha_pmu, pdev);
 	if (ret)
 		return ret;
 
-	hha_pmu->num_counters = HHA_NR_COUNTERS;
-	hha_pmu->counter_bits = 48;
+	if (hha_pmu->identifier >= HISI_PMU_V2) {
+		hha_pmu->counter_bits = 64;
+		hha_pmu->check_event = HHA_V2_NR_EVENT;
+		hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v2_attr_groups;
+		hha_pmu->num_counters = HHA_V2_NR_COUNTERS;
+	} else {
+		hha_pmu->counter_bits = 48;
+		hha_pmu->check_event = HHA_V1_NR_EVENT;
+		hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v1_attr_groups;
+		hha_pmu->num_counters = HHA_V1_NR_COUNTERS;
+	}
 	hha_pmu->ops = &hisi_uncore_hha_ops;
 	hha_pmu->dev = &pdev->dev;
 	hha_pmu->on_cpu = -1;
-	hha_pmu->check_event = 0x65;
 
 	return 0;
 }
@@ -416,7 +531,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
 		.start		= hisi_uncore_pmu_start,
 		.stop		= hisi_uncore_pmu_stop,
 		.read		= hisi_uncore_pmu_read,
-		.attr_groups	= hisi_hha_pmu_attr_groups,
+		.attr_groups	= hha_pmu->pmu_events.attr_groups,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
 	};
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 7d792435c2aa..bf9f7772cac9 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/list.h>
-#include <linux/platform_device.h>
 #include <linux/smp.h>
 
 #include "hisi_uncore_pmu.h"
@@ -24,12 +23,17 @@
 #define L3C_INT_MASK		0x0800
 #define L3C_INT_STATUS		0x0808
 #define L3C_INT_CLEAR		0x080c
+#define L3C_CORE_CTRL           0x1b04
+#define L3C_TRACETAG_CTRL       0x1b20
+#define L3C_DATSRC_TYPE         0x1b48
+#define L3C_DATSRC_CTRL         0x1bf0
 #define L3C_EVENT_CTRL	        0x1c00
 #define L3C_VERSION		0x1cf0
 #define L3C_EVENT_TYPE0		0x1d00
 /*
- * Each counter is 48-bits and [48:63] are reserved
- * which are Read-As-Zero and Writes-Ignored.
+ * If the HW version only supports a 48-bit counter, then
+ * bits [63:48] are reserved, which are Read-As-Zero and
+ * Writes-Ignored.
  */
 #define L3C_CNTR0_LOWER		0x1e00
 
@@ -37,7 +41,186 @@
 #define L3C_NR_COUNTERS		0x8
 
 #define L3C_PERF_CTRL_EN	0x10000
+#define L3C_TRACETAG_EN		BIT(31)
+#define L3C_TRACETAG_REQ_SHIFT	7
+#define L3C_TRACETAG_MARK_EN	BIT(0)
+#define L3C_TRACETAG_REQ_EN	(L3C_TRACETAG_MARK_EN | BIT(2))
+#define L3C_TRACETAG_CORE_EN	(L3C_TRACETAG_MARK_EN | BIT(3))
+#define L3C_CORE_EN		BIT(20)
+#define L3C_COER_NONE		0x0
+#define L3C_DATSRC_MASK		0xFF
+#define L3C_DATSRC_SKT_EN	BIT(23)
+#define L3C_DATSRC_NONE		0x0
 #define L3C_EVTYPE_NONE		0xff
+#define L3C_V1_NR_EVENTS	0x59
+#define L3C_V2_NR_EVENTS	0xFF
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16);
+
+static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Set request-type for tracetag */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= tt_req << L3C_TRACETAG_REQ_SHIFT;
+		val |= L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Enable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_req = hisi_get_tt_req(event);
+
+	if (tt_req) {
+		u32 val;
+
+		/* Clear request-type */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT);
+		val &= ~L3C_TRACETAG_REQ_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+
+		/* Disable request-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_TRACETAG_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u32 reg, reg_idx, shift, val;
+	int idx = hwc->idx;
+
+	/*
+	 * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1).
+	 * There are 2 datasource ctrl register for the 8 hardware counters.
+	 * Datasrc is 8-bits and for the former 4 hardware counters,
+	 * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_DATSRC_TYPE1 is chosen.
+	 */
+	reg = L3C_DATSRC_TYPE + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_DATSRC_MASK << shift);
+	val |= ds_cfg << shift;
+	writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_config_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, ds_cfg);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val |= L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_ds(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 ds_cfg = hisi_get_datasrc_cfg(event);
+	u32 ds_skt = hisi_get_datasrc_skt(event);
+
+	if (ds_cfg)
+		hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE);
+
+	if (ds_skt) {
+		u32 val;
+
+		val = readl(l3c_pmu->base + L3C_DATSRC_CTRL);
+		val &= ~L3C_DATSRC_SKT_EN;
+		writel(val, l3c_pmu->base + L3C_DATSRC_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Config and enable core information */
+		writel(core, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val |= L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Enable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val |= L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu);
+	u32 core = hisi_get_tt_core(event);
+
+	if (core) {
+		u32 val;
+
+		/* Clear core information */
+		writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL);
+		val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+		val &= ~L3C_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+
+		/* Disable core-tracetag statistics */
+		val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL);
+		val &= ~L3C_TRACETAG_CORE_EN;
+		writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL);
+	}
+}
+
+static void hisi_l3c_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_config_req_tracetag(event);
+		hisi_l3c_pmu_config_core_tracetag(event);
+		hisi_l3c_pmu_config_ds(event);
+	}
+}
+
+static void hisi_l3c_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_l3c_pmu_clear_ds(event);
+		hisi_l3c_pmu_clear_core_tracetag(event);
+		hisi_l3c_pmu_clear_req_tracetag(event);
+	}
+}
 
 /*
  * Select the counter register offset using the counter index
@@ -50,29 +233,13 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
 static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
 				     struct hw_perf_event *hwc)
 {
-	u32 idx = hwc->idx;
-
-	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
-		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return 0;
-	}
-
-	/* Read 64-bits and the upper 16 bits are RAZ */
-	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
 				       struct hw_perf_event *hwc, u64 val)
 {
-	u32 idx = hwc->idx;
-
-	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
-		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return;
-	}
-
-	/* Write 64-bits and the upper 16 bits are WI */
-	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx));
 }
 
 static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
@@ -168,81 +335,26 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
 	writel(val, l3c_pmu->base + L3C_INT_MASK);
 }
 
-static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id)
+static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu)
 {
-	struct hisi_pmu *l3c_pmu = dev_id;
-	struct perf_event *event;
-	unsigned long overflown;
-	int idx;
-
-	/* Read L3C_INT_STATUS register */
-	overflown = readl(l3c_pmu->base + L3C_INT_STATUS);
-	if (!overflown)
-		return IRQ_NONE;
-
-	/*
-	 * Find the counter index which overflowed if the bit was set
-	 * and handle it.
-	 */
-	for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) {
-		/* Write 1 to clear the IRQ status flag */
-		writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR);
-
-		/* Get the corresponding event struct */
-		event = l3c_pmu->pmu_events.hw_events[idx];
-		if (!event)
-			continue;
-
-		hisi_uncore_pmu_event_update(event);
-		hisi_uncore_pmu_set_event_period(event);
-	}
-
-	return IRQ_HANDLED;
+	return readl(l3c_pmu->base + L3C_INT_STATUS);
 }
 
-static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
-				 struct platform_device *pdev)
+static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx)
 {
-	int irq, ret;
-
-	/* Read and init IRQ */
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
-
-	ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
-			       IRQF_NOBALANCING | IRQF_NO_THREAD,
-			       dev_name(&pdev->dev), l3c_pmu);
-	if (ret < 0) {
-		dev_err(&pdev->dev,
-			"Fail to request IRQ:%d ret:%d\n", irq, ret);
-		return ret;
-	}
-
-	l3c_pmu->irq = irq;
-
-	return 0;
+	writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR);
 }
 
 static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
 	{ "HISI0213", },
-	{},
+	{ "HISI0214", },
+	{}
 };
 MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
 
 static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
 				  struct hisi_pmu *l3c_pmu)
 {
-	unsigned long long id;
-	acpi_status status;
-
-	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
-				       "_UID", NULL, &id);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	l3c_pmu->index_id = id;
-
 	/*
 	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
 	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
@@ -270,17 +382,31 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
 	return 0;
 }
 
-static struct attribute *hisi_l3c_pmu_format_attr[] = {
+static struct attribute *hisi_l3c_pmu_v1_format_attr[] = {
 	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
 	NULL,
 };
 
-static const struct attribute_group hisi_l3c_pmu_format_group = {
+static const struct attribute_group hisi_l3c_pmu_v1_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_v1_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"),
+	HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"),
+	HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"),
+	HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_format_group = {
 	.name = "format",
-	.attrs = hisi_l3c_pmu_format_attr,
+	.attrs = hisi_l3c_pmu_v2_format_attr,
 };
 
-static struct attribute *hisi_l3c_pmu_events_attr[] = {
+static struct attribute *hisi_l3c_pmu_v1_events_attr[] = {
 	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
 	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
 	HISI_PMU_EVENT_ATTR(rd_hit_cpipe,	0x02),
@@ -297,9 +423,22 @@ static struct attribute *hisi_l3c_pmu_events_attr[] = {
 	NULL,
 };
 
-static const struct attribute_group hisi_l3c_pmu_events_group = {
+static const struct attribute_group hisi_l3c_pmu_v1_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_v1_events_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(l3c_hit,		0x48),
+	HISI_PMU_EVENT_ATTR(cycles,		0x7f),
+	HISI_PMU_EVENT_ATTR(l3c_ref,		0xb8),
+	HISI_PMU_EVENT_ATTR(dat_access,		0xb9),
+	NULL
+};
+
+static const struct attribute_group hisi_l3c_pmu_v2_events_group = {
 	.name = "events",
-	.attrs = hisi_l3c_pmu_events_attr,
+	.attrs = hisi_l3c_pmu_v2_events_attr,
 };
 
 static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
@@ -325,14 +464,22 @@ static const struct attribute_group hisi_l3c_pmu_identifier_group = {
 	.attrs = hisi_l3c_pmu_identifier_attrs,
 };
 
-static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
-	&hisi_l3c_pmu_format_group,
-	&hisi_l3c_pmu_events_group,
+static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = {
+	&hisi_l3c_pmu_v1_format_group,
+	&hisi_l3c_pmu_v1_events_group,
 	&hisi_l3c_pmu_cpumask_attr_group,
 	&hisi_l3c_pmu_identifier_group,
 	NULL,
 };
 
+static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = {
+	&hisi_l3c_pmu_v2_format_group,
+	&hisi_l3c_pmu_v2_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	&hisi_l3c_pmu_identifier_group,
+	NULL
+};
+
 static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.write_evtype		= hisi_l3c_pmu_write_evtype,
 	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
@@ -344,6 +491,10 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
 	.disable_counter_int	= hisi_l3c_pmu_disable_counter_int,
 	.write_counter		= hisi_l3c_pmu_write_counter,
 	.read_counter		= hisi_l3c_pmu_read_counter,
+	.get_int_status		= hisi_l3c_pmu_get_int_status,
+	.clear_int_status	= hisi_l3c_pmu_clear_int_status,
+	.enable_filter		= hisi_l3c_pmu_enable_filter,
+	.disable_filter		= hisi_l3c_pmu_disable_filter,
 };
 
 static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
@@ -355,16 +506,24 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
 	if (ret)
 		return ret;
 
-	ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev);
+	ret = hisi_uncore_pmu_init_irq(l3c_pmu, pdev);
 	if (ret)
 		return ret;
 
+	if (l3c_pmu->identifier >= HISI_PMU_V2) {
+		l3c_pmu->counter_bits = 64;
+		l3c_pmu->check_event = L3C_V2_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups;
+	} else {
+		l3c_pmu->counter_bits = 48;
+		l3c_pmu->check_event = L3C_V1_NR_EVENTS;
+		l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups;
+	}
+
 	l3c_pmu->num_counters = L3C_NR_COUNTERS;
-	l3c_pmu->counter_bits = 48;
 	l3c_pmu->ops = &hisi_uncore_l3c_ops;
 	l3c_pmu->dev = &pdev->dev;
 	l3c_pmu->on_cpu = -1;
-	l3c_pmu->check_event = 0x59;
 
 	return 0;
 }
@@ -392,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	/*
+	 * CCL_ID is used to identify the L3C in the same SCCL which was
+	 * used _UID by mistake.
+	 */
 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
-			      l3c_pmu->sccl_id, l3c_pmu->index_id);
+			      l3c_pmu->sccl_id, l3c_pmu->ccl_id);
 	l3c_pmu->pmu = (struct pmu) {
 		.name		= name,
 		.module		= THIS_MODULE,
@@ -406,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 		.start		= hisi_uncore_pmu_start,
 		.stop		= hisi_uncore_pmu_stop,
 		.read		= hisi_uncore_pmu_read,
-		.attr_groups	= hisi_l3c_pmu_attr_groups,
+		.attr_groups	= l3c_pmu->pmu_events.attr_groups,
 		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
 	};
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
new file mode 100644
index 000000000000..14f23eb31248
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon PA uncore Hardware event counters support
+ *
+ * Copyright (C) 2020 HiSilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* PA register definition */
+#define PA_PERF_CTRL			0x1c00
+#define PA_EVENT_CTRL			0x1c04
+#define PA_TT_CTRL			0x1c08
+#define PA_TGTID_CTRL			0x1c14
+#define PA_SRCID_CTRL			0x1c18
+#define PA_INT_MASK			0x1c70
+#define PA_INT_STATUS			0x1c78
+#define PA_INT_CLEAR			0x1c7c
+#define PA_EVENT_TYPE0			0x1c80
+#define PA_PMU_VERSION			0x1cf0
+#define PA_EVENT_CNT0_L			0x1f00
+
+#define PA_EVTYPE_MASK			0xff
+#define PA_NR_COUNTERS			0x8
+#define PA_PERF_CTRL_EN			BIT(0)
+#define PA_TRACETAG_EN			BIT(4)
+#define PA_TGTID_EN			BIT(11)
+#define PA_SRCID_EN			BIT(11)
+#define PA_TGTID_NONE			0
+#define PA_SRCID_NONE			0
+#define PA_TGTID_MSK_SHIFT		12
+#define PA_SRCID_MSK_SHIFT		12
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_cmd, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_msk, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+
+static void hisi_pa_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(pa_pmu->base + PA_TT_CTRL);
+		val |= PA_TRACETAG_EN;
+		writel(val, pa_pmu->base + PA_TT_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(pa_pmu->base + PA_TT_CTRL);
+		val &= ~PA_TRACETAG_EN;
+		writel(val, pa_pmu->base + PA_TT_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_config_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_tgtid_cmd(event);
+
+	if (cmd) {
+		u32 msk = hisi_get_tgtid_msk(event);
+		u32 val = cmd | PA_TGTID_EN | (msk << PA_TGTID_MSK_SHIFT);
+
+		writel(val, pa_pmu->base + PA_TGTID_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_tgtid_cmd(event);
+
+	if (cmd)
+		writel(PA_TGTID_NONE, pa_pmu->base + PA_TGTID_CTRL);
+}
+
+static void hisi_pa_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 msk = hisi_get_srcid_msk(event);
+		u32 val = cmd | PA_SRCID_EN | (msk << PA_SRCID_MSK_SHIFT);
+
+		writel(val, pa_pmu->base + PA_SRCID_CTRL);
+	}
+}
+
+static void hisi_pa_pmu_clear_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd)
+		writel(PA_SRCID_NONE, pa_pmu->base + PA_SRCID_CTRL);
+}
+
+static void hisi_pa_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_pa_pmu_enable_tracetag(event);
+		hisi_pa_pmu_config_srcid(event);
+		hisi_pa_pmu_config_tgtid(event);
+	}
+}
+
+static void hisi_pa_pmu_disable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_pa_pmu_clear_tgtid(event);
+		hisi_pa_pmu_clear_srcid(event);
+		hisi_pa_pmu_clear_tracetag(event);
+	}
+}
+
+static u32 hisi_pa_pmu_get_counter_offset(int idx)
+{
+	return (PA_EVENT_CNT0_L + idx * 8);
+}
+
+static u64 hisi_pa_pmu_read_counter(struct hisi_pmu *pa_pmu,
+				    struct hw_perf_event *hwc)
+{
+	return readq(pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_pa_pmu_write_counter(struct hisi_pmu *pa_pmu,
+				      struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_pa_pmu_write_evtype(struct hisi_pmu *pa_pmu, int idx,
+				     u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(PA_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * PA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * PA_EVENT_TYPE1 is chosen.
+	 */
+	reg = PA_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to pa_EVENT_TYPEx Register */
+	val = readl(pa_pmu->base + reg);
+	val &= ~(PA_EVTYPE_MASK << shift);
+	val |= (type << shift);
+	writel(val, pa_pmu->base + reg);
+}
+
+static void hisi_pa_pmu_start_counters(struct hisi_pmu *pa_pmu)
+{
+	u32 val;
+
+	val = readl(pa_pmu->base + PA_PERF_CTRL);
+	val |= PA_PERF_CTRL_EN;
+	writel(val, pa_pmu->base + PA_PERF_CTRL);
+}
+
+static void hisi_pa_pmu_stop_counters(struct hisi_pmu *pa_pmu)
+{
+	u32 val;
+
+	val = readl(pa_pmu->base + PA_PERF_CTRL);
+	val &= ~(PA_PERF_CTRL_EN);
+	writel(val, pa_pmu->base + PA_PERF_CTRL);
+}
+
+static void hisi_pa_pmu_enable_counter(struct hisi_pmu *pa_pmu,
+				       struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in PA_EVENT_CTRL register */
+	val = readl(pa_pmu->base + PA_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, pa_pmu->base + PA_EVENT_CTRL);
+}
+
+static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in PA_EVENT_CTRL register */
+	val = readl(pa_pmu->base + PA_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, pa_pmu->base + PA_EVENT_CTRL);
+}
+
+static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu,
+					   struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 0 to enable interrupt */
+	val = readl(pa_pmu->base + PA_INT_MASK);
+	val &= ~(1 << hwc->idx);
+	writel(val, pa_pmu->base + PA_INT_MASK);
+}
+
+static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Write 1 to mask interrupt */
+	val = readl(pa_pmu->base + PA_INT_MASK);
+	val |= 1 << hwc->idx;
+	writel(val, pa_pmu->base + PA_INT_MASK);
+}
+
+static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu)
+{
+	return readl(pa_pmu->base + PA_INT_STATUS);
+}
+
+static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx)
+{
+	writel(1 << idx, pa_pmu->base + PA_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = {
+	{ "HISI0273", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match);
+
+static int hisi_pa_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *pa_pmu)
+{
+	/*
+	 * Use the SCCL_ID and the index ID to identify the PA PMU,
+	 * while SCCL_ID is the nearst SCCL_ID from this SICL and
+	 * CPU core is chosen from this SCCL to manage this PMU.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &pa_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Cannot read sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &pa_pmu->index_id)) {
+		dev_err(&pdev->dev, "Cannot read idx-id!\n");
+		return -EINVAL;
+	}
+
+	pa_pmu->ccl_id = -1;
+
+	pa_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(pa_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n");
+		return PTR_ERR(pa_pmu->base);
+	}
+
+	pa_pmu->identifier = readl(pa_pmu->base + PA_PMU_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_pa_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tgtid_cmd, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(tgtid_msk, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"),
+	NULL,
+};
+
+static const struct attribute_group hisi_pa_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_pa_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_pa_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_req,		0x40),
+	HISI_PMU_EVENT_ATTR(tx_req,             0x5c),
+	HISI_PMU_EVENT_ATTR(cycle,		0x78),
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_pa_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_pa_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = {
+	.attrs = hisi_pa_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_pa_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_pa_pmu_identifier_attrs[] = {
+	&hisi_pa_pmu_identifier_attr.attr,
+	NULL
+};
+
+static struct attribute_group hisi_pa_pmu_identifier_group = {
+	.attrs = hisi_pa_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = {
+	&hisi_pa_pmu_v2_format_group,
+	&hisi_pa_pmu_v2_events_group,
+	&hisi_pa_pmu_cpumask_attr_group,
+	&hisi_pa_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_pa_ops = {
+	.write_evtype		= hisi_pa_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_pa_pmu_start_counters,
+	.stop_counters		= hisi_pa_pmu_stop_counters,
+	.enable_counter		= hisi_pa_pmu_enable_counter,
+	.disable_counter	= hisi_pa_pmu_disable_counter,
+	.enable_counter_int	= hisi_pa_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_pa_pmu_disable_counter_int,
+	.write_counter		= hisi_pa_pmu_write_counter,
+	.read_counter		= hisi_pa_pmu_read_counter,
+	.get_int_status		= hisi_pa_pmu_get_int_status,
+	.clear_int_status	= hisi_pa_pmu_clear_int_status,
+	.enable_filter		= hisi_pa_pmu_enable_filter,
+	.disable_filter		= hisi_pa_pmu_disable_filter,
+};
+
+static int hisi_pa_pmu_dev_probe(struct platform_device *pdev,
+				 struct hisi_pmu *pa_pmu)
+{
+	int ret;
+
+	ret = hisi_pa_pmu_init_data(pdev, pa_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(pa_pmu, pdev);
+	if (ret)
+		return ret;
+
+	pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups;
+	pa_pmu->num_counters = PA_NR_COUNTERS;
+	pa_pmu->ops = &hisi_uncore_pa_ops;
+	pa_pmu->check_event = 0xB0;
+	pa_pmu->counter_bits = 64;
+	pa_pmu->dev = &pdev->dev;
+	pa_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_pa_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *pa_pmu;
+	char *name;
+	int ret;
+
+	pa_pmu = devm_kzalloc(&pdev->dev, sizeof(*pa_pmu), GFP_KERNEL);
+	if (!pa_pmu)
+		return -ENOMEM;
+
+	ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu);
+	if (ret)
+		return ret;
+	/*
+	 * PA is attached in SICL and the CPU core is chosen to manage this
+	 * PMU which is the nearest SCCL, while its SCCL_ID is greater than
+	 * one with the SICL_ID.
+	 */
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u",
+			      pa_pmu->sccl_id - 1, pa_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+				       &pa_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	pa_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups    = pa_pmu->pmu_events.attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	ret = perf_pmu_register(&pa_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+					    &pa_pmu->node);
+		irq_set_affinity_hint(pa_pmu->irq, NULL);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, pa_pmu);
+	return ret;
+}
+
+static int hisi_pa_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&pa_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+					    &pa_pmu->node);
+	irq_set_affinity_hint(pa_pmu->irq, NULL);
+
+	return 0;
+}
+
+static struct platform_driver hisi_pa_pmu_driver = {
+	.driver = {
+		.name = "hisi_pa_pmu",
+		.acpi_match_table = hisi_pa_pmu_acpi_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_pa_pmu_probe,
+	.remove = hisi_pa_pmu_remove,
+};
+
+static int __init hisi_pa_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+				      "AP_PERF_ARM_HISI_PA_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("PA PMU: cpuhp state setup failed, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_pa_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE);
+
+	return ret;
+}
+module_init(hisi_pa_pmu_module_init);
+
+static void __exit hisi_pa_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_pa_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE);
+}
+module_exit(hisi_pa_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 9dbdc3fc3bb4..13c68b5e39c4 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -21,7 +21,7 @@
 #include "hisi_uncore_pmu.h"
 
 #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff)
-#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1)
+#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0))
 
 /*
  * PMU format attributes
@@ -33,7 +33,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev,
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
 
-	return sprintf(buf, "%s\n", (char *)eattr->var);
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
 }
 EXPORT_SYMBOL_GPL(hisi_format_sysfs_show);
 
@@ -47,7 +47,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev,
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
 
-	return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var);
+	return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var);
 }
 EXPORT_SYMBOL_GPL(hisi_event_sysfs_show);
 
@@ -59,7 +59,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev,
 {
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
 
-	return sprintf(buf, "%d\n", hisi_pmu->on_cpu);
+	return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu);
 }
 EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show);
 
@@ -96,12 +96,6 @@ static bool hisi_validate_event_group(struct perf_event *event)
 	return counters <= hisi_pmu->num_counters;
 }
 
-int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx)
-{
-	return idx >= 0 && idx < hisi_pmu->num_counters;
-}
-EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid);
-
 int hisi_uncore_pmu_get_event_idx(struct perf_event *event)
 {
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu);
@@ -125,19 +119,68 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
 {
 	struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev));
 
-	return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier);
+	return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier);
 }
 EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show);
 
 static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx)
 {
-	if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) {
-		dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx);
-		return;
+	clear_bit(idx, hisi_pmu->pmu_events.used_mask);
+}
+
+static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data)
+{
+	struct hisi_pmu *hisi_pmu = data;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	overflown = hisi_pmu->ops->get_int_status(hisi_pmu);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it.
+	 */
+	for_each_set_bit(idx, &overflown, hisi_pmu->num_counters) {
+		/* Write 1 to clear the IRQ status flag */
+		hisi_pmu->ops->clear_int_status(hisi_pmu, idx);
+		/* Get the corresponding event struct */
+		event = hisi_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
 	}
 
-	clear_bit(idx, hisi_pmu->pmu_events.used_mask);
+	return IRQ_HANDLED;
+}
+
+int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
+			     struct platform_device *pdev)
+{
+	int irq, ret;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       dev_name(&pdev->dev), hisi_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ: %d ret: %d.\n", irq, ret);
+		return ret;
+	}
+
+	hisi_pmu->irq = irq;
+
+	return 0;
 }
+EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq);
 
 int hisi_uncore_pmu_event_init(struct perf_event *event)
 {
@@ -202,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event)
 	hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx,
 				    HISI_GET_EVENTID(event));
 
+	if (hisi_pmu->ops->enable_filter)
+		hisi_pmu->ops->enable_filter(event);
+
 	hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc);
 	hisi_pmu->ops->enable_counter(hisi_pmu, hwc);
 }
@@ -216,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event)
 
 	hisi_pmu->ops->disable_counter(hisi_pmu, hwc);
 	hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc);
+
+	if (hisi_pmu->ops->disable_filter)
+		hisi_pmu->ops->disable_filter(event);
 }
 
 void hisi_uncore_pmu_set_event_period(struct perf_event *event)
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 25b7cbe1f818..ea9d89bbc1ea 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -11,16 +11,19 @@
 #ifndef __HISI_UNCORE_PMU_H__
 #define __HISI_UNCORE_PMU_H__
 
+#include <linux/bitfield.h>
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt)     "hisi_pmu: " fmt
 
+#define HISI_PMU_V2		0x30
 #define HISI_MAX_COUNTERS 0x10
 #define to_hisi_pmu(p)	(container_of(p, struct hisi_pmu, pmu))
 
@@ -34,6 +37,12 @@
 #define HISI_PMU_EVENT_ATTR(_name, _config)		\
 	HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config)
 
+#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo)        \
+	static inline u32 hisi_get_##name(struct perf_event *event)            \
+	{                                                                  \
+		return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config);  \
+	}
+
 struct hisi_pmu;
 
 struct hisi_uncore_ops {
@@ -47,11 +56,16 @@ struct hisi_uncore_ops {
 	void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *);
 	void (*start_counters)(struct hisi_pmu *);
 	void (*stop_counters)(struct hisi_pmu *);
+	u32 (*get_int_status)(struct hisi_pmu *hisi_pmu);
+	void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx);
+	void (*enable_filter)(struct perf_event *event);
+	void (*disable_filter)(struct perf_event *event);
 };
 
 struct hisi_pmu_hwevents {
 	struct perf_event *hw_events[HISI_MAX_COUNTERS];
 	DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS);
+	const struct attribute_group **attr_groups;
 };
 
 /* Generic pmu struct for different pmu types */
@@ -71,6 +85,8 @@ struct hisi_pmu {
 	void __iomem *base;
 	/* the ID of the PMU modules */
 	u32 index_id;
+	/* For DDRC PMU v2: each DDRC has more than one DMC */
+	u32 sub_id;
 	int num_counters;
 	int counter_bits;
 	/* check event code range */
@@ -78,7 +94,6 @@ struct hisi_pmu {
 	u32 identifier;
 };
 
-int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx);
 int hisi_uncore_pmu_get_event_idx(struct perf_event *event);
 void hisi_uncore_pmu_read(struct perf_event *event);
 int hisi_uncore_pmu_add(struct perf_event *event, int flags);
@@ -102,6 +117,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node);
 ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev,
 					     struct device_attribute *attr,
 					     char *page);
-
+int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu,
+			     struct platform_device *pdev);
 
 #endif /* __HISI_UNCORE_PMU_H__ */
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
new file mode 100644
index 000000000000..46be312fa126
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * HiSilicon SLLC uncore Hardware event counters support
+ *
+ * Copyright (C) 2020 Hisilicon Limited
+ * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ */
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* SLLC register definition */
+#define SLLC_INT_MASK			0x0814
+#define SLLC_INT_STATUS			0x0818
+#define SLLC_INT_CLEAR			0x081c
+#define SLLC_PERF_CTRL			0x1c00
+#define SLLC_SRCID_CTRL			0x1c04
+#define SLLC_TGTID_CTRL			0x1c08
+#define SLLC_EVENT_CTRL			0x1c14
+#define SLLC_EVENT_TYPE0		0x1c18
+#define SLLC_VERSION			0x1cf0
+#define SLLC_EVENT_CNT0_L		0x1d00
+
+#define SLLC_EVTYPE_MASK		0xff
+#define SLLC_PERF_CTRL_EN		BIT(0)
+#define SLLC_FILT_EN			BIT(1)
+#define SLLC_TRACETAG_EN		BIT(2)
+#define SLLC_SRCID_EN			BIT(4)
+#define SLLC_SRCID_NONE			0x0
+#define SLLC_TGTID_EN			BIT(5)
+#define SLLC_TGTID_NONE			0x0
+#define SLLC_TGTID_MIN_SHIFT		1
+#define SLLC_TGTID_MAX_SHIFT		12
+#define SLLC_SRCID_CMD_SHIFT		1
+#define SLLC_SRCID_MSK_SHIFT		12
+#define SLLC_NR_EVENTS			0x80
+
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33);
+HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44);
+
+static bool tgtid_is_valid(u32 max, u32 min)
+{
+	return max > 0 && max >= min;
+}
+
+static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_TRACETAG_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 tt_en = hisi_get_tracetag_en(event);
+
+	if (tt_en) {
+		u32 val;
+
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_config_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 min = hisi_get_tgtid_min(event);
+	u32 max = hisi_get_tgtid_max(event);
+
+	if (tgtid_is_valid(max, min)) {
+		u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT);
+
+		writel(val, sllc_pmu->base + SLLC_TGTID_CTRL);
+		/* Enable the tgtid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_TGTID_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 min = hisi_get_tgtid_min(event);
+	u32 max = hisi_get_tgtid_max(event);
+
+	if (tgtid_is_valid(max, min)) {
+		u32 val;
+
+		writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL);
+		/* Disable the tgtid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_config_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val, msk;
+
+		msk = hisi_get_srcid_msk(event);
+		val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT);
+		writel(val, sllc_pmu->base + SLLC_SRCID_CTRL);
+		/* Enable the srcid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val |= SLLC_SRCID_EN | SLLC_FILT_EN;
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_clear_srcid(struct perf_event *event)
+{
+	struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu);
+	u32 cmd = hisi_get_srcid_cmd(event);
+
+	if (cmd) {
+		u32 val;
+
+		writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL);
+		/* Disable the srcid */
+		val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+		val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN);
+		writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+	}
+}
+
+static void hisi_sllc_pmu_enable_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_sllc_pmu_enable_tracetag(event);
+		hisi_sllc_pmu_config_srcid(event);
+		hisi_sllc_pmu_config_tgtid(event);
+	}
+}
+
+static void hisi_sllc_pmu_clear_filter(struct perf_event *event)
+{
+	if (event->attr.config1 != 0x0) {
+		hisi_sllc_pmu_disable_tracetag(event);
+		hisi_sllc_pmu_clear_srcid(event);
+		hisi_sllc_pmu_clear_tgtid(event);
+	}
+}
+
+static u32 hisi_sllc_pmu_get_counter_offset(int idx)
+{
+	return (SLLC_EVENT_CNT0_L + idx * 8);
+}
+
+static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu,
+				      struct hw_perf_event *hwc)
+{
+	return readq(sllc_pmu->base +
+		     hisi_sllc_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu,
+					struct hw_perf_event *hwc, u64 val)
+{
+	writeq(val, sllc_pmu->base +
+	       hisi_sllc_pmu_get_counter_offset(hwc->idx));
+}
+
+static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx,
+				       u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(SLLC_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * SLLC_EVENT_TYPE1 is chosen.
+	 */
+	reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to SLLC_EVENT_TYPEx Register */
+	val = readl(sllc_pmu->base + reg);
+	val &= ~(SLLC_EVTYPE_MASK << shift);
+	val |= (type << shift);
+	writel(val, sllc_pmu->base + reg);
+}
+
+static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+	val |= SLLC_PERF_CTRL_EN;
+	writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+}
+
+static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_PERF_CTRL);
+	val &= ~(SLLC_PERF_CTRL_EN);
+	writel(val, sllc_pmu->base + SLLC_PERF_CTRL);
+}
+
+static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
+	val |= 1 << hwc->idx;
+	writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+}
+
+static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu,
+					  struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, sllc_pmu->base + SLLC_EVENT_CTRL);
+}
+
+static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_INT_MASK);
+	/* Write 0 to enable interrupt */
+	val &= ~(1 << hwc->idx);
+	writel(val, sllc_pmu->base + SLLC_INT_MASK);
+}
+
+static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu,
+					      struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(sllc_pmu->base + SLLC_INT_MASK);
+	/* Write 1 to mask interrupt */
+	val |= 1 << hwc->idx;
+	writel(val, sllc_pmu->base + SLLC_INT_MASK);
+}
+
+static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu)
+{
+	return readl(sllc_pmu->base + SLLC_INT_STATUS);
+}
+
+static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx)
+{
+	writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR);
+}
+
+static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = {
+	{ "HISI0263", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match);
+
+static int hisi_sllc_pmu_init_data(struct platform_device *pdev,
+				   struct hisi_pmu *sllc_pmu)
+{
+	/*
+	 * Use the SCCL_ID and the index ID to identify the SLLC PMU,
+	 * while SCCL_ID is from MPIDR_EL1 by CPU.
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &sllc_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Cannot read sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id",
+				     &sllc_pmu->index_id)) {
+		dev_err(&pdev->dev, "Cannot read idx-id!\n");
+		return -EINVAL;
+	}
+
+	/* SLLC PMUs only share the same SCCL */
+	sllc_pmu->ccl_id = -1;
+
+	sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(sllc_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n");
+		return PTR_ERR(sllc_pmu->base);
+	}
+
+	sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION);
+
+	return 0;
+}
+
+static struct attribute *hisi_sllc_pmu_v2_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	HISI_PMU_FORMAT_ATTR(tgtid_min, "config1:0-10"),
+	HISI_PMU_FORMAT_ATTR(tgtid_max, "config1:11-21"),
+	HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"),
+	HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"),
+	HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"),
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_v2_format_group = {
+	.name = "format",
+	.attrs = hisi_sllc_pmu_v2_format_attr,
+};
+
+static struct attribute *hisi_sllc_pmu_v2_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rx_req,             0x30),
+	HISI_PMU_EVENT_ATTR(rx_data,            0x31),
+	HISI_PMU_EVENT_ATTR(tx_req,             0x34),
+	HISI_PMU_EVENT_ATTR(tx_data,            0x35),
+	HISI_PMU_EVENT_ATTR(cycles,             0x09),
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_v2_events_group = {
+	.name = "events",
+	.attrs = hisi_sllc_pmu_v2_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL
+};
+
+static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = {
+	.attrs = hisi_sllc_pmu_cpumask_attrs,
+};
+
+static struct device_attribute hisi_sllc_pmu_identifier_attr =
+	__ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL);
+
+static struct attribute *hisi_sllc_pmu_identifier_attrs[] = {
+	&hisi_sllc_pmu_identifier_attr.attr,
+	NULL
+};
+
+static struct attribute_group hisi_sllc_pmu_identifier_group = {
+	.attrs = hisi_sllc_pmu_identifier_attrs,
+};
+
+static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = {
+	&hisi_sllc_pmu_v2_format_group,
+	&hisi_sllc_pmu_v2_events_group,
+	&hisi_sllc_pmu_cpumask_attr_group,
+	&hisi_sllc_pmu_identifier_group,
+	NULL
+};
+
+static const struct hisi_uncore_ops hisi_uncore_sllc_ops = {
+	.write_evtype		= hisi_sllc_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_sllc_pmu_start_counters,
+	.stop_counters		= hisi_sllc_pmu_stop_counters,
+	.enable_counter		= hisi_sllc_pmu_enable_counter,
+	.disable_counter	= hisi_sllc_pmu_disable_counter,
+	.enable_counter_int	= hisi_sllc_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_sllc_pmu_disable_counter_int,
+	.write_counter		= hisi_sllc_pmu_write_counter,
+	.read_counter		= hisi_sllc_pmu_read_counter,
+	.get_int_status		= hisi_sllc_pmu_get_int_status,
+	.clear_int_status	= hisi_sllc_pmu_clear_int_status,
+	.enable_filter		= hisi_sllc_pmu_enable_filter,
+	.disable_filter		= hisi_sllc_pmu_clear_filter,
+};
+
+static int hisi_sllc_pmu_dev_probe(struct platform_device *pdev,
+				   struct hisi_pmu *sllc_pmu)
+{
+	int ret;
+
+	ret = hisi_sllc_pmu_init_data(pdev, sllc_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_uncore_pmu_init_irq(sllc_pmu, pdev);
+	if (ret)
+		return ret;
+
+	sllc_pmu->pmu_events.attr_groups = hisi_sllc_pmu_v2_attr_groups;
+	sllc_pmu->ops = &hisi_uncore_sllc_ops;
+	sllc_pmu->check_event = SLLC_NR_EVENTS;
+	sllc_pmu->counter_bits = 64;
+	sllc_pmu->num_counters = 8;
+	sllc_pmu->dev = &pdev->dev;
+	sllc_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_sllc_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *sllc_pmu;
+	char *name;
+	int ret;
+
+	sllc_pmu = devm_kzalloc(&pdev->dev, sizeof(*sllc_pmu), GFP_KERNEL);
+	if (!sllc_pmu)
+		return -ENOMEM;
+
+	ret = hisi_sllc_pmu_dev_probe(pdev, sllc_pmu);
+	if (ret)
+		return ret;
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u",
+			      sllc_pmu->sccl_id, sllc_pmu->index_id);
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+				       &sllc_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	sllc_pmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups    = sllc_pmu->pmu_events.attr_groups,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	ret = perf_pmu_register(&sllc_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+					    &sllc_pmu->node);
+		irq_set_affinity_hint(sllc_pmu->irq, NULL);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, sllc_pmu);
+
+	return ret;
+}
+
+static int hisi_sllc_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&sllc_pmu->pmu);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+					    &sllc_pmu->node);
+	irq_set_affinity_hint(sllc_pmu->irq, NULL);
+
+	return 0;
+}
+
+static struct platform_driver hisi_sllc_pmu_driver = {
+	.driver = {
+		.name = "hisi_sllc_pmu",
+		.acpi_match_table = hisi_sllc_pmu_acpi_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = hisi_sllc_pmu_probe,
+	.remove = hisi_sllc_pmu_remove,
+};
+
+static int __init hisi_sllc_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
+				      "AP_PERF_ARM_HISI_SLLC_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("SLLC PMU: cpuhp state setup failed, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_sllc_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE);
+
+	return ret;
+}
+module_init(hisi_sllc_pmu_module_init);
+
+static void __exit hisi_sllc_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_sllc_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE);
+}
+module_exit(hisi_sllc_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
+MODULE_AUTHOR("Qi Liu <liuqi115@huawei.com>");
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 8883af955a2a..fc54a80f9c5c 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -676,7 +676,7 @@ static ssize_t l2cache_pmu_event_show(struct device *dev,
 	struct perf_pmu_events_attr *pmu_attr;
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
 #define L2CACHE_EVENT_ATTR(_name, _id)					     \
diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index fb34b87b9471..bba078077c93 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -615,7 +615,7 @@ static ssize_t l3cache_pmu_format_show(struct device *dev,
 	struct dev_ext_attribute *eattr;
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
-	return sprintf(buf, "%s\n", (char *) eattr->var);
+	return sysfs_emit(buf, "%s\n", (char *) eattr->var);
 }
 
 #define L3CACHE_PMU_FORMAT_ATTR(_name, _config)				      \
@@ -643,7 +643,7 @@ static ssize_t l3cache_pmu_event_show(struct device *dev,
 	struct perf_pmu_events_attr *pmu_attr;
 
 	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
-	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
 #define L3CACHE_EVENT_ATTR(_name, _id)					     \
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index e116815fa809..06a6d569b0b5 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -128,7 +128,7 @@ __tx2_pmu_##_var##_show(struct device *dev,				\
 			       char *page)				\
 {									\
 	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\
-	return sprintf(page, _format "\n");				\
+	return sysfs_emit(page, _format "\n");				\
 }									\
 									\
 static struct device_attribute format_attr_##_var =			\
@@ -176,7 +176,7 @@ static ssize_t tx2_pmu_event_show(struct device *dev,
 	struct dev_ext_attribute *eattr;
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
-	return sprintf(buf, "event=0x%lx\n", (unsigned long) eattr->var);
+	return sysfs_emit(buf, "event=0x%lx\n", (unsigned long) eattr->var);
 }
 
 #define TX2_EVENT_ATTR(name, config) \
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 44faa51ba799..ffe3bdeec845 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -170,7 +170,7 @@ static ssize_t xgene_pmu_format_show(struct device *dev,
 	struct dev_ext_attribute *eattr;
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
-	return sprintf(buf, "%s\n", (char *) eattr->var);
+	return sysfs_emit(buf, "%s\n", (char *) eattr->var);
 }
 
 #define XGENE_PMU_FORMAT_ATTR(_name, _config)		\
@@ -281,7 +281,7 @@ static ssize_t xgene_pmu_event_show(struct device *dev,
 	struct dev_ext_attribute *eattr;
 
 	eattr = container_of(attr, struct dev_ext_attribute, attr);
-	return sprintf(buf, "config=0x%lx\n", (unsigned long) eattr->var);
+	return sysfs_emit(buf, "config=0x%lx\n", (unsigned long) eattr->var);
 }
 
 #define XGENE_PMU_EVENT_ATTR(_name, _config)		\
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index cc7c3fda2aa6..3d4442397bf9 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -176,6 +176,8 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
 	CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
 	CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index bee82809107c..31b347c9f8dd 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1258,11 +1258,13 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));
  */
 extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
 #else
+#ifndef set_handle_irq
 #define set_handle_irq(handle_irq)		\
 	do {					\
 		(void)handle_irq;		\
 		WARN_ON(1);			\
 	} while (0)
 #endif
+#endif
 
 #endif /* _LINUX_IRQ_H */
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 14f72ec96492..d53ea3c047bc 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -376,6 +376,12 @@ static inline void *kasan_reset_tag(const void *addr)
 
 #endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/
 
+#ifdef CONFIG_KASAN_HW_TAGS
+
+void kasan_report_async(void);
+
+#endif /* CONFIG_KASAN_HW_TAGS */
+
 #ifdef CONFIG_KASAN_SW_TAGS
 void __init kasan_init_sw_tags(void);
 #else
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 30f68b42eeb5..61bf4774b8f2 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -426,6 +426,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
 #define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 667f1aed091c..18a9f59dc067 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -255,4 +255,8 @@ struct prctl_mm_map {
 # define SYSCALL_DISPATCH_FILTER_ALLOW	0
 # define SYSCALL_DISPATCH_FILTER_BLOCK	1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS		60
+#define PR_PAC_GET_ENABLED_KEYS		61
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 2e2e3f378d97..3d62c9599dc0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -119,6 +119,12 @@
 #ifndef PAC_RESET_KEYS
 # define PAC_RESET_KEYS(a, b)	(-EINVAL)
 #endif
+#ifndef PAC_SET_ENABLED_KEYS
+# define PAC_SET_ENABLED_KEYS(a, b, c)	(-EINVAL)
+#endif
+#ifndef PAC_GET_ENABLED_KEYS
+# define PAC_GET_ENABLED_KEYS(a)	(-EINVAL)
+#endif
 #ifndef SET_TAGGED_ADDR_CTRL
 # define SET_TAGGED_ADDR_CTRL(a)	(-EINVAL)
 #endif
@@ -2497,6 +2503,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		error = PAC_RESET_KEYS(me, arg2);
 		break;
+	case PR_PAC_SET_ENABLED_KEYS:
+		if (arg4 || arg5)
+			return -EINVAL;
+		error = PAC_SET_ENABLED_KEYS(me, arg2, arg3);
+		break;
+	case PR_PAC_GET_ENABLED_KEYS:
+		if (arg2 || arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = PAC_GET_ENABLED_KEYS(me);
+		break;
 	case PR_SET_TAGGED_ADDR_CTRL:
 		if (arg3 || arg4 || arg5)
 			return -EINVAL;
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index e5647d147b35..785e724ce0d8 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -69,10 +69,10 @@ static void kasan_test_exit(struct kunit *test)
  * resource named "kasan_data". Do not use this name for KUnit resources
  * outside of KASAN tests.
  *
- * For hardware tag-based KASAN, when a tag fault happens, tag checking is
- * normally auto-disabled. When this happens, this test handler reenables
- * tag checking. As tag checking can be only disabled or enabled per CPU, this
- * handler disables migration (preemption).
+ * For hardware tag-based KASAN in sync mode, when a tag fault happens, tag
+ * checking is auto-disabled. When this happens, this test handler reenables
+ * tag checking. As tag checking can be only disabled or enabled per CPU,
+ * this handler disables migration (preemption).
  *
  * Since the compiler doesn't see that the expression can change the fail_data
  * fields, it can reorder or optimize away the accesses to those fields.
@@ -80,7 +80,8 @@ static void kasan_test_exit(struct kunit *test)
  * expression to prevent that.
  */
 #define KUNIT_EXPECT_KASAN_FAIL(test, expression) do {		\
-	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS))			\
+	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&			\
+	    !kasan_async_mode_enabled())			\
 		migrate_disable();				\
 	WRITE_ONCE(fail_data.report_expected, true);		\
 	WRITE_ONCE(fail_data.report_found, false);		\
@@ -92,12 +93,16 @@ static void kasan_test_exit(struct kunit *test)
 	barrier();						\
 	expression;						\
 	barrier();						\
+	if (kasan_async_mode_enabled())				\
+		kasan_force_async_fault();			\
+	barrier();						\
 	KUNIT_EXPECT_EQ(test,					\
 			READ_ONCE(fail_data.report_expected),	\
 			READ_ONCE(fail_data.report_found));	\
-	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) {			\
+	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&			\
+	    !kasan_async_mode_enabled()) {			\
 		if (READ_ONCE(fail_data.report_found))		\
-			kasan_enable_tagging();			\
+			kasan_enable_tagging_sync();		\
 		migrate_enable();				\
 	}							\
 } while (0)
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 2aad21fda156..4004388b4e4b 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -25,6 +25,12 @@ enum kasan_arg {
 	KASAN_ARG_ON,
 };
 
+enum kasan_arg_mode {
+	KASAN_ARG_MODE_DEFAULT,
+	KASAN_ARG_MODE_SYNC,
+	KASAN_ARG_MODE_ASYNC,
+};
+
 enum kasan_arg_stacktrace {
 	KASAN_ARG_STACKTRACE_DEFAULT,
 	KASAN_ARG_STACKTRACE_OFF,
@@ -38,6 +44,7 @@ enum kasan_arg_fault {
 };
 
 static enum kasan_arg kasan_arg __ro_after_init;
+static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
 static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
 static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
 
@@ -45,6 +52,10 @@ static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
 DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
 EXPORT_SYMBOL(kasan_flag_enabled);
 
+/* Whether the asynchronous mode is enabled. */
+bool kasan_flag_async __ro_after_init;
+EXPORT_SYMBOL_GPL(kasan_flag_async);
+
 /* Whether to collect alloc/free stack traces. */
 DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
 
@@ -68,6 +79,23 @@ static int __init early_kasan_flag(char *arg)
 }
 early_param("kasan", early_kasan_flag);
 
+/* kasan.mode=sync/async */
+static int __init early_kasan_mode(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	if (!strcmp(arg, "sync"))
+		kasan_arg_mode = KASAN_ARG_MODE_SYNC;
+	else if (!strcmp(arg, "async"))
+		kasan_arg_mode = KASAN_ARG_MODE_ASYNC;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+early_param("kasan.mode", early_kasan_mode);
+
 /* kasan.stacktrace=off/on */
 static int __init early_kasan_flag_stacktrace(char *arg)
 {
@@ -115,7 +143,15 @@ void kasan_init_hw_tags_cpu(void)
 		return;
 
 	hw_init_tags(KASAN_TAG_MAX);
-	hw_enable_tagging();
+
+	/*
+	 * Enable async mode only when explicitly requested through
+	 * the command line.
+	 */
+	if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC)
+		hw_enable_tagging_async();
+	else
+		hw_enable_tagging_sync();
 }
 
 /* kasan_init_hw_tags() is called once on boot CPU. */
@@ -132,6 +168,22 @@ void __init kasan_init_hw_tags(void)
 	/* Enable KASAN. */
 	static_branch_enable(&kasan_flag_enabled);
 
+	switch (kasan_arg_mode) {
+	case KASAN_ARG_MODE_DEFAULT:
+		/*
+		 * Default to sync mode.
+		 * Do nothing, kasan_flag_async keeps its default value.
+		 */
+		break;
+	case KASAN_ARG_MODE_SYNC:
+		/* Do nothing, kasan_flag_async keeps its default value. */
+		break;
+	case KASAN_ARG_MODE_ASYNC:
+		/* Async mode enabled. */
+		kasan_flag_async = true;
+		break;
+	}
+
 	switch (kasan_arg_stacktrace) {
 	case KASAN_ARG_STACKTRACE_DEFAULT:
 		/* Default to enabling stack trace collection. */
@@ -194,10 +246,16 @@ void kasan_set_tagging_report_once(bool state)
 }
 EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once);
 
-void kasan_enable_tagging(void)
+void kasan_enable_tagging_sync(void)
+{
+	hw_enable_tagging_sync();
+}
+EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync);
+
+void kasan_force_async_fault(void)
 {
-	hw_enable_tagging();
+	hw_force_async_tag_fault();
 }
-EXPORT_SYMBOL_GPL(kasan_enable_tagging);
+EXPORT_SYMBOL_GPL(kasan_force_async_fault);
 
 #endif
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 3436c6bf7c0c..c1581e8a9b8e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -7,20 +7,37 @@
 #include <linux/stackdepot.h>
 
 #ifdef CONFIG_KASAN_HW_TAGS
+
 #include <linux/static_key.h>
+
 DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
+extern bool kasan_flag_async __ro_after_init;
+
 static inline bool kasan_stack_collection_enabled(void)
 {
 	return static_branch_unlikely(&kasan_flag_stacktrace);
 }
+
+static inline bool kasan_async_mode_enabled(void)
+{
+	return kasan_flag_async;
+}
 #else
+
 static inline bool kasan_stack_collection_enabled(void)
 {
 	return true;
 }
+
+static inline bool kasan_async_mode_enabled(void)
+{
+	return false;
+}
+
 #endif
 
 extern bool kasan_flag_panic __ro_after_init;
+extern bool kasan_flag_async __ro_after_init;
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 #define KASAN_GRANULE_SIZE	(1UL << KASAN_SHADOW_SCALE_SHIFT)
@@ -275,8 +292,11 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 
 #ifdef CONFIG_KASAN_HW_TAGS
 
-#ifndef arch_enable_tagging
-#define arch_enable_tagging()
+#ifndef arch_enable_tagging_sync
+#define arch_enable_tagging_sync()
+#endif
+#ifndef arch_enable_tagging_async
+#define arch_enable_tagging_async()
 #endif
 #ifndef arch_init_tags
 #define arch_init_tags(max_tag)
@@ -284,6 +304,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #ifndef arch_set_tagging_report_once
 #define arch_set_tagging_report_once(state)
 #endif
+#ifndef arch_force_async_tag_fault
+#define arch_force_async_tag_fault()
+#endif
 #ifndef arch_get_random_tag
 #define arch_get_random_tag()	(0xFF)
 #endif
@@ -294,16 +317,19 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #define arch_set_mem_tag_range(addr, size, tag) ((void *)(addr))
 #endif
 
-#define hw_enable_tagging()			arch_enable_tagging()
+#define hw_enable_tagging_sync()		arch_enable_tagging_sync()
+#define hw_enable_tagging_async()		arch_enable_tagging_async()
 #define hw_init_tags(max_tag)			arch_init_tags(max_tag)
 #define hw_set_tagging_report_once(state)	arch_set_tagging_report_once(state)
+#define hw_force_async_tag_fault()		arch_force_async_tag_fault()
 #define hw_get_random_tag()			arch_get_random_tag()
 #define hw_get_mem_tag(addr)			arch_get_mem_tag(addr)
 #define hw_set_mem_tag_range(addr, size, tag)	arch_set_mem_tag_range((addr), (size), (tag))
 
 #else /* CONFIG_KASAN_HW_TAGS */
 
-#define hw_enable_tagging()
+#define hw_enable_tagging_sync()
+#define hw_enable_tagging_async()
 #define hw_set_tagging_report_once(state)
 
 #endif /* CONFIG_KASAN_HW_TAGS */
@@ -311,12 +337,14 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
 void kasan_set_tagging_report_once(bool state);
-void kasan_enable_tagging(void);
+void kasan_enable_tagging_sync(void);
+void kasan_force_async_fault(void);
 
 #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
 
 static inline void kasan_set_tagging_report_once(bool state) { }
-static inline void kasan_enable_tagging(void) { }
+static inline void kasan_enable_tagging_sync(void) { }
+static inline void kasan_force_async_fault(void) { }
 
 #endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
 
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 87b271206163..14bd51ea2348 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -87,7 +87,8 @@ static void start_report(unsigned long *flags)
 
 static void end_report(unsigned long *flags, unsigned long addr)
 {
-	trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
+	if (!kasan_async_mode_enabled())
+		trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
 	pr_err("==================================================================\n");
 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
 	spin_unlock_irqrestore(&report_lock, *flags);
@@ -360,6 +361,25 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
 	end_report(&flags, (unsigned long)object);
 }
 
+#ifdef CONFIG_KASAN_HW_TAGS
+void kasan_report_async(void)
+{
+	unsigned long flags;
+
+#if IS_ENABLED(CONFIG_KUNIT)
+	if (current->kunit_test)
+		kasan_update_kunit_status(current->kunit_test);
+#endif /* IS_ENABLED(CONFIG_KUNIT) */
+
+	start_report(&flags);
+	pr_err("BUG: KASAN: invalid-access\n");
+	pr_err("Asynchronous mode enabled: no access details available\n");
+	pr_err("\n");
+	dump_stack();
+	end_report(&flags, 0);
+}
+#endif /* CONFIG_KASAN_HW_TAGS */
+
 static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 				unsigned long ip)
 {
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f287599a7a3..1d96a21acb2f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -93,6 +93,12 @@ static void unmap_region(struct mm_struct *mm,
  * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
  *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
+ *
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE (with Enhanced PAN supported):
+ *								r: (no) no
+ *								w: (no) no
+ *								x: (yes) yes
  */
 pgprot_t protection_map[16] __ro_after_init = {
 	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 2c9d012797a7..ced910fb4019 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth fp mte
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
new file mode 100644
index 000000000000..73e013c082a6
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := btitest nobtitest
+
+PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
+
+# These tests are built as freestanding binaries since otherwise BTI
+# support in ld.so is required which is not currently widespread; when
+# it is available it will still be useful to test this separately as the
+# cases for statically linked and dynamically lined binaries are
+# slightly different.
+
+CFLAGS_NOBTI = -DBTI=0
+CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
+
+CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
+
+BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
+NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
+
+%-bti.o: %.c
+	$(BTI_CC_COMMAND)
+
+%-bti.o: %.S
+	$(BTI_CC_COMMAND)
+
+%-nobti.o: %.c
+	$(NOBTI_CC_COMMAND)
+
+%-nobti.o: %.S
+	$(NOBTI_CC_COMMAND)
+
+BTI_OBJS =                                      \
+	test-bti.o                           \
+	signal-bti.o                            \
+	start-bti.o                             \
+	syscall-bti.o                           \
+	system-bti.o                            \
+	teststubs-bti.o                         \
+	trampoline-bti.o
+gen/btitest: $(BTI_OBJS)
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+NOBTI_OBJS =                                    \
+	test-nobti.o                         \
+	signal-nobti.o                          \
+	start-nobti.o                           \
+	syscall-nobti.o                         \
+	system-nobti.o                          \
+	teststubs-nobti.o                       \
+	trampoline-nobti.o
+gen/nobtitest: $(NOBTI_OBJS)
+	$(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(PROGS)
+	cp $(PROGS) $(OUTPUT)/
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
new file mode 100644
index 000000000000..04e7b72880ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#define NT_GNU_PROPERTY_TYPE_0	5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC	(1U << 1)
+
+
+.macro startfn name:req
+	.globl \name
+\name:
+	.macro endfn
+		.size \name, . - \name
+		.type \name, @function
+		.purgem endfn
+	.endm
+.endm
+
+.macro emit_aarch64_feature_1_and
+	.pushsection .note.gnu.property, "a"
+	.align	3
+	.long	2f - 1f
+	.long	6f - 3f
+	.long	NT_GNU_PROPERTY_TYPE_0
+1:	.string	"GNU"
+2:
+	.align	3
+3:	.long	GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	.long	5f - 4f
+4:
+#if BTI
+	.long	GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \
+		GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+	.long	0
+#endif
+5:
+	.align	3
+6:
+	.popsection
+.endm
+
+.macro paciasp
+	hint	0x19
+.endm
+
+.macro autiasp
+	hint	0x1d
+.endm
+
+.macro __bti_
+	hint	0x20
+.endm
+
+.macro __bti_c
+	hint	0x22
+.endm
+
+.macro __bti_j
+	hint	0x24
+.endm
+
+.macro __bti_jc
+	hint	0x26
+.endm
+
+.macro bti what=
+	__bti_\what
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h
new file mode 100644
index 000000000000..2aff9b10336e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/btitest.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef BTITEST_H
+#define BTITEST_H
+
+/* Trampolines for calling the test stubs: */
+void call_using_br_x0(void (*)(void));
+void call_using_br_x16(void (*)(void));
+void call_using_blr(void (*)(void));
+
+/* Test stubs: */
+void nohint_func(void);
+void bti_none_func(void);
+void bti_c_func(void);
+void bti_j_func(void);
+void bti_jc_func(void);
+void paciasp_func(void);
+
+#endif /* !BTITEST_H */
diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h
new file mode 100644
index 000000000000..ebb6204f447a
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/compiler.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#define __always_unused __attribute__((__unused__))
+#define __noreturn __attribute__((__noreturn__))
+#define __unreachable() __builtin_unreachable()
+
+/* curse(e) has value e, but the compiler cannot assume so */
+#define curse(e) ({				\
+	__typeof__(e) __curse_e = (e);		\
+	asm ("" : "+r" (__curse_e));		\
+	__curse_e;				\
+})
+
+#endif /* ! COMPILER_H */
diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/gen/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c
new file mode 100644
index 000000000000..f3fd29b91141
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+#include "signal.h"
+
+int sigemptyset(sigset_t *s)
+{
+	unsigned int i;
+
+	for (i = 0; i < _NSIG_WORDS; ++i)
+		s->sig[i] = 0;
+
+	return 0;
+}
+
+int sigaddset(sigset_t *s, int n)
+{
+	if (n < 1 || n > _NSIG)
+		return -EINVAL;
+
+	s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW;
+	return 0;
+}
+
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old)
+{
+	return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask));
+}
+
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old)
+{
+	return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask));
+}
diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h
new file mode 100644
index 000000000000..103457dc880e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SIGNAL_H
+#define SIGNAL_H
+
+#include <linux/signal.h>
+
+#include "system.h"
+
+typedef __sighandler_t sighandler_t;
+
+int sigemptyset(sigset_t *s);
+int sigaddset(sigset_t *s, int n);
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old);
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old);
+
+#endif /* ! SIGNAL_H */
diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S
new file mode 100644
index 000000000000..831f952e0572
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/start.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn _start
+	mov	x0, sp
+	b	start
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S
new file mode 100644
index 000000000000..8dde8b6f3db1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/syscall.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn syscall
+	bti	c
+	mov	w8, w0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	mov	x3, x4
+	mov	x4, x5
+	mov	x5, x6
+	mov	x6, x7
+	svc	#0
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
new file mode 100644
index 000000000000..6385d8d4973b
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+void __noreturn exit(int n)
+{
+	syscall(__NR_exit, n);
+	__unreachable();
+}
+
+ssize_t write(int fd, const void *buf, size_t size)
+{
+	return syscall(__NR_write, fd, buf, size);
+}
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
new file mode 100644
index 000000000000..aca118589705
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+typedef __kernel_size_t size_t;
+typedef __kernel_ssize_t ssize_t;
+
+#include <linux/errno.h>
+#include <asm/hwcap.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+long syscall(int nr, ...);
+
+void __noreturn exit(int n);
+ssize_t write(int fd, const void *buf, size_t size);
+
+#endif /* ! SYSTEM_H */
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
new file mode 100644
index 000000000000..656b04976ccc
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019,2021  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <linux/errno.h>
+#include <linux/auxvec.h>
+#include <linux/signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+typedef struct ucontext ucontext_t;
+
+#include "btitest.h"
+#include "compiler.h"
+#include "signal.h"
+
+#define EXPECTED_TESTS 18
+
+static volatile unsigned int test_num = 1;
+static unsigned int test_passed;
+static unsigned int test_failed;
+static unsigned int test_skipped;
+
+static void fdputs(int fd, const char *str)
+{
+	size_t len = 0;
+	const char *p = str;
+
+	while (*p++)
+		++len;
+
+	write(fd, str, len);
+}
+
+static void putstr(const char *str)
+{
+	fdputs(1, str);
+}
+
+static void putnum(unsigned int num)
+{
+	char c;
+
+	if (num / 10)
+		putnum(num / 10);
+
+	c = '0' + (num % 10);
+	write(1, &c, 1);
+}
+
+#define puttestname(test_name, trampoline_name) do {	\
+	putstr(test_name);				\
+	putstr("/");					\
+	putstr(trampoline_name);			\
+} while (0)
+
+void print_summary(void)
+{
+	putstr("# Totals: pass:");
+	putnum(test_passed);
+	putstr(" fail:");
+	putnum(test_failed);
+	putstr(" xfail:0 xpass:0 skip:");
+	putnum(test_skipped);
+	putstr(" error:0\n");
+}
+
+static const char *volatile current_test_name;
+static const char *volatile current_trampoline_name;
+static volatile int sigill_expected, sigill_received;
+
+static void handler(int n, siginfo_t *si __always_unused,
+		    void *uc_ __always_unused)
+{
+	ucontext_t *uc = uc_;
+
+	putstr("# \t[SIGILL in ");
+	puttestname(current_test_name, current_trampoline_name);
+	putstr(", BTYPE=");
+	write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK)
+			      >> PSR_BTYPE_SHIFT) * 2], 2);
+	if (!sigill_expected) {
+		putstr("]\n");
+		putstr("not ok ");
+		putnum(test_num);
+		putstr(" ");
+		puttestname(current_test_name, current_trampoline_name);
+		putstr("(unexpected SIGILL)\n");
+		print_summary();
+		exit(128 + n);
+	}
+
+	putstr(" (expected)]\n");
+	sigill_received = 1;
+	/* zap BTYPE so that resuming the faulting code will work */
+	uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK;
+}
+
+static int skip_all;
+
+static void __do_test(void (*trampoline)(void (*)(void)),
+		      void (*fn)(void),
+		      const char *trampoline_name,
+		      const char *name,
+		      int expect_sigill)
+{
+	if (skip_all) {
+		test_skipped++;
+		putstr("ok ");
+		putnum(test_num);
+		putstr(" ");
+		puttestname(name, trampoline_name);
+		putstr(" # SKIP\n");
+
+		return;
+	}
+
+	/* Branch Target exceptions should only happen in BTI binaries: */
+	if (!BTI)
+		expect_sigill = 0;
+
+	sigill_expected = expect_sigill;
+	sigill_received = 0;
+	current_test_name = name;
+	current_trampoline_name = trampoline_name;
+
+	trampoline(fn);
+
+	if (expect_sigill && !sigill_received) {
+		putstr("not ok ");
+		test_failed++;
+	} else {
+		putstr("ok ");
+		test_passed++;
+	}
+	putnum(test_num++);
+	putstr(" ");
+	puttestname(name, trampoline_name);
+	putstr("\n");
+}
+
+#define do_test(expect_sigill_br_x0,					\
+		expect_sigill_br_x16,					\
+		expect_sigill_blr,					\
+		name)							\
+do {									\
+	__do_test(call_using_br_x0, name, "call_using_br_x0", #name,	\
+		  expect_sigill_br_x0);					\
+	__do_test(call_using_br_x16, name, "call_using_br_x16", #name,	\
+		  expect_sigill_br_x16);				\
+	__do_test(call_using_blr, name, "call_using_blr", #name,	\
+		  expect_sigill_blr);					\
+} while (0)
+
+void start(int *argcp)
+{
+	struct sigaction sa;
+	void *const *p;
+	const struct auxv_entry {
+		unsigned long type;
+		unsigned long val;
+	} *auxv;
+	unsigned long hwcap = 0, hwcap2 = 0;
+
+	putstr("TAP version 13\n");
+	putstr("1..");
+	putnum(EXPECTED_TESTS);
+	putstr("\n");
+
+	/* Gross hack for finding AT_HWCAP2 from the initial process stack: */
+	p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */
+	/* step over environment */
+	while (*p++)
+		;
+	for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) {
+		switch (auxv->type) {
+		case AT_HWCAP:
+			hwcap = auxv->val;
+			break;
+		case AT_HWCAP2:
+			hwcap2 = auxv->val;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (hwcap & HWCAP_PACA)
+		putstr("# HWCAP_PACA present\n");
+	else
+		putstr("# HWCAP_PACA not present\n");
+
+	if (hwcap2 & HWCAP2_BTI) {
+		putstr("# HWCAP2_BTI present\n");
+		if (!(hwcap & HWCAP_PACA))
+			putstr("# Bad hardware?  Expect problems.\n");
+	} else {
+		putstr("# HWCAP2_BTI not present\n");
+		skip_all = 1;
+	}
+
+	putstr("# Test binary");
+	if (!BTI)
+		putstr(" not");
+	putstr(" built for BTI\n");
+
+	sa.sa_handler = (sighandler_t)(void *)handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	sigaction(SIGILL, &sa, NULL);
+	sigaddset(&sa.sa_mask, SIGILL);
+	sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+	do_test(1, 1, 1, nohint_func);
+	do_test(1, 1, 1, bti_none_func);
+	do_test(1, 0, 0, bti_c_func);
+	do_test(0, 0, 1, bti_j_func);
+	do_test(0, 0, 0, bti_jc_func);
+	do_test(1, 0, 0, paciasp_func);
+
+	print_summary();
+
+	if (test_num - 1 != EXPECTED_TESTS)
+		putstr("# WARNING - EXPECTED TEST COUNT WRONG\n");
+
+	if (test_failed)
+		exit(1);
+	else
+		exit(0);
+}
diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S
new file mode 100644
index 000000000000..b62c8c35f67e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/teststubs.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn bti_none_func
+	bti
+	ret
+endfn
+
+startfn bti_c_func
+	bti	c
+	ret
+endfn
+
+startfn bti_j_func
+	bti	j
+	ret
+endfn
+
+startfn bti_jc_func
+	bti	jc
+	ret
+endfn
+
+startfn paciasp_func
+	paciasp
+	autiasp
+	ret
+endfn
+
+startfn nohint_func
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S
new file mode 100644
index 000000000000..09beb3f361f1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/trampoline.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019  Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn call_using_br_x0
+	bti	c
+	br	x0
+endfn
+
+startfn call_using_br_x16
+	bti	c
+	mov	x16, x0
+	br	x16
+endfn
+
+startfn call_using_blr
+	paciasp
+	stp	x29, x30, [sp, #-16]!
+	blr	x0
+	ldp	x29, x30, [sp], #16
+	autiasp
+	ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 0b3af552632a..409e3e53d00a 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -1,14 +1,18 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright (C) 2020 ARM Limited
 
-CFLAGS += -std=gnu99 -I. -lpthread
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -std=gnu99 -I. -pthread
+LDFLAGS += -pthread
 SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
 PROGS := $(patsubst %.c,%,$(SRCS))
 
 #Add mte compiler option
-ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
 CFLAGS += -march=armv8.5-a+memtag
-endif
 
 #check if the compiler works well
 mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
@@ -19,11 +23,14 @@ TEST_GEN_PROGS := $(PROGS)
 
 # Get Kernel headers installed and use them.
 KSFT_KHDR_INSTALL := 1
+else
+    $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
+    $(warning test program "mte" will not be created.)
 endif
 
 # Include KSFT lib.mk.
 include ../../lib.mk
 
 ifeq ($(mte_cc_support),1)
-$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S
 endif
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 3b23c4d61d38..88c74bc46d4f 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str)
 		ksft_print_msg("ERR: missing %s\n", str);
 		return 0;
 	}
-	fscanf(f, "%lu", &val);
+	if (fscanf(f, "%lu", &val) != 1) {
+		ksft_print_msg("ERR: parsing %s\n", str);
+		val = 0;
+	}
 	fclose(f);
 	return val;
 }
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 4bfa80f2a8c3..1de7a0abd0ae 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
 	if (fd == -1)
 		return KSFT_FAIL;
 	for (i = 0; i < len; i++)
-		write(fd, &val, sizeof(val));
+		if (write(fd, &val, sizeof(val)) != sizeof(val))
+			return KSFT_FAIL;
 	lseek(fd, 0, 0);
 	ptr = mte_allocate_memory(len, mem_type, mapping, true);
 	if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 39f8908988ea..f50ac31920d1 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags
 	}
 	/* Initialize the file for mappable size */
 	lseek(fd, 0, SEEK_SET);
-	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
-		write(fd, buffer, INIT_BUFFER_SIZE);
+	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) {
+		if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+			perror("initialising buffer");
+			return NULL;
+		}
+	}
 	index -= INIT_BUFFER_SIZE;
-	write(fd, buffer, size - index);
+	if (write(fd, buffer, size - index) != size - index) {
+		perror("initialising buffer");
+		return NULL;
+	}
 	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
 }
 
@@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
 	/* Initialize the file for mappable size */
 	lseek(fd, 0, SEEK_SET);
 	for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
-		write(fd, buffer, INIT_BUFFER_SIZE);
+		if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+			perror("initialising buffer");
+			return NULL;
+		}
 	index -= INIT_BUFFER_SIZE;
-	write(fd, buffer, map_size - index);
+	if (write(fd, buffer, map_size - index) != map_size - index) {
+		perror("initialising buffer");
+		return NULL;
+	}
 	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
 					   range_after, true, fd);
 }
@@ -271,29 +284,20 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
 
 	en |= (incl_mask << PR_MTE_TAG_SHIFT);
 	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
-	if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+	if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
 		ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
 		return -EINVAL;
 	}
 	return 0;
 }
 
-#define ID_AA64PFR1_MTE_SHIFT		8
-#define ID_AA64PFR1_MTE			2
-
 int mte_default_setup(void)
 {
-	unsigned long hwcaps = getauxval(AT_HWCAP);
+	unsigned long hwcaps2 = getauxval(AT_HWCAP2);
 	unsigned long en = 0;
 	int ret;
 
-	if (!(hwcaps & HWCAP_CPUID)) {
-		ksft_print_msg("FAIL: CPUID registers unavailable\n");
-		return KSFT_FAIL;
-	}
-	/* Read ID_AA64PFR1_EL1 register */
-	asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
-	if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+	if (!(hwcaps2 & HWCAP2_MTE)) {
 		ksft_print_msg("FAIL: MTE features unavailable\n");
 		return KSFT_SKIP;
 	}
@@ -333,6 +337,7 @@ int create_temp_file(void)
 	/* Create a file in the tmpfs filesystem */
 	fd = mkstemp(&filename[0]);
 	if (fd == -1) {
+		perror(filename);
 		ksft_print_msg("FAIL: Unable to open temporary file\n");
 		return 0;
 	}