summary refs log tree commit diff
diff options
context:
space:
mode:
authorWill Deacon <will@kernel.org>2022-07-25 10:59:15 +0100
committerWill Deacon <will@kernel.org>2022-07-25 10:59:15 +0100
commitf96d67a8af7a39f7ffaac464d8bccc4c720e52c2 (patch)
treedaee5c1458a5d8ed1bdb58b54de19b232ebd44aa
parent92867739e3439ecc9bfa0a106be515d93f14c735 (diff)
parent1191b6256e50a07e7d8ce36eb970708e42a4be1a (diff)
downloadlinux-f96d67a8af7a39f7ffaac464d8bccc4c720e52c2.tar.gz
Merge branch 'for-next/boot' into for-next/core
* for-next/boot: (34 commits)
  arm64: fix KASAN_INLINE
  arm64: Add an override for ID_AA64SMFR0_EL1.FA64
  arm64: Add the arm64.nosve command line option
  arm64: Add the arm64.nosme command line option
  arm64: Expose a __check_override primitive for oddball features
  arm64: Allow the idreg override to deal with variable field width
  arm64: Factor out checking of a feature against the override into a macro
  arm64: Allow sticky E2H when entering EL1
  arm64: Save state of HCR_EL2.E2H before switch to EL1
  arm64: Rename the VHE switch to "finalise_el2"
  arm64: mm: fix booting with 52-bit address space
  arm64: head: remove __PHYS_OFFSET
  arm64: lds: use PROVIDE instead of conditional definitions
  arm64: setup: drop early FDT pointer helpers
  arm64: head: avoid relocating the kernel twice for KASLR
  arm64: kaslr: defer initialization to initcall where permitted
  arm64: head: record CPU boot mode after enabling the MMU
  arm64: head: populate kernel page tables with MMU and caches on
  arm64: head: factor out TTBR1 assignment into a macro
  arm64: idreg-override: use early FDT mapping in ID map
  ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/virt/kvm/arm/hyp-abi.rst11
-rw-r--r--arch/arm64/include/asm/assembler.h31
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/el2_setup.h60
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h18
-rw-r--r--arch/arm64/include/asm/memory.h4
-rw-r--r--arch/arm64/include/asm/mmu_context.h16
-rw-r--r--arch/arm64/include/asm/virt.h11
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/cpufeature.c14
-rw-r--r--arch/arm64/kernel/head.S525
-rw-r--r--arch/arm64/kernel/hyp-stub.S117
-rw-r--r--arch/arm64/kernel/idreg-override.c93
-rw-r--r--arch/arm64/kernel/image-vars.h59
-rw-r--r--arch/arm64/kernel/kaslr.c149
-rw-r--r--arch/arm64/kernel/pi/Makefile33
-rw-r--r--arch/arm64/kernel/pi/kaslr_early.c112
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S19
-rw-r--r--arch/arm64/mm/kasan_init.c4
-rw-r--r--arch/arm64/mm/mmu.c55
-rw-r--r--arch/arm64/mm/proc.S15
24 files changed, 750 insertions, 612 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 1f5f9c1c96df..3c78af07b313 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -400,6 +400,12 @@
 	arm64.nomte	[ARM64] Unconditionally disable Memory Tagging Extension
 			support
 
+	arm64.nosve	[ARM64] Unconditionally disable Scalable Vector
+			Extension support
+
+	arm64.nosme	[ARM64] Unconditionally disable Scalable Matrix
+			Extension support
+
 	ataflop=	[HW,M68k]
 
 	atarimouse=	[HW,MOUSE] Atari Mouse
diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst
index 4d43fbc25195..412b276449d3 100644
--- a/Documentation/virt/kvm/arm/hyp-abi.rst
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
 
 * ::
 
-    x0 = HVC_VHE_RESTART (arm64 only)
+    x0 = HVC_FINALISE_EL2 (arm64 only)
 
-  Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
-  the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
-  being off, and VHE not being disabled by any other means (command line
-  option, for example).
+  Finish configuring EL2 depending on the command-line options,
+  including an attempt to upgrade the kernel's exception level from
+  EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU
+  supporting VHE, the EL2 MMU being off, and VHE not being disabled by
+  any other means (command line option, for example).
 
 Any other value of r0/x0 triggers a hypervisor-specific handling,
 which is not documented here.
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index dc422fa437c2..5846145be523 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -360,6 +360,20 @@ alternative_cb_end
 	.endm
 
 /*
+ * idmap_get_t0sz - get the T0SZ value needed to cover the ID map
+ *
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of _end.
+ */
+	.macro	idmap_get_t0sz, reg
+	adrp	\reg, _end
+	orr	\reg, \reg, #(1 << VA_BITS_MIN) - 1
+	clz	\reg, \reg
+	.endm
+
+/*
  * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
  * ID_AA64MMFR0_EL1.PARange value
  *
@@ -466,6 +480,18 @@ alternative_endif
 	.endm
 
 /*
+ * load_ttbr1 - install @pgtbl as a TTBR1 page table
+ * pgtbl preserved
+ * tmp1/tmp2 clobbered, either may overlap with pgtbl
+ */
+	.macro		load_ttbr1, pgtbl, tmp1, tmp2
+	phys_to_ttbr	\tmp1, \pgtbl
+	offset_ttbr1 	\tmp1, \tmp2
+	msr		ttbr1_el1, \tmp1
+	isb
+	.endm
+
+/*
  * To prevent the possibility of old and new partial table walks being visible
  * in the tlb, switch the ttbr to a zero page when we invalidate the old
  * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i
@@ -478,10 +504,7 @@ alternative_endif
 	isb
 	tlbi	vmalle1
 	dsb	nsh
-	phys_to_ttbr \tmp, \page_table
-	offset_ttbr1 \tmp, \tmp2
-	msr	ttbr1_el1, \tmp
-	isb
+	load_ttbr1 \page_table, \tmp, \tmp2
 	.endm
 
 /*
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f7f2227db56f..fd7d75a275f6 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
 }
 
 extern struct arm64_ftr_override id_aa64mmfr1_override;
+extern struct arm64_ftr_override id_aa64pfr0_override;
 extern struct arm64_ftr_override id_aa64pfr1_override;
+extern struct arm64_ftr_override id_aa64zfr0_override;
+extern struct arm64_ftr_override id_aa64smfr0_override;
 extern struct arm64_ftr_override id_aa64isar1_override;
 extern struct arm64_ftr_override id_aa64isar2_override;
 
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index bfd0ad64b598..2630faa5bc08 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -129,64 +129,6 @@
 	msr	cptr_el2, x0			// Disable copro. traps to EL2
 .endm
 
-/* SVE register access */
-.macro __init_el2_nvhe_sve
-	mrs	x1, id_aa64pfr0_el1
-	ubfx	x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
-	cbz	x1, .Lskip_sve_\@
-
-	bic	x0, x0, #CPTR_EL2_TZ		// Also disable SVE traps
-	msr	cptr_el2, x0			// Disable copro. traps to EL2
-	isb
-	mov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vector
-	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
-.Lskip_sve_\@:
-.endm
-
-/* SME register access and priority mapping */
-.macro __init_el2_nvhe_sme
-	mrs	x1, id_aa64pfr1_el1
-	ubfx	x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
-	cbz	x1, .Lskip_sme_\@
-
-	bic	x0, x0, #CPTR_EL2_TSM		// Also disable SME traps
-	msr	cptr_el2, x0			// Disable copro. traps to EL2
-	isb
-
-	mrs	x1, sctlr_el2
-	orr	x1, x1, #SCTLR_ELx_ENTP2	// Disable TPIDR2 traps
-	msr	sctlr_el2, x1
-	isb
-
-	mov	x1, #0				// SMCR controls
-
-	mrs_s	x2, SYS_ID_AA64SMFR0_EL1
-	ubfx	x2, x2, #ID_AA64SMFR0_EL1_FA64_SHIFT, #1 // Full FP in SM?
-	cbz	x2, .Lskip_sme_fa64_\@
-
-	orr	x1, x1, SMCR_ELx_FA64_MASK
-.Lskip_sme_fa64_\@:
-
-	orr	x1, x1, #SMCR_ELx_LEN_MASK	// Enable full SME vector
-	msr_s	SYS_SMCR_EL2, x1		// length for EL1.
-
-	mrs_s	x1, SYS_SMIDR_EL1		// Priority mapping supported?
-	ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
-	cbz     x1, .Lskip_sme_\@
-
-	msr_s	SYS_SMPRIMAP_EL2, xzr		// Make all priorities equal
-
-	mrs	x1, id_aa64mmfr1_el1		// HCRX_EL2 present?
-	ubfx	x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
-	cbz	x1, .Lskip_sme_\@
-
-	mrs_s	x1, SYS_HCRX_EL2
-	orr	x1, x1, #HCRX_EL2_SMPME_MASK	// Enable priority mapping
-	msr_s	SYS_HCRX_EL2, x1
-
-.Lskip_sme_\@:
-.endm
-
 /* Disable any fine grained traps */
 .macro __init_el2_fgt
 	mrs	x1, id_aa64mmfr0_el1
@@ -250,8 +192,6 @@
 	__init_el2_hstr
 	__init_el2_nvhe_idregs
 	__init_el2_nvhe_cptr
-	__init_el2_nvhe_sve
-	__init_el2_nvhe_sme
 	__init_el2_fgt
 	__init_el2_nvhe_prepare_eret
 .endm
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 96dc0f7da258..02e59fa8f293 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+#include <asm/boot.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sparsemem.h>
 
@@ -35,10 +36,8 @@
  */
 #if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
-#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
 #else
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
-#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
 #endif
 
 
@@ -87,7 +86,14 @@
 			+ EARLY_PUDS((vstart), (vend))	/* each PUD needs a next level page table */	\
 			+ EARLY_PMDS((vstart), (vend)))	/* each PMD needs a next level page table */
 #define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
-#define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+
+/* the initial ID map may need two extra pages if it needs to be extended */
+#if VA_BITS < 48
+#define INIT_IDMAP_DIR_SIZE	((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
+#else
+#define INIT_IDMAP_DIR_SIZE	(INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
+#endif
+#define INIT_IDMAP_DIR_PAGES	EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
 
 /* Initial memory map size */
 #if ARM64_KERNEL_USES_PMD_MAPS
@@ -107,9 +113,11 @@
 #define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
 #if ARM64_KERNEL_USES_PMD_MAPS
-#define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RW_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#define SWAPPER_RX_MMUFLAGS	(SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
 #else
-#define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RW_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#define SWAPPER_RX_MMUFLAGS	(SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
 #endif
 
 /*
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b09b300360cf..227d256cd4b9 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -174,7 +174,11 @@
 #include <linux/types.h>
 #include <asm/bug.h>
 
+#if VA_BITS > 48
 extern u64			vabits_actual;
+#else
+#define vabits_actual		((u64)VA_BITS)
+#endif
 
 extern s64			memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 6770667b34a3..c7ccd82db1d2 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
  * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
  * physical memory, in which case it will be smaller.
  */
-extern u64 idmap_t0sz;
-extern u64 idmap_ptrs_per_pgd;
+extern int idmap_t0sz;
 
 /*
  * Ensure TCR.T0SZ is set to the provided value.
@@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void)
 		cpu_switch_mm(mm->pgd, mm);
 }
 
-static inline void cpu_install_idmap(void)
+static inline void __cpu_install_idmap(pgd_t *idmap)
 {
 	cpu_set_reserved_ttbr0();
 	local_flush_tlb_all();
 	cpu_set_idmap_tcr_t0sz();
 
-	cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
+	cpu_switch_mm(lm_alias(idmap), &init_mm);
+}
+
+static inline void cpu_install_idmap(void)
+{
+	__cpu_install_idmap(idmap_pg_dir);
 }
 
 /*
@@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
+static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
 {
 	typedef void (ttbr_replace_func)(phys_addr_t);
 	extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
 
 	replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
 
-	cpu_install_idmap();
+	__cpu_install_idmap(idmap);
 	replace_phys(ttbr1);
 	cpu_uninstall_idmap();
 }
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 0e80db4327b6..4eb601e7de50 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -36,9 +36,9 @@
 #define HVC_RESET_VECTORS 2
 
 /*
- * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
+ * HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
  */
-#define HVC_VHE_RESTART	3
+#define HVC_FINALISE_EL2	3
 
 /* Max number of HYP stub hypercalls */
 #define HVC_STUB_HCALL_NR 4
@@ -49,6 +49,13 @@
 #define BOOT_CPU_MODE_EL1	(0xe11)
 #define BOOT_CPU_MODE_EL2	(0xe12)
 
+/*
+ * Flags returned together with the boot mode, but not preserved in
+ * __boot_cpu_mode. Used by the idreg override code to work out the
+ * boot state.
+ */
+#define BOOT_CPU_FLAG_E2H	BIT_ULL(32)
+
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7075a9c6a4a6..1add7b01efa7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -64,7 +64,7 @@ obj-$(CONFIG_ACPI)			+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)			+= acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o pi/
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index cb0ae19d23bb..0f6d3b213c25 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -633,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = {
 	__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
 
 struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
 struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
+struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
+struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
 struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
 
@@ -670,11 +673,14 @@ static const struct __ftr_reg_entry {
 	ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
 
 	/* Op1 = 0, CRn = 0, CRm = 4 */
-	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
+			       &id_aa64pfr0_override),
 	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
 			       &id_aa64pfr1_override),
-	ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
-	ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
+	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
+			       &id_aa64zfr0_override),
+	ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
+			       &id_aa64smfr0_override),
 
 	/* Op1 = 0, CRn = 0, CRm = 5 */
 	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -3295,7 +3301,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
 
 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
 {
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 }
 
 /*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6a98f1a38c29..cefe6a73ee54 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -37,8 +37,6 @@
 
 #include "efi-header.S"
 
-#define __PHYS_OFFSET	KERNEL_START
-
 #if (PAGE_OFFSET & 0x1fffff) != 0
 #error PAGE_OFFSET must be at least 2MB aligned
 #endif
@@ -51,9 +49,6 @@
  *   MMU = off, D-cache = off, I-cache = on or off,
  *   x0 = physical address to the FDT blob.
  *
- * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET).
- *
  * Note that the callee-saved registers are used for storing variables
  * that are useful before the MMU is enabled. The allocations are described
  * in the entry routines.
@@ -82,25 +77,34 @@
 	 * primary lowlevel boot path:
 	 *
 	 *  Register   Scope                      Purpose
+	 *  x20        primary_entry() .. __primary_switch()    CPU boot mode
 	 *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
+	 *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
 	 *  x23        primary_entry() .. start_kernel()        physical misalignment/KASLR offset
-	 *  x28        __create_page_tables()                   callee preserved temp register
-	 *  x19/x20    __primary_switch()                       callee preserved temp registers
-	 *  x24        __primary_switch() .. relocate_kernel()  current RELR displacement
+	 *  x24        __primary_switch()                       linear map KASLR seed
+	 *  x25        primary_entry() .. start_kernel()        supported VA size
+	 *  x28        create_idmap()                           callee preserved temp register
 	 */
 SYM_CODE_START(primary_entry)
 	bl	preserve_boot_args
 	bl	init_kernel_el			// w0=cpu_boot_mode
-	adrp	x23, __PHYS_OFFSET
-	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
-	bl	set_cpu_boot_mode_flag
-	bl	__create_page_tables
+	mov	x20, x0
+	bl	create_idmap
+
 	/*
 	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
 	 * details.
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
+#if VA_BITS > 48
+	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
+	tst	x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+	mov	x0, #VA_BITS
+	mov	x25, #VA_BITS_MIN
+	csel	x25, x25, x0, eq
+	mov	x0, x25
+#endif
 	bl	__cpu_setup			// initialise processor
 	b	__primary_switch
 SYM_CODE_END(primary_entry)
@@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
 	b	dcache_inval_poc		// tail call
 SYM_CODE_END(preserve_boot_args)
 
-/*
- * Macro to create a table entry to the next page.
- *
- *	tbl:	page table address
- *	virt:	virtual address
- *	shift:	#imm page table shift
- *	ptrs:	#imm pointers per table page
- *
- * Preserves:	virt
- * Corrupts:	ptrs, tmp1, tmp2
- * Returns:	tbl -> next level table page address
- */
-	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
-	add	\tmp1, \tbl, #PAGE_SIZE
-	phys_to_pte \tmp2, \tmp1
-	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
-	lsr	\tmp1, \virt, #\shift
-	sub	\ptrs, \ptrs, #1
-	and	\tmp1, \tmp1, \ptrs		// table index
-	str	\tmp2, [\tbl, \tmp1, lsl #3]
-	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
-	.endm
+SYM_FUNC_START_LOCAL(clear_page_tables)
+	/*
+	 * Clear the init page tables.
+	 */
+	adrp	x0, init_pg_dir
+	adrp	x1, init_pg_end
+	sub	x2, x1, x0
+	mov	x1, xzr
+	b	__pi_memset			// tail call
+SYM_FUNC_END(clear_page_tables)
 
 /*
  * Macro to populate page table entries, these entries can be pointers to the next level
@@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args)
  *	vstart:	virtual address of start of range
  *	vend:	virtual address of end of range - we map [vstart, vend]
  *	shift:	shift used to transform virtual address into index
- *	ptrs:	number of entries in page table
+ *	order:  #imm 2log(number of entries in page table)
  *	istart:	index in table corresponding to vstart
  *	iend:	index in table corresponding to vend
  *	count:	On entry: how many extra entries were required in previous level, scales
  *			  our end index.
  *		On exit: returns how many extra entries required for next page table level
  *
- * Preserves:	vstart, vend, shift, ptrs
+ * Preserves:	vstart, vend
  * Returns:	istart, iend, count
  */
-	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
-	lsr	\iend, \vend, \shift
-	mov	\istart, \ptrs
-	sub	\istart, \istart, #1
-	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
-	mov	\istart, \ptrs
-	mul	\istart, \istart, \count
-	add	\iend, \iend, \istart	// iend += count * ptrs
-					// our entries span multiple tables
-
-	lsr	\istart, \vstart, \shift
-	mov	\count, \ptrs
-	sub	\count, \count, #1
-	and	\istart, \istart, \count
-
+	.macro compute_indices, vstart, vend, shift, order, istart, iend, count
+	ubfx	\istart, \vstart, \shift, \order
+	ubfx	\iend, \vend, \shift, \order
+	add	\iend, \iend, \count, lsl \order
 	sub	\count, \iend, \istart
 	.endm
 
@@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args)
  *	vend:	virtual address of end of range - we map [vstart, vend - 1]
  *	flags:	flags to use to map last level entries
  *	phys:	physical address corresponding to vstart - physical memory is contiguous
- *	pgds:	the number of pgd entries
+ *	order:  #imm 2log(number of entries in PGD table)
+ *
+ * If extra_shift is set, an extra level will be populated if the end address does
+ * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
  *
  * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
  * Preserves:	vstart, flags
  * Corrupts:	tbl, rtbl, vend, istart, iend, tmp, count, sv
  */
-	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
 	sub \vend, \vend, #1
 	add \rtbl, \tbl, #PAGE_SIZE
-	mov \sv, \rtbl
 	mov \count, #0
-	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+
+	.ifnb	\extra_shift
+	tst	\vend, #~((1 << (\extra_shift)) - 1)
+	b.eq	.L_\@
+	compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
+	.endif
+.L_\@:
+	compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
 	mov \sv, \rtbl
+	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+	mov \tbl, \sv
 
 #if SWAPPER_PGTABLE_LEVELS > 3
-	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+	compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
-	mov \sv, \rtbl
 #endif
 
 #if SWAPPER_PGTABLE_LEVELS > 2
-	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
 #endif
 
-	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
-	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
-	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
+	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
+	populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
 	.endm
 
 /*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- *   - identity mapping to enable the MMU (low address, TTBR0)
- *   - first few MB of the kernel linear mapping to jump to once the MMU has
- *     been enabled
+ * Remap a subregion created with the map_memory macro with modified attributes
+ * or output address. The entire remapped region must have been covered in the
+ * invocation of map_memory.
+ *
+ * x0: last level table address (returned in first argument to map_memory)
+ * x1: start VA of the existing mapping
+ * x2: start VA of the region to update
+ * x3: end VA of the region to update (exclusive)
+ * x4: start PA associated with the region to update
+ * x5: attributes to set on the updated region
+ * x6: order of the last level mappings
  */
-SYM_FUNC_START_LOCAL(__create_page_tables)
-	mov	x28, lr
+SYM_FUNC_START_LOCAL(remap_region)
+	sub	x3, x3, #1		// make end inclusive
 
-	/*
-	 * Invalidate the init page tables to avoid potential dirty cache lines
-	 * being evicted. Other page tables are allocated in rodata as part of
-	 * the kernel image, and thus are clean to the PoC per the boot
-	 * protocol.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	bl	dcache_inval_poc
+	// Get the index offset for the start of the last level table
+	lsr	x1, x1, x6
+	bfi	x1, xzr, #0, #PAGE_SHIFT - 3
 
-	/*
-	 * Clear the init page tables.
-	 */
-	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	sub	x1, x1, x0
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	subs	x1, x1, #64
-	b.ne	1b
+	// Derive the start and end indexes into the last level table
+	// associated with the provided region
+	lsr	x2, x2, x6
+	lsr	x3, x3, x6
+	sub	x2, x2, x1
+	sub	x3, x3, x1
 
-	mov	x7, SWAPPER_MM_MMUFLAGS
+	mov	x1, #1
+	lsl	x6, x1, x6		// block size at this level
 
-	/*
-	 * Create the identity mapping.
-	 */
-	adrp	x0, idmap_pg_dir
-	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
-
-#ifdef CONFIG_ARM64_VA_BITS_52
-	mrs_s	x6, SYS_ID_AA64MMFR2_EL1
-	and	x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
-	mov	x5, #52
-	cbnz	x6, 1f
-#endif
-	mov	x5, #VA_BITS_MIN
-1:
-	adr_l	x6, vabits_actual
-	str	x5, [x6]
-	dmb	sy
-	dc	ivac, x6		// Invalidate potentially stale cache line
+	populate_entries x0, x4, x2, x3, x5, x6, x7
+	ret
+SYM_FUNC_END(remap_region)
 
+SYM_FUNC_START_LOCAL(create_idmap)
+	mov	x28, lr
 	/*
-	 * VA_BITS may be too small to allow for an ID mapping to be created
-	 * that covers system RAM if that is located sufficiently high in the
-	 * physical address space. So for the ID map, use an extended virtual
-	 * range in that case, and configure an additional translation level
-	 * if needed.
+	 * The ID map carries a 1:1 mapping of the physical address range
+	 * covered by the loaded image, which could be anywhere in DRAM. This
+	 * means that the required size of the VA (== PA) space is decided at
+	 * boot time, and could be more than the configured size of the VA
+	 * space for ordinary kernel and user space mappings.
+	 *
+	 * There are three cases to consider here:
+	 * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
+	 *   the placement of the image. In this case, we configure one extra
+	 *   level of translation on the fly for the ID map only. (This case
+	 *   also covers 42-bit VA/52-bit PA on 64k pages).
 	 *
-	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
-	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
-	 * this number conveniently equals the number of leading zeroes in
-	 * the physical address of __idmap_text_end.
+	 * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
+	 *   only happen when using 64k pages, in which case we need to extend
+	 *   the root level table rather than add a level. Note that we can
+	 *   treat this case as 'always extended' as long as we take care not
+	 *   to program an unsupported T0SZ value into the TCR register.
+	 *
+	 * - Combinations that would require two additional levels of
+	 *   translation are not supported, e.g., VA_BITS==36 on 16k pages, or
+	 *   VA_BITS==39/4k pages with 5-level paging, where the input address
+	 *   requires more than 47 or 48 bits, respectively.
 	 */
-	adrp	x5, __idmap_text_end
-	clz	x5, x5
-	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
-	b.ge	1f			// .. then skip VA range extension
-
-	adr_l	x6, idmap_t0sz
-	str	x5, [x6]
-	dmb	sy
-	dc	ivac, x6		// Invalidate potentially stale cache line
-
 #if (VA_BITS < 48)
+#define IDMAP_PGD_ORDER	(VA_BITS - PGDIR_SHIFT)
 #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
 
 	/*
 	 * If VA_BITS < 48, we have to configure an additional table level.
@@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 #if VA_BITS != EXTRA_SHIFT
 #error "Mismatch between VA_BITS and page size/number of translation levels"
 #endif
-
-	mov	x4, EXTRA_PTRS
-	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
 #else
+#define IDMAP_PGD_ORDER	(PHYS_MASK_SHIFT - PGDIR_SHIFT)
+#define EXTRA_SHIFT
 	/*
 	 * If VA_BITS == 48, we don't have to configure an additional
 	 * translation level, but the top-level table has more entries.
 	 */
-	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
-	str_l	x4, idmap_ptrs_per_pgd, x5
 #endif
-1:
-	ldr_l	x4, idmap_ptrs_per_pgd
-	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
-
-	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
-
-	/*
-	 * Map the kernel image (starting with PHYS_OFFSET).
-	 */
-	adrp	x0, init_pg_dir
-	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
-	add	x5, x5, x23			// add KASLR displacement
-	mov	x4, PTRS_PER_PGD
-	adrp	x6, _end			// runtime __pa(_end)
-	adrp	x3, _text			// runtime __pa(_text)
-	sub	x6, x6, x3			// _end - _text
-	add	x6, x6, x5			// runtime __va(_end)
-
-	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
+	adrp	x0, init_idmap_pg_dir
+	adrp	x3, _text
+	adrp	x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+	mov	x7, SWAPPER_RX_MMUFLAGS
+
+	map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
+
+	/* Remap the kernel page tables r/w in the ID map */
+	adrp	x1, _text
+	adrp	x2, init_pg_dir
+	adrp	x3, init_pg_end
+	bic	x4, x2, #SWAPPER_BLOCK_SIZE - 1
+	mov	x5, SWAPPER_RW_MMUFLAGS
+	mov	x6, #SWAPPER_BLOCK_SHIFT
+	bl	remap_region
+
+	/* Remap the FDT after the kernel image */
+	adrp	x1, _text
+	adrp	x22, _end + SWAPPER_BLOCK_SIZE
+	bic	x2, x22, #SWAPPER_BLOCK_SIZE - 1
+	bfi	x22, x21, #0, #SWAPPER_BLOCK_SHIFT		// remapped FDT address
+	add	x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
+	bic	x4, x21, #SWAPPER_BLOCK_SIZE - 1
+	mov	x5, SWAPPER_RW_MMUFLAGS
+	mov	x6, #SWAPPER_BLOCK_SHIFT
+	bl	remap_region
 
 	/*
 	 * Since the page tables have been populated with non-cacheable
@@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	 */
 	dmb	sy
 
-	adrp	x0, idmap_pg_dir
-	adrp	x1, idmap_pg_end
+	adrp	x0, init_idmap_pg_dir
+	adrp	x1, init_idmap_pg_end
 	bl	dcache_inval_poc
+	ret	x28
+SYM_FUNC_END(create_idmap)
 
+SYM_FUNC_START_LOCAL(create_kernel_mapping)
 	adrp	x0, init_pg_dir
-	adrp	x1, init_pg_end
-	bl	dcache_inval_poc
+	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
+	add	x5, x5, x23			// add KASLR displacement
+	adrp	x6, _end			// runtime __pa(_end)
+	adrp	x3, _text			// runtime __pa(_text)
+	sub	x6, x6, x3			// _end - _text
+	add	x6, x6, x5			// runtime __va(_end)
+	mov	x7, SWAPPER_RW_MMUFLAGS
 
-	ret	x28
-SYM_FUNC_END(__create_page_tables)
+	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
+
+	dsb	ishst				// sync with page table walker
+	ret
+SYM_FUNC_END(create_kernel_mapping)
 
 	/*
 	 * Initialize CPU registers with task-specific and cpu-specific context.
@@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables)
 /*
  * The following fragment of code is executed with the MMU enabled.
  *
- *   x0 = __PHYS_OFFSET
+ *   x0 = __pa(KERNEL_START)
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
 	adr_l	x4, init_task
@@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	sub	x4, x4, x0			// the kernel virtual and
 	str_l	x4, kimage_voffset, x5		// physical mappings
 
+	mov	x0, x20
+	bl	set_cpu_boot_mode_flag
+
 	// Clear BSS
 	adr_l	x0, __bss_start
 	mov	x1, xzr
@@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+#if VA_BITS > 48
+	adr_l	x8, vabits_actual		// Set this early so KASAN early init
+	str	x25, [x8]			// ... observes the correct value
+	dc	civac, x8			// Make visible to booting secondaries
+#endif
+
+#ifdef CONFIG_RANDOMIZE_BASE
+	adrp	x5, memstart_offset_seed	// Save KASLR linear map seed
+	strh	w24, [x5, :lo12:memstart_offset_seed]
+#endif
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 	bl	kasan_early_init
 #endif
 	mov	x0, x21				// pass FDT address in x0
 	bl	early_fdt_map			// Try mapping the FDT early
+	mov	x0, x20				// pass the full boot status
 	bl	init_feature_override		// Parse cpu feature overrides
-#ifdef CONFIG_RANDOMIZE_BASE
-	tst	x23, ~(MIN_KIMG_ALIGN - 1)	// already running randomized?
-	b.ne	0f
-	bl	kaslr_early_init		// parse FDT for KASLR options
-	cbz	x0, 0f				// KASLR disabled? just proceed
-	orr	x23, x23, x0			// record KASLR offset
-	ldp	x29, x30, [sp], #16		// we must enable KASLR, return
-	ret					// to __primary_switch()
-0:
-#endif
-	bl	switch_to_vhe			// Prefer VHE if possible
+	mov	x0, x20
+	bl	finalise_el2			// Prefer VHE if possible
 	ldp	x29, x30, [sp], #16
 	bl	start_kernel
 	ASM_BUG()
 SYM_FUNC_END(__primary_switched)
 
-	.pushsection ".rodata", "a"
-SYM_DATA_START(kimage_vaddr)
-	.quad		_text
-SYM_DATA_END(kimage_vaddr)
-EXPORT_SYMBOL(kimage_vaddr)
-	.popsection
-
 /*
  * end early head section, begin head code that is also used for
  * hotplug and needs to have the same protections as the text region
@@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr)
  * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
  * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
  *
- * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
- * booted in EL1 or EL2 respectively.
+ * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
+ * booted in EL1 or EL2 respectively, with the top 32 bits containing
+ * potential context flags. These flags are *not* stored in __boot_cpu_mode.
  */
 SYM_FUNC_START(init_kernel_el)
 	mrs	x0, CurrentEL
@@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	msr	vbar_el2, x0
 	isb
 
+	mov_q	x1, INIT_SCTLR_EL1_MMU_OFF
+
 	/*
 	 * Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
 	 * making it impossible to start in nVHE mode. Is that
@@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 	and	x0, x0, #HCR_E2H
 	cbz	x0, 1f
 
-	/* Switching to VHE requires a sane SCTLR_EL1 as a start */
-	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
-	msr_s	SYS_SCTLR_EL12, x0
-
-	/*
-	 * Force an eret into a helper "function", and let it return
-	 * to our original caller... This makes sure that we have
-	 * initialised the basic PSTATE state.
-	 */
-	mov	x0, #INIT_PSTATE_EL2
-	msr	spsr_el1, x0
-	adr	x0, __cpu_stick_to_vhe
-	msr	elr_el1, x0
-	eret
+	/* Set a sane SCTLR_EL1, the VHE way */
+	msr_s	SYS_SCTLR_EL12, x1
+	mov	x2, #BOOT_CPU_FLAG_E2H
+	b	2f
 
 1:
-	mov_q	x0, INIT_SCTLR_EL1_MMU_OFF
-	msr	sctlr_el1, x0
-
+	msr	sctlr_el1, x1
+	mov	x2, xzr
+2:
 	msr	elr_el2, lr
 	mov	w0, #BOOT_CPU_MODE_EL2
+	orr	x0, x0, x2
 	eret
-
-__cpu_stick_to_vhe:
-	mov	x0, #HVC_VHE_RESTART
-	hvc	#0
-	mov	x0, #BOOT_CPU_MODE_EL2
-	ret
 SYM_FUNC_END(init_kernel_el)
 
 /*
@@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
 	b.ne	1f
 	add	x1, x1, #4
 1:	str	w0, [x1]			// Save CPU boot mode
-	dmb	sy
-	dc	ivac, x1			// Invalidate potentially stale cache line
 	ret
 SYM_FUNC_END(set_cpu_boot_mode_flag)
 
-/*
- * These values are written with the MMU off, but read with the MMU on.
- * Writers will invalidate the corresponding address, discarding up to a
- * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
- * sufficient alignment that the CWG doesn't overlap another section.
- */
-	.pushsection ".mmuoff.data.write", "aw"
-/*
- * We need to find out the CPU boot mode long after boot, so we need to
- * store it in a writable variable.
- *
- * This is not in .bss, because we set it sufficiently early that the boot-time
- * zeroing of .bss would clobber it.
- */
-SYM_DATA_START(__boot_cpu_mode)
-	.long	BOOT_CPU_MODE_EL2
-	.long	BOOT_CPU_MODE_EL1
-SYM_DATA_END(__boot_cpu_mode)
-/*
- * The booting CPU updates the failed status @__early_cpu_boot_status,
- * with MMU turned off.
- */
-SYM_DATA_START(__early_cpu_boot_status)
-	.quad 	0
-SYM_DATA_END(__early_cpu_boot_status)
-
-	.popsection
-
 	/*
 	 * This provides a "holding pen" for platforms to hold all secondary
 	 * cores are held until we're ready for them to initialise.
 	 */
 SYM_FUNC_START(secondary_holding_pen)
 	bl	init_kernel_el			// w0=cpu_boot_mode
-	bl	set_cpu_boot_mode_flag
-	mrs	x0, mpidr_el1
+	mrs	x2, mpidr_el1
 	mov_q	x1, MPIDR_HWID_BITMASK
-	and	x0, x0, x1
+	and	x2, x2, x1
 	adr_l	x3, secondary_holding_pen_release
 pen:	ldr	x4, [x3]
-	cmp	x4, x0
+	cmp	x4, x2
 	b.eq	secondary_startup
 	wfe
 	b	pen
@@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen)
 	 */
 SYM_FUNC_START(secondary_entry)
 	bl	init_kernel_el			// w0=cpu_boot_mode
-	bl	set_cpu_boot_mode_flag
 	b	secondary_startup
 SYM_FUNC_END(secondary_entry)
 
@@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
 	 */
-	bl	switch_to_vhe
+	mov	x20, x0				// preserve boot mode
+	bl	finalise_el2
 	bl	__cpu_secondary_check52bitva
+#if VA_BITS > 48
+	ldr_l	x0, vabits_actual
+#endif
 	bl	__cpu_setup			// initialise processor
 	adrp	x1, swapper_pg_dir
+	adrp	x2, idmap_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =__secondary_switched
 	br	x8
 SYM_FUNC_END(secondary_startup)
 
 SYM_FUNC_START_LOCAL(__secondary_switched)
+	mov	x0, x20
+	bl	set_cpu_boot_mode_flag
+	str_l	xzr, __early_cpu_boot_status, x3
 	adr_l	x5, vectors
 	msr	vbar_el1, x5
 	isb
@@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow)
  *
  *  x0  = SCTLR_EL1 value for turning on the MMU.
  *  x1  = TTBR1_EL1 value
+ *  x2  = ID map root table address
  *
  * Returns to the caller via x30/lr. This requires the caller to be covered
  * by the .idmap.text section.
@@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow)
  * If it isn't, park the CPU
  */
 SYM_FUNC_START(__enable_mmu)
-	mrs	x2, ID_AA64MMFR0_EL1
-	ubfx	x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-	cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+	mrs	x3, ID_AA64MMFR0_EL1
+	ubfx	x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+	cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
 	b.lt    __no_granule_support
-	cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+	cmp     x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
 	b.gt    __no_granule_support
-	update_early_cpu_boot_status 0, x2, x3
-	adrp	x2, idmap_pg_dir
-	phys_to_ttbr x1, x1
 	phys_to_ttbr x2, x2
 	msr	ttbr0_el1, x2			// load TTBR0
-	offset_ttbr1 x1, x3
-	msr	ttbr1_el1, x1			// load TTBR1
-	isb
+	load_ttbr1 x1, x1, x3
 
 	set_sctlr_el1	x0
 
@@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu)
 SYM_FUNC_END(__enable_mmu)
 
 SYM_FUNC_START(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_VA_BITS_52
+#if VA_BITS > 48
 	ldr_l	x0, vabits_actual
 	cmp	x0, #52
 	b.ne	2f
@@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
 	 * Iterate over each entry in the relocation table, and apply the
 	 * relocations in place.
 	 */
-	ldr	w9, =__rela_offset		// offset to reloc table
-	ldr	w10, =__rela_size		// size of reloc table
-
+	adr_l	x9, __rela_start
+	adr_l	x10, __rela_end
 	mov_q	x11, KIMAGE_VADDR		// default virtual offset
 	add	x11, x11, x23			// actual virtual offset
-	add	x9, x9, x11			// __va(.rela)
-	add	x10, x9, x10			// __va(.rela) + sizeof(.rela)
 
 0:	cmp	x9, x10
 	b.hs	1f
@@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
 	 * entry in x9, the address being relocated by the current address or
 	 * bitmap entry in x13 and the address being relocated by the current
 	 * bit in x14.
-	 *
-	 * Because addends are stored in place in the binary, RELR relocations
-	 * cannot be applied idempotently. We use x24 to keep track of the
-	 * currently applied displacement so that we can correctly relocate if
-	 * __relocate_kernel is called twice with non-zero displacements (i.e.
-	 * if there is both a physical misalignment and a KASLR displacement).
 	 */
-	ldr	w9, =__relr_offset		// offset to reloc table
-	ldr	w10, =__relr_size		// size of reloc table
-	add	x9, x9, x11			// __va(.relr)
-	add	x10, x9, x10			// __va(.relr) + sizeof(.relr)
-
-	sub	x15, x23, x24			// delta from previous offset
-	cbz	x15, 7f				// nothing to do if unchanged
-	mov	x24, x23			// save new offset
+	adr_l	x9, __relr_start
+	adr_l	x10, __relr_end
 
 2:	cmp	x9, x10
 	b.hs	7f
@@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
 	tbnz	x11, #0, 3f			// branch to handle bitmaps
 	add	x13, x11, x23
 	ldr	x12, [x13]			// relocate address entry
-	add	x12, x12, x15
+	add	x12, x12, x23
 	str	x12, [x13], #8			// adjust to start of bitmap
 	b	2b
 
@@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
 	cbz	x11, 6f
 	tbz	x11, #0, 5f			// skip bit if not set
 	ldr	x12, [x14]			// relocate bit
-	add	x12, x12, x15
+	add	x12, x12, x23
 	str	x12, [x14]
 
 5:	add	x14, x14, #8			// move to next bit's address
@@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel)
 #endif
 
 SYM_FUNC_START_LOCAL(__primary_switch)
+	adrp	x1, reserved_pg_dir
+	adrp	x2, init_idmap_pg_dir
+	bl	__enable_mmu
+#ifdef CONFIG_RELOCATABLE
+	adrp	x23, KERNEL_START
+	and	x23, x23, MIN_KIMG_ALIGN - 1
 #ifdef CONFIG_RANDOMIZE_BASE
-	mov	x19, x0				// preserve new SCTLR_EL1 value
-	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
+	mov	x0, x22
+	adrp	x1, init_pg_end
+	mov	sp, x1
+	mov	x29, xzr
+	bl	__pi_kaslr_early_init
+	and	x24, x0, #SZ_2M - 1		// capture memstart offset seed
+	bic	x0, x0, #SZ_2M - 1
+	orr	x23, x23, x0			// record kernel offset
+#endif
 #endif
+	bl	clear_page_tables
+	bl	create_kernel_mapping
 
 	adrp	x1, init_pg_dir
-	bl	__enable_mmu
+	load_ttbr1 x1, x1, x2
 #ifdef CONFIG_RELOCATABLE
-#ifdef CONFIG_RELR
-	mov	x24, #0				// no RELR displacement yet
-#endif
 	bl	__relocate_kernel
-#ifdef CONFIG_RANDOMIZE_BASE
-	ldr	x8, =__primary_switched
-	adrp	x0, __PHYS_OFFSET
-	blr	x8
-
-	/*
-	 * If we return here, we have a KASLR displacement in x23 which we need
-	 * to take into account by discarding the current kernel mapping and
-	 * creating a new one.
-	 */
-	pre_disable_mmu_workaround
-	msr	sctlr_el1, x20			// disable the MMU
-	isb
-	bl	__create_page_tables		// recreate kernel mapping
-
-	tlbi	vmalle1				// Remove any stale TLB entries
-	dsb	nsh
-	isb
-
-	set_sctlr_el1	x19			// re-enable the MMU
-
-	bl	__relocate_kernel
-#endif
 #endif
 	ldr	x8, =__primary_switched
-	adrp	x0, __PHYS_OFFSET
+	adrp	x0, KERNEL_START		// __pa(KERNEL_START)
 	br	x8
 SYM_FUNC_END(__primary_switch)
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 43d212618834..12c7fad02ae5 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -16,6 +16,30 @@
 #include <asm/ptrace.h>
 #include <asm/virt.h>
 
+// Warning, hardcoded register allocation
+// This will clobber x1 and x2, and expect x1 to contain
+// the id register value as read from the HW
+.macro __check_override idreg, fld, width, pass, fail
+	ubfx	x1, x1, #\fld, #\width
+	cbz	x1, \fail
+
+	adr_l	x1, \idreg\()_override
+	ldr	x2, [x1, FTR_OVR_VAL_OFFSET]
+	ldr	x1, [x1, FTR_OVR_MASK_OFFSET]
+	ubfx	x2, x2, #\fld, #\width
+	ubfx	x1, x1, #\fld, #\width
+	cmp	x1, xzr
+	and	x2, x2, x1
+	csinv	x2, x2, xzr, ne
+	cbnz	x2, \pass
+	b	\fail
+.endm
+
+.macro check_override idreg, fld, pass, fail
+	mrs	x1, \idreg\()_el1
+	__check_override \idreg \fld 4 \pass \fail
+.endm
+
 	.text
 	.pushsection	.hyp.text, "ax"
 
@@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync)
 	msr	vbar_el2, x1
 	b	9f
 
-1:	cmp	x0, #HVC_VHE_RESTART
-	b.eq	mutate_to_vhe
+1:	cmp	x0, #HVC_FINALISE_EL2
+	b.eq	__finalise_el2
 
 2:	cmp	x0, #HVC_SOFT_RESTART
 	b.ne	3f
@@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync)
 	eret
 SYM_CODE_END(elx_sync)
 
-// nVHE? No way! Give me the real thing!
-SYM_CODE_START_LOCAL(mutate_to_vhe)
+SYM_CODE_START_LOCAL(__finalise_el2)
+	check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
+
+.Linit_sve:	/* SVE register access */
+	mrs	x0, cptr_el2			// Disable SVE traps
+	bic	x0, x0, #CPTR_EL2_TZ
+	msr	cptr_el2, x0
+	isb
+	mov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vector
+	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
+
+.Lskip_sve:
+	check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
+
+.Linit_sme:	/* SME register access and priority mapping */
+	mrs	x0, cptr_el2			// Disable SME traps
+	bic	x0, x0, #CPTR_EL2_TSM
+	msr	cptr_el2, x0
+	isb
+
+	mrs	x1, sctlr_el2
+	orr	x1, x1, #SCTLR_ELx_ENTP2	// Disable TPIDR2 traps
+	msr	sctlr_el2, x1
+	isb
+
+	mov	x0, #0				// SMCR controls
+
+	// Full FP in SM?
+	mrs_s	x1, SYS_ID_AA64SMFR0_EL1
+	__check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
+
+.Linit_sme_fa64:
+	orr	x0, x0, SMCR_ELx_FA64_MASK
+.Lskip_sme_fa64:
+
+	orr	x0, x0, #SMCR_ELx_LEN_MASK	// Enable full SME vector
+	msr_s	SYS_SMCR_EL2, x0		// length for EL1.
+
+	mrs_s	x1, SYS_SMIDR_EL1		// Priority mapping supported?
+	ubfx    x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
+	cbz     x1, .Lskip_sme
+
+	msr_s	SYS_SMPRIMAP_EL2, xzr		// Make all priorities equal
+
+	mrs	x1, id_aa64mmfr1_el1		// HCRX_EL2 present?
+	ubfx	x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
+	cbz	x1, .Lskip_sme
+
+	mrs_s	x1, SYS_HCRX_EL2
+	orr	x1, x1, #HCRX_EL2_SMPME_MASK	// Enable priority mapping
+	msr_s	SYS_HCRX_EL2, x1
+
+.Lskip_sme:
+
+	// nVHE? No way! Give me the real thing!
 	// Sanity check: MMU *must* be off
 	mrs	x1, sctlr_el2
 	tbnz	x1, #0, 1f
 
 	// Needs to be VHE capable, obviously
-	mrs	x1, id_aa64mmfr1_el1
-	ubfx	x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
-	cbz	x1, 1f
-
-	// Check whether VHE is disabled from the command line
-	adr_l	x1, id_aa64mmfr1_override
-	ldr	x2, [x1, FTR_OVR_VAL_OFFSET]
-	ldr	x1, [x1, FTR_OVR_MASK_OFFSET]
-	ubfx	x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
-	ubfx	x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
-	cmp	x1, xzr
-	and	x2, x2, x1
-	csinv	x2, x2, xzr, ne
-	cbnz	x2, 2f
+	check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
 
 1:	mov_q	x0, HVC_STUB_ERR
 	eret
@@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
 	msr	spsr_el1, x0
 
 	b	enter_vhe
-SYM_CODE_END(mutate_to_vhe)
+SYM_CODE_END(__finalise_el2)
 
 	// At the point where we reach enter_vhe(), we run with
-	// the MMU off (which is enforced by mutate_to_vhe()).
+	// the MMU off (which is enforced by __finalise_el2()).
 	// We thus need to be in the idmap, or everything will
 	// explode when enabling the MMU.
 
@@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors)
 SYM_FUNC_END(__hyp_reset_vectors)
 
 /*
- * Entry point to switch to VHE if deemed capable
+ * Entry point to finalise EL2 and switch to VHE if deemed capable
+ *
+ * w0: boot mode, as returned by init_kernel_el()
  */
-SYM_FUNC_START(switch_to_vhe)
+SYM_FUNC_START(finalise_el2)
 	// Need to have booted at EL2
-	adr_l	x1, __boot_cpu_mode
-	ldr	w0, [x1]
 	cmp	w0, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 
@@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe)
 	cmp	x0, #CurrentEL_EL1
 	b.ne	1f
 
-	// Turn the world upside down
-	mov	x0, #HVC_VHE_RESTART
+	mov	x0, #HVC_FINALISE_EL2
 	hvc	#0
 1:
 	ret
-SYM_FUNC_END(switch_to_vhe)
+SYM_FUNC_END(finalise_el2)
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 21b3d03089ca..1b0542c69738 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -19,16 +19,21 @@
 #define FTR_ALIAS_NAME_LEN	30
 #define FTR_ALIAS_OPTION_LEN	116
 
+static u64 __boot_status __initdata;
+
 struct ftr_set_desc {
 	char 				name[FTR_DESC_NAME_LEN];
 	struct arm64_ftr_override	*override;
 	struct {
 		char			name[FTR_DESC_FIELD_LEN];
 		u8			shift;
+		u8			width;
 		bool			(*filter)(u64 val);
 	} 				fields[];
 };
 
+#define FIELD(n, s, f)	{ .name = n, .shift = s, .width = 4, .filter = f }
+
 static bool __init mmfr1_vh_filter(u64 val)
 {
 	/*
@@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val)
 	 * the user was trying to force nVHE on us, proceed with
 	 * attitude adjustment.
 	 */
-	return !(is_kernel_in_hyp_mode() && val == 0);
+	return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
+		 val == 0);
 }
 
 static const struct ftr_set_desc mmfr1 __initconst = {
 	.name		= "id_aa64mmfr1",
 	.override	= &id_aa64mmfr1_override,
 	.fields		= {
-		{ "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
+		FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
+		{}
+	},
+};
+
+static bool __init pfr0_sve_filter(u64 val)
+{
+	/*
+	 * Disabling SVE also means disabling all the features that
+	 * are associated with it. The easiest way to do it is just to
+	 * override id_aa64zfr0_el1 to be 0.
+	 */
+	if (!val) {
+		id_aa64zfr0_override.val = 0;
+		id_aa64zfr0_override.mask = GENMASK(63, 0);
+	}
+
+	return true;
+}
+
+static const struct ftr_set_desc pfr0 __initconst = {
+	.name		= "id_aa64pfr0",
+	.override	= &id_aa64pfr0_override,
+	.fields		= {
+	        FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
 		{}
 	},
 };
 
+static bool __init pfr1_sme_filter(u64 val)
+{
+	/*
+	 * Similarly to SVE, disabling SME also means disabling all
+	 * the features that are associated with it. Just set
+	 * id_aa64smfr0_el1 to 0 and don't look back.
+	 */
+	if (!val) {
+		id_aa64smfr0_override.val = 0;
+		id_aa64smfr0_override.mask = GENMASK(63, 0);
+	}
+
+	return true;
+}
+
 static const struct ftr_set_desc pfr1 __initconst = {
 	.name		= "id_aa64pfr1",
 	.override	= &id_aa64pfr1_override,
 	.fields		= {
-		{ "bt", ID_AA64PFR1_BT_SHIFT },
-		{ "mte", ID_AA64PFR1_MTE_SHIFT},
+		FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
+		FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
+		FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
 		{}
 	},
 };
@@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = {
 	.name		= "id_aa64isar1",
 	.override	= &id_aa64isar1_override,
 	.fields		= {
-		{ "gpi", ID_AA64ISAR1_EL1_GPI_SHIFT },
-		{ "gpa", ID_AA64ISAR1_EL1_GPA_SHIFT },
-		{ "api", ID_AA64ISAR1_EL1_API_SHIFT },
-		{ "apa", ID_AA64ISAR1_EL1_APA_SHIFT },
+		FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
+		FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
+		FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
+		FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
 		{}
 	},
 };
@@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = {
 	.name		= "id_aa64isar2",
 	.override	= &id_aa64isar2_override,
 	.fields		= {
-		{ "gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT },
-		{ "apa3", ID_AA64ISAR2_EL1_APA3_SHIFT },
+		FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
+		FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
+		{}
+	},
+};
+
+static const struct ftr_set_desc smfr0 __initconst = {
+	.name		= "id_aa64smfr0",
+	.override	= &id_aa64smfr0_override,
+	.fields		= {
+		/* FA64 is a one bit field... :-/ */
+		{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
 		{}
 	},
 };
@@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = {
 	.override	= &kaslr_feature_override,
 #endif
 	.fields		= {
-		{ "disabled", 0 },
+		FIELD("disabled", 0, NULL),
 		{}
 	},
 };
 
 static const struct ftr_set_desc * const regs[] __initconst = {
 	&mmfr1,
+	&pfr0,
 	&pfr1,
 	&isar1,
 	&isar2,
+	&smfr0,
 	&kaslr,
 };
 
@@ -108,6 +166,8 @@ static const struct {
 } aliases[] __initconst = {
 	{ "kvm-arm.mode=nvhe",		"id_aa64mmfr1.vh=0" },
 	{ "kvm-arm.mode=protected",	"id_aa64mmfr1.vh=0" },
+	{ "arm64.nosve",		"id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" },
+	{ "arm64.nosme",		"id_aa64pfr1.sme=0" },
 	{ "arm64.nobti",		"id_aa64pfr1.bt=0" },
 	{ "arm64.nopauth",
 	  "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
@@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline)
 
 		for (f = 0; strlen(regs[i]->fields[f].name); f++) {
 			u64 shift = regs[i]->fields[f].shift;
-			u64 mask = 0xfUL << shift;
+			u64 width = regs[i]->fields[f].width ?: 4;
+			u64 mask = GENMASK_ULL(shift + width - 1, shift);
 			u64 v;
 
 			if (find_field(cmdline, regs[i], f, &v))
@@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline)
 
 			/*
 			 * If an override gets filtered out, advertise
-			 * it by setting the value to 0xf, but
+			 * it by setting the value to the all-ones while
 			 * clearing the mask... Yes, this is fragile.
 			 */
 			if (regs[i]->fields[f].filter &&
@@ -234,9 +295,9 @@ static __init void parse_cmdline(void)
 }
 
 /* Keep checkers quiet */
-void init_feature_override(void);
+void init_feature_override(u64 boot_status);
 
-asmlinkage void __init init_feature_override(void)
+asmlinkage void __init init_feature_override(u64 boot_status)
 {
 	int i;
 
@@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void)
 		}
 	}
 
+	__boot_status = boot_status;
+
 	parse_cmdline();
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 241c86b67d01..afa69e04e75e 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -10,11 +10,8 @@
 #error This file should only be included in vmlinux.lds.S
 #endif
 
-#ifdef CONFIG_EFI
-
-__efistub_kernel_size		= _edata - _text;
-__efistub_primary_entry_offset	= primary_entry - _text;
-
+PROVIDE(__efistub_kernel_size		= _edata - _text);
+PROVIDE(__efistub_primary_entry_offset	= primary_entry - _text);
 
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
@@ -25,31 +22,37 @@ __efistub_primary_entry_offset	= primary_entry - _text;
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strnlen		= __pi_strnlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub_strrchr		= __pi_strrchr;
-__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
-
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
-#endif
+PROVIDE(__efistub_memcmp		= __pi_memcmp);
+PROVIDE(__efistub_memchr		= __pi_memchr);
+PROVIDE(__efistub_memcpy		= __pi_memcpy);
+PROVIDE(__efistub_memmove		= __pi_memmove);
+PROVIDE(__efistub_memset		= __pi_memset);
+PROVIDE(__efistub_strlen		= __pi_strlen);
+PROVIDE(__efistub_strnlen		= __pi_strnlen);
+PROVIDE(__efistub_strcmp		= __pi_strcmp);
+PROVIDE(__efistub_strncmp		= __pi_strncmp);
+PROVIDE(__efistub_strrchr		= __pi_strrchr);
+PROVIDE(__efistub_dcache_clean_poc	= __pi_dcache_clean_poc);
+
+PROVIDE(__efistub__text			= _text);
+PROVIDE(__efistub__end			= _end);
+PROVIDE(__efistub__edata		= _edata);
+PROVIDE(__efistub_screen_info		= screen_info);
+PROVIDE(__efistub__ctype		= _ctype);
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
-__efistub_screen_info		= screen_info;
-__efistub__ctype		= _ctype;
+/*
+ * The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which
+ * instruments the conventional ones. Therefore, any references from the EFI
+ * stub or other position independent, low level C code should be redirected to
+ * the non-instrumented versions as well.
+ */
+PROVIDE(__efistub___memcpy		= __pi_memcpy);
+PROVIDE(__efistub___memmove		= __pi_memmove);
+PROVIDE(__efistub___memset		= __pi_memset);
 
-#endif
+PROVIDE(__pi___memcpy			= __pi_memcpy);
+PROVIDE(__pi___memmove			= __pi_memmove);
+PROVIDE(__pi___memset			= __pi_memset);
 
 #ifdef CONFIG_KVM
 
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 418b2bba1521..325455d16dbc 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -13,7 +13,6 @@
 #include <linux/pgtable.h>
 #include <linux/random.h>
 
-#include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
@@ -21,128 +20,45 @@
 #include <asm/sections.h>
 #include <asm/setup.h>
 
-enum kaslr_status {
-	KASLR_ENABLED,
-	KASLR_DISABLED_CMDLINE,
-	KASLR_DISABLED_NO_SEED,
-	KASLR_DISABLED_FDT_REMAP,
-};
-
-static enum kaslr_status __initdata kaslr_status;
 u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
-static __init u64 get_kaslr_seed(void *fdt)
-{
-	int node, len;
-	fdt64_t *prop;
-	u64 ret;
-
-	node = fdt_path_offset(fdt, "/chosen");
-	if (node < 0)
-		return 0;
-
-	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
-	if (!prop || len != sizeof(u64))
-		return 0;
-
-	ret = fdt64_to_cpu(*prop);
-	*prop = 0;
-	return ret;
-}
-
 struct arm64_ftr_override kaslr_feature_override __initdata;
 
-/*
- * This routine will be executed with the kernel mapped at its default virtual
- * address, and if it returns successfully, the kernel will be remapped, and
- * start_kernel() will be executed from a randomized virtual offset. The
- * relocation will result in all absolute references (e.g., static variables
- * containing function pointers) to be reinitialized, and zero-initialized
- * .bss variables will be reset to 0.
- */
-u64 __init kaslr_early_init(void)
+static int __init kaslr_init(void)
 {
-	void *fdt;
-	u64 seed, offset, mask, module_range;
-	unsigned long raw;
+	u64 module_range;
+	u32 seed;
 
 	/*
 	 * Set a reasonable default for module_alloc_base in case
 	 * we end up running with module randomization disabled.
 	 */
 	module_alloc_base = (u64)_etext - MODULES_VSIZE;
-	dcache_clean_inval_poc((unsigned long)&module_alloc_base,
-			    (unsigned long)&module_alloc_base +
-				    sizeof(module_alloc_base));
-
-	/*
-	 * Try to map the FDT early. If this fails, we simply bail,
-	 * and proceed with KASLR disabled. We will make another
-	 * attempt at mapping the FDT in setup_machine()
-	 */
-	fdt = get_early_fdt_ptr();
-	if (!fdt) {
-		kaslr_status = KASLR_DISABLED_FDT_REMAP;
-		return 0;
-	}
 
-	/*
-	 * Retrieve (and wipe) the seed from the FDT
-	 */
-	seed = get_kaslr_seed(fdt);
-
-	/*
-	 * Check if 'nokaslr' appears on the command line, and
-	 * return 0 if that is the case.
-	 */
 	if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
-		kaslr_status = KASLR_DISABLED_CMDLINE;
+		pr_info("KASLR disabled on command line\n");
 		return 0;
 	}
 
-	/*
-	 * Mix in any entropy obtainable architecturally if enabled
-	 * and supported.
-	 */
-
-	if (arch_get_random_seed_long_early(&raw))
-		seed ^= raw;
-
-	if (!seed) {
-		kaslr_status = KASLR_DISABLED_NO_SEED;
+	if (!kaslr_offset()) {
+		pr_warn("KASLR disabled due to lack of seed\n");
 		return 0;
 	}
 
+	pr_info("KASLR enabled\n");
+
 	/*
-	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
-	 * kernel image offset from the seed. Let's place the kernel in the
-	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
-	 * the lower and upper quarters to avoid colliding with other
-	 * allocations.
-	 * Even if we could randomize at page granularity for 16k and 64k pages,
-	 * let's always round to 2 MB so we don't interfere with the ability to
-	 * map using contiguous PTEs
+	 * KASAN without KASAN_VMALLOC does not expect the module region to
+	 * intersect the vmalloc region, since shadow memory is allocated for
+	 * each module at load time, whereas the vmalloc region will already be
+	 * shadowed by KASAN zero pages.
 	 */
-	mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1);
-	offset = BIT(VA_BITS_MIN - 3) + (seed & mask);
+	BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
+	              IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
+		     !IS_ENABLED(CONFIG_KASAN_VMALLOC));
 
-	/* use the top 16 bits to randomize the linear region */
-	memstart_offset_seed = seed >> 48;
-
-	if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
-	    (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-	     IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
-		/*
-		 * KASAN without KASAN_VMALLOC does not expect the module region
-		 * to intersect the vmalloc region, since shadow memory is
-		 * allocated for each module at load time, whereas the vmalloc
-		 * region is shadowed by KASAN zero pages. So keep modules
-		 * out of the vmalloc region if KASAN is enabled without
-		 * KASAN_VMALLOC, and put the kernel well within 4 GB of the
-		 * module region.
-		 */
-		return offset % SZ_2G;
+	seed = get_random_u32();
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
 		/*
@@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void)
 		 * resolved normally.)
 		 */
 		module_range = SZ_2G - (u64)(_end - _stext);
-		module_alloc_base = max((u64)_end + offset - SZ_2G,
-					(u64)MODULES_VADDR);
+		module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
 	} else {
 		/*
 		 * Randomize the module region by setting module_alloc_base to
@@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void)
 		 * when ARM64_MODULE_PLTS is enabled.
 		 */
 		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
-		module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
 	}
 
 	/* use the lower 21 bits to randomize the base of the module region */
 	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
 	module_alloc_base &= PAGE_MASK;
 
-	dcache_clean_inval_poc((unsigned long)&module_alloc_base,
-			    (unsigned long)&module_alloc_base +
-				    sizeof(module_alloc_base));
-	dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
-			    (unsigned long)&memstart_offset_seed +
-				    sizeof(memstart_offset_seed));
-
-	return offset;
-}
-
-static int __init kaslr_init(void)
-{
-	switch (kaslr_status) {
-	case KASLR_ENABLED:
-		pr_info("KASLR enabled\n");
-		break;
-	case KASLR_DISABLED_CMDLINE:
-		pr_info("KASLR disabled on command line\n");
-		break;
-	case KASLR_DISABLED_NO_SEED:
-		pr_warn("KASLR disabled due to lack of seed\n");
-		break;
-	case KASLR_DISABLED_FDT_REMAP:
-		pr_warn("KASLR disabled due to FDT remapping failure\n");
-		break;
-	}
-
 	return 0;
 }
-core_initcall(kaslr_init)
+subsys_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/pi/Makefile b/arch/arm64/kernel/pi/Makefile
new file mode 100644
index 000000000000..839291430cb3
--- /dev/null
+++ b/arch/arm64/kernel/pi/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 Google LLC
+
+KBUILD_CFLAGS	:= $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+		   -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+		   $(call cc-option,-mbranch-protection=none) \
+		   -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+		   -include $(srctree)/include/linux/hidden.h \
+		   -D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
+		   $(call cc-option,-fno-addrsig)
+
+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS	:= $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+# disable LTO
+KBUILD_CFLAGS	:= $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
+
+GCOV_PROFILE	:= n
+KASAN_SANITIZE	:= n
+KCSAN_SANITIZE	:= n
+UBSAN_SANITIZE	:= n
+KCOV_INSTRUMENT	:= n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+			       --remove-section=.note.gnu.property \
+			       --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+	$(call if_changed_rule,cc_o_c)
+
+obj-y		:= kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y		:= $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/arm64/kernel/pi/kaslr_early.c b/arch/arm64/kernel/pi/kaslr_early.c
new file mode 100644
index 000000000000..6c3855e69395
--- /dev/null
+++ b/arch/arm64/kernel/pi/kaslr_early.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright 2022 Google LLC
+// Author: Ard Biesheuvel <ardb@google.com>
+
+// NOTE: code in this file runs *very* early, and is not permitted to use
+// global variables or anything that relies on absolute addressing.
+
+#include <linux/libfdt.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
+
+#include <asm/archrandom.h>
+#include <asm/memory.h>
+
+/* taken from lib/string.c */
+static char *__strstr(const char *s1, const char *s2)
+{
+	size_t l1, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	l1 = strlen(s1);
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+static bool cmdline_contains_nokaslr(const u8 *cmdline)
+{
+	const u8 *str;
+
+	str = __strstr(cmdline, "nokaslr");
+	return str == cmdline || (str > cmdline && *(str - 1) == ' ');
+}
+
+static bool is_kaslr_disabled_cmdline(void *fdt)
+{
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		int node;
+		const u8 *prop;
+
+		node = fdt_path_offset(fdt, "/chosen");
+		if (node < 0)
+			goto out;
+
+		prop = fdt_getprop(fdt, node, "bootargs", NULL);
+		if (!prop)
+			goto out;
+
+		if (cmdline_contains_nokaslr(prop))
+			return true;
+
+		if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
+			goto out;
+
+		return false;
+	}
+out:
+	return cmdline_contains_nokaslr(CONFIG_CMDLINE);
+}
+
+static u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	fdt64_t *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+asmlinkage u64 kaslr_early_init(void *fdt)
+{
+	u64 seed;
+
+	if (is_kaslr_disabled_cmdline(fdt))
+		return 0;
+
+	seed = get_kaslr_seed(fdt);
+	if (!seed) {
+#ifdef CONFIG_ARCH_RANDOM
+		 if (!__early_cpu_has_rndr() ||
+		     !__arm64_rndr((unsigned long *)&seed))
+#endif
+		return 0;
+	}
+
+	/*
+	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+	 * kernel image offset from the seed. Let's place the kernel in the
+	 * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
+	 * the lower and upper quarters to avoid colliding with other
+	 * allocations.
+	 */
+	return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
+}
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 4ea9392f86e0..617f78ad43a1 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter)
 	.pushsection ".idmap.text", "awx"
 SYM_CODE_START(cpu_resume)
 	bl	init_kernel_el
-	bl	switch_to_vhe
+	bl	finalise_el2
 	bl	__cpu_setup
 	/* enable the MMU early - so we can access sleep_save_stash by va */
 	adrp	x1, swapper_pg_dir
+	adrp	x2, idmap_pg_dir
 	bl	__enable_mmu
 	ldr	x8, =_cpu_resume
 	br	x8
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 2b0887e58a7c..9135fe0f3df5 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void)
 
 	/* Restore CnP bit in TTBR1_EL1 */
 	if (system_supports_cnp())
-		cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+		cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 
 	/*
 	 * PSTATE was not saved over suspend/resume, re-enable any detected
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 8a078c0ee140..45131e354e27 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -199,8 +199,7 @@ SECTIONS
 	}
 
 	idmap_pg_dir = .;
-	. += IDMAP_DIR_SIZE;
-	idmap_pg_end = .;
+	. += PAGE_SIZE;
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	tramp_pg_dir = .;
@@ -236,6 +235,10 @@ SECTIONS
 	__inittext_end = .;
 	__initdata_begin = .;
 
+	init_idmap_pg_dir = .;
+	. += INIT_IDMAP_DIR_SIZE;
+	init_idmap_pg_end = .;
+
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)
@@ -254,21 +257,17 @@ SECTIONS
 	HYPERVISOR_RELOC_SECTION
 
 	.rela.dyn : ALIGN(8) {
+		__rela_start = .;
 		*(.rela .rela*)
+		__rela_end = .;
 	}
 
-	__rela_offset	= ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
-	__rela_size	= SIZEOF(.rela.dyn);
-
-#ifdef CONFIG_RELR
 	.relr.dyn : ALIGN(8) {
+		__relr_start = .;
 		*(.relr.dyn)
+		__relr_end = .;
 	}
 
-	__relr_offset	= ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
-	__relr_size	= SIZEOF(.relr.dyn);
-#endif
-
 	. = ALIGN(SEGMENT_ALIGN);
 	__initdata_end = .;
 	__init_end = .;
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index c12cd700598f..e969e68de005 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
 	dsb(ishst);
-	cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
+	cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
 				PAGE_KERNEL_RO));
 
 	memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 }
 
 static void __init kasan_init_depth(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 19feb6fe05b2..db7c4e6ae57b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,15 +43,27 @@
 #define NO_CONT_MAPPINGS	BIT(1)
 #define NO_EXEC_MAPPINGS	BIT(2)	/* assumes FEAT_HPDS is not used */
 
-u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
-u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+int idmap_t0sz __ro_after_init;
 
-u64 __section(".mmuoff.data.write") vabits_actual;
+#if VA_BITS > 48
+u64 vabits_actual __ro_after_init = VA_BITS_MIN;
 EXPORT_SYMBOL(vabits_actual);
+#endif
+
+u64 kimage_vaddr __ro_after_init = (u64)&_text;
+EXPORT_SYMBOL(kimage_vaddr);
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
 
+u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
+
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+long __section(".mmuoff.data.write") __early_cpu_boot_status;
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -763,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
 	kasan_copy_shadow(pgdp);
 }
 
+static void __init create_idmap(void)
+{
+	u64 start = __pa_symbol(__idmap_text_start);
+	u64 size = __pa_symbol(__idmap_text_end) - start;
+	pgd_t *pgd = idmap_pg_dir;
+	u64 pgd_phys;
+
+	/* check if we need an additional level of translation */
+	if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
+		pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
+		set_pgd(&idmap_pg_dir[start >> VA_BITS],
+			__pgd(pgd_phys | P4D_TYPE_TABLE));
+		pgd = __va(pgd_phys);
+	}
+	__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
+			     early_pgtable_alloc, 0);
+
+	if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+		extern u32 __idmap_kpti_flag;
+		u64 pa = __pa_symbol(&__idmap_kpti_flag);
+
+		/*
+		 * The KPTI G-to-nG conversion code needs a read-write mapping
+		 * of its synchronization flag in the ID map.
+		 */
+		__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
+				     early_pgtable_alloc, 0);
+	}
+}
+
 void __init paging_init(void)
 {
 	pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+	extern pgd_t init_idmap_pg_dir[];
+
+	idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
 
 	map_kernel(pgdp);
 	map_mem(pgdp);
 
 	pgd_clear_fixmap();
 
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
 	init_mm.pgd = swapper_pg_dir;
 
 	memblock_phys_free(__pa_symbol(init_pg_dir),
 			   __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
 
 	memblock_allow_resize();
+
+	create_idmap();
 }
 
 /*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 972ce8d7f2c5..7837a69524c5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -249,8 +249,10 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
  *
  * Called exactly once from stop_machine context by each CPU found during boot.
  */
-__idmap_kpti_flag:
-	.long	1
+	.pushsection	".data", "aw", %progbits
+SYM_DATA(__idmap_kpti_flag, .long 1)
+	.popsection
+
 SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 	cpu		.req	w0
 	temp_pte	.req	x0
@@ -273,7 +275,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 
 	mov	x5, x3				// preserve temp_pte arg
 	mrs	swapper_ttb, ttbr1_el1
-	adr	flag_ptr, __idmap_kpti_flag
+	adr_l	flag_ptr, __idmap_kpti_flag
 
 	cbnz	cpu, __idmap_kpti_secondary
 
@@ -396,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
  *
  *	Initialise the processor for turning the MMU on.
  *
+ * Input:
+ *	x0 - actual number of VA bits (ignored unless VA_BITS > 48)
  * Output:
  *	Return in x0 the value of the SCTLR_EL1 register.
  */
@@ -465,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
 	tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
-	ldr_l		x9, vabits_actual
-	sub		x9, xzr, x9
+	sub		x9, xzr, x0
 	add		x9, x9, #64
 	tcr_set_t1sz	tcr, x9
 #else
-	ldr_l		x9, idmap_t0sz
+	idmap_get_t0sz	x9
 #endif
 	tcr_set_t0sz	tcr, x9