summary refs log tree commit diff
path: root/arch/arm64/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-08-01 10:37:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-08-01 10:37:00 -0700
commit0cec3f24a7cedc726f8790d693aaff2c535dc4ce (patch)
tree964e115cda27fd3f93b662a760fbc7d9a8e845e7 /arch/arm64/mm
parenta82c58cf1a5560988d8b9b8abcd982e90a5fdaa4 (diff)
parent892f7237b3ffb090f1b1f1e55fe7c50664405aed (diff)
downloadlinux-0cec3f24a7cedc726f8790d693aaff2c535dc4ce.tar.gz
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Will Deacon:
 "Highlights include a major rework of our kPTI page-table rewriting
  code (which makes it both more maintainable and considerably faster in
  the cases where it is required) as well as significant changes to our
  early boot code to reduce the need for data cache maintenance and
  greatly simplify the KASLR relocation dance.

  Summary:

   - Remove unused generic cpuidle support (replaced by PSCI version)

   - Fix documentation describing the kernel virtual address space

   - Handling of some new CPU errata in Arm implementations

   - Rework of our exception table code in preparation for handling
     machine checks (i.e. RAS errors) more gracefully

   - Switch over to the generic implementation of ioremap()

   - Fix lockdep tracking in NMI context

   - Instrument our memory barrier macros for KCSAN

   - Rework of the kPTI G->nG page-table repainting so that the MMU
     remains enabled and the boot time is no longer slowed to a crawl
     for systems which require the late remapping

   - Enable support for direct swapping of 2MiB transparent huge-pages
     on systems without MTE

   - Fix handling of MTE tags with allocating new pages with HW KASAN

   - Expose the SMIDR register to userspace via sysfs

   - Continued rework of the stack unwinder, particularly improving the
     behaviour under KASAN

   - More repainting of our system register definitions to match the
     architectural terminology

   - Improvements to the layout of the vDSO objects

   - Support for allocating additional bits of HWCAP2 and exposing
     FEAT_EBF16 to userspace on CPUs that support it

   - Considerable rework and optimisation of our early boot code to
     reduce the need for cache maintenance and avoid jumping in and out
     of the kernel when handling relocation under KASLR

   - Support for disabling SVE and SME support on the kernel
     command-line

   - Support for the Hisilicon HNS3 PMU

   - Miscellanous cleanups, trivial updates and minor fixes"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (136 commits)
  arm64: Delay initialisation of cpuinfo_arm64::reg_{zcr,smcr}
  arm64: fix KASAN_INLINE
  arm64/hwcap: Support FEAT_EBF16
  arm64/cpufeature: Store elf_hwcaps as a bitmap rather than unsigned long
  arm64/hwcap: Document allocation of upper bits of AT_HWCAP
  arm64: enable THP_SWAP for arm64
  arm64/mm: use GENMASK_ULL for TTBR_BADDR_MASK_52
  arm64: errata: Remove AES hwcap for COMPAT tasks
  arm64: numa: Don't check node against MAX_NUMNODES
  drivers/perf: arm_spe: Fix consistency of SYS_PMSCR_EL1.CX
  perf: RISC-V: Add of_node_put() when breaking out of for_each_of_cpu_node()
  docs: perf: Include hns3-pmu.rst in toctree to fix 'htmldocs' WARNING
  arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"
  mm: kasan: Skip page unpoisoning only if __GFP_SKIP_KASAN_UNPOISON
  mm: kasan: Skip unpoisoning of user pages
  mm: kasan: Ensure the tags are visible before the tag in page->flags
  drivers/perf: hisi: add driver for HNS3 PMU
  drivers/perf: hisi: Add description for HNS3 PMU driver
  drivers/perf: riscv_pmu_sbi: perf format
  perf/arm-cci: Use the bitmap API to allocate bitmaps
  ...
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r--arch/arm64/mm/cache.S41
-rw-r--r--arch/arm64/mm/copypage.c9
-rw-r--r--arch/arm64/mm/dma-mapping.c19
-rw-r--r--arch/arm64/mm/extable.c10
-rw-r--r--arch/arm64/mm/fault.c1
-rw-r--r--arch/arm64/mm/hugetlbpage.c10
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/ioremap.c90
-rw-r--r--arch/arm64/mm/kasan_init.c4
-rw-r--r--arch/arm64/mm/mmu.c78
-rw-r--r--arch/arm64/mm/mteswap.c9
-rw-r--r--arch/arm64/mm/proc.S188
12 files changed, 186 insertions, 277 deletions
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 21c907987080..081058d4e436 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -194,44 +194,3 @@ SYM_FUNC_START(__pi_dcache_clean_pop)
 	ret
 SYM_FUNC_END(__pi_dcache_clean_pop)
 SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop)
-
-/*
- *	__dma_flush_area(start, size)
- *
- *	clean & invalidate D / U line
- *
- *	- start   - virtual start address of region
- *	- size    - size in question
- */
-SYM_FUNC_START(__pi___dma_flush_area)
-	add	x1, x0, x1
-	dcache_by_line_op civac, sy, x0, x1, x2, x3
-	ret
-SYM_FUNC_END(__pi___dma_flush_area)
-SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area)
-
-/*
- *	__dma_map_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-SYM_FUNC_START(__pi___dma_map_area)
-	add	x1, x0, x1
-	b	__pi_dcache_clean_poc
-SYM_FUNC_END(__pi___dma_map_area)
-SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
-
-/*
- *	__dma_unmap_area(start, size, dir)
- *	- start	- kernel virtual start address
- *	- size	- size of region
- *	- dir	- DMA direction
- */
-SYM_FUNC_START(__pi___dma_unmap_area)
-	add	x1, x0, x1
-	cmp	w2, #DMA_TO_DEVICE
-	b.ne	__pi_dcache_inval_poc
-	ret
-SYM_FUNC_END(__pi___dma_unmap_area)
-SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area)
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 0dea80bf6de4..24913271e898 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from)
 
 	if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
 		set_bit(PG_mte_tagged, &to->flags);
-		page_kasan_tag_reset(to);
-		/*
-		 * We need smp_wmb() in between setting the flags and clearing the
-		 * tags because if another thread reads page->flags and builds a
-		 * tagged address out of it, there is an actual dependency to the
-		 * memory access, but on the current thread we do not guarantee that
-		 * the new page->flags are visible before the tags were updated.
-		 */
-		smp_wmb();
 		mte_copy_page_tags(kto, kfrom);
 	}
 }
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 6099c81b9322..599cf81f5685 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -14,20 +14,29 @@
 #include <asm/xen/xen-ops.h>
 
 void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+			      enum dma_data_direction dir)
 {
-	__dma_map_area(phys_to_virt(paddr), size, dir);
+	unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+	dcache_clean_poc(start, start + size);
 }
 
 void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
-		enum dma_data_direction dir)
+			   enum dma_data_direction dir)
 {
-	__dma_unmap_area(phys_to_virt(paddr), size, dir);
+	unsigned long start = (unsigned long)phys_to_virt(paddr);
+
+	if (dir == DMA_TO_DEVICE)
+		return;
+
+	dcache_inval_poc(start, start + size);
 }
 
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
-	__dma_flush_area(page_address(page), size);
+	unsigned long start = (unsigned long)page_address(page);
+
+	dcache_clean_inval_poc(start, start + size);
 }
 
 #ifdef CONFIG_IOMMU_DMA
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 489455309695..228d681a8715 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -16,13 +16,6 @@ get_ex_fixup(const struct exception_table_entry *ex)
 	return ((unsigned long)&ex->fixup + ex->fixup);
 }
 
-static bool ex_handler_fixup(const struct exception_table_entry *ex,
-			     struct pt_regs *regs)
-{
-	regs->pc = get_ex_fixup(ex);
-	return true;
-}
-
 static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
 					struct pt_regs *regs)
 {
@@ -72,11 +65,10 @@ bool fixup_exception(struct pt_regs *regs)
 		return false;
 
 	switch (ex->type) {
-	case EX_TYPE_FIXUP:
-		return ex_handler_fixup(ex, regs);
 	case EX_TYPE_BPF:
 		return ex_handler_bpf(ex, regs);
 	case EX_TYPE_UACCESS_ERR_ZERO:
+	case EX_TYPE_KACCESS_ERR_ZERO:
 		return ex_handler_uaccess_err_zero(ex, regs);
 	case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
 		return ex_handler_load_unaligned_zeropad(ex, regs);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c5e11768e5c1..cdf3ffa0c223 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -927,6 +927,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 void tag_clear_highpage(struct page *page)
 {
 	mte_zero_clear_page_tags(page_address(page));
-	page_kasan_tag_reset(page);
 	set_bit(PG_mte_tagged, &page->flags);
 }
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 3618ef3f6d81..5307ffdefb8d 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -100,16 +100,6 @@ int pud_huge(pud_t pud)
 #endif
 }
 
-/*
- * Select all bits except the pfn
- */
-static inline pgprot_t pte_pgprot(pte_t pte)
-{
-	unsigned long pfn = pte_pfn(pte);
-
-	return __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
-}
-
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, size_t *pgsize)
 {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 339ee84e5a61..b6ef26fc8ebe 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -389,7 +389,7 @@ void __init arm64_memblock_init(void)
 
 	early_init_fdt_scan_reserved_mem();
 
-	if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32))
+	if (!defer_reserve_crashkernel())
 		reserve_crashkernel();
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
@@ -438,7 +438,7 @@ void __init bootmem_init(void)
 	 * request_standard_resources() depends on crashkernel's memory being
 	 * reserved, so do it here.
 	 */
-	if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32))
+	if (defer_reserve_crashkernel())
 		reserve_crashkernel();
 
 	memblock_dump_all();
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b21f91cd830d..c5af103d4ad4 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -1,96 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * Based on arch/arm/mm/ioremap.c
- *
- * (C) Copyright 1995 1996 Linus Torvalds
- * Hacked for ARM by Phil Blundell <philb@gnu.org>
- * Hacked to allow all architectures to build, and various cleanups
- * by Russell King
- * Copyright (C) 2012 ARM Ltd.
- */
 
-#include <linux/export.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
 #include <linux/io.h>
 
-#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-
-static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size,
-				      pgprot_t prot, void *caller)
+bool ioremap_allowed(phys_addr_t phys_addr, size_t size, unsigned long prot)
 {
-	unsigned long last_addr;
-	unsigned long offset = phys_addr & ~PAGE_MASK;
-	int err;
-	unsigned long addr;
-	struct vm_struct *area;
+	unsigned long last_addr = phys_addr + size - 1;
 
-	/*
-	 * Page align the mapping address and size, taking account of any
-	 * offset.
-	 */
-	phys_addr &= PAGE_MASK;
-	size = PAGE_ALIGN(size + offset);
+	/* Don't allow outside PHYS_MASK */
+	if (last_addr & ~PHYS_MASK)
+		return false;
 
-	/*
-	 * Don't allow wraparound, zero size or outside PHYS_MASK.
-	 */
-	last_addr = phys_addr + size - 1;
-	if (!size || last_addr < phys_addr || (last_addr & ~PHYS_MASK))
-		return NULL;
-
-	/*
-	 * Don't allow RAM to be mapped.
-	 */
+	/* Don't allow RAM to be mapped. */
 	if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr))))
-		return NULL;
-
-	area = get_vm_area_caller(size, VM_IOREMAP, caller);
-	if (!area)
-		return NULL;
-	addr = (unsigned long)area->addr;
-	area->phys_addr = phys_addr;
-
-	err = ioremap_page_range(addr, addr + size, phys_addr, prot);
-	if (err) {
-		vunmap((void *)addr);
-		return NULL;
-	}
-
-	return (void __iomem *)(offset + addr);
-}
-
-void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot)
-{
-	return __ioremap_caller(phys_addr, size, prot,
-				__builtin_return_address(0));
-}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *io_addr)
-{
-	unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
-
-	/*
-	 * We could get an address outside vmalloc range in case
-	 * of ioremap_cache() reusing a RAM mapping.
-	 */
-	if (is_vmalloc_addr((void *)addr))
-		vunmap((void *)addr);
-}
-EXPORT_SYMBOL(iounmap);
-
-void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
-{
-	/* For normal memory we already have a cacheable mapping. */
-	if (pfn_is_map_memory(__phys_to_pfn(phys_addr)))
-		return (void __iomem *)__phys_to_virt(phys_addr);
+		return false;
 
-	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),
-				__builtin_return_address(0));
+	return true;
 }
-EXPORT_SYMBOL(ioremap_cache);
 
 /*
  * Must be called after early_fixmap_init
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index c12cd700598f..e969e68de005 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
 	dsb(ishst);
-	cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
+	cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
 				PAGE_KERNEL_RO));
 
 	memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
 }
 
 static void __init kasan_init_depth(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..db7c4e6ae57b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -43,15 +43,27 @@
 #define NO_CONT_MAPPINGS	BIT(1)
 #define NO_EXEC_MAPPINGS	BIT(2)	/* assumes FEAT_HPDS is not used */
 
-u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
-u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+int idmap_t0sz __ro_after_init;
 
-u64 __section(".mmuoff.data.write") vabits_actual;
+#if VA_BITS > 48
+u64 vabits_actual __ro_after_init = VA_BITS_MIN;
 EXPORT_SYMBOL(vabits_actual);
+#endif
+
+u64 kimage_vaddr __ro_after_init = (u64)&_text;
+EXPORT_SYMBOL(kimage_vaddr);
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
 
+u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
+
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+long __section(".mmuoff.data.write") __early_cpu_boot_status;
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -388,6 +400,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	} while (pgdp++, addr = next, addr != end);
 }
 
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+extern __alias(__create_pgd_mapping)
+void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
+			     phys_addr_t size, pgprot_t prot,
+			     phys_addr_t (*pgtable_alloc)(int), int flags);
+#endif
+
 static phys_addr_t __pgd_pgtable_alloc(int shift)
 {
 	void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
@@ -529,8 +548,7 @@ static void __init map_mem(pgd_t *pgdp)
 
 #ifdef CONFIG_KEXEC_CORE
 	if (crash_mem_map) {
-		if (IS_ENABLED(CONFIG_ZONE_DMA) ||
-		    IS_ENABLED(CONFIG_ZONE_DMA32))
+		if (defer_reserve_crashkernel())
 			flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 		else if (crashk_res.end)
 			memblock_mark_nomap(crashk_res.start,
@@ -571,8 +589,7 @@ static void __init map_mem(pgd_t *pgdp)
 	 * through /sys/kernel/kexec_crash_size interface.
 	 */
 #ifdef CONFIG_KEXEC_CORE
-	if (crash_mem_map &&
-	    !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) {
+	if (crash_mem_map && !defer_reserve_crashkernel()) {
 		if (crashk_res.end) {
 			__map_memblock(pgdp, crashk_res.start,
 				       crashk_res.end + 1,
@@ -665,13 +682,9 @@ static int __init map_entry_trampoline(void)
 		__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
 			     pa_start + i * PAGE_SIZE, prot);
 
-	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
-		extern char __entry_tramp_data_start[];
-
-		__set_fixmap(FIX_ENTRY_TRAMP_DATA,
-			     __pa_symbol(__entry_tramp_data_start),
-			     PAGE_KERNEL_RO);
-	}
+	if (IS_ENABLED(CONFIG_RELOCATABLE))
+		__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+			     pa_start + i * PAGE_SIZE, PAGE_KERNEL_RO);
 
 	return 0;
 }
@@ -762,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
 	kasan_copy_shadow(pgdp);
 }
 
+static void __init create_idmap(void)
+{
+	u64 start = __pa_symbol(__idmap_text_start);
+	u64 size = __pa_symbol(__idmap_text_end) - start;
+	pgd_t *pgd = idmap_pg_dir;
+	u64 pgd_phys;
+
+	/* check if we need an additional level of translation */
+	if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
+		pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
+		set_pgd(&idmap_pg_dir[start >> VA_BITS],
+			__pgd(pgd_phys | P4D_TYPE_TABLE));
+		pgd = __va(pgd_phys);
+	}
+	__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
+			     early_pgtable_alloc, 0);
+
+	if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+		extern u32 __idmap_kpti_flag;
+		u64 pa = __pa_symbol(&__idmap_kpti_flag);
+
+		/*
+		 * The KPTI G-to-nG conversion code needs a read-write mapping
+		 * of its synchronization flag in the ID map.
+		 */
+		__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
+				     early_pgtable_alloc, 0);
+	}
+}
+
 void __init paging_init(void)
 {
 	pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
+	extern pgd_t init_idmap_pg_dir[];
+
+	idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
 
 	map_kernel(pgdp);
 	map_mem(pgdp);
 
 	pgd_clear_fixmap();
 
-	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
+	cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
 	init_mm.pgd = swapper_pg_dir;
 
 	memblock_phys_free(__pa_symbol(init_pg_dir),
 			   __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
 
 	memblock_allow_resize();
+
+	create_idmap();
 }
 
 /*
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index a9e50e930484..4334dec93bd4 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
 	if (!tags)
 		return false;
 
-	page_kasan_tag_reset(page);
-	/*
-	 * We need smp_wmb() in between setting the flags and clearing the
-	 * tags because if another thread reads page->flags and builds a
-	 * tagged address out of it, there is an actual dependency to the
-	 * memory access, but on the current thread we do not guarantee that
-	 * the new page->flags are visible before the tags were updated.
-	 */
-	smp_wmb();
 	mte_restore_page_tags(page_address(page), tags);
 
 	return true;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 50bbed947bec..7837a69524c5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -14,6 +14,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/asm_pointer_auth.h>
 #include <asm/hwcap.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
@@ -200,34 +201,64 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
 	.popsection
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+
+#define KPTI_NG_PTE_FLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+
 	.pushsection ".idmap.text", "awx"
 
-	.macro	__idmap_kpti_get_pgtable_ent, type
-	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty
-	dmb	sy				// lines are written back before
-	ldr	\type, [cur_\()\type\()p]	// loading the entry
-	tbz	\type, #0, skip_\()\type	// Skip invalid and
-	tbnz	\type, #11, skip_\()\type	// non-global entries
+	.macro	kpti_mk_tbl_ng, type, num_entries
+	add	end_\type\()p, cur_\type\()p, #\num_entries * 8
+.Ldo_\type:
+	ldr	\type, [cur_\type\()p]		// Load the entry
+	tbz	\type, #0, .Lnext_\type		// Skip invalid and
+	tbnz	\type, #11, .Lnext_\type	// non-global entries
+	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
+	str	\type, [cur_\type\()p]		// Update the entry
+	.ifnc	\type, pte
+	tbnz	\type, #1, .Lderef_\type
+	.endif
+.Lnext_\type:
+	add	cur_\type\()p, cur_\type\()p, #8
+	cmp	cur_\type\()p, end_\type\()p
+	b.ne	.Ldo_\type
 	.endm
 
-	.macro __idmap_kpti_put_pgtable_ent_ng, type
-	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
-	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
-	dmb	sy				// that it is visible to all
-	dc	civac, cur_\()\type\()p		// CPUs.
+	/*
+	 * Dereference the current table entry and map it into the temporary
+	 * fixmap slot associated with the current level.
+	 */
+	.macro	kpti_map_pgtbl, type, level
+	str	xzr, [temp_pte, #8 * (\level + 1)]	// break before make
+	dsb	nshst
+	add	pte, temp_pte, #PAGE_SIZE * (\level + 1)
+	lsr	pte, pte, #12
+	tlbi	vaae1, pte
+	dsb	nsh
+	isb
+
+	phys_to_pte pte, cur_\type\()p
+	add	cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
+	orr	pte, pte, pte_flags
+	str	pte, [temp_pte, #8 * (\level + 1)]
+	dsb	nshst
 	.endm
 
 /*
- * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
+ * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd,
+ *				   unsigned long temp_pte_va)
  *
  * Called exactly once from stop_machine context by each CPU found during boot.
  */
-__idmap_kpti_flag:
-	.long	1
+	.pushsection	".data", "aw", %progbits
+SYM_DATA(__idmap_kpti_flag, .long 1)
+	.popsection
+
 SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 	cpu		.req	w0
+	temp_pte	.req	x0
 	num_cpus	.req	w1
-	swapper_pa	.req	x2
+	pte_flags	.req	x1
+	temp_pgd_phys	.req	x2
 	swapper_ttb	.req	x3
 	flag_ptr	.req	x4
 	cur_pgdp	.req	x5
@@ -235,17 +266,16 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 	pgd		.req	x7
 	cur_pudp	.req	x8
 	end_pudp	.req	x9
-	pud		.req	x10
 	cur_pmdp	.req	x11
 	end_pmdp	.req	x12
-	pmd		.req	x13
 	cur_ptep	.req	x14
 	end_ptep	.req	x15
 	pte		.req	x16
+	valid		.req	x17
 
+	mov	x5, x3				// preserve temp_pte arg
 	mrs	swapper_ttb, ttbr1_el1
-	restore_ttbr1	swapper_ttb
-	adr	flag_ptr, __idmap_kpti_flag
+	adr_l	flag_ptr, __idmap_kpti_flag
 
 	cbnz	cpu, __idmap_kpti_secondary
 
@@ -256,98 +286,71 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
 	eor	w17, w17, num_cpus
 	cbnz	w17, 1b
 
-	/* We need to walk swapper, so turn off the MMU. */
-	pre_disable_mmu_workaround
-	mrs	x17, sctlr_el1
-	bic	x17, x17, #SCTLR_ELx_M
-	msr	sctlr_el1, x17
+	/* Switch to the temporary page tables on this CPU only */
+	__idmap_cpu_set_reserved_ttbr1 x8, x9
+	offset_ttbr1 temp_pgd_phys, x8
+	msr	ttbr1_el1, temp_pgd_phys
 	isb
 
+	mov	temp_pte, x5
+	mov	pte_flags, #KPTI_NG_PTE_FLAGS
+
 	/* Everybody is enjoying the idmap, so we can rewrite swapper. */
 	/* PGD */
-	mov	cur_pgdp, swapper_pa
-	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
-do_pgd:	__idmap_kpti_get_pgtable_ent	pgd
-	tbnz	pgd, #1, walk_puds
-next_pgd:
-	__idmap_kpti_put_pgtable_ent_ng	pgd
-skip_pgd:
-	add	cur_pgdp, cur_pgdp, #8
-	cmp	cur_pgdp, end_pgdp
-	b.ne	do_pgd
-
-	/* Publish the updated tables and nuke all the TLBs */
-	dsb	sy
-	tlbi	vmalle1is
-	dsb	ish
-	isb
+	adrp		cur_pgdp, swapper_pg_dir
+	kpti_map_pgtbl	pgd, 0
+	kpti_mk_tbl_ng	pgd, PTRS_PER_PGD
 
-	/* We're done: fire up the MMU again */
-	mrs	x17, sctlr_el1
-	orr	x17, x17, #SCTLR_ELx_M
-	set_sctlr_el1	x17
+	/* Ensure all the updated entries are visible to secondary CPUs */
+	dsb	ishst
+
+	/* We're done: fire up swapper_pg_dir again */
+	__idmap_cpu_set_reserved_ttbr1 x8, x9
+	msr	ttbr1_el1, swapper_ttb
+	isb
 
 	/* Set the flag to zero to indicate that we're all done */
 	str	wzr, [flag_ptr]
 	ret
 
+.Lderef_pgd:
 	/* PUD */
-walk_puds:
-	.if CONFIG_PGTABLE_LEVELS > 3
+	.if		CONFIG_PGTABLE_LEVELS > 3
+	pud		.req	x10
 	pte_to_phys	cur_pudp, pgd
-	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
-do_pud:	__idmap_kpti_get_pgtable_ent	pud
-	tbnz	pud, #1, walk_pmds
-next_pud:
-	__idmap_kpti_put_pgtable_ent_ng	pud
-skip_pud:
-	add	cur_pudp, cur_pudp, 8
-	cmp	cur_pudp, end_pudp
-	b.ne	do_pud
-	b	next_pgd
-	.else /* CONFIG_PGTABLE_LEVELS <= 3 */
-	mov	pud, pgd
-	b	walk_pmds
-next_pud:
-	b	next_pgd
+	kpti_map_pgtbl	pud, 1
+	kpti_mk_tbl_ng	pud, PTRS_PER_PUD
+	b		.Lnext_pgd
+	.else		/* CONFIG_PGTABLE_LEVELS <= 3 */
+	pud		.req	pgd
+	.set		.Lnext_pud, .Lnext_pgd
 	.endif
 
+.Lderef_pud:
 	/* PMD */
-walk_pmds:
-	.if CONFIG_PGTABLE_LEVELS > 2
+	.if		CONFIG_PGTABLE_LEVELS > 2
+	pmd		.req	x13
 	pte_to_phys	cur_pmdp, pud
-	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
-do_pmd:	__idmap_kpti_get_pgtable_ent	pmd
-	tbnz	pmd, #1, walk_ptes
-next_pmd:
-	__idmap_kpti_put_pgtable_ent_ng	pmd
-skip_pmd:
-	add	cur_pmdp, cur_pmdp, #8
-	cmp	cur_pmdp, end_pmdp
-	b.ne	do_pmd
-	b	next_pud
-	.else /* CONFIG_PGTABLE_LEVELS <= 2 */
-	mov	pmd, pud
-	b	walk_ptes
-next_pmd:
-	b	next_pud
+	kpti_map_pgtbl	pmd, 2
+	kpti_mk_tbl_ng	pmd, PTRS_PER_PMD
+	b		.Lnext_pud
+	.else		/* CONFIG_PGTABLE_LEVELS <= 2 */
+	pmd		.req	pgd
+	.set		.Lnext_pmd, .Lnext_pgd
 	.endif
 
+.Lderef_pmd:
 	/* PTE */
-walk_ptes:
 	pte_to_phys	cur_ptep, pmd
-	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
-do_pte:	__idmap_kpti_get_pgtable_ent	pte
-	__idmap_kpti_put_pgtable_ent_ng	pte
-skip_pte:
-	add	cur_ptep, cur_ptep, #8
-	cmp	cur_ptep, end_ptep
-	b.ne	do_pte
-	b	next_pmd
+	kpti_map_pgtbl	pte, 3
+	kpti_mk_tbl_ng	pte, PTRS_PER_PTE
+	b		.Lnext_pmd
 
 	.unreq	cpu
+	.unreq	temp_pte
 	.unreq	num_cpus
-	.unreq	swapper_pa
+	.unreq	pte_flags
+	.unreq	temp_pgd_phys
 	.unreq	cur_pgdp
 	.unreq	end_pgdp
 	.unreq	pgd
@@ -360,6 +363,7 @@ skip_pte:
 	.unreq	cur_ptep
 	.unreq	end_ptep
 	.unreq	pte
+	.unreq	valid
 
 	/* Secondary CPUs end up here */
 __idmap_kpti_secondary:
@@ -379,7 +383,6 @@ __idmap_kpti_secondary:
 	cbnz	w16, 1b
 
 	/* All done, act like nothing happened */
-	offset_ttbr1 swapper_ttb, x16
 	msr	ttbr1_el1, swapper_ttb
 	isb
 	ret
@@ -395,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
  *
  *	Initialise the processor for turning the MMU on.
  *
+ * Input:
+ *	x0 - actual number of VA bits (ignored unless VA_BITS > 48)
  * Output:
  *	Return in x0 the value of the SCTLR_EL1 register.
  */
@@ -464,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
 	tcr_clear_errata_bits tcr, x9, x5
 
 #ifdef CONFIG_ARM64_VA_BITS_52
-	ldr_l		x9, vabits_actual
-	sub		x9, xzr, x9
+	sub		x9, xzr, x0
 	add		x9, x9, #64
 	tcr_set_t1sz	tcr, x9
 #else
-	ldr_l		x9, idmap_t0sz
+	idmap_get_t0sz	x9
 #endif
 	tcr_set_t0sz	tcr, x9