summary refs log tree commit diff
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@armlinux.org.uk>2017-04-26 10:59:49 +0100
committerRussell King <rmk+kernel@armlinux.org.uk>2017-04-26 10:59:49 +0100
commitc92a90a5060a84e4a180af6b60aa0fb3a5e46c64 (patch)
tree80a6446dbc8da07ce2bf168794d9ac8344a0d90e
parent6d80594936914e798b1b54b3bfe4bd68d8418966 (diff)
parent11ce4b33aedc65198d7bc9669344ebca5ee36a41 (diff)
downloadlinux-c92a90a5060a84e4a180af6b60aa0fb3a5e46c64.tar.gz
Merge branches 'fixes' and 'misc' into for-next
-rw-r--r--Documentation/devicetree/bindings/arm/l2c2x0.txt3
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/r7s72100.dtsi11
-rw-r--r--arch/arm/include/asm/cpufeature.h38
-rw-r--r--arch/arm/include/asm/fixmap.h2
-rw-r--r--arch/arm/include/asm/module.h9
-rw-r--r--arch/arm/kernel/ftrace.c11
-rw-r--r--arch/arm/kernel/module-plts.c87
-rw-r--r--arch/arm/kernel/module.lds1
-rw-r--r--arch/arm/kernel/setup.c4
-rw-r--r--arch/arm/mach-shmobile/setup-r7s72100.c2
-rw-r--r--arch/arm/mm/cache-l2x0.c13
-rw-r--r--arch/arm/mm/dump.c54
-rw-r--r--arch/arm/mm/init.c13
-rw-r--r--arch/arm/mm/mmu.c16
-rw-r--r--arch/arm/mm/proc-v7m.S2
16 files changed, 210 insertions, 57 deletions
diff --git a/Documentation/devicetree/bindings/arm/l2c2x0.txt b/Documentation/devicetree/bindings/arm/l2c2x0.txt
index 917199f17965..d9650c1788f4 100644
--- a/Documentation/devicetree/bindings/arm/l2c2x0.txt
+++ b/Documentation/devicetree/bindings/arm/l2c2x0.txt
@@ -90,6 +90,9 @@ Optional properties:
 - arm,standby-mode: L2 standby mode enable. Value <0> (forcibly disable),
   <1> (forcibly enable), property absent (OS specific behavior,
   preferably retain firmware settings)
+- arm,early-bresp-disable : Disable the CA9 optimization Early BRESP (PL310)
+- arm,full-line-zero-disable : Disable the CA9 optimization Full line of zero
+  write (PL310)
 
 Example:
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d4e71b42c77..64b43a84a1eb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -27,6 +27,7 @@ config ARM
 	select GENERIC_ALLOCATOR
 	select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_PROBE
diff --git a/arch/arm/boot/dts/r7s72100.dtsi b/arch/arm/boot/dts/r7s72100.dtsi
index b8aa256bd515..1cf2bd038090 100644
--- a/arch/arm/boot/dts/r7s72100.dtsi
+++ b/arch/arm/boot/dts/r7s72100.dtsi
@@ -177,6 +177,7 @@
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 			clock-frequency = <400000000>;
+			next-level-cache = <&L2>;
 		};
 	};
 
@@ -368,6 +369,16 @@
 			<0xe8202000 0x1000>;
 	};
 
+	L2: cache-controller@3ffff000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x3ffff000 0x1000>;
+		interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+		arm,early-bresp-disable;
+		arm,full-line-zero-disable;
+		cache-unified;
+		cache-level = <2>;
+	};
+
 	i2c0: i2c@fcfee000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
diff --git a/arch/arm/include/asm/cpufeature.h b/arch/arm/include/asm/cpufeature.h
new file mode 100644
index 000000000000..6d425191d01d
--- /dev/null
+++ b/arch/arm/include/asm/cpufeature.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <linux/log2.h>
+#include <asm/hwcap.h>
+
+/*
+ * Due to the fact that ELF_HWCAP is a 32-bit type on ARM, and given the number
+ * of optional CPU features it defines, ARM's CPU hardware capability bits have
+ * been distributed over separate elf_hwcap and elf_hwcap2 variables, each of
+ * which covers a subset of the available CPU features.
+ *
+ * Currently, only a few of those are suitable for automatic module loading
+ * (which is the primary use case of this facility) and those happen to be all
+ * covered by HWCAP2. So let's only cover those via the cpu_feature()
+ * convenience macro for now (which is used by module_cpu_feature_match()).
+ * However, all capabilities are exposed via the modalias, and can be matched
+ * using an explicit MODULE_DEVICE_TABLE() that uses __hwcap_feature() directly.
+ */
+#define MAX_CPU_FEATURES	64
+#define __hwcap_feature(x)	ilog2(HWCAP_ ## x)
+#define __hwcap2_feature(x)	(32 + ilog2(HWCAP2_ ## x))
+#define cpu_feature(x)		__hwcap2_feature(x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return num < 32 ? elf_hwcap & BIT(num) : elf_hwcap2 & BIT(num - 32);
+}
+
+#endif
diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index 5c17d2dec777..8f967d1373f6 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -41,7 +41,7 @@ static const enum fixed_addresses __end_of_fixed_addresses =
 
 #define FIXMAP_PAGE_COMMON	(L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY)
 
-#define FIXMAP_PAGE_NORMAL	(FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK)
+#define FIXMAP_PAGE_NORMAL	(pgprot_kernel | L_PTE_XN)
 #define FIXMAP_PAGE_RO		(FIXMAP_PAGE_NORMAL | L_PTE_RDONLY)
 
 /* Used by set_fixmap_(io|nocache), both meant for mapping a device */
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 464748b9fd7d..ed2319663a1e 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -18,13 +18,18 @@ enum {
 };
 #endif
 
+struct mod_plt_sec {
+	struct elf32_shdr	*plt;
+	int			plt_count;
+};
+
 struct mod_arch_specific {
 #ifdef CONFIG_ARM_UNWIND
 	struct unwind_table *unwind[ARM_SEC_MAX];
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
-	struct elf32_shdr   *plt;
-	int		    plt_count;
+	struct mod_plt_sec	core;
+	struct mod_plt_sec	init;
 #endif
 };
 
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 3f1759411d51..414e60ed0257 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -29,11 +29,6 @@
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_OLD_MCOUNT
-#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
-#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
-
-#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
 
 static int __ftrace_modify_code(void *data)
 {
@@ -51,6 +46,12 @@ void arch_ftrace_update_code(int command)
 	stop_machine(__ftrace_modify_code, &command, NULL);
 }
 
+#ifdef CONFIG_OLD_MCOUNT
+#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
+#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
+
+#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
+
 static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
 {
 	return rec->arch.old_mcount ? OLD_NOP : NOP;
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 3a5cba90c971..3d0c2e4dda1d 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -31,9 +31,17 @@ struct plt_entries {
 	u32	lit[PLT_ENT_COUNT];
 };
 
+static bool in_init(const struct module *mod, unsigned long loc)
+{
+	return loc - (u32)mod->init_layout.base < mod->init_layout.size;
+}
+
 u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 {
-	struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
+	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+							  &mod->arch.init;
+
+	struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr;
 	int idx = 0;
 
 	/*
@@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 	 * relocations are sorted, this will be the last entry we allocated.
 	 * (if one exists).
 	 */
-	if (mod->arch.plt_count > 0) {
-		plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
-		idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
+	if (pltsec->plt_count > 0) {
+		plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT;
+		idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT;
 
 		if (plt->lit[idx] == val)
 			return (u32)&plt->ldr[idx];
@@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 			plt++;
 	}
 
-	mod->arch.plt_count++;
-	BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
+	pltsec->plt_count++;
+	BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size);
 
 	if (!idx)
 		/* Populate a new set of entries */
@@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
 
 /* Count how many PLT entries we may need */
 static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
-			       const Elf32_Rel *rel, int num)
+			       const Elf32_Rel *rel, int num, Elf32_Word dstidx)
 {
 	unsigned int ret = 0;
 	const Elf32_Sym *s;
@@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 		case R_ARM_THM_JUMP24:
 			/*
 			 * We only have to consider branch targets that resolve
-			 * to undefined symbols. This is not simply a heuristic,
-			 * it is a fundamental limitation, since the PLT itself
-			 * is part of the module, and needs to be within range
-			 * as well, so modules can never grow beyond that limit.
+			 * to symbols that are defined in a different section.
+			 * This is not simply a heuristic, it is a fundamental
+			 * limitation, since there is no guaranteed way to emit
+			 * PLT entries sufficiently close to the branch if the
+			 * section size exceeds the range of a branch
+			 * instruction. So ignore relocations against defined
+			 * symbols if they live in the same section as the
+			 * relocation target.
 			 */
 			s = syms + ELF32_R_SYM(rel[i].r_info);
-			if (s->st_shndx != SHN_UNDEF)
+			if (s->st_shndx == dstidx)
 				break;
 
 			/*
@@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 			 * So we need to support them, but there is no need to
 			 * take them into consideration when trying to optimize
 			 * this code. So let's only check for duplicates when
-			 * the addend is zero.
+			 * the addend is zero. (Note that calls into the core
+			 * module via init PLT entries could involve section
+			 * relative symbol references with non-zero addends, for
+			 * which we may end up emitting duplicates, but the init
+			 * PLT is released along with the rest of the .init
+			 * region as soon as module loading completes.)
 			 */
 			if (!is_zero_addend_relocation(base, rel + i) ||
 			    !duplicate_rel(base, rel, i))
@@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned long plts = 0;
+	unsigned long core_plts = 0;
+	unsigned long init_plts = 0;
 	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
 	Elf32_Sym *syms = NULL;
 
@@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	 */
 	for (s = sechdrs; s < sechdrs_end; ++s) {
 		if (strcmp(".plt", secstrings + s->sh_name) == 0)
-			mod->arch.plt = s;
+			mod->arch.core.plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init.plt = s;
 		else if (s->sh_type == SHT_SYMTAB)
 			syms = (Elf32_Sym *)s->sh_addr;
 	}
 
-	if (!mod->arch.plt) {
-		pr_err("%s: module PLT section missing\n", mod->name);
+	if (!mod->arch.core.plt || !mod->arch.init.plt) {
+		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
 	if (!syms) {
@@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		/* sort by type and symbol index */
 		sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
 
-		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
+		if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
+			core_plts += count_plts(syms, dstsec->sh_addr, rels,
+						numrels, s->sh_info);
+		else
+			init_plts += count_plts(syms, dstsec->sh_addr, rels,
+						numrels, s->sh_info);
 	}
 
-	mod->arch.plt->sh_type = SHT_NOBITS;
-	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
-					  sizeof(struct plt_entries));
-	mod->arch.plt_count = 0;
-
-	pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
+	mod->arch.core.plt->sh_type = SHT_NOBITS;
+	mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.core.plt_count = 0;
+
+	mod->arch.init.plt->sh_type = SHT_NOBITS;
+	mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.init.plt_count = 0;
+
+	pr_debug("%s: plt=%x, init.plt=%x\n", __func__,
+		 mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size);
 	return 0;
 }
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
index 05881e2b414c..eacb5c67f61e 100644
--- a/arch/arm/kernel/module.lds
+++ b/arch/arm/kernel/module.lds
@@ -1,3 +1,4 @@
 SECTIONS {
 	.plt : { BYTE(0) }
+	.init.plt : { BYTE(0) }
 }
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f4e54503afa9..32e1a9513dc7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -80,7 +80,7 @@ __setup("fpe=", fpe_setup);
 
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
-extern void early_paging_init(const struct machine_desc *);
+extern void early_mm_init(const struct machine_desc *);
 extern void adjust_lowmem_bounds(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
@@ -1088,7 +1088,7 @@ void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 #ifdef CONFIG_MMU
-	early_paging_init(mdesc);
+	early_mm_init(mdesc);
 #endif
 	setup_dma_zone(mdesc);
 	xen_early_init();
diff --git a/arch/arm/mach-shmobile/setup-r7s72100.c b/arch/arm/mach-shmobile/setup-r7s72100.c
index d46639fc6849..319ca9508ec6 100644
--- a/arch/arm/mach-shmobile/setup-r7s72100.c
+++ b/arch/arm/mach-shmobile/setup-r7s72100.c
@@ -26,6 +26,8 @@ static const char *const r7s72100_boards_compat_dt[] __initconst = {
 };
 
 DT_MACHINE_START(R7S72100_DT, "Generic R7S72100 (Flattened Device Tree)")
+	.l2c_aux_val    = 0,
+	.l2c_aux_mask   = ~0,
 	.init_early	= shmobile_init_delay,
 	.init_late	= shmobile_init_late,
 	.dt_compat	= r7s72100_boards_compat_dt,
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2290be390f87..808efbb89b88 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -57,6 +57,9 @@ static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
 struct l2x0_regs l2x0_saved_regs;
 
+static bool l2x0_bresp_disable;
+static bool l2x0_flz_disable;
+
 /*
  * Common code for all cache controllers.
  */
@@ -620,7 +623,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 	u32 aux = l2x0_saved_regs.aux_ctrl;
 
 	if (rev >= L310_CACHE_ID_RTL_R2P0) {
-		if (cortex_a9) {
+		if (cortex_a9 && !l2x0_bresp_disable) {
 			aux |= L310_AUX_CTRL_EARLY_BRESP;
 			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
 		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
@@ -629,7 +632,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 		}
 	}
 
-	if (cortex_a9) {
+	if (cortex_a9 && !l2x0_flz_disable) {
 		u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
 		u32 acr = get_auxcr();
 
@@ -1200,6 +1203,12 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		*aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
 	}
 
+	if (of_property_read_bool(np, "arm,early-bresp-disable"))
+		l2x0_bresp_disable = true;
+
+	if (of_property_read_bool(np, "arm,full-line-zero-disable"))
+		l2x0_flz_disable = true;
+
 	prefetch = l2x0_saved_regs.prefetch_ctrl;
 
 	ret = of_property_read_u32(np, "arm,double-linefill", &val);
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 21192d6eda40..35ff45470dbf 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 
+#include <asm/domain.h>
 #include <asm/fixmap.h>
 #include <asm/memory.h>
 #include <asm/pgtable.h>
@@ -43,6 +44,7 @@ struct pg_state {
 	unsigned long start_address;
 	unsigned level;
 	u64 current_prot;
+	const char *current_domain;
 };
 
 struct prot_bits {
@@ -216,7 +218,8 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val)
+static void note_page(struct pg_state *st, unsigned long addr,
+		      unsigned int level, u64 val, const char *domain)
 {
 	static const char units[] = "KMGTPE";
 	u64 prot = val & pg_level[level].mask;
@@ -224,8 +227,10 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 	if (!st->level) {
 		st->level = level;
 		st->current_prot = prot;
+		st->current_domain = domain;
 		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 	} else if (prot != st->current_prot || level != st->level ||
+		   domain != st->current_domain ||
 		   addr >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
@@ -240,6 +245,8 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 				unit++;
 			}
 			seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (st->current_domain)
+				seq_printf(st->seq, " %s", st->current_domain);
 			if (pg_level[st->level].bits)
 				dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
 			seq_printf(st->seq, "\n");
@@ -251,11 +258,13 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 		}
 		st->start_address = addr;
 		st->current_prot = prot;
+		st->current_domain = domain;
 		st->level = level;
 	}
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start,
+		     const char *domain)
 {
 	pte_t *pte = pte_offset_kernel(pmd, 0);
 	unsigned long addr;
@@ -263,25 +272,50 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte));
+		note_page(st, addr, 4, pte_val(*pte), domain);
 	}
 }
 
+static const char *get_domain_name(pmd_t *pmd)
+{
+#ifndef CONFIG_ARM_LPAE
+	switch (pmd_val(*pmd) & PMD_DOMAIN_MASK) {
+	case PMD_DOMAIN(DOMAIN_KERNEL):
+		return "KERNEL ";
+	case PMD_DOMAIN(DOMAIN_USER):
+		return "USER   ";
+	case PMD_DOMAIN(DOMAIN_IO):
+		return "IO     ";
+	case PMD_DOMAIN(DOMAIN_VECTORS):
+		return "VECTORS";
+	default:
+		return "unknown";
+	}
+#endif
+	return NULL;
+}
+
 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
 	unsigned long addr;
 	unsigned i;
+	const char *domain;
 
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 		addr = start + i * PMD_SIZE;
+		domain = get_domain_name(pmd);
 		if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
-			note_page(st, addr, 3, pmd_val(*pmd));
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
 		else
-			walk_pte(st, pmd, addr);
+			walk_pte(st, pmd, addr, domain);
 
-		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1]))
-			note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1]));
+		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) {
+			addr += SECTION_SIZE;
+			pmd++;
+			domain = get_domain_name(pmd);
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
+		}
 	}
 }
 
@@ -296,7 +330,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 		if (!pud_none(*pud)) {
 			walk_pmd(st, pud, addr);
 		} else {
-			note_page(st, addr, 2, pud_val(*pud));
+			note_page(st, addr, 2, pud_val(*pud), NULL);
 		}
 	}
 }
@@ -317,11 +351,11 @@ static void walk_pgd(struct seq_file *m)
 		if (!pgd_none(*pgd)) {
 			walk_pud(&st, pgd, addr);
 		} else {
-			note_page(&st, addr, 1, pgd_val(*pgd));
+			note_page(&st, addr, 1, pgd_val(*pgd), NULL);
 		}
 	}
 
-	note_page(&st, 0, 0, 0);
+	note_page(&st, 0, 0, 0, NULL);
 }
 
 static int ptdump_show(struct seq_file *m, void *v)
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 1d8558ff9827..ad80548325fe 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -709,34 +709,37 @@ void set_section_perms(struct section_perm *perms, int n, bool set,
 
 }
 
+/**
+ * update_sections_early intended to be called only through stop_machine
+ * framework and executed by only one CPU while all other CPUs will spin and
+ * wait, so no locking is required in this function.
+ */
 static void update_sections_early(struct section_perm perms[], int n)
 {
 	struct task_struct *t, *s;
 
-	read_lock(&tasklist_lock);
 	for_each_process(t) {
 		if (t->flags & PF_KTHREAD)
 			continue;
 		for_each_thread(t, s)
 			set_section_perms(perms, n, true, s->mm);
 	}
-	read_unlock(&tasklist_lock);
 	set_section_perms(perms, n, true, current->active_mm);
 	set_section_perms(perms, n, true, &init_mm);
 }
 
-int __fix_kernmem_perms(void *unused)
+static int __fix_kernmem_perms(void *unused)
 {
 	update_sections_early(nx_perms, ARRAY_SIZE(nx_perms));
 	return 0;
 }
 
-void fix_kernmem_perms(void)
+static void fix_kernmem_perms(void)
 {
 	stop_machine(__fix_kernmem_perms, NULL, NULL);
 }
 
-int __mark_rodata_ro(void *unused)
+static int __mark_rodata_ro(void *unused)
 {
 	update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
 	return 0;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e016d7f37b3..347cca965783 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -414,6 +414,11 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 		     FIXADDR_END);
 	BUG_ON(idx >= __end_of_fixed_addresses);
 
+	/* we only support device mappings until pgprot_kernel has been set */
+	if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
+		    pgprot_val(pgprot_kernel) == 0))
+		return;
+
 	if (pgprot_val(prot))
 		set_pte_at(NULL, vaddr, pte,
 			pfn_pte(phys >> PAGE_SHIFT, prot));
@@ -1492,7 +1497,7 @@ pgtables_remap lpae_pgtables_remap_asm;
  * early_paging_init() recreates boot time page table setup, allowing machines
  * to switch over to a high (>4G) address space on LPAE systems
  */
-void __init early_paging_init(const struct machine_desc *mdesc)
+static void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	pgtables_remap *lpae_pgtables_remap;
 	unsigned long pa_pgd;
@@ -1560,7 +1565,7 @@ void __init early_paging_init(const struct machine_desc *mdesc)
 
 #else
 
-void __init early_paging_init(const struct machine_desc *mdesc)
+static void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	long long offset;
 
@@ -1616,7 +1621,6 @@ void __init paging_init(const struct machine_desc *mdesc)
 {
 	void *zero_page;
 
-	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
 	memblock_set_current_limit(arm_lowmem_limit);
@@ -1636,3 +1640,9 @@ void __init paging_init(const struct machine_desc *mdesc)
 	empty_zero_page = virt_to_page(zero_page);
 	__flush_dcache_page(NULL, empty_zero_page);
 }
+
+void __init early_mm_init(const struct machine_desc *mdesc)
+{
+	build_mem_type_table();
+	early_paging_init(mdesc);
+}
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 50497778c2e5..47a5acc64433 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -135,9 +135,11 @@ __v7m_setup_cont:
 	dsb
 	mov	r6, lr			@ save LR
 	ldr	sp, =init_thread_union + THREAD_START_SP
+	stmia	sp, {r0-r3, r12}
 	cpsie	i
 	svc	#0
 1:	cpsid	i
+	ldmia	sp, {r0-r3, r12}
 	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
 	mov	lr, r6			@ restore LR