summary refs log tree commit diff
path: root/arch/arm/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r--arch/arm/kernel/entry-armv.S97
-rw-r--r--arch/arm/kernel/entry-header.S37
-rw-r--r--arch/arm/kernel/irq.c9
-rw-r--r--arch/arm/kernel/setup.c8
-rw-r--r--arch/arm/kernel/sleep.S8
-rw-r--r--arch/arm/kernel/traps.c80
-rw-r--r--arch/arm/kernel/unwind.c3
-rw-r--r--arch/arm/kernel/vmlinux.lds.S4
8 files changed, 231 insertions, 15 deletions
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 207875ac62ff..5fb7465d14d9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -57,6 +57,10 @@ UNWIND(	.setfp	fpreg, sp		)
 	@
 	subs	r2, sp, r0		@ SP above bottom of IRQ stack?
 	rsbscs	r2, r2, #THREAD_SIZE	@ ... and below the top?
+#ifdef CONFIG_VMAP_STACK
+	ldr_l	r2, high_memory, cc	@ End of the linear region
+	cmpcc	r2, r0			@ Stack pointer was below it?
+#endif
 	movcs	sp, r0			@ If so, revert to incoming SP
 
 #ifndef CONFIG_UNWINDER_ARM
@@ -188,13 +192,18 @@ ENDPROC(__und_invalid)
 #define SPFIX(code...)
 #endif
 
-	.macro	svc_entry, stack_hole=0, trace=1, uaccess=1
+	.macro	svc_entry, stack_hole=0, trace=1, uaccess=1, overflow_check=1
  UNWIND(.fnstart		)
- UNWIND(.save {r0 - pc}		)
 	sub	sp, sp, #(SVC_REGS_SIZE + \stack_hole)
+ THUMB(	add	sp, r1		)	@ get SP in a GPR without
+ THUMB(	sub	r1, sp, r1	)	@ using a temp register
+
+	.if	\overflow_check
+ UNWIND(.save	{r0 - pc}	)
+	do_overflow_check (SVC_REGS_SIZE + \stack_hole)
+	.endif
+
 #ifdef CONFIG_THUMB2_KERNEL
-	add	sp, r1			@ get SP in a GPR without
-	sub	r1, sp, r1		@ using a temp register
 	tst	r1, #4			@ test stack pointer alignment
 	sub	r1, sp, r1		@ restore original R1
 	sub	sp, r1			@ restore original SP
@@ -827,12 +836,20 @@ ENTRY(__switch_to)
 	str	r7, [r8]
 #endif
 	mov	r0, r5
-#if !defined(CONFIG_THUMB2_KERNEL)
+#if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK)
 	set_current r7
 	ldmia	r4, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
 #else
 	mov	r1, r7
 	ldmia	r4, {r4 - sl, fp, ip, lr}	@ Load all regs saved previously
+#ifdef CONFIG_VMAP_STACK
+	@
+	@ Do a dummy read from the new stack while running from the old one so
+	@ that we can rely on do_translation_fault() to fix up any stale PMD
+	@ entries covering the vmalloc region.
+	@
+	ldr	r2, [ip]
+#endif
 
 	@ When CONFIG_THREAD_INFO_IN_TASK=n, the update of SP itself is what
 	@ effectuates the task switch, as that is what causes the observable
@@ -849,6 +866,76 @@ ENTRY(__switch_to)
  UNWIND(.fnend		)
 ENDPROC(__switch_to)
 
+#ifdef CONFIG_VMAP_STACK
+	.text
+	.align	2
+__bad_stack:
+	@
+	@ We've just detected an overflow. We need to load the address of this
+	@ CPU's overflow stack into the stack pointer register. We have only one
+	@ scratch register so let's use a sequence of ADDs including one
+	@ involving the PC, and decorate them with PC-relative group
+	@ relocations. As these are ARM only, switch to ARM mode first.
+	@
+	@ We enter here with IP clobbered and its value stashed on the mode
+	@ stack.
+	@
+THUMB(	bx	pc		)
+THUMB(	nop			)
+THUMB(	.arm			)
+	mrc	p15, 0, ip, c13, c0, 4		@ Get per-CPU offset
+
+	.globl	overflow_stack_ptr
+	.reloc	0f, R_ARM_ALU_PC_G0_NC, overflow_stack_ptr
+	.reloc	1f, R_ARM_ALU_PC_G1_NC, overflow_stack_ptr
+	.reloc	2f, R_ARM_LDR_PC_G2, overflow_stack_ptr
+	add	ip, ip, pc
+0:	add	ip, ip, #-4
+1:	add	ip, ip, #0
+2:	ldr	ip, [ip, #4]
+
+	str	sp, [ip, #-4]!			@ Preserve original SP value
+	mov	sp, ip				@ Switch to overflow stack
+	pop	{ip}				@ Original SP in IP
+
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+	mov	ip, ip				@ mov expected by unwinder
+	push	{fp, ip, lr, pc}		@ GCC flavor frame record
+#else
+	str	ip, [sp, #-8]!			@ store original SP
+	push	{fpreg, lr}			@ Clang flavor frame record
+#endif
+UNWIND( ldr	ip, [r0, #4]	)		@ load exception LR
+UNWIND( str	ip, [sp, #12]	)		@ store in the frame record
+	ldr	ip, [r0, #12]			@ reload IP
+
+	@ Store the original GPRs to the new stack.
+	svc_entry uaccess=0, overflow_check=0
+
+UNWIND( .save   {sp, pc}	)
+UNWIND( .save   {fpreg, lr}	)
+UNWIND( .setfp  fpreg, sp	)
+
+	ldr	fpreg, [sp, #S_SP]		@ Add our frame record
+						@ to the linked list
+#if defined(CONFIG_UNWINDER_FRAME_POINTER) && defined(CONFIG_CC_IS_GCC)
+	ldr	r1, [fp, #4]			@ reload SP at entry
+	add	fp, fp, #12
+#else
+	ldr	r1, [fpreg, #8]
+#endif
+	str	r1, [sp, #S_SP]			@ store in pt_regs
+
+	@ Stash the regs for handle_bad_stack
+	mov	r0, sp
+
+	@ Time to die
+	bl	handle_bad_stack
+	nop
+UNWIND( .fnend			)
+ENDPROC(__bad_stack)
+#endif
+
 	__INIT
 
 /*
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index ae24dd54e9ef..81df2a3561ca 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -423,3 +423,40 @@ scno	.req	r7		@ syscall number
 tbl	.req	r8		@ syscall table pointer
 why	.req	r8		@ Linux syscall (!= 0)
 tsk	.req	r9		@ current thread_info
+
+	.macro	do_overflow_check, frame_size:req
+#ifdef CONFIG_VMAP_STACK
+	@
+	@ Test whether the SP has overflowed. Task and IRQ stacks are aligned
+	@ so that SP & BIT(THREAD_SIZE_ORDER + PAGE_SHIFT) should always be
+	@ zero.
+	@
+ARM(	tst	sp, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT)	)
+THUMB(	tst	r1, #1 << (THREAD_SIZE_ORDER + PAGE_SHIFT)	)
+THUMB(	it	ne						)
+	bne	.Lstack_overflow_check\@
+
+	.pushsection	.text
+.Lstack_overflow_check\@:
+	@
+	@ The stack pointer is not pointing to a valid vmap'ed stack, but it
+	@ may be pointing into the linear map instead, which may happen if we
+	@ are already running from the overflow stack. We cannot detect overflow
+	@ in such cases so just carry on.
+	@
+	str	ip, [r0, #12]			@ Stash IP on the mode stack
+	ldr_l	ip, high_memory			@ Start of VMALLOC space
+ARM(	cmp	sp, ip			)	@ SP in vmalloc space?
+THUMB(	cmp	r1, ip			)
+THUMB(	itt	lo			)
+	ldrlo	ip, [r0, #12]			@ Restore IP
+	blo	.Lout\@				@ Carry on
+
+THUMB(	sub	r1, sp, r1		)	@ Restore original R1
+THUMB(	sub	sp, r1			)	@ Restore original SP
+	add	sp, sp, #\frame_size		@ Undo svc_entry's SP change
+	b	__bad_stack			@ Handle VMAP stack overflow
+	.popsection
+.Lout\@:
+#endif
+	.endm
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index e05219bca218..5deb40f39999 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -56,7 +56,14 @@ static void __init init_irq_stacks(void)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		stack = (u8 *)__get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+		if (!IS_ENABLED(CONFIG_VMAP_STACK))
+			stack = (u8 *)__get_free_pages(GFP_KERNEL,
+						       THREAD_SIZE_ORDER);
+		else
+			stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
+					       THREADINFO_GFP, NUMA_NO_NODE,
+					       __builtin_return_address(0));
+
 		if (WARN_ON(!stack))
 			break;
 		per_cpu(irq_stack_ptr, cpu) = &stack[THREAD_SIZE];
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 284a80c0b6e1..039feb7cd590 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -141,10 +141,10 @@ EXPORT_SYMBOL(outer_cache);
 int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN;
 
 struct stack {
-	u32 irq[3];
-	u32 abt[3];
-	u32 und[3];
-	u32 fiq[3];
+	u32 irq[4];
+	u32 abt[4];
+	u32 und[4];
+	u32 fiq[4];
 } ____cacheline_aligned;
 
 #ifndef CONFIG_CPU_V7M
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 43077e11dafd..803b51e5cba0 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -67,6 +67,14 @@ ENTRY(__cpu_suspend)
 	ldr	r4, =cpu_suspend_size
 #endif
 	mov	r5, sp			@ current virtual SP
+#ifdef CONFIG_VMAP_STACK
+	@ Run the suspend code from the overflow stack so we don't have to rely
+	@ on vmalloc-to-phys conversions anywhere in the arch suspend code.
+	@ The original SP value captured in R5 will be restored on the way out.
+	mov_l	r6, overflow_stack_ptr	@ Base pointer
+	mrc	p15, 0, r7, c13, c0, 4	@ Get per-CPU offset
+	ldr	sp, [r6, r7]		@ Address of this CPU's overflow stack
+#endif
 	add	r4, r4, #12		@ Space for pgd, virt sp, phys resume fn
 	sub	sp, sp, r4		@ allocate CPU state on stack
 	ldr	r3, =sleep_save_sp
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index b42c446cec9a..b28a705c49cb 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -121,7 +121,8 @@ void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl)
 static int verify_stack(unsigned long sp)
 {
 	if (sp < PAGE_OFFSET ||
-	    (sp > (unsigned long)high_memory && high_memory != NULL))
+	    (!IS_ENABLED(CONFIG_VMAP_STACK) &&
+	     sp > (unsigned long)high_memory && high_memory != NULL))
 		return -EFAULT;
 
 	return 0;
@@ -291,7 +292,8 @@ static int __die(const char *str, int err, struct pt_regs *regs)
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
-			 ALIGN(regs->ARM_sp, THREAD_SIZE));
+			 ALIGN(regs->ARM_sp - THREAD_SIZE, THREAD_ALIGN)
+			 + THREAD_SIZE);
 		dump_backtrace(regs, tsk, KERN_EMERG);
 		dump_instr(KERN_EMERG, regs);
 	}
@@ -838,3 +840,77 @@ void __init early_trap_init(void *vectors_base)
 	 */
 #endif
 }
+
+#ifdef CONFIG_VMAP_STACK
+
+DECLARE_PER_CPU(u8 *, irq_stack_ptr);
+
+asmlinkage DEFINE_PER_CPU(u8 *, overflow_stack_ptr);
+
+static int __init allocate_overflow_stacks(void)
+{
+	u8 *stack;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		stack = (u8 *)__get_free_page(GFP_KERNEL);
+		if (WARN_ON(!stack))
+			return -ENOMEM;
+		per_cpu(overflow_stack_ptr, cpu) = &stack[OVERFLOW_STACK_SIZE];
+	}
+	return 0;
+}
+early_initcall(allocate_overflow_stacks);
+
+asmlinkage void handle_bad_stack(struct pt_regs *regs)
+{
+	unsigned long tsk_stk = (unsigned long)current->stack;
+	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+	unsigned long ovf_stk = (unsigned long)this_cpu_read(overflow_stack_ptr);
+
+	console_verbose();
+	pr_emerg("Insufficient stack space to handle exception!");
+
+	pr_emerg("Task stack:     [0x%08lx..0x%08lx]\n",
+		 tsk_stk, tsk_stk + THREAD_SIZE);
+	pr_emerg("IRQ stack:      [0x%08lx..0x%08lx]\n",
+		 irq_stk - THREAD_SIZE, irq_stk);
+	pr_emerg("Overflow stack: [0x%08lx..0x%08lx]\n",
+		 ovf_stk - OVERFLOW_STACK_SIZE, ovf_stk);
+
+	die("kernel stack overflow", regs, 0);
+}
+
+/*
+ * Normally, we rely on the logic in do_translation_fault() to update stale PMD
+ * entries covering the vmalloc space in a task's page tables when it first
+ * accesses the region in question. Unfortunately, this is not sufficient when
+ * the task stack resides in the vmalloc region, as do_translation_fault() is a
+ * C function that needs a stack to run.
+ *
+ * So we need to ensure that these PMD entries are up to date *before* the MM
+ * switch. As we already have some logic in the MM switch path that takes care
+ * of this, let's trigger it by bumping the counter every time the core vmalloc
+ * code modifies a PMD entry in the vmalloc region.
+ */
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+	if (start > VMALLOC_END || end < VMALLOC_START)
+		return;
+
+	/*
+	 * This hooks into the core vmalloc code to receive notifications of
+	 * any PMD level changes that have been made to the kernel page tables.
+	 * This means it should only be triggered once for every MiB worth of
+	 * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
+	 * and that kernel PMD level table entries are rarely (if ever)
+	 * updated.
+	 *
+	 * This means that the counter is going to max out at ~250 for the
+	 * typical case. If it overflows, something entirely unexpected has
+	 * occurred so let's throw a warning if that happens.
+	 */
+	WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+}
+
+#endif
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index e8d729975f12..c5ea328c428d 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -389,7 +389,8 @@ int unwind_frame(struct stackframe *frame)
 
 	/* store the highest address on the stack to avoid crossing it*/
 	ctrl.sp_low = frame->sp;
-	ctrl.sp_high = ALIGN(ctrl.sp_low, THREAD_SIZE);
+	ctrl.sp_high = ALIGN(ctrl.sp_low - THREAD_SIZE, THREAD_ALIGN)
+		       + THREAD_SIZE;
 
 	pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__,
 		 frame->pc, frame->lr, frame->sp);
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index f02d617e3359..aa12b65a7fd6 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -138,12 +138,12 @@ SECTIONS
 #ifdef CONFIG_STRICT_KERNEL_RWX
 	. = ALIGN(1<<SECTION_SHIFT);
 #else
-	. = ALIGN(THREAD_SIZE);
+	. = ALIGN(THREAD_ALIGN);
 #endif
 	__init_end = .;
 
 	_sdata = .;
-	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)