summary refs log tree commit diff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 12:02:25 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 12:02:25 +0900
commit42859eea96ba6beabfb0369a1eeffa3c7d2bd9cb (patch)
treefa38aeda0d6e7a4c48a882b166b8643594a1ad50 /arch/x86/kernel
parentf59b51fe3d3092c08d7d554ecb40db24011b2ebc (diff)
parentf322220d6159455da2b5a8a596d802c8695fed30 (diff)
downloadlinux-42859eea96ba6beabfb0369a1eeffa3c7d2bd9cb.tar.gz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
Pull generic execve() changes from Al Viro:
 "This introduces the generic kernel_thread() and kernel_execve()
  functions, and switches x86, arm, alpha, um and s390 over to them."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal: (26 commits)
  s390: convert to generic kernel_execve()
  s390: switch to generic kernel_thread()
  s390: fold kernel_thread_helper() into ret_from_fork()
  s390: fold execve_tail() into start_thread(), convert to generic sys_execve()
  um: switch to generic kernel_thread()
  x86, um/x86: switch to generic sys_execve and kernel_execve
  x86: split ret_from_fork
  alpha: introduce ret_from_kernel_execve(), switch to generic kernel_execve()
  alpha: switch to generic kernel_thread()
  alpha: switch to generic sys_execve()
  arm: get rid of execve wrapper, switch to generic execve() implementation
  arm: optimized current_pt_regs()
  arm: introduce ret_from_kernel_execve(), switch to generic kernel_execve()
  arm: split ret_from_fork, simplify kernel_thread() [based on patch by rmk]
  generic sys_execve()
  generic kernel_execve()
  new helper: current_pt_regs()
  preparation for generic kernel_thread()
  um: kill thread->forking
  um: let signal_delivered() do SIGTRAP on singlestepping into handler
  ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/entry_32.S26
-rw-r--r--arch/x86/kernel/entry_64.S74
-rw-r--r--arch/x86/kernel/process.c65
-rw-r--r--arch/x86/kernel/process_32.c37
-rw-r--r--arch/x86/kernel/process_64.c35
-rw-r--r--arch/x86/kernel/signal.c4
-rw-r--r--arch/x86/kernel/sys_i386_32.c40
-rw-r--r--arch/x86/kernel/vm86_32.c6
10 files changed, 101 insertions, 191 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a48ea05157d3..91ce48f05f9f 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
-obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc962ec..28610822fb3c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@ void common(void) {
 	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 	OFFSET(BP_pref_address, boot_params, hdr.pref_address);
 	OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+	BLANK();
+	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0750e3ba87c0..8f9ed1afde8f 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -299,6 +299,13 @@ ENTRY(ret_from_fork)
 	CFI_ENDPROC
 END(ret_from_fork)
 
+ENTRY(ret_from_kernel_execve)
+	movl %eax, %esp
+	movl $0,PT_EAX(%esp)
+	GET_THREAD_INFO(%ebp)
+	jmp syscall_exit
+END(ret_from_kernel_execve)
+
 /*
  * Interrupt exit functions should be protected against kprobes
  */
@@ -323,8 +330,7 @@ ret_from_intr:
 	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 #else
 	/*
-	 * We can be coming here from a syscall done in the kernel space,
-	 * e.g. a failed kernel_execve().
+	 * We can be coming here from child spawned by kernel_thread().
 	 */
 	movl PT_CS(%esp), %eax
 	andl $SEGMENT_RPL_MASK, %eax
@@ -732,7 +738,6 @@ ENDPROC(ptregs_##name)
 PTREGSCALL1(iopl)
 PTREGSCALL0(fork)
 PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
 PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
@@ -1015,15 +1020,20 @@ END(spurious_interrupt_bug)
  */
 	.popsection
 
-ENTRY(kernel_thread_helper)
-	pushl $0		# fake return address for unwinder
+ENTRY(ret_from_kernel_thread)
 	CFI_STARTPROC
-	movl %edi,%eax
-	call *%esi
+	pushl_cfi %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
+ENDPROC(ret_from_kernel_thread)
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 44531acd9a81..cdc790c78f32 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -554,7 +554,7 @@ ENTRY(ret_from_fork)
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   retint_restore_args
+	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
@@ -562,6 +562,16 @@ ENTRY(ret_from_fork)
 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
+1:
+	subq $REST_SKIP, %rsp	# move the stack pointer back
+	CFI_ADJUST_CFA_OFFSET	REST_SKIP
+	movq %rbp, %rdi
+	call *%rbx
+	# exit
+	mov %eax, %edi
+	call do_exit
+	ud2			# padding for call trace
+
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -862,7 +872,6 @@ ENTRY(stub_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
 	call sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
@@ -912,8 +921,7 @@ ENTRY(stub_x32_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
-	call sys32_execve
+	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
@@ -1318,51 +1326,19 @@ bad_gs:
 	jmp  2b
 	.previous
 
-ENTRY(kernel_thread_helper)
-	pushq $0		# fake return address
-	CFI_STARTPROC
-	/*
-	 * Here we are in the child and the registers are set as they were
-	 * at kernel_thread() invocation in the parent.
-	 */
-	call *%rsi
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-END(kernel_thread_helper)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- *	 extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- *	rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
-	CFI_STARTPROC
-	FAKE_STACK_FRAME $0
-	SAVE_ALL
-	movq %rsp,%rcx
-	call sys_execve
-	movq %rax, RAX(%rsp)
-	RESTORE_REST
-	testq %rax,%rax
-	je int_ret_from_sys_call
-	RESTORE_ARGS
-	UNFAKE_STACK_FRAME
-	ret
-	CFI_ENDPROC
-END(kernel_execve)
+ENTRY(ret_from_kernel_execve)
+	movq %rdi, %rsp
+	movl $0, RAX(%rsp)
+	// RESTORE_REST
+	movq 0*8(%rsp), %r15
+	movq 1*8(%rsp), %r14
+	movq 2*8(%rsp), %r13
+	movq 3*8(%rsp), %r12
+	movq 4*8(%rsp), %rbp
+	movq 5*8(%rsp), %rbx
+	addq $(6*8), %rsp
+	jmp int_ret_from_sys_call
+END(ret_from_kernel_execve)
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dc3567e083f9..b644e1c765dc 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 }
 
 /*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.si = (unsigned long) fn;
-	regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
-	regs.ds = __USER_DS;
-	regs.es = __USER_DS;
-	regs.fs = __KERNEL_PERCPU;
-	regs.gs = __KERNEL_STACK_CANARY;
-#else
-	regs.ss = __KERNEL_DS;
-#endif
-
-	regs.orig_ax = -1;
-	regs.ip = (unsigned long) kernel_thread_helper;
-	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
-	if (error == 0) {
-		/* Make sure we don't return using sysenter.. */
-                set_thread_flag(TIF_IRET);
-        }
-#endif
-
-	putname(filename);
-	return error;
-}
-
-/*
  * Idle related variables and functions
  */
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b9ff83c7135b..44e0bff38e72 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
 #include <asm/switch_to.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 /*
  * Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long unused,
+	unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
 	int err;
 
-	childregs = task_pt_regs(p);
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.ip = (unsigned long) ret_from_kernel_thread;
+		task_user_gs(p) = __KERNEL_STACK_CANARY;
+		childregs->ds = __USER_DS;
+		childregs->es = __USER_DS;
+		childregs->fs = __KERNEL_PERCPU;
+		childregs->bx = sp;	/* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		p->fpu_counter = 0;
+		p->thread.io_bitmap_ptr = NULL;
+		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+		return 0;
+	}
 	*childregs = *regs;
 	childregs->ax = 0;
 	childregs->sp = sp;
 
-	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
-
 	p->thread.ip = (unsigned long) ret_from_fork;
-
 	task_user_gs(p) = get_user_gs(regs);
 
 	p->fpu_counter = 0;
@@ -190,6 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+	regs->flags		= X86_EFLAGS_IF;
+	/*
+	 * force it to the iret return path by making it look as if there was
+	 * some work pending.
+	 */
+	set_thread_flag(TIF_NOTIFY_RESUME);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8a6d20ce1978..16c6365e2b86 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
 	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
-	childregs = ((struct pt_regs *)
-			(THREAD_SIZE + task_stack_page(p))) - 1;
-	*childregs = *regs;
-
-	childregs->ax = 0;
-	if (user_mode(regs))
-		childregs->sp = sp;
-	else
-		childregs->sp = (unsigned long)childregs;
-
+	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
 	p->thread.usersp = me->thread.usersp;
-
 	set_tsk_thread_flag(p, TIF_FORK);
-
 	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->sp = (unsigned long)childregs;
+		childregs->ss = __KERNEL_DS;
+		childregs->bx = sp; /* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		return 0;
+	}
+	*childregs = *regs;
+
+	childregs->ax = 0;
+	childregs->sp = sp;
 
 	err = -ENOMEM;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b33144c8b309..29ad351804e9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -840,10 +840,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
 
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-
 	rcu_user_enter();
 }
 
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5fede19..000000000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	asm volatile ("int $0x80"
-	: "=a" (__res)
-	: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
-	return __res;
-}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0baf23..5c9687b1bde6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 		if ((trapno == 3) || (trapno == 1)) {
 			KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
 			/* setting this flag forces the code in entry_32.S to
-			   call save_v86_state() and change the stack pointer
-			   to KVM86->regs32 */
-			set_thread_flag(TIF_IRET);
+			   the path where we call save_v86_state() and change
+			   the stack pointer to KVM86->regs32 */
+			set_thread_flag(TIF_NOTIFY_RESUME);
 			return 0;
 		}
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));