summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 15:40:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-16 15:40:50 -0800
commit2a74dbb9a86e8102dcd07d284135b4530a84826e (patch)
treea54403e312b6062dfb57bd904ba8b8ce3b11e720 /arch
parent770b6cb4d21fb3e3df2a7a51e186a3c14db1ec30 (diff)
parente93072374112db9dc86635934ee761249be28370 (diff)
downloadlinux-2a74dbb9a86e8102dcd07d284135b4530a84826e.tar.gz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security
Pull security subsystem updates from James Morris:
 "A quiet cycle for the security subsystem with just a few maintenance
  updates."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security:
  Smack: create a sysfs mount point for smackfs
  Smack: use select not depends in Kconfig
  Yama: remove locking from delete path
  Yama: add RCU to drop read locking
  drivers/char/tpm: remove tasklet and cleanup
  KEYS: Use keyring_alloc() to create special keyrings
  KEYS: Reduce initial permissions on keys
  KEYS: Make the session and process keyrings per-thread
  seccomp: Make syscall skipping and nr changes more consistent
  key: Fix resource leak
  keys: Fix unreachable code
  KEYS: Add payload preparsing opportunity prior to key instantiate or update
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/vsyscall_64.c110
1 files changed, 59 insertions, 51 deletions
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3a3e8c9e280d..9a907a67be8f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
 	return nr;
 }
 
-#ifdef CONFIG_SECCOMP
-static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
-{
-	if (!seccomp_mode(&tsk->seccomp))
-		return 0;
-	task_pt_regs(tsk)->orig_ax = syscall_nr;
-	task_pt_regs(tsk)->ax = syscall_nr;
-	return __secure_computing(syscall_nr);
-}
-#else
-#define vsyscall_seccomp(_tsk, _nr) 0
-#endif
-
 static bool write_ok_or_segv(unsigned long ptr, size_t size)
 {
 	/*
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 {
 	struct task_struct *tsk;
 	unsigned long caller;
-	int vsyscall_nr;
+	int vsyscall_nr, syscall_nr, tmp;
 	int prev_sig_on_uaccess_error;
 	long ret;
-	int skip;
 
 	/*
 	 * No point in checking CS -- the only way to get here is a user mode
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	}
 
 	tsk = current;
-	/*
-	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
-	 * preserve that behavior to make writing exploits harder.
-	 */
-	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
-	current_thread_info()->sig_on_uaccess_error = 1;
 
 	/*
+	 * Check for access_ok violations and find the syscall nr.
+	 *
 	 * NULL is a valid user pointer (in the access_ok sense) on 32-bit and
 	 * 64-bit, so we don't need to special-case it here.  For all the
 	 * vsyscalls, NULL means "don't write anything" not "write it at
 	 * address 0".
 	 */
-	ret = -EFAULT;
-	skip = 0;
 	switch (vsyscall_nr) {
 	case 0:
-		skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
-		if (skip)
-			break;
-
 		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
-		    !write_ok_or_segv(regs->si, sizeof(struct timezone)))
-			break;
+		    !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_gettimeofday;
+		break;
+
+	case 1:
+		if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_time;
+		break;
+
+	case 2:
+		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+		    !write_ok_or_segv(regs->si, sizeof(unsigned))) {
+			ret = -EFAULT;
+			goto check_fault;
+		}
+
+		syscall_nr = __NR_getcpu;
+		break;
+	}
+
+	/*
+	 * Handle seccomp.  regs->ip must be the original value.
+	 * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
+	 *
+	 * We could optimize the seccomp disabled case, but performance
+	 * here doesn't matter.
+	 */
+	regs->orig_ax = syscall_nr;
+	regs->ax = -ENOSYS;
+	tmp = secure_computing(syscall_nr);
+	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
+		warn_bad_vsyscall(KERN_DEBUG, regs,
+				  "seccomp tried to change syscall nr or ip");
+		do_exit(SIGSYS);
+	}
+	if (tmp)
+		goto do_ret;  /* skip requested */
 
+	/*
+	 * With a real vsyscall, page faults cause SIGSEGV.  We want to
+	 * preserve that behavior to make writing exploits harder.
+	 */
+	prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+	current_thread_info()->sig_on_uaccess_error = 1;
+
+	ret = -EFAULT;
+	switch (vsyscall_nr) {
+	case 0:
 		ret = sys_gettimeofday(
 			(struct timeval __user *)regs->di,
 			(struct timezone __user *)regs->si);
 		break;
 
 	case 1:
-		skip = vsyscall_seccomp(tsk, __NR_time);
-		if (skip)
-			break;
-
-		if (!write_ok_or_segv(regs->di, sizeof(time_t)))
-			break;
-
 		ret = sys_time((time_t __user *)regs->di);
 		break;
 
 	case 2:
-		skip = vsyscall_seccomp(tsk, __NR_getcpu);
-		if (skip)
-			break;
-
-		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
-		    !write_ok_or_segv(regs->si, sizeof(unsigned)))
-			break;
-
 		ret = sys_getcpu((unsigned __user *)regs->di,
 				 (unsigned __user *)regs->si,
 				 NULL);
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
 	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
 
-	if (skip) {
-		if ((long)regs->ax <= 0L) /* seccomp errno emulation */
-			goto do_ret;
-		goto done; /* seccomp trace/trap */
-	}
-
+check_fault:
 	if (ret == -EFAULT) {
 		/* Bad news -- userspace fed a bad pointer to a vsyscall. */
 		warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@ do_ret:
 	/* Emulate a ret instruction. */
 	regs->ip = caller;
 	regs->sp += 8;
-done:
 	return true;
 
 sigsegv: