From 3aa551c9b4c40018f0e261a178e3d25478dc04a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 23 Mar 2009 18:28:15 +0100 Subject: genirq: add threaded interrupt handler support Add support for threaded interrupt handlers: A device driver can request that its main interrupt handler runs in a thread. To achive this the device driver requests the interrupt with request_threaded_irq() and provides additionally to the handler a thread function. The handler function is called in hard interrupt context and needs to check whether the interrupt originated from the device. If the interrupt originated from the device then the handler can either return IRQ_HANDLED or IRQ_WAKE_THREAD. IRQ_HANDLED is returned when no further action is required. IRQ_WAKE_THREAD causes the genirq code to invoke the threaded (main) handler. When IRQ_WAKE_THREAD is returned handler must have disabled the interrupt on the device level. This is mandatory for shared interrupt handlers, but we need to do it as well for obscure x86 hardware where disabling an interrupt on the IO_APIC level redirects the interrupt to the legacy PIC interrupt lines. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar --- kernel/exit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c6..ca0b3488c4a9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1037,6 +1037,8 @@ NORET_TYPE void do_exit(long code) schedule(); } + exit_irq_thread(); + exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against -- cgit 1.4.1 From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:00:13 -0400 Subject: Take fs_struct handling to new file (fs/fs_struct.c) Pure code move; two new helper functions for nfsd and daemonize (unshare_fs_struct() and daemonize_fs_struct() resp.; for now - the same code as used to be in callers). unshare_fs_struct() exported (for nfsd, as copy_fs_struct()/exit_fs() used to be), copy_fs_struct() and exit_fs() don't need exports anymore. Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/fs_struct.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 6 ++ fs/namei.c | 7 --- fs/namespace.c | 68 ---------------------- fs/nfsd/nfssvc.c | 7 +-- include/linux/fs_struct.h | 2 + kernel/exit.c | 31 +--------- kernel/fork.c | 29 +--------- 9 files changed, 155 insertions(+), 138 deletions(-) create mode 100644 fs/fs_struct.c (limited to 'kernel/exit.c') diff --git a/fs/Makefile b/fs/Makefile index 6e82a307bcd4..b5cd8e18dd9f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o + stack.o fs_struct.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/fs_struct.c b/fs/fs_struct.c new file mode 100644 index 000000000000..36e0a123bbf3 --- /dev/null +++ b/fs/fs_struct.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +/* + * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_root(struct fs_struct *fs, struct path *path) +{ + struct path old_root; + + write_lock(&fs->lock); + old_root = fs->root; + fs->root = *path; + path_get(path); + write_unlock(&fs->lock); + if (old_root.dentry) + path_put(&old_root); +} + +/* + * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_pwd(struct fs_struct *fs, struct path *path) +{ + struct path old_pwd; + + write_lock(&fs->lock); + old_pwd = fs->pwd; + fs->pwd = *path; + path_get(path); + write_unlock(&fs->lock); + + if (old_pwd.dentry) + path_put(&old_pwd); +} + +void chroot_fs_refs(struct path *old_root, struct path *new_root) +{ + struct task_struct *g, *p; + struct fs_struct *fs; + int count = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + fs = p->fs; + if (fs) { + write_lock(&fs->lock); + if (fs->root.dentry == old_root->dentry + && fs->root.mnt == old_root->mnt) { + path_get(new_root); + fs->root = *new_root; + count++; + } + if (fs->pwd.dentry == old_root->dentry + && fs->pwd.mnt == old_root->mnt) { + path_get(new_root); + fs->pwd = *new_root; + count++; + } + write_unlock(&fs->lock); + } + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + while (count--) + path_put(old_root); +} + +void put_fs_struct(struct fs_struct *fs) +{ + /* No need to hold fs->lock if we are killing it */ + if (atomic_dec_and_test(&fs->count)) { + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); + } +} + +void exit_fs(struct task_struct *tsk) +{ + struct fs_struct * fs = tsk->fs; + + if (fs) { + task_lock(tsk); + tsk->fs = NULL; + task_unlock(tsk); + put_fs_struct(fs); + } +} + +struct fs_struct *copy_fs_struct(struct fs_struct *old) +{ + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { + atomic_set(&fs->count, 1); + rwlock_init(&fs->lock); + fs->umask = old->umask; + read_lock(&old->lock); + fs->root = old->root; + path_get(&old->root); + fs->pwd = old->pwd; + path_get(&old->pwd); + read_unlock(&old->lock); + } + return fs; +} + +int unshare_fs_struct(void) +{ + struct fs_struct *fsp = copy_fs_struct(current->fs); + if (!fsp) + return -ENOMEM; + exit_fs(current); + current->fs = fsp; + return 0; +} +EXPORT_SYMBOL_GPL(unshare_fs_struct); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; + +void daemonize_fs_struct(void) +{ + struct fs_struct *fs; + + exit_fs(current); /* current->fs->count--; */ + fs = &init_fs; + current->fs = fs; + atomic_inc(&fs->count); +} diff --git a/fs/internal.h b/fs/internal.h index 53af885f1732..477a105f8df3 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -11,6 +11,7 @@ struct super_block; struct linux_binprm; +struct path; /* * block_dev.c @@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); + +/* + * fs_struct.c + */ +extern void chroot_fs_refs(struct path *, struct path *); diff --git a/fs/namei.c b/fs/namei.c index d040ce11785d..4c65a6460138 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2897,10 +2897,3 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); - -/* to be mentioned only in INIT_TASK */ -struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), - .umask = 0022, -}; diff --git a/fs/namespace.c b/fs/namespace.c index f7ec283ccfbb..1e56303c718e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2092,74 +2092,6 @@ out1: return retval; } -/* - * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_root(struct fs_struct *fs, struct path *path) -{ - struct path old_root; - - write_lock(&fs->lock); - old_root = fs->root; - fs->root = *path; - path_get(path); - write_unlock(&fs->lock); - if (old_root.dentry) - path_put(&old_root); -} - -/* - * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -static void chroot_fs_refs(struct path *old_root, struct path *new_root) -{ - struct task_struct *g, *p; - struct fs_struct *fs; - int count = 0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - task_lock(p); - fs = p->fs; - if (fs) { - write_lock(&fs->lock); - if (fs->root.dentry == old_root->dentry - && fs->root.mnt == old_root->mnt) { - path_get(new_root); - fs->root = *new_root; - count++; - } - if (fs->pwd.dentry == old_root->dentry - && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); - fs->pwd = *new_root; - count++; - } - write_unlock(&fs->lock); - } - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - while (count--) - path_put(old_root); -} - /* * pivot_root Semantics: * Moves the root file system of the current process to the directory put_old, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa8..144d69918614 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -404,7 +404,6 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; - struct fs_struct *fsp; int err, preverr = 0; /* Lock module and set up kernel thread */ @@ -413,13 +412,11 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ - fsp = copy_fs_struct(current->fs); - if (!fsp) { + if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } - exit_fs(current); - current->fs = fsp; + current->fs->umask = 0; /* diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 18b467dbe278..298cef1c0793 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void put_fs_struct(struct fs_struct *); +extern void daemonize_fs_struct(void); +extern int unshare_fs_struct(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c6..ad8375758a79 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal); void daemonize(const char *name, ...) { va_list args; - struct fs_struct *fs; sigset_t blocked; va_start(args, name); @@ -462,11 +461,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ - exit_fs(current); /* current->fs->count--; */ - fs = init_task.fs; - current->fs = fs; - atomic_inc(&fs->count); - + daemonize_fs_struct(); exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk) } } -void put_fs_struct(struct fs_struct *fs) -{ - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } -} - -void exit_fs(struct task_struct *tsk) -{ - struct fs_struct * fs = tsk->fs; - - if (fs) { - task_lock(tsk); - tsk->fs = NULL; - task_unlock(tsk); - put_fs_struct(fs); - } -} - -EXPORT_SYMBOL_GPL(exit_fs); - #ifdef CONFIG_MM_OWNER /* * Task p is exiting and it owned mm, lets find a new owner for it diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a381..05c02dc586b1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -681,38 +681,13 @@ fail_nomem: return retval; } -static struct fs_struct *__copy_fs_struct(struct fs_struct *old) -{ - struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); - /* We don't need to lock fs - think why ;-) */ - if (fs) { - atomic_set(&fs->count, 1); - rwlock_init(&fs->lock); - fs->umask = old->umask; - read_lock(&old->lock); - fs->root = old->root; - path_get(&old->root); - fs->pwd = old->pwd; - path_get(&old->pwd); - read_unlock(&old->lock); - } - return fs; -} - -struct fs_struct *copy_fs_struct(struct fs_struct *old) -{ - return __copy_fs_struct(old); -} - -EXPORT_SYMBOL_GPL(copy_fs_struct); - static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { if (clone_flags & CLONE_FS) { atomic_inc(¤t->fs->count); return 0; } - tsk->fs = __copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(current->fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) if ((unshare_flags & CLONE_FS) && (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = __copy_fs_struct(current->fs); + *new_fsp = copy_fs_struct(current->fs); if (!*new_fsp) return -ENOMEM; } -- cgit 1.4.1 From 5ad4e53bd5406ee214ddc5a41f03f779b8b2d526 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:50:06 -0400 Subject: Get rid of indirect include of fs_struct.h Don't pull it in sched.h; very few files actually need it and those can include directly. sched.h itself only needs forward declaration of struct fs_struct; Signed-off-by: Al Viro --- arch/cris/kernel/process.c | 1 - fs/dcache.c | 1 + fs/exec.c | 1 + fs/fs_struct.c | 1 + fs/namei.c | 1 + fs/namespace.c | 1 + fs/open.c | 1 + fs/proc/base.c | 1 + fs/proc/task_nommu.c | 1 + include/linux/mnt_namespace.h | 2 ++ include/linux/nsproxy.h | 1 + include/linux/sched.h | 3 ++- init/do_mounts.c | 1 + kernel/auditsc.c | 1 + kernel/exec_domain.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sys.c | 1 + security/tomoyo/realpath.c | 1 + 19 files changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 60816e876455..4df0b320d524 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b116..0dc4de21f088 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/fs/exec.c b/fs/exec.c index 614991bf0c87..052a961e41aa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 6ac219338670..eee059052db5 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. diff --git a/fs/namei.c b/fs/namei.c index 964c0249444b..b8433ebfae05 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) diff --git a/fs/namespace.c b/fs/namespace.c index 1e56303c718e..c6f54e4c4290 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "pnode.h" diff --git a/fs/open.c b/fs/open.c index 75b61677daaf..377eb25b6abf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -29,6 +29,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b688..f71559784bfb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 6ca01052c5bc..253afc04484c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d6..3a059298cc19 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b36..7b370c7cfeff 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2de..b4e065ea0de1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, diff --git a/init/do_mounts.c b/init/do_mounts.c index 8d4ff5afc1d8..dd7ee5f203f3 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8cbddff6c283..2bfc64786765 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index cb8e9626c215..c35452cadded 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -18,6 +18,7 @@ #include #include #include +#include static void default_handler(int, struct pt_regs *); diff --git a/kernel/exit.c b/kernel/exit.c index ad8375758a79..b5d656845c90 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 51f138a131de..e82a14577a98 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882a..ce182aaed204 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index d47f16b844b2..3bbe01a7a4b5 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "realpath.h" -- cgit 1.4.1 From 90bc8d8b1a38f1ab131a2399a202e1889db95de8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:57:58 -0700 Subject: do_wait: fix waiting for the group stop with the dead leader do_wait(WSTOPPED) assumes that p->state must be == TASK_STOPPED, this is not true if the leader is already dead. Check SIGNAL_STOP_STOPPED instead and use signal->group_exit_code. Trivial test-case: void *tfunc(void *arg) { pause(); return NULL; } int main(void) { pthread_t thr; pthread_create(&thr, NULL, tfunc, NULL); pthread_exit(NULL); return 0; } It doesn't react to ^Z (and then to ^C or ^\). The task is stopped, but bash can't see this. The bug is very old, and it was reported multiple times. This patch was sent more than a year ago (http://marc.info/?t=119713920000003) but it was ignored. This change also fixes other oddities (but not all) in this area. For example, before this patch: $ sleep 100 ^Z [1]+ Stopped sleep 100 $ strace -p `pidof sleep` Process 11442 attached - interrupt to quit strace hangs in do_wait(), because ->exit_code was already consumed by bash. After this patch, strace happily proceeds: --- SIGTSTP (Stopped) @ 0 (0) --- restart_syscall(<... resuming interrupted call ...> To me, this looks much more "natural" and correct. Another example. Let's suppose we have the main thread M and sub-thread T, the process is stopped, and its parent did wait(WSTOPPED). Now we can ptrace T but not M. This looks at least strange to me. Imho, do_wait() should not confuse the per-thread ptrace stops with the per-process job control stops. Signed-off-by: Oleg Nesterov Cc: Denys Vlasenko Cc: "Eric W. Biederman" Cc: Jan Kratochvil Cc: Kaz Kylheku Cc: Michael Kerrisk Cc: Roland McGrath Cc: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c6..0c06b9efae3b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1417,6 +1417,18 @@ static int wait_task_zombie(struct task_struct *p, int options, return retval; } +static int *task_stopped_code(struct task_struct *p, bool ptrace) +{ + if (ptrace) { + if (task_is_stopped_or_traced(p)) + return &p->exit_code; + } else { + if (p->signal->flags & SIGNAL_STOP_STOPPED) + return &p->signal->group_exit_code; + } + return NULL; +} + /* * Handle sys_wait4 work for one task in state TASK_STOPPED. We hold * read_lock(&tasklist_lock) on entry. If we return zero, we still hold @@ -1427,7 +1439,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { - int retval, exit_code, why; + int retval, exit_code, *p_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; @@ -1437,22 +1449,16 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, exit_code = 0; spin_lock_irq(&p->sighand->siglock); - if (unlikely(!task_is_stopped_or_traced(p))) - goto unlock_sig; - - if (!ptrace && p->signal->group_stop_count > 0) - /* - * A group stop is in progress and this is the group leader. - * We won't report until all threads have stopped. - */ + p_code = task_stopped_code(p, ptrace); + if (unlikely(!p_code)) goto unlock_sig; - exit_code = p->exit_code; + exit_code = *p_code; if (!exit_code) goto unlock_sig; if (!unlikely(options & WNOWAIT)) - p->exit_code = 0; + *p_code = 0; /* don't need the RCU readlock here as we're holding a spinlock */ uid = __task_cred(p)->uid; @@ -1608,7 +1614,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, */ *notask_error = 0; - if (task_is_stopped_or_traced(p)) + if (task_stopped_code(p, ptrace)) return wait_task_stopped(ptrace, p, options, infop, stat_addr, ru); -- cgit 1.4.1 From 6d69cb87f05eef3b02370b2f7bae608ad2301a00 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:12 -0700 Subject: ptrace: simplify ptrace_exit()->ignoring_children() path ignoring_children() takes parent->sighand->siglock and checks k_sigaction[SIGCHLD] atomically. But this buys nothing, we can't get the "really" wrong result even if we race with sigaction(SIGCHLD). If we read the "stale" sa_handler/sa_flags we can pretend it was changed right after the check. Remove spin_lock(->siglock), and kill "int ign" which caches the result of ignoring_children() which becomes rather trivial. Perhaps it makes sense to export this helper, do_notify_parent() can use it too. Signed-off-by: Oleg Nesterov Cc: Jerome Marchand Cc: Roland McGrath Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 0c06b9efae3b..7a8311422930 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -732,19 +732,15 @@ static void exit_mm(struct task_struct * tsk) } /* - * Return nonzero if @parent's children should reap themselves. - * - * Called with write_lock_irq(&tasklist_lock) held. + * Called with irqs disabled, returns true if childs should reap themselves. */ -static int ignoring_children(struct task_struct *parent) +static int ignoring_children(struct sighand_struct *sigh) { int ret; - struct sighand_struct *psig = parent->sighand; - unsigned long flags; - spin_lock_irqsave(&psig->siglock, flags); - ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN || - (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT)); - spin_unlock_irqrestore(&psig->siglock, flags); + spin_lock(&sigh->siglock); + ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) || + (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT); + spin_unlock(&sigh->siglock); return ret; } @@ -757,7 +753,6 @@ static int ignoring_children(struct task_struct *parent) static void ptrace_exit(struct task_struct *parent, struct list_head *dead) { struct task_struct *p, *n; - int ign = -1; list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { __ptrace_unlink(p); @@ -779,12 +774,8 @@ static void ptrace_exit(struct task_struct *parent, struct list_head *dead) if (!task_detached(p) && thread_group_empty(p)) { if (!same_thread_group(p->real_parent, parent)) do_notify_parent(p, p->exit_signal); - else { - if (ign < 0) - ign = ignoring_children(parent); - if (ign) - p->exit_signal = -1; - } + else if (ignoring_children(parent->sighand)) + p->exit_signal = -1; } if (task_detached(p)) { -- cgit 1.4.1 From b1b4c6799fb59e710454bfe0ab477cb8523a8667 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:13 -0700 Subject: ptrace: reintroduce __ptrace_detach() as a callee of ptrace_exit() No functional changes, preparation for the next patch. Move the "should we release this child" logic into the separate handler, __ptrace_detach(). Signed-off-by: Oleg Nesterov Cc: Jerome Marchand Cc: Roland McGrath Cc: Denys Vlasenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 62 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 7a8311422930..576eae233b53 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -744,6 +744,38 @@ static int ignoring_children(struct sighand_struct *sigh) return ret; } +/* Returns nonzero if the tracee should be released. */ +int __ptrace_detach(struct task_struct *tracer, struct task_struct *p) +{ + __ptrace_unlink(p); + + if (p->exit_state != EXIT_ZOMBIE) + return 0; + /* + * If it's a zombie, our attachedness prevented normal + * parent notification or self-reaping. Do notification + * now if it would have happened earlier. If it should + * reap itself we return true. + * + * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. + */ + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, tracer)) + do_notify_parent(p, p->exit_signal); + else if (ignoring_children(tracer->sighand)) + p->exit_signal = -1; + } + + if (!task_detached(p)) + return 0; + + /* Mark it as in the process of being reaped. */ + p->exit_state = EXIT_DEAD; + return 1; +} + /* * Detach all tasks we were using ptrace on. * Any that need to be release_task'd are put on the @dead list. @@ -755,36 +787,8 @@ static void ptrace_exit(struct task_struct *parent, struct list_head *dead) struct task_struct *p, *n; list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { - __ptrace_unlink(p); - - if (p->exit_state != EXIT_ZOMBIE) - continue; - - /* - * If it's a zombie, our attachedness prevented normal - * parent notification or self-reaping. Do notification - * now if it would have happened earlier. If it should - * reap itself, add it to the @dead list. We can't call - * release_task() here because we already hold tasklist_lock. - * - * If it's our own child, there is no notification to do. - * But if our normal children self-reap, then this child - * was prevented by ptrace and we must reap it now. - */ - if (!task_detached(p) && thread_group_empty(p)) { - if (!same_thread_group(p->real_parent, parent)) - do_notify_parent(p, p->exit_signal); - else if (ignoring_children(parent->sighand)) - p->exit_signal = -1; - } - - if (task_detached(p)) { - /* - * Mark it as in the process of being reaped. - */ - p->exit_state = EXIT_DEAD; + if (__ptrace_detach(parent, p)) list_add(&p->ptrace_entry, dead); - } } } -- cgit 1.4.1 From 0a967a044a777e8b9c739120927114ddc0094298 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:15 -0700 Subject: reparent_thread: don't call kill_orphaned_pgrp() if task_detached() If task_detached(p) == T, then either a) p is not the main thread, we will find the group leader on the ->children list. or b) p is the group leader but its ->exit_state = EXIT_DEAD. This can only happen when the last sub-thread has died, but in that case that thread has already called kill_orphaned_pgrp() from exit_notify(). In both cases kill_orphaned_pgrp() looks bogus. Move the task_detached() check up and simplify the code, this is also right from the "common sense" pov: we should do nothing with the detached childs, except move them to the new parent's ->children list. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 576eae233b53..405e6877168b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -818,6 +818,8 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) list_move_tail(&p->sibling, &p->real_parent->children); + if (task_detached(p)) + return; /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ @@ -825,15 +827,13 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) return; /* We don't want people slaying init. */ - if (!task_detached(p)) - p->exit_signal = SIGCHLD; + p->exit_signal = SIGCHLD; /* If we'd notified the old parent about this child's death, * also notify the new parent. */ if (!ptrace_reparented(p) && - p->exit_state == EXIT_ZOMBIE && - !task_detached(p) && thread_group_empty(p)) + p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); kill_orphaned_pgrp(p, father); -- cgit 1.4.1 From b1442b055c154699a6a2c436f3352f71b6beede3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:16 -0700 Subject: reparent_thread: fix the "is it traced" check reparent_thread() uses ptrace_reparented() to check whether this thread is ptraced, in that case we should not notify the new parent. But ptrace_reparented() is not exactly correct when the reparented thread is traced by /sbin/init, because forget_original_parent() has already changed ->real_parent. Currently, the only problem is the false notification. But with the next patch the kernel crash in this (yes, pathological) case. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 405e6877168b..5be0a406faeb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -832,7 +832,7 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) /* If we'd notified the old parent about this child's death, * also notify the new parent. */ - if (!ptrace_reparented(p) && + if (!p->ptrace && p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) do_notify_parent(p, p->exit_signal); -- cgit 1.4.1 From 7f5d3652d469cdf9eb2365dfea7ce3fb9e1409cc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:17 -0700 Subject: reparent_thread: fix a zombie leak if /sbin/init ignores SIGCHLD If /sbin/init ignores SIGCHLD and we re-parent a zombie, it is leaked. reparent_thread() does do_notify_parent() which sets ->exit_signal = -1 in this case. This means that nobody except us can reap it, the detached task is not visible to do_wait(). Change reparent_thread() to return a boolean (like __pthread_detach) to indicate that the thread is dead and must be released. Also change forget_original_parent() to add the child to ptrace_dead list in this case. The naming becomes insane, the next patch does the cleanup. Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 5be0a406faeb..3e09b7cb3b20 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -810,8 +810,11 @@ static void ptrace_exit_finish(struct task_struct *parent, } } -static void reparent_thread(struct task_struct *p, struct task_struct *father) +/* Returns nonzero if the child should be released. */ +static int reparent_thread(struct task_struct *p, struct task_struct *father) { + int dead; + if (p->pdeath_signal) /* We already hold the tasklist_lock here. */ group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); @@ -819,12 +822,12 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) list_move_tail(&p->sibling, &p->real_parent->children); if (task_detached(p)) - return; + return 0; /* If this is a threaded reparent there is no need to * notify anyone anything has happened. */ if (same_thread_group(p->real_parent, father)) - return; + return 0; /* We don't want people slaying init. */ p->exit_signal = SIGCHLD; @@ -832,11 +835,19 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) /* If we'd notified the old parent about this child's death, * also notify the new parent. */ + dead = 0; if (!p->ptrace && - p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) + p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { do_notify_parent(p, p->exit_signal); + if (task_detached(p)) { + p->exit_state = EXIT_DEAD; + dead = 1; + } + } kill_orphaned_pgrp(p, father); + + return dead; } /* @@ -896,7 +907,8 @@ static void forget_original_parent(struct task_struct *father) BUG_ON(p->ptrace); p->parent = p->real_parent; } - reparent_thread(p, father); + if (reparent_thread(p, father)) + list_add(&p->ptrace_entry, &ptrace_dead);; } write_unlock_irq(&tasklist_lock); -- cgit 1.4.1 From 39c626ae47c469abdfd30c6e42eff884931380d6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:18 -0700 Subject: forget_original_parent: split out the un-ptrace part By discussion with Roland. - Rename ptrace_exit() to exit_ptrace(), and change it to do all the necessary work with ->ptraced list by its own. - Move this code from exit.c to ptrace.c - Update the comment in ptrace_detach() to explain the rechecking of the child->ptrace. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: "Metzger, Markus T" Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 2 +- include/linux/sched.h | 5 +++ kernel/exit.c | 95 ++++---------------------------------------------- kernel/ptrace.c | 78 +++++++++++++++++++++++++++++++++++++++-- 4 files changed, 88 insertions(+), 92 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 1a2b0cb55535..67c15653fc23 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,7 +94,7 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); -extern int __ptrace_detach(struct task_struct *tracer, struct task_struct *p); +extern void exit_ptrace(struct task_struct *tracer); extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 diff --git a/include/linux/sched.h b/include/linux/sched.h index 9186f8c5d5f2..b47c94e7560b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2061,6 +2061,11 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) +static inline int task_detached(struct task_struct *p) +{ + return p->exit_signal == -1; +} + /* * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also diff --git a/kernel/exit.c b/kernel/exit.c index 3e09b7cb3b20..506693dfdd4e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -61,11 +61,6 @@ DEFINE_TRACE(sched_process_wait); static void exit_mm(struct task_struct * tsk); -static inline int task_detached(struct task_struct *p) -{ - return p->exit_signal == -1; -} - static void __unhash_process(struct task_struct *p) { nr_threads--; @@ -731,85 +726,6 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -/* - * Called with irqs disabled, returns true if childs should reap themselves. - */ -static int ignoring_children(struct sighand_struct *sigh) -{ - int ret; - spin_lock(&sigh->siglock); - ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) || - (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT); - spin_unlock(&sigh->siglock); - return ret; -} - -/* Returns nonzero if the tracee should be released. */ -int __ptrace_detach(struct task_struct *tracer, struct task_struct *p) -{ - __ptrace_unlink(p); - - if (p->exit_state != EXIT_ZOMBIE) - return 0; - /* - * If it's a zombie, our attachedness prevented normal - * parent notification or self-reaping. Do notification - * now if it would have happened earlier. If it should - * reap itself we return true. - * - * If it's our own child, there is no notification to do. - * But if our normal children self-reap, then this child - * was prevented by ptrace and we must reap it now. - */ - if (!task_detached(p) && thread_group_empty(p)) { - if (!same_thread_group(p->real_parent, tracer)) - do_notify_parent(p, p->exit_signal); - else if (ignoring_children(tracer->sighand)) - p->exit_signal = -1; - } - - if (!task_detached(p)) - return 0; - - /* Mark it as in the process of being reaped. */ - p->exit_state = EXIT_DEAD; - return 1; -} - -/* - * Detach all tasks we were using ptrace on. - * Any that need to be release_task'd are put on the @dead list. - * - * Called with write_lock(&tasklist_lock) held. - */ -static void ptrace_exit(struct task_struct *parent, struct list_head *dead) -{ - struct task_struct *p, *n; - - list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) { - if (__ptrace_detach(parent, p)) - list_add(&p->ptrace_entry, dead); - } -} - -/* - * Finish up exit-time ptrace cleanup. - * - * Called without locks. - */ -static void ptrace_exit_finish(struct task_struct *parent, - struct list_head *dead) -{ - struct task_struct *p, *n; - - BUG_ON(!list_empty(&parent->ptraced)); - - list_for_each_entry_safe(p, n, dead, ptrace_entry) { - list_del_init(&p->ptrace_entry); - release_task(p); - } -} - /* Returns nonzero if the child should be released. */ static int reparent_thread(struct task_struct *p, struct task_struct *father) { @@ -894,12 +810,10 @@ static void forget_original_parent(struct task_struct *father) struct task_struct *p, *n, *reaper; LIST_HEAD(ptrace_dead); + exit_ptrace(father); + write_lock_irq(&tasklist_lock); reaper = find_new_reaper(father); - /* - * First clean up ptrace if we were using it. - */ - ptrace_exit(father, &ptrace_dead); list_for_each_entry_safe(p, n, &father->children, sibling) { p->real_parent = reaper; @@ -914,7 +828,10 @@ static void forget_original_parent(struct task_struct *father) write_unlock_irq(&tasklist_lock); BUG_ON(!list_empty(&father->children)); - ptrace_exit_finish(father, &ptrace_dead); + list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } } /* diff --git a/kernel/ptrace.c b/kernel/ptrace.c index ee553b6ad125..f5a9fa5aafa1 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -235,9 +235,57 @@ out: return retval; } +/* + * Called with irqs disabled, returns true if childs should reap themselves. + */ +static int ignoring_children(struct sighand_struct *sigh) +{ + int ret; + spin_lock(&sigh->siglock); + ret = (sigh->action[SIGCHLD-1].sa.sa_handler == SIG_IGN) || + (sigh->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT); + spin_unlock(&sigh->siglock); + return ret; +} + +/* + * Called with tasklist_lock held for writing. + * Unlink a traced task, and clean it up if it was a traced zombie. + * Return true if it needs to be reaped with release_task(). + * (We can't call release_task() here because we already hold tasklist_lock.) + * + * If it's a zombie, our attachedness prevented normal parent notification + * or self-reaping. Do notification now if it would have happened earlier. + * If it should reap itself, return true. + * + * If it's our own child, there is no notification to do. + * But if our normal children self-reap, then this child + * was prevented by ptrace and we must reap it now. + */ +static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) +{ + __ptrace_unlink(p); + + if (p->exit_state == EXIT_ZOMBIE) { + if (!task_detached(p) && thread_group_empty(p)) { + if (!same_thread_group(p->real_parent, tracer)) + do_notify_parent(p, p->exit_signal); + else if (ignoring_children(tracer->sighand)) + p->exit_signal = -1; + } + if (task_detached(p)) { + /* Mark it as in the process of being reaped. */ + p->exit_state = EXIT_DEAD; + return true; + } + } + + return false; +} + int ptrace_detach(struct task_struct *child, unsigned int data) { - int dead = 0; + bool dead = false; if (!valid_signal(data)) return -EIO; @@ -247,7 +295,10 @@ int ptrace_detach(struct task_struct *child, unsigned int data) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); write_lock_irq(&tasklist_lock); - /* protect against de_thread()->release_task() */ + /* + * This child can be already killed. Make sure de_thread() or + * our sub-thread doing do_wait() didn't do release_task() yet. + */ if (child->ptrace) { child->exit_code = data; @@ -264,6 +315,29 @@ int ptrace_detach(struct task_struct *child, unsigned int data) return 0; } +/* + * Detach all tasks we were using ptrace on. + */ +void exit_ptrace(struct task_struct *tracer) +{ + struct task_struct *p, *n; + LIST_HEAD(ptrace_dead); + + write_lock_irq(&tasklist_lock); + list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { + if (__ptrace_detach(tracer, p)) + list_add(&p->ptrace_entry, &ptrace_dead); + } + write_unlock_irq(&tasklist_lock); + + BUG_ON(!list_empty(&tracer->ptraced)); + + list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { + list_del_init(&p->ptrace_entry); + release_task(p); + } +} + int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) { int copied = 0; -- cgit 1.4.1 From 5dfc80be73dd0c212d2e6dd8dbf5afa07e680bbe Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:19 -0700 Subject: forget_original_parent: do not abuse child->ptrace_entry By discussion with Roland. - Use ->sibling instead of ->ptrace_entry to chain the need to be release_task'd childs. Nobody else can use ->sibling, this task is EXIT_DEAD and nobody can find it on its own list. - rename ptrace_dead to dead_childs. - Now that we don't have the "parallel" untrace code, change back reparent_thread() to return void, pass dead_childs as an argument. Actually, I don't understand why do we notify /sbin/init when we reparent a zombie, probably it is better to reap it unconditionally. [akpm@linux-foundation.org: s/childs/children/] Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Cc: "Metzger, Markus T" Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 87 ++++++++++++++++++++++++++++------------------------------- 1 file changed, 41 insertions(+), 46 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 506693dfdd4e..029415d9f82e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -726,46 +726,6 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -/* Returns nonzero if the child should be released. */ -static int reparent_thread(struct task_struct *p, struct task_struct *father) -{ - int dead; - - if (p->pdeath_signal) - /* We already hold the tasklist_lock here. */ - group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); - - list_move_tail(&p->sibling, &p->real_parent->children); - - if (task_detached(p)) - return 0; - /* If this is a threaded reparent there is no need to - * notify anyone anything has happened. - */ - if (same_thread_group(p->real_parent, father)) - return 0; - - /* We don't want people slaying init. */ - p->exit_signal = SIGCHLD; - - /* If we'd notified the old parent about this child's death, - * also notify the new parent. - */ - dead = 0; - if (!p->ptrace && - p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { - do_notify_parent(p, p->exit_signal); - if (task_detached(p)) { - p->exit_state = EXIT_DEAD; - dead = 1; - } - } - - kill_orphaned_pgrp(p, father); - - return dead; -} - /* * When we die, we re-parent all our children. * Try to give them to another thread in our thread @@ -805,10 +765,46 @@ static struct task_struct *find_new_reaper(struct task_struct *father) return pid_ns->child_reaper; } +/* +* Any that need to be release_task'd are put on the @dead list. + */ +static void reparent_thread(struct task_struct *father, struct task_struct *p, + struct list_head *dead) +{ + if (p->pdeath_signal) + group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); + + list_move_tail(&p->sibling, &p->real_parent->children); + + if (task_detached(p)) + return; + /* + * If this is a threaded reparent there is no need to + * notify anyone anything has happened. + */ + if (same_thread_group(p->real_parent, father)) + return; + + /* We don't want people slaying init. */ + p->exit_signal = SIGCHLD; + + /* If it has exited notify the new parent about this child's death. */ + if (!p->ptrace && + p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { + do_notify_parent(p, p->exit_signal); + if (task_detached(p)) { + p->exit_state = EXIT_DEAD; + list_move_tail(&p->sibling, dead); + } + } + + kill_orphaned_pgrp(p, father); +} + static void forget_original_parent(struct task_struct *father) { struct task_struct *p, *n, *reaper; - LIST_HEAD(ptrace_dead); + LIST_HEAD(dead_children); exit_ptrace(father); @@ -821,15 +817,14 @@ static void forget_original_parent(struct task_struct *father) BUG_ON(p->ptrace); p->parent = p->real_parent; } - if (reparent_thread(p, father)) - list_add(&p->ptrace_entry, &ptrace_dead);; + reparent_thread(father, p, &dead_children); } - write_unlock_irq(&tasklist_lock); + BUG_ON(!list_empty(&father->children)); - list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { - list_del_init(&p->ptrace_entry); + list_for_each_entry_safe(p, n, &dead_children, sibling) { + list_del_init(&p->sibling); release_task(p); } } -- cgit 1.4.1 From 2ae448efc87df6d328f5835969076c7f9fce59c3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:36 -0700 Subject: pids: improve get_task_pid() to fix the unsafe sys_wait4()->task_pgrp() sys_wait4() does get_pid(task_pgrp(current)), this is not safe. We can add rcu lock/unlock around, but we already have get_task_pid() which can be improved to handle the special pids in more reliable manner. Signed-off-by: Oleg Nesterov Cc: Louis Rilling Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Sukadev Bhattiprolu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 +- kernel/pid.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 029415d9f82e..384f09caf2ef 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1737,7 +1737,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, pid = find_get_pid(-upid); } else if (upid == 0) { type = PIDTYPE_PGID; - pid = get_pid(task_pgrp(current)); + pid = get_task_pid(current, PIDTYPE_PGID); } else /* upid > 0 */ { type = PIDTYPE_PID; pid = find_get_pid(upid); diff --git a/kernel/pid.c b/kernel/pid.c index 1b3586fe753a..6628abcc520e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -403,6 +403,8 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; rcu_read_lock(); + if (type != PIDTYPE_PID) + task = task->group_leader; pid = get_pid(task->pids[type].pid); rcu_read_unlock(); return pid; -- cgit 1.4.1 From 1b0f7ffd0ea27cd3a0b9ca04e3df9522048c32a3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:39 -0700 Subject: pids: kill signal_struct-> __pgrp/__session and friends We are wasting 2 words in signal_struct without any reason to implement task_pgrp_nr() and task_session_nr(). task_session_nr() has no callers since 2e2ba22ea4fd4bb85f0fa37c521066db6775cbef, we can remove it. task_pgrp_nr() is still (I believe wrongly) used in fs/autofsX and fs/coda. This patch reimplements task_pgrp_nr() via task_pgrp_nr_ns(), and kills __pgrp/__session and the related helpers. The change in drivers/char/tty_io.c is cosmetic, but hopefully makes sense anyway. Signed-off-by: Oleg Nesterov Acked-by: Alan Cox [tty parts] Cc: Cedric Le Goater Cc: Dave Hansen Cc: Eric Biederman Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: Sukadev Bhattiprolu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 4 ++-- include/linux/sched.h | 43 ++++++------------------------------------- kernel/exit.c | 10 +++------- kernel/fork.c | 2 -- kernel/sys.c | 4 +--- 5 files changed, 12 insertions(+), 51 deletions(-) (limited to 'kernel/exit.c') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index a44b701c5bba..66b99a2049e3 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2681,7 +2681,7 @@ void __do_SAK(struct tty_struct *tty) /* Kill the entire session */ do_each_pid_task(session, PIDTYPE_SID, p) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); } while_each_pid_task(session, PIDTYPE_SID, p); @@ -2691,7 +2691,7 @@ void __do_SAK(struct tty_struct *tty) do_each_thread(g, p) { if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); continue; diff --git a/include/linux/sched.h b/include/linux/sched.h index 49df878a0cad..206ac003e8c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -547,25 +547,8 @@ struct signal_struct { struct list_head cpu_timers[3]; - /* job control IDs */ - - /* - * pgrp and session fields are deprecated. - * use the task_session_Xnr and task_pgrp_Xnr routines below - */ - - union { - pid_t pgrp __deprecated; - pid_t __pgrp; - }; - struct pid *tty_old_pgrp; - union { - pid_t session __deprecated; - pid_t __session; - }; - /* boolean value for session group leader */ int leader; @@ -1469,16 +1452,6 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline void set_task_session(struct task_struct *tsk, pid_t session) -{ - tsk->signal->__session = session; -} - -static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) -{ - tsk->signal->__pgrp = pgrp; -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1552,11 +1525,6 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return tsk->signal->__pgrp; -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1569,11 +1537,6 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr(struct task_struct *tsk) -{ - return tsk->signal->__session; -} - static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1585,6 +1548,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +/* obsolete, do not use */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. diff --git a/kernel/exit.c b/kernel/exit.c index 384f09caf2ef..3bec141c82f6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -357,16 +357,12 @@ static void reparent_to_kthreadd(void) void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; - pid_t nr = pid_nr(pid); - if (task_session(curr) != pid) { + if (task_session(curr) != pid) change_pid(curr, PIDTYPE_SID, pid); - set_task_session(curr, nr); - } - if (task_pgrp(curr) != pid) { + + if (task_pgrp(curr) != pid) change_pid(curr, PIDTYPE_PGID, pid); - set_task_pgrp(curr, nr); - } } static void set_special_pids(struct pid *pid) diff --git a/kernel/fork.c b/kernel/fork.c index adbea16ec649..f74458231449 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1265,8 +1265,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); - set_task_pgrp(p, task_pgrp_nr(current)); - set_task_session(p, task_session_nr(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882a..742cefa527e6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1013,10 +1013,8 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) if (err) goto out; - if (task_pgrp(p) != pgrp) { + if (task_pgrp(p) != pgrp) change_pid(p, PIDTYPE_PGID, pgrp); - set_task_pgrp(p, pid_nr(pgrp)); - } err = 0; out: -- cgit 1.4.1 From 432870dab85a2f69dc417022646cb9a70acf7f94 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 6 Apr 2009 16:16:02 +0200 Subject: exit_notify: kill the wrong capable(CAP_KILL) check The CAP_KILL check in exit_notify() looks just wrong, kill it. Whatever logic we have to reset ->exit_signal, the malicious user can bypass it if it execs the setuid application before exiting. Signed-off-by: Oleg Nesterov Acked-by: Serge Hallyn Acked-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 6686ed1e4aa3..32cbf2607cb0 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -837,8 +837,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead) */ if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && (tsk->parent_exec_id != tsk->real_parent->self_exec_id || - tsk->self_exec_id != tsk->parent_exec_id) && - !capable(CAP_KILL)) + tsk->self_exec_id != tsk->parent_exec_id)) tsk->exit_signal = SIGCHLD; signal = tracehook_notify_death(tsk, &cookie, group_dead); -- cgit 1.4.1