From 13f59c5e45be59665c11ddde19799b6295543b7d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 28 Apr 2014 20:15:43 +0200 Subject: uprobes: Refuse to insert a probe into MAP_SHARED vma valid_vma() rejects the VM_SHARED vmas, but this still allows to insert a probe into the MAP_SHARED but not VM_MAYWRITE vma. Currently this is fine, such a mapping doesn't really differ from the private read-only mmap except mprotect(PROT_WRITE) won't work. However, get_user_pages(FOLL_WRITE | FOLL_FORCE) doesn't allow to COW in this case, and it would be safer to follow the same conventions as mm even if currently this happens to work. After the recent cda540ace6a1 "mm: get_user_pages(write,force) refuse to COW in shared areas" only uprobes can insert an anon page into the shared file-backed area, lets stop this and change valid_vma() to check VM_MAYSHARE instead. Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d1edc5e6fd03..7716c40f2c50 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -127,7 +127,7 @@ struct xol_area { */ static bool valid_vma(struct vm_area_struct *vma, bool is_register) { - vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; + vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; if (is_register) flags |= VM_WRITE; -- cgit 1.4.1 From 29dedee0e693aa113164c820395ce51446a71ace Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 5 May 2014 16:38:18 +0200 Subject: uprobes: Add mem_cgroup_charge_anon() into uprobe_write_opcode() Hugh says: The one I noticed was that it forgets all about memcg (because it was copied from KSM, and there the replacement page has already been charged to a memcg). See how mm/memory.c do_anonymous_page() does a mem_cgroup_charge_anon(). Hopefully not a big problem, uprobes is a system-wide thing and only root can insert the probes. But I agree, should be fixed anyway. Add mem_cgroup_{un,}charge_anon() into uprobe_write_opcode(). To simplify the error handling (and avoid the new "uncharge" label) the patch also moves anon_vma_prepare() up before we alloc/charge the new page. While at it fix the comment about ->mmap_sem, it is held for write. Suggested-by: Hugh Dickins Signed-off-by: Oleg Nesterov --- kernel/events/uprobes.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7716c40f2c50..a13251e8bfa4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -279,18 +279,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t * supported by that architecture then we need to modify is_trap_at_addr and * uprobe_write_opcode accordingly. This would never be a problem for archs * that have fixed length instructions. - */ - -/* + * * uprobe_write_opcode - write the opcode at a given virtual address. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. * - * Called with mm->mmap_sem held (for read and with a reference to - * mm). - * - * For mm @mm, write the opcode at @vaddr. + * Called with mm->mmap_sem held for write. * Return 0 (success) or a negative errno. */ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, @@ -310,21 +305,25 @@ retry: if (ret <= 0) goto put_old; + ret = anon_vma_prepare(vma); + if (ret) + goto put_old; + ret = -ENOMEM; new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); if (!new_page) goto put_old; - __SetPageUptodate(new_page); + if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) + goto put_new; + __SetPageUptodate(new_page); copy_highpage(new_page, old_page); copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); - ret = anon_vma_prepare(vma); - if (ret) - goto put_new; - ret = __replace_page(vma, vaddr, old_page, new_page); + if (ret) + mem_cgroup_uncharge_page(new_page); put_new: page_cache_release(new_page); -- cgit 1.4.1 From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 12 May 2014 18:24:45 +0200 Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap If the probed insn triggers a trap, ->si_addr = regs->ip is technically correct, but this is not what the signal handler wants; we need to pass the address of the probed insn, not the address of xol slot. Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in uprobes.h uses a macro to avoid include hell and ensure that it can be compiled even if an architecture doesn't define instruction_pointer(). Test-case: #include #include #include extern void probe_div(void); void sigh(int sig, siginfo_t *info, void *c) { int passed = (info->si_addr == probe_div); printf(passed ? "PASS\n" : "FAIL\n"); _exit(!passed); } int main(void) { struct sigaction sa = { .sa_sigaction = sigh, .sa_flags = SA_SIGINFO, }; sigaction(SIGFPE, &sa, NULL); asm ( "xor %ecx,%ecx\n" ".globl probe_div; probe_div:\n" "idiv %ecx\n" ); return 0; } it fails if probe_div() is probed. Note: show_unhandled_signals users should probably use this helper too, but we need to cleanup them first. Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu --- arch/x86/kernel/traps.c | 7 ++++--- include/linux/uprobes.h | 4 ++++ kernel/events/uprobes.c | 10 ++++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'kernel/events') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 73b3ea32245a..3fdb20548c4b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, case X86_TRAP_DE: sicode = FPE_INTDIV; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_UD: sicode = ILL_ILLOPN; - siaddr = regs->ip; + siaddr = uprobe_get_trap_addr(regs); break; case X86_TRAP_AC: sicode = BUS_ADRALN; @@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = (void __user *)regs->ip; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); if (trapnr == X86_TRAP_MF) { unsigned short cwd, swd; /* diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index edff2b97b864..88c3b7e8b384 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_trap_insn(uprobe_opcode_t *insn); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); +extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); @@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r #else /* !CONFIG_UPROBES */ struct uprobes_state { }; + +#define uprobe_get_trap_addr(regs) instruction_pointer(regs) + static inline int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index a13251e8bfa4..3b02c72938a8 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1351,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; } +unsigned long uprobe_get_trap_addr(struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (unlikely(utask && utask->active_uprobe)) + return utask->vaddr; + + return instruction_pointer(regs); +} + /* * Called with no locks held. * Called in context of a exiting or a exec-ing thread. -- cgit 1.4.1 From ebf905fc7a6e7c99c53b5afc888d8f950da90aff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 29 May 2014 19:00:24 +0200 Subject: perf: Fix use after free in perf_remove_from_context() While that mutex should guard the elements, it doesn't guard against the use-after-free that's from list_for_each_entry_rcu(). __perf_event_exit_task() can actually free the event. And because list addition/deletion is guarded by both ctx->mutex and ctx->lock, holding ctx->mutex is sufficient for reading the list, so we don't actually need the rcu list iteration. Fixes: 3a497f48637e ("perf: Simplify perf_event_exit_task_context()") Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: Dave Jones Cc: acme@ghostprotocols.net Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140529170024.GA2315@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/core.c b/kernel/events/core.c index ed50b0943213..a62d142ad498 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7431,7 +7431,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { - struct perf_event *child_event; + struct perf_event *child_event, *next; struct perf_event_context *child_ctx; unsigned long flags; @@ -7485,7 +7485,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) */ mutex_lock(&child_ctx->mutex); - list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) + list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) __perf_event_exit_task(child_event, child_ctx, child); mutex_unlock(&child_ctx->mutex); -- cgit 1.4.1 From 53b25335dd60981ad608da7890420898a34469a6 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 16 May 2014 17:12:12 -0400 Subject: perf: Disable sampled events if no PMU interrupt Add common code to generate -ENOTSUPP at event creation time if an architecture attempts to create a sampled event and PERF_PMU_NO_INTERRUPT is set. This adds a new pmu->capabilities flag. Initially we only support PERF_PMU_NO_INTERRUPT (to indicate a PMU has no support for generating hardware interrupts) but there are other capabilities that can be added later. Signed-off-by: Vince Weaver Acked-by: Will Deacon [peterz: rename to PERF_PMU_CAP_* and moved the pmu::capabilities word into a hole] Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1405161708060.11099@vincent-weaver-1.umelst.maine.edu Signed-off-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 10 ++++++++++ kernel/events/core.c | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'kernel/events') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index af6dcf1d9e47..267c8f37012c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -166,6 +166,11 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +/** + * pmu::capabilities flags + */ +#define PERF_PMU_CAP_NO_INTERRUPT 0x01 + /** * struct pmu - generic performance monitoring unit */ @@ -178,6 +183,11 @@ struct pmu { const char *name; int type; + /* + * various common per-pmu feature flags + */ + int capabilities; + int * __percpu pmu_disable_count; struct perf_cpu_context * __percpu pmu_cpu_context; int task_ctx_nr; diff --git a/kernel/events/core.c b/kernel/events/core.c index a62d142ad498..e9ef0c6646af 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7120,6 +7120,13 @@ SYSCALL_DEFINE5(perf_event_open, } } + if (is_sampling_event(event)) { + if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { + err = -ENOTSUPP; + goto err_alloc; + } + } + account_event(event); /* -- cgit 1.4.1 From 41ccba029e9492dd0fc1bf5c23b72c6322a6dfe9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 19 May 2014 20:40:54 +0200 Subject: uprobes: Shift ->readpage check from __copy_insn() to uprobe_register() copy_insn() fails with -EIO if ->readpage == NULL, but this error is not propagated unless uprobe_register() path finds ->mm which already mmaps this file. In this case (say) "perf record" does not actually install the probe, but the user can't know about this. Move this check into uprobe_register() so that this problem can be detected earlier and reported to user. Note: this is still not perfect, - copy_insn() and arch_uprobe_analyze_insn() should be called by uprobe_register() but this is not simple, we need vm_file for read_mapping_page() (although perhaps we can pass NULL), and we need ->mm for is_64bit_mm() (although this logic is broken anyway). - uprobe_register() should be called by create_trace_uprobe(), not by probe_event_enable(), so that an error can be detected at "perf probe -x" time. This also needs more changes in the core uprobe code, uprobe register/unregister interface was poorly designed from the very beginning. Reported-by: Denys Vlasenko Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Signed-off-by: Peter Zijlstra Cc: Hugh Dickins Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140519184054.GA6750@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3b02c72938a8..c56b13e3b5e1 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -536,9 +536,6 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, void *insn, int nbytes, loff_t offset) { struct page *page; - - if (!mapping->a_ops->readpage) - return -EIO; /* * Ensure that the page that has the original instruction is * populated and in page-cache. @@ -879,6 +876,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (!uc->handler && !uc->ret_handler) return -EINVAL; + /* copy_insn()->read_mapping_page() needs ->readpage() */ + if (!inode->i_mapping->a_ops->readpage) + return -EIO; /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; -- cgit 1.4.1 From 40814f6805a524130a5ec313871147f7aa9b5318 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 19 May 2014 20:41:36 +0200 Subject: uprobes: Teach copy_insn() to support tmpfs tmpfs is widely used but as Denys reports shmem_aops doesn't have ->readpage() and thus you can't probe a binary on this filesystem. As Hugh suggested we can use shmem_read_mapping_page() in this case, just we need to check shmem_mapping() if ->readpage == NULL. Reported-by: Denys Vlasenko Suggested-by: Hugh Dickins Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140519184136.GB6750@redhat.com Signed-off-by: Ingo Molnar --- kernel/events/uprobes.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'kernel/events') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c56b13e3b5e1..6bfb6717f878 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -36,6 +36,7 @@ #include "../../mm/internal.h" /* munlock_vma_page */ #include #include +#include #include @@ -537,10 +538,14 @@ static int __copy_insn(struct address_space *mapping, struct file *filp, { struct page *page; /* - * Ensure that the page that has the original instruction is - * populated and in page-cache. + * Ensure that the page that has the original instruction is populated + * and in page-cache. If ->readpage == NULL it must be shmem_mapping(), + * see uprobe_register(). */ - page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); + if (mapping->a_ops->readpage) + page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); + else + page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT); if (IS_ERR(page)) return PTR_ERR(page); @@ -876,8 +881,8 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * if (!uc->handler && !uc->ret_handler) return -EINVAL; - /* copy_insn()->read_mapping_page() needs ->readpage() */ - if (!inode->i_mapping->a_ops->readpage) + /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ + if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping)) return -EIO; /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) -- cgit 1.4.1 From e041e328c4b41e1db79bfe5ba9992c2ed771ad19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 21 May 2014 17:32:19 +0200 Subject: perf: Fix perf_event_comm() vs. exec() assumption perf_event_comm() assumes that set_task_comm() is only called on exec(), and in particular that its only called on current. Neither are true, as Dave reported a WARN triggered by set_task_comm() being called on !current. Separate the exec() hook from the comm hook. Reported-by: Dave Jones Signed-off-by: Peter Zijlstra Cc: Alexander Viro Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20140521153219.GH5226@laptop.programming.kicks-ass.net [ Build fix. ] Signed-off-by: Ingo Molnar --- fs/exec.c | 1 + include/linux/perf_event.h | 4 +++- kernel/events/core.c | 28 ++++++++++++++++------------ 3 files changed, 20 insertions(+), 13 deletions(-) (limited to 'kernel/events') diff --git a/fs/exec.c b/fs/exec.c index 238b7aa26f68..a038a41a3677 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1110,6 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); + perf_event_exec(); set_task_comm(current, kbasename(bprm->filename)); /* Set the new mm task size. We have to do that late because it may diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3ef6ea12806a..9b5cd1992a88 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -695,6 +695,7 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs; extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); +extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); @@ -772,7 +773,7 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern int __perf_event_disable(void *info); extern void perf_event_task_tick(void); -#else +#else /* !CONFIG_PERF_EVENTS: */ static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { } @@ -802,6 +803,7 @@ static inline int perf_unregister_guest_info_callbacks (struct perf_guest_info_callbacks *callbacks) { return 0; } static inline void perf_event_mmap(struct vm_area_struct *vma) { } +static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } diff --git a/kernel/events/core.c b/kernel/events/core.c index 440eefc67397..647698f91988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2970,6 +2970,22 @@ out: local_irq_restore(flags); } +void perf_event_exec(void) +{ + struct perf_event_context *ctx; + int ctxn; + + rcu_read_lock(); + for_each_task_context_nr(ctxn) { + ctx = current->perf_event_ctxp[ctxn]; + if (!ctx) + continue; + + perf_event_enable_on_exec(ctx); + } + rcu_read_unlock(); +} + /* * Cross CPU call to read the hardware event */ @@ -5057,18 +5073,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; - struct perf_event_context *ctx; - int ctxn; - - rcu_read_lock(); - for_each_task_context_nr(ctxn) { - ctx = task->perf_event_ctxp[ctxn]; - if (!ctx) - continue; - - perf_event_enable_on_exec(ctx); - } - rcu_read_unlock(); if (!atomic_read(&nr_comm_events)) return; -- cgit 1.4.1 From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 28 May 2014 11:45:04 +0300 Subject: perf: Differentiate exec() and non-exec() comm events perf tools like 'perf report' can aggregate samples by comm strings, which generally works. However, there are other potential use-cases. For example, to pair up 'calls' with 'returns' accurately (from branch events like Intel BTS) it is necessary to identify whether the process has exec'd. Although a comm event is generated when an 'exec' happens it is also generated whenever the comm string is changed on a whim (e.g. by prctl PR_SET_NAME). This patch adds a flag to the comm event to differentiate one case from the other. In order to determine whether the kernel supports the new flag, a selection bit named 'exec' is added to struct perf_event_attr. The bit does nothing but will cause perf_event_open() to fail if the bit is set on kernels that do not have it defined. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com Cc: Paul Mackerras Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Alexander Viro Cc: Linus Torvalds Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- fs/exec.c | 6 +++--- include/linux/perf_event.h | 4 ++-- include/linux/sched.h | 6 +++++- include/uapi/linux/perf_event.h | 9 +++++++-- kernel/events/core.c | 4 ++-- 5 files changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel/events') diff --git a/fs/exec.c b/fs/exec.c index a038a41a3677..a3d33fe592d6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); perf_event_exec(); - set_task_comm(current, kbasename(bprm->filename)); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b4c1d4685bf0..707617a8c0f6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks * extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern void perf_event_exec(void); -extern void perf_event_comm(struct task_struct *tsk); +extern void perf_event_comm(struct task_struct *tsk, bool exec); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } -static inline void perf_event_comm(struct task_struct *tsk) { } +static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 221b2bde3723..ad86e1d7dbc2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, const char *from); +extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); +static inline void set_task_comm(struct task_struct *tsk, const char *from) +{ + __set_task_comm(tsk, from, false); +} extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index d9cd853818ad..5312fae47218 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -302,8 +302,8 @@ struct perf_event_attr { exclude_callchain_kernel : 1, /* exclude kernel callchains */ exclude_callchain_user : 1, /* exclude user callchains */ mmap2 : 1, /* include mmap with inode data */ - - __reserved_1 : 40; + comm_exec : 1, /* flag comm events that are due to an exec */ + __reserved_1 : 39; union { __u32 wakeup_events; /* wakeup every n events */ @@ -502,7 +502,12 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) #define PERF_RECORD_MISC_GUEST_USER (5 << 0) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) /* * Indicates that the content of PERF_SAMPLE_IP points to * the actual instruction that triggered the event. See also diff --git a/kernel/events/core.c b/kernel/events/core.c index 8fac2056d51e..7da5e561e89a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5090,7 +5090,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) NULL); } -void perf_event_comm(struct task_struct *task) +void perf_event_comm(struct task_struct *task, bool exec) { struct perf_comm_event comm_event; @@ -5104,7 +5104,7 @@ void perf_event_comm(struct task_struct *task) .event_id = { .header = { .type = PERF_RECORD_COMM, - .misc = 0, + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, /* .size */ }, /* .pid */ -- cgit 1.4.1