summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-01-11 11:47:45 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-01-11 11:47:45 -0800
commitddb321a8dd158520d97ed1cbade1d4ac36b6af31 (patch)
tree842f5550c5947d4aebff56dcd1091950b0cc0f82 /arch
parent1e6c3e8f8fb94a8914a380e02a7e8ad81d47273e (diff)
parent5306c31c5733cb4a79cc002e0c3ad256fd439614 (diff)
downloadlinux-ddb321a8dd158520d97ed1cbade1d4ac36b6af31.tar.gz
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
 "Mostly tooling fixes, but also some kernel side fixes: uncore PMU
  driver fix, user regs sampling fix and an instruction decoder fix that
  unbreaks PEBS precise sampling"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf/x86/uncore/hsw-ep: Handle systems with only two SBOXes
  perf/x86_64: Improve user regs sampling
  perf: Move task_pt_regs sampling into arch code
  x86: Fix off-by-one in instruction decoder
  perf hists browser: Fix segfault when showing callchain
  perf callchain: Free callchains when hist entries are deleted
  perf hists: Fix children sort key behavior
  perf diff: Fix to sort by baseline field by default
  perf list: Fix --raw-dump option
  perf probe: Fix crash in dwarf_getcfi_elf
  perf probe: Fix to fall back to find probe point in symbols
  perf callchain: Append callchains only when requested
  perf ui/tui: Print backtrace symbols when segfault occurs
  perf report: Show progress bar for output resorting
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kernel/perf_regs.c8
-rw-r--r--arch/arm64/kernel/perf_regs.c8
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c17
-rw-r--r--arch/x86/kernel/perf_regs.c90
-rw-r--r--arch/x86/lib/insn.c2
6 files changed, 125 insertions, 2 deletions
diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
index 6e4379c67cbc..592dda3f21ff 100644
--- a/arch/arm/kernel/perf_regs.c
+++ b/arch/arm/kernel/perf_regs.c
@@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task)
 {
 	return PERF_SAMPLE_REGS_ABI_32;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 6762ad705587..3f62b35fb6f1 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task)
 	else
 		return PERF_SAMPLE_REGS_ABI_64;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 18eb78bbdd10..863d9b02563e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -17,7 +17,7 @@
 #define UNCORE_PCI_DEV_TYPE(data)	((data >> 8) & 0xff)
 #define UNCORE_PCI_DEV_IDX(data)	(data & 0xff)
 #define UNCORE_EXTRA_PCI_DEV		0xff
-#define UNCORE_EXTRA_PCI_DEV_MAX	2
+#define UNCORE_EXTRA_PCI_DEV_MAX	3
 
 /* support up to 8 sockets */
 #define UNCORE_SOCKET_MAX		8
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
index 745b158e9a65..21af6149edf2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c
@@ -891,6 +891,7 @@ void snbep_uncore_cpu_init(void)
 enum {
 	SNBEP_PCI_QPI_PORT0_FILTER,
 	SNBEP_PCI_QPI_PORT1_FILTER,
+	HSWEP_PCI_PCU_3,
 };
 
 static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
@@ -2026,6 +2027,17 @@ void hswep_uncore_cpu_init(void)
 {
 	if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
 		hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+
+	/* Detect 6-8 core systems with only two SBOXes */
+	if (uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3]) {
+		u32 capid4;
+
+		pci_read_config_dword(uncore_extra_pci_dev[0][HSWEP_PCI_PCU_3],
+				      0x94, &capid4);
+		if (((capid4 >> 6) & 0x3) == 0)
+			hswep_uncore_sbox.num_boxes = 2;
+	}
+
 	uncore_msr_uncores = hswep_msr_uncores;
 }
 
@@ -2287,6 +2299,11 @@ static DEFINE_PCI_DEVICE_TABLE(hswep_uncore_pci_ids) = {
 		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
 						   SNBEP_PCI_QPI_PORT1_FILTER),
 	},
+	{ /* PCU.3 (for Capability registers) */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2fc0),
+		.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+						   HSWEP_PCI_PCU_3),
+	},
 	{ /* end: all zeroes */ }
 };
 
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index e309cc5c276e..781861cc5ee8 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task)
 {
 	return PERF_SAMPLE_REGS_ABI_32;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
 #else /* CONFIG_X86_64 */
 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
 		       (1ULL << PERF_REG_X86_ES) | \
@@ -102,4 +110,86 @@ u64 perf_reg_abi(struct task_struct *task)
 	else
 		return PERF_SAMPLE_REGS_ABI_64;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	struct pt_regs *user_regs = task_pt_regs(current);
+
+	/*
+	 * If we're in an NMI that interrupted task_pt_regs setup, then
+	 * we can't sample user regs at all.  This check isn't really
+	 * sufficient, though, as we could be in an NMI inside an interrupt
+	 * that happened during task_pt_regs setup.
+	 */
+	if (regs->sp > (unsigned long)&user_regs->r11 &&
+	    regs->sp <= (unsigned long)(user_regs + 1)) {
+		regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+		regs_user->regs = NULL;
+		return;
+	}
+
+	/*
+	 * RIP, flags, and the argument registers are usually saved.
+	 * orig_ax is probably okay, too.
+	 */
+	regs_user_copy->ip = user_regs->ip;
+	regs_user_copy->cx = user_regs->cx;
+	regs_user_copy->dx = user_regs->dx;
+	regs_user_copy->si = user_regs->si;
+	regs_user_copy->di = user_regs->di;
+	regs_user_copy->r8 = user_regs->r8;
+	regs_user_copy->r9 = user_regs->r9;
+	regs_user_copy->r10 = user_regs->r10;
+	regs_user_copy->r11 = user_regs->r11;
+	regs_user_copy->orig_ax = user_regs->orig_ax;
+	regs_user_copy->flags = user_regs->flags;
+
+	/*
+	 * Don't even try to report the "rest" regs.
+	 */
+	regs_user_copy->bx = -1;
+	regs_user_copy->bp = -1;
+	regs_user_copy->r12 = -1;
+	regs_user_copy->r13 = -1;
+	regs_user_copy->r14 = -1;
+	regs_user_copy->r15 = -1;
+
+	/*
+	 * For this to be at all useful, we need a reasonable guess for
+	 * sp and the ABI.  Be careful: we're in NMI context, and we're
+	 * considering current to be the current task, so we should
+	 * be careful not to look at any other percpu variables that might
+	 * change during context switches.
+	 */
+	if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+	    task_thread_info(current)->status & TS_COMPAT) {
+		/* Easy case: we're in a compat syscall. */
+		regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
+		regs_user_copy->sp = user_regs->sp;
+		regs_user_copy->cs = user_regs->cs;
+		regs_user_copy->ss = user_regs->ss;
+	} else if (user_regs->orig_ax != -1) {
+		/*
+		 * We're probably in a 64-bit syscall.
+		 * Warning: this code is severely racy.  At least it's better
+		 * than just blindly copying user_regs.
+		 */
+		regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
+		regs_user_copy->sp = this_cpu_read(old_rsp);
+		regs_user_copy->cs = __USER_CS;
+		regs_user_copy->ss = __USER_DS;
+		regs_user_copy->cx = -1;  /* usually contains garbage */
+	} else {
+		/* We're probably in an interrupt or exception. */
+		regs_user->abi = user_64bit_mode(user_regs) ?
+			PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
+		regs_user_copy->sp = user_regs->sp;
+		regs_user_copy->cs = user_regs->cs;
+		regs_user_copy->ss = user_regs->ss;
+	}
+
+	regs_user->regs = regs_user_copy;
+}
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 2480978b31cc..1313ae6b478b 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -28,7 +28,7 @@
 
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)	\
-	((insn)->next_byte + sizeof(t) + n < (insn)->end_kaddr)
+	((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
 
 #define __get_next(t, insn)	\
 	({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })