summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-07 08:50:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-07 08:50:34 -0700
commitf536b3cae84eb7c9f3495285ad048d13a397ed0b (patch)
treeb53eee1c45eb080168786e2f103e76d6706cbbb0 /tools
parente669830526a0abaf301bf408df69cde33901ac63 (diff)
parent537e5400a0a05c4efe70e7b372c19cfcd0179362 (diff)
downloadlinux-f536b3cae84eb7c9f3495285ad048d13a397ed0b.tar.gz
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc updates from Ben Herrenschmidt:
 "This is the powerpc new goodies for 3.17.  The short story:

  The biggest bit is Michael removing all of pre-POWER4 processor
  support from the 64-bit kernel.  POWER3 and rs64.  This gets rid of a
  ton of old cruft that has been bitrotting in a long while.  It was
  broken for quite a few versions already and nobody noticed.  Nobody
  uses those machines anymore.  While at it, he cleaned up a bunch of
  old dusty cabinets, getting rid of a skeletton or two.

  Then, we have some base VFIO support for KVM, which allows assigning
  of PCI devices to KVM guests, support for large 64-bit BARs on
  "powernv" platforms, support for HMI (Hardware Management Interrupts)
  on those same platforms, some sparse-vmemmap improvements (for memory
  hotplug),

  There is the usual batch of Freescale embedded updates (summary in the
  merge commit) and fixes here or there, I think that's it for the
  highlights"

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (102 commits)
  powerpc/eeh: Export eeh_iommu_group_to_pe()
  powerpc/eeh: Add missing #ifdef CONFIG_IOMMU_API
  powerpc: Reduce scariness of interrupt frames in stack traces
  powerpc: start loop at section start of start in vmemmap_populated()
  powerpc: implement vmemmap_free()
  powerpc: implement vmemmap_remove_mapping() for BOOK3S
  powerpc: implement vmemmap_list_free()
  powerpc: Fail remap_4k_pfn() if PFN doesn't fit inside PTE
  powerpc/book3s: Fix endianess issue for HMI handling on napping cpus.
  powerpc/book3s: handle HMIs for cpus in nap mode.
  powerpc/powernv: Invoke opal call to handle hmi.
  powerpc/book3s: Add basic infrastructure to handle HMI in Linux.
  powerpc/iommu: Fix comments with it_page_shift
  powerpc/powernv: Handle compound PE in config accessors
  powerpc/powernv: Handle compound PE for EEH
  powerpc/powernv: Handle compound PE
  powerpc/powernv: Split ioda_eeh_get_state()
  powerpc/powernv: Allow to freeze PE
  powerpc/powernv: Enable M64 aperatus for PHB3
  powerpc/eeh: Aux PE data for error log
  ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/powerpc/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_instructions.c30
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S271
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c261
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c48
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c50
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c114
12 files changed, 617 insertions, 284 deletions
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 54833a791a44..74a78cedce37 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,10 +17,10 @@ TARGETS = pmu copyloops mm tm
 
 endif
 
-all:
-	@for TARGET in $(TARGETS); do \
-		$(MAKE) -C $$TARGET all; \
-	done;
+all: $(TARGETS)
+
+$(TARGETS):
+	$(MAKE) -k -C $@ all
 
 run_tests: all
 	@for TARGET in $(TARGETS); do \
@@ -36,4 +36,4 @@ clean:
 tags:
 	find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: all run_tests clean tags
+.PHONY: all run_tests clean tags $(TARGETS)
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index b9ff0db42c79..c9f4263906a5 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -1,10 +1,12 @@
 noarg:
 	$(MAKE) -C ../
 
-PROGS := count_instructions
-EXTRA_SOURCES := ../harness.c event.c
+PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c
 
-all: $(PROGS) sub_all
+SUB_TARGETS = ebb
+
+all: $(PROGS) $(SUB_TARGETS)
 
 $(PROGS): $(EXTRA_SOURCES)
 
@@ -20,13 +22,8 @@ run_tests: all sub_run_tests
 clean: sub_clean
 	rm -f $(PROGS) loop.o
 
-
-SUB_TARGETS = ebb
-
-sub_all:
-	@for TARGET in $(SUB_TARGETS); do \
-		$(MAKE) -C $$TARGET all; \
-	done;
+$(SUB_TARGETS):
+	$(MAKE) -k -C $@ all
 
 sub_run_tests: all
 	@for TARGET in $(SUB_TARGETS); do \
@@ -38,4 +35,4 @@ sub_clean:
 		$(MAKE) -C $$TARGET clean; \
 	done;
 
-.PHONY: all run_tests clean sub_all sub_run_tests sub_clean
+.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
diff --git a/tools/testing/selftests/powerpc/pmu/count_instructions.c b/tools/testing/selftests/powerpc/pmu/count_instructions.c
index 312b4f0fd27c..4622117b24c0 100644
--- a/tools/testing/selftests/powerpc/pmu/count_instructions.c
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@@ -12,6 +12,7 @@
 
 #include "event.h"
 #include "utils.h"
+#include "lib.h"
 
 extern void thirty_two_instruction_loop(u64 loops);
 
@@ -90,7 +91,7 @@ static u64 determine_overhead(struct event *events)
 	return overhead;
 }
 
-static int count_instructions(void)
+static int test_body(void)
 {
 	struct event events[2];
 	u64 overhead;
@@ -111,17 +112,23 @@ static int count_instructions(void)
 	overhead = determine_overhead(events);
 	printf("Overhead of null loop: %llu instructions\n", overhead);
 
-	/* Run for 1M instructions */
-	FAIL_IF(do_count_loop(events, 0x100000, overhead, true));
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
 
-	/* Run for 10M instructions */
-	FAIL_IF(do_count_loop(events, 0xa00000, overhead, true));
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
 
-	/* Run for 100M instructions */
-	FAIL_IF(do_count_loop(events, 0x6400000, overhead, true));
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
 
-	/* Run for 1G instructions */
-	FAIL_IF(do_count_loop(events, 0x40000000, overhead, true));
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
 
 	event_close(&events[0]);
 	event_close(&events[1]);
@@ -129,6 +136,11 @@ static int count_instructions(void)
 	return 0;
 }
 
+static int count_instructions(void)
+{
+	return eat_cpu(test_body);
+}
+
 int main(void)
 {
 	return test_harness(count_instructions, "count_instructions");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index edbba2affc2c..3dc4332698cb 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -13,11 +13,12 @@ PROGS := reg_access_test event_attributes_test cycles_test	\
 	 close_clears_pmcc_test instruction_count_test		\
 	 fork_cleanup_test ebb_on_child_test			\
 	 ebb_on_willing_child_test back_to_back_ebbs_test	\
-	 lost_exception_test no_handler_test
+	 lost_exception_test no_handler_test			\
+	 cycles_with_mmcr2_test
 
 all: $(PROGS)
 
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c
+$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
new file mode 100644
index 000000000000..c7e4093f1cd3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(core_busy_loop)
+	stdu	%r1, -168(%r1)
+	std	r14, 160(%r1)
+	std	r15, 152(%r1)
+	std	r16, 144(%r1)
+	std	r17, 136(%r1)
+	std	r18, 128(%r1)
+	std	r19, 120(%r1)
+	std	r20, 112(%r1)
+	std	r21, 104(%r1)
+	std	r22, 96(%r1)
+	std	r23, 88(%r1)
+	std	r24, 80(%r1)
+	std	r25, 72(%r1)
+	std	r26, 64(%r1)
+	std	r27, 56(%r1)
+	std	r28, 48(%r1)
+	std	r29, 40(%r1)
+	std	r30, 32(%r1)
+	std	r31, 24(%r1)
+
+	li	 r3, 0x3030
+	std	 r3, -96(%r1)
+	li	 r4, 0x4040
+	std	 r4, -104(%r1)
+	li	 r5, 0x5050
+	std	 r5, -112(%r1)
+	li	 r6, 0x6060
+	std	 r6, -120(%r1)
+	li	 r7, 0x7070
+	std	 r7, -128(%r1)
+	li	 r8, 0x0808
+	std	 r8, -136(%r1)
+	li	 r9, 0x0909
+	std	 r9, -144(%r1)
+	li	r10, 0x1010
+	std	r10, -152(%r1)
+	li	r11, 0x1111
+	std	r11, -160(%r1)
+	li	r14, 0x1414
+	std	r14, -168(%r1)
+	li	r15, 0x1515
+	std	r15, -176(%r1)
+	li	r16, 0x1616
+	std	r16, -184(%r1)
+	li	r17, 0x1717
+	std	r17, -192(%r1)
+	li	r18, 0x1818
+	std	r18, -200(%r1)
+	li	r19, 0x1919
+	std	r19, -208(%r1)
+	li	r20, 0x2020
+	std	r20, -216(%r1)
+	li	r21, 0x2121
+	std	r21, -224(%r1)
+	li	r22, 0x2222
+	std	r22, -232(%r1)
+	li	r23, 0x2323
+	std	r23, -240(%r1)
+	li	r24, 0x2424
+	std	r24, -248(%r1)
+	li	r25, 0x2525
+	std	r25, -256(%r1)
+	li	r26, 0x2626
+	std	r26, -264(%r1)
+	li	r27, 0x2727
+	std	r27, -272(%r1)
+	li	r28, 0x2828
+	std	r28, -280(%r1)
+	li	r29, 0x2929
+	std	r29, -288(%r1)
+	li	r30, 0x3030
+	li	r31, 0x3131
+
+	li	r3, 0
+0:	addi	r3, r3, 1
+	cmpwi	r3, 100
+	blt	0b
+
+	/* Return 1 (fail) unless we get through all the checks */
+	li	r3, 1
+
+	/* Check none of our registers have been corrupted */
+	cmpwi	r4,  0x4040
+	bne	1f
+	cmpwi	r5,  0x5050
+	bne	1f
+	cmpwi	r6,  0x6060
+	bne	1f
+	cmpwi	r7,  0x7070
+	bne	1f
+	cmpwi	r8,  0x0808
+	bne	1f
+	cmpwi	r9,  0x0909
+	bne	1f
+	cmpwi	r10, 0x1010
+	bne	1f
+	cmpwi	r11, 0x1111
+	bne	1f
+	cmpwi	r14, 0x1414
+	bne	1f
+	cmpwi	r15, 0x1515
+	bne	1f
+	cmpwi	r16, 0x1616
+	bne	1f
+	cmpwi	r17, 0x1717
+	bne	1f
+	cmpwi	r18, 0x1818
+	bne	1f
+	cmpwi	r19, 0x1919
+	bne	1f
+	cmpwi	r20, 0x2020
+	bne	1f
+	cmpwi	r21, 0x2121
+	bne	1f
+	cmpwi	r22, 0x2222
+	bne	1f
+	cmpwi	r23, 0x2323
+	bne	1f
+	cmpwi	r24, 0x2424
+	bne	1f
+	cmpwi	r25, 0x2525
+	bne	1f
+	cmpwi	r26, 0x2626
+	bne	1f
+	cmpwi	r27, 0x2727
+	bne	1f
+	cmpwi	r28, 0x2828
+	bne	1f
+	cmpwi	r29, 0x2929
+	bne	1f
+	cmpwi	r30, 0x3030
+	bne	1f
+	cmpwi	r31, 0x3131
+	bne	1f
+
+	/* Load junk into all our registers before we reload them from the stack. */
+	li	r3,  0xde
+	li	r4,  0xad
+	li	r5,  0xbe
+	li	r6,  0xef
+	li	r7,  0xde
+	li	r8,  0xad
+	li	r9,  0xbe
+	li	r10, 0xef
+	li	r11, 0xde
+	li	r14, 0xad
+	li	r15, 0xbe
+	li	r16, 0xef
+	li	r17, 0xde
+	li	r18, 0xad
+	li	r19, 0xbe
+	li	r20, 0xef
+	li	r21, 0xde
+	li	r22, 0xad
+	li	r23, 0xbe
+	li	r24, 0xef
+	li	r25, 0xde
+	li	r26, 0xad
+	li	r27, 0xbe
+	li	r28, 0xef
+	li	r29, 0xdd
+
+	ld	r3,	-96(%r1)
+	cmpwi	r3,  0x3030
+	bne	1f
+	ld	r4,	-104(%r1)
+	cmpwi	r4,  0x4040
+	bne	1f
+	ld	r5,	-112(%r1)
+	cmpwi	r5,  0x5050
+	bne	1f
+	ld	r6,	-120(%r1)
+	cmpwi	r6,  0x6060
+	bne	1f
+	ld	r7,	-128(%r1)
+	cmpwi	r7,  0x7070
+	bne	1f
+	ld	r8,	-136(%r1)
+	cmpwi	r8,  0x0808
+	bne	1f
+	ld	r9,	-144(%r1)
+	cmpwi	r9,  0x0909
+	bne	1f
+	ld	r10, -152(%r1)
+	cmpwi	r10, 0x1010
+	bne	1f
+	ld	r11, -160(%r1)
+	cmpwi	r11, 0x1111
+	bne	1f
+	ld	r14, -168(%r1)
+	cmpwi	r14, 0x1414
+	bne	1f
+	ld	r15, -176(%r1)
+	cmpwi	r15, 0x1515
+	bne	1f
+	ld	r16, -184(%r1)
+	cmpwi	r16, 0x1616
+	bne	1f
+	ld	r17, -192(%r1)
+	cmpwi	r17, 0x1717
+	bne	1f
+	ld	r18, -200(%r1)
+	cmpwi	r18, 0x1818
+	bne	1f
+	ld	r19, -208(%r1)
+	cmpwi	r19, 0x1919
+	bne	1f
+	ld	r20, -216(%r1)
+	cmpwi	r20, 0x2020
+	bne	1f
+	ld	r21, -224(%r1)
+	cmpwi	r21, 0x2121
+	bne	1f
+	ld	r22, -232(%r1)
+	cmpwi	r22, 0x2222
+	bne	1f
+	ld	r23, -240(%r1)
+	cmpwi	r23, 0x2323
+	bne	1f
+	ld	r24, -248(%r1)
+	cmpwi	r24, 0x2424
+	bne	1f
+	ld	r25, -256(%r1)
+	cmpwi	r25, 0x2525
+	bne	1f
+	ld	r26, -264(%r1)
+	cmpwi	r26, 0x2626
+	bne	1f
+	ld	r27, -272(%r1)
+	cmpwi	r27, 0x2727
+	bne	1f
+	ld	r28, -280(%r1)
+	cmpwi	r28, 0x2828
+	bne	1f
+	ld	r29, -288(%r1)
+	cmpwi	r29, 0x2929
+	bne	1f
+
+	/* Load 0 (success) to return */
+	li	r3, 0
+
+1:	ld	r14, 160(%r1)
+	ld	r15, 152(%r1)
+	ld	r16, 144(%r1)
+	ld	r17, 136(%r1)
+	ld	r18, 128(%r1)
+	ld	r19, 120(%r1)
+	ld	r20, 112(%r1)
+	ld	r21, 104(%r1)
+	ld	r22, 96(%r1)
+	ld	r23, 88(%r1)
+	ld	r24, 80(%r1)
+	ld	r25, 72(%r1)
+	ld	r26, 64(%r1)
+	ld	r27, 56(%r1)
+	ld	r28, 48(%r1)
+	ld	r29, 40(%r1)
+	ld	r30, 32(%r1)
+	ld	r31, 24(%r1)
+	addi	%r1, %r1, 168
+	blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
new file mode 100644
index 000000000000..d43029b0800c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* (     FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+	struct event event;
+	uint64_t val, expected[2], actual;
+	int i;
+	bool bad_mmcr2;
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* XXX Set of MMCR2 must be after enable */
+	expected[0] = MMCR2_EXPECTED_1;
+	expected[1] = MMCR2_EXPECTED_2;
+	i = 0;
+	bad_mmcr2 = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		mtspr(SPRN_MMCR2, expected[i % 2]);
+
+		FAIL_IF(core_busy_loop());
+
+		val = mfspr(SPRN_MMCR2);
+		if (val != expected[i % 2]) {
+			bad_mmcr2 = true;
+			actual = val;
+		}
+
+		i++;
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	count_pmc(1, sample_period);
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	if (bad_mmcr2)
+		printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+	FAIL_IF(bad_mmcr2);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index 1b46be94b64c..d7a72ce696b5 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -224,6 +224,7 @@ void dump_ebb_hw_state(void)
 
 	printf("HW state:\n"		\
 	       "MMCR0 0x%016x %s\n"	\
+	       "MMCR2 0x%016lx\n"	\
 	       "EBBHR 0x%016lx\n"	\
 	       "BESCR 0x%016llx %s\n"	\
 	       "PMC1  0x%016lx\n"	\
@@ -233,10 +234,11 @@ void dump_ebb_hw_state(void)
 	       "PMC5  0x%016lx\n"	\
 	       "PMC6  0x%016lx\n"	\
 	       "SIAR  0x%016lx\n",
-	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_EBBHR), bescr,
-	       decode_bescr(bescr), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2),
-	       mfspr(SPRN_PMC3), mfspr(SPRN_PMC4), mfspr(SPRN_PMC5),
-	       mfspr(SPRN_PMC6), mfspr(SPRN_SIAR));
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+	       mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+	       mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+	       mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+	       mfspr(SPRN_SIAR));
 }
 
 void dump_ebb_state(void)
@@ -335,257 +337,6 @@ void event_leader_ebb_init(struct event *e)
 	e->attr.pinned = 1;
 }
 
-int core_busy_loop(void)
-{
-	int rc;
-
-	asm volatile (
-		"li  3,  0x3030\n"
-		"std 3,  -96(1)\n"
-		"li  4,  0x4040\n"
-		"std 4,  -104(1)\n"
-		"li  5,  0x5050\n"
-		"std 5,  -112(1)\n"
-		"li  6,  0x6060\n"
-		"std 6,  -120(1)\n"
-		"li  7,  0x7070\n"
-		"std 7,  -128(1)\n"
-		"li  8,  0x0808\n"
-		"std 8,  -136(1)\n"
-		"li  9,  0x0909\n"
-		"std 9,  -144(1)\n"
-		"li  10, 0x1010\n"
-		"std 10, -152(1)\n"
-		"li  11, 0x1111\n"
-		"std 11, -160(1)\n"
-		"li  14, 0x1414\n"
-		"std 14, -168(1)\n"
-		"li  15, 0x1515\n"
-		"std 15, -176(1)\n"
-		"li  16, 0x1616\n"
-		"std 16, -184(1)\n"
-		"li  17, 0x1717\n"
-		"std 17, -192(1)\n"
-		"li  18, 0x1818\n"
-		"std 18, -200(1)\n"
-		"li  19, 0x1919\n"
-		"std 19, -208(1)\n"
-		"li  20, 0x2020\n"
-		"std 20, -216(1)\n"
-		"li  21, 0x2121\n"
-		"std 21, -224(1)\n"
-		"li  22, 0x2222\n"
-		"std 22, -232(1)\n"
-		"li  23, 0x2323\n"
-		"std 23, -240(1)\n"
-		"li  24, 0x2424\n"
-		"std 24, -248(1)\n"
-		"li  25, 0x2525\n"
-		"std 25, -256(1)\n"
-		"li  26, 0x2626\n"
-		"std 26, -264(1)\n"
-		"li  27, 0x2727\n"
-		"std 27, -272(1)\n"
-		"li  28, 0x2828\n"
-		"std 28, -280(1)\n"
-		"li  29, 0x2929\n"
-		"std 29, -288(1)\n"
-		"li  30, 0x3030\n"
-		"li  31, 0x3131\n"
-
-		"li    3,  0\n"
-		"0: "
-		"addi  3, 3, 1\n"
-		"cmpwi 3, 100\n"
-		"blt   0b\n"
-
-		/* Return 1 (fail) unless we get through all the checks */
-		"li	0, 1\n"
-
-		/* Check none of our registers have been corrupted */
-		"cmpwi  4,  0x4040\n"
-		"bne	1f\n"
-		"cmpwi  5,  0x5050\n"
-		"bne	1f\n"
-		"cmpwi  6,  0x6060\n"
-		"bne	1f\n"
-		"cmpwi  7,  0x7070\n"
-		"bne	1f\n"
-		"cmpwi  8,  0x0808\n"
-		"bne	1f\n"
-		"cmpwi  9,  0x0909\n"
-		"bne	1f\n"
-		"cmpwi  10, 0x1010\n"
-		"bne	1f\n"
-		"cmpwi  11, 0x1111\n"
-		"bne	1f\n"
-		"cmpwi  14, 0x1414\n"
-		"bne	1f\n"
-		"cmpwi  15, 0x1515\n"
-		"bne	1f\n"
-		"cmpwi  16, 0x1616\n"
-		"bne	1f\n"
-		"cmpwi  17, 0x1717\n"
-		"bne	1f\n"
-		"cmpwi  18, 0x1818\n"
-		"bne	1f\n"
-		"cmpwi  19, 0x1919\n"
-		"bne	1f\n"
-		"cmpwi  20, 0x2020\n"
-		"bne	1f\n"
-		"cmpwi  21, 0x2121\n"
-		"bne	1f\n"
-		"cmpwi  22, 0x2222\n"
-		"bne	1f\n"
-		"cmpwi  23, 0x2323\n"
-		"bne	1f\n"
-		"cmpwi  24, 0x2424\n"
-		"bne	1f\n"
-		"cmpwi  25, 0x2525\n"
-		"bne	1f\n"
-		"cmpwi  26, 0x2626\n"
-		"bne	1f\n"
-		"cmpwi  27, 0x2727\n"
-		"bne	1f\n"
-		"cmpwi  28, 0x2828\n"
-		"bne	1f\n"
-		"cmpwi  29, 0x2929\n"
-		"bne	1f\n"
-		"cmpwi  30, 0x3030\n"
-		"bne	1f\n"
-		"cmpwi  31, 0x3131\n"
-		"bne	1f\n"
-
-		/* Load junk into all our registers before we reload them from the stack. */
-		"li  3,  0xde\n"
-		"li  4,  0xad\n"
-		"li  5,  0xbe\n"
-		"li  6,  0xef\n"
-		"li  7,  0xde\n"
-		"li  8,  0xad\n"
-		"li  9,  0xbe\n"
-		"li  10, 0xef\n"
-		"li  11, 0xde\n"
-		"li  14, 0xad\n"
-		"li  15, 0xbe\n"
-		"li  16, 0xef\n"
-		"li  17, 0xde\n"
-		"li  18, 0xad\n"
-		"li  19, 0xbe\n"
-		"li  20, 0xef\n"
-		"li  21, 0xde\n"
-		"li  22, 0xad\n"
-		"li  23, 0xbe\n"
-		"li  24, 0xef\n"
-		"li  25, 0xde\n"
-		"li  26, 0xad\n"
-		"li  27, 0xbe\n"
-		"li  28, 0xef\n"
-		"li  29, 0xdd\n"
-
-		"ld     3,  -96(1)\n"
-		"cmpwi  3,  0x3030\n"
-		"bne	1f\n"
-		"ld     4,  -104(1)\n"
-		"cmpwi  4,  0x4040\n"
-		"bne	1f\n"
-		"ld     5,  -112(1)\n"
-		"cmpwi  5,  0x5050\n"
-		"bne	1f\n"
-		"ld     6,  -120(1)\n"
-		"cmpwi  6,  0x6060\n"
-		"bne	1f\n"
-		"ld     7,  -128(1)\n"
-		"cmpwi  7,  0x7070\n"
-		"bne	1f\n"
-		"ld     8,  -136(1)\n"
-		"cmpwi  8,  0x0808\n"
-		"bne	1f\n"
-		"ld     9,  -144(1)\n"
-		"cmpwi  9,  0x0909\n"
-		"bne	1f\n"
-		"ld     10, -152(1)\n"
-		"cmpwi  10, 0x1010\n"
-		"bne	1f\n"
-		"ld     11, -160(1)\n"
-		"cmpwi  11, 0x1111\n"
-		"bne	1f\n"
-		"ld     14, -168(1)\n"
-		"cmpwi  14, 0x1414\n"
-		"bne	1f\n"
-		"ld     15, -176(1)\n"
-		"cmpwi  15, 0x1515\n"
-		"bne	1f\n"
-		"ld     16, -184(1)\n"
-		"cmpwi  16, 0x1616\n"
-		"bne	1f\n"
-		"ld     17, -192(1)\n"
-		"cmpwi  17, 0x1717\n"
-		"bne	1f\n"
-		"ld     18, -200(1)\n"
-		"cmpwi  18, 0x1818\n"
-		"bne	1f\n"
-		"ld     19, -208(1)\n"
-		"cmpwi  19, 0x1919\n"
-		"bne	1f\n"
-		"ld     20, -216(1)\n"
-		"cmpwi  20, 0x2020\n"
-		"bne	1f\n"
-		"ld     21, -224(1)\n"
-		"cmpwi  21, 0x2121\n"
-		"bne	1f\n"
-		"ld     22, -232(1)\n"
-		"cmpwi  22, 0x2222\n"
-		"bne	1f\n"
-		"ld     23, -240(1)\n"
-		"cmpwi  23, 0x2323\n"
-		"bne	1f\n"
-		"ld     24, -248(1)\n"
-		"cmpwi  24, 0x2424\n"
-		"bne	1f\n"
-		"ld     25, -256(1)\n"
-		"cmpwi  25, 0x2525\n"
-		"bne	1f\n"
-		"ld     26, -264(1)\n"
-		"cmpwi  26, 0x2626\n"
-		"bne	1f\n"
-		"ld     27, -272(1)\n"
-		"cmpwi  27, 0x2727\n"
-		"bne	1f\n"
-		"ld     28, -280(1)\n"
-		"cmpwi  28, 0x2828\n"
-		"bne	1f\n"
-		"ld     29, -288(1)\n"
-		"cmpwi  29, 0x2929\n"
-		"bne	1f\n"
-
-		/* Load 0 (success) to return */
-		"li	0, 0\n"
-
-		"1: 	mr %0, 0\n"
-
-		: "=r" (rc)
-		: /* no inputs */
-		: "3", "4", "5", "6", "7", "8", "9", "10", "11", "14",
-		  "15", "16", "17", "18", "19", "20", "21", "22", "23",
-		   "24", "25", "26", "27", "28", "29", "30", "31",
-		   "memory"
-	);
-
-	return rc;
-}
-
-int core_busy_loop_with_freeze(void)
-{
-	int rc;
-
-	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
-	rc = core_busy_loop();
-	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
-
-	return rc;
-}
-
 int ebb_child(union pipe read_pipe, union pipe write_pipe)
 {
 	struct event event;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
index e62bde05bf78..e44eee5d97ca 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -70,7 +70,6 @@ int ebb_check_mmcr0(void);
 extern u64 sample_period;
 
 int core_busy_loop(void);
-int core_busy_loop_with_freeze(void);
 int ebb_child(union pipe read_pipe, union pipe write_pipe);
 int catch_sigill(void (*func)(void));
 void write_pmc1(void);
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
new file mode 100644
index 000000000000..77472f31441e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "event.h"
+#include "utils.h"
+
+#define MALLOC_SIZE     (0x10000 * 10)  /* Ought to be enough .. */
+
+/*
+ * Tests that the L3 bank handling is correct. We fixed it in commit e9aaac1.
+ */
+static int l3_bank_test(void)
+{
+	struct event event;
+	char *p;
+	int i;
+
+	p = malloc(MALLOC_SIZE);
+	FAIL_IF(!p);
+
+	event_init(&event, 0x84918F);
+
+	FAIL_IF(event_open(&event));
+
+	for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+		p[i] = i;
+
+	event_read(&event);
+	event_report(&event);
+
+	FAIL_IF(event.result.running == 0);
+	FAIL_IF(event.result.enabled == 0);
+
+	event_close(&event);
+	free(p);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(l3_bank_test, "l3_bank_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
index 0f6a4731d546..9768dea37bf3 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -5,10 +5,15 @@
 
 #define _GNU_SOURCE	/* For CPU_ZERO etc. */
 
+#include <elf.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <link.h>
 #include <sched.h>
 #include <setjmp.h>
 #include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 #include <sys/wait.h>
 
 #include "utils.h"
@@ -177,8 +182,8 @@ struct addr_range libc, vdso;
 
 int parse_proc_maps(void)
 {
+	unsigned long start, end;
 	char execute, name[128];
-	uint64_t start, end;
 	FILE *f;
 	int rc;
 
@@ -250,3 +255,46 @@ out_close:
 out:
 	return rc;
 }
+
+static char auxv[4096];
+
+void *get_auxv_entry(int type)
+{
+	ElfW(auxv_t) *p;
+	void *result;
+	ssize_t num;
+	int fd;
+
+	fd = open("/proc/self/auxv", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		return NULL;
+	}
+
+	result = NULL;
+
+	num = read(fd, auxv, sizeof(auxv));
+	if (num < 0) {
+		perror("read");
+		goto out;
+	}
+
+	if (num > sizeof(auxv)) {
+		printf("Overflowed auxv buffer\n");
+		goto out;
+	}
+
+	p = (ElfW(auxv_t) *)auxv;
+
+	while (p->a_type != AT_NULL) {
+		if (p->a_type == type) {
+			result = (void *)p->a_un.a_val;
+			break;
+		}
+
+		p++;
+	}
+out:
+	close(fd);
+	return result;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index ca5d72ae3be6..0f0339c8a6f6 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -29,6 +29,7 @@ extern int notify_parent(union pipe write_pipe);
 extern int notify_parent_of_error(union pipe write_pipe);
 extern pid_t eat_cpu(int (test_function)(void));
 extern bool require_paranoia_below(int level);
+extern void *get_auxv_entry(int type);
 
 struct addr_range {
 	uint64_t first, last;
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
new file mode 100644
index 000000000000..fddbbc9cae2f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "lib.h"
+#include "utils.h"
+
+/*
+ * Test that per-event excludes work.
+ */
+
+static int per_event_excludes(void)
+{
+	struct event *e, events[4];
+	char *platform;
+	int i;
+
+	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
+	FAIL_IF(!platform);
+	SKIP_IF(strcmp(platform, "power8") != 0);
+
+	/*
+	 * We need to create the events disabled, otherwise the running/enabled
+	 * counts don't match up.
+	 */
+	e = &events[0];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions");
+	e->attr.disabled = 1;
+
+	e = &events[1];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(k)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_hv = 1;
+
+	e = &events[2];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(h)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_kernel = 1;
+
+	e = &events[3];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(u)");
+	e->attr.disabled = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_kernel = 1;
+
+	FAIL_IF(event_open(&events[0]));
+
+	/*
+	 * The open here will fail if we don't have per event exclude support,
+	 * because the second event has an incompatible set of exclude settings
+	 * and we're asking for the events to be in a group.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+	/*
+	 * Even though the above will fail without per-event excludes we keep
+	 * testing in order to be thorough.
+	 */
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Spin for a while */
+	for (i = 0; i < INT_MAX; i++)
+		asm volatile("" : : : "memory");
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	for (i = 0; i < 4; i++) {
+		FAIL_IF(event_read(&events[i]));
+		event_report(&events[i]);
+	}
+
+	/*
+	 * We should see that all events have enabled == running. That
+	 * shows that they were all on the PMU at once.
+	 */
+	for (i = 0; i < 4; i++)
+		FAIL_IF(events[i].result.running != events[i].result.enabled);
+
+	/*
+	 * We can also check that the result for instructions is >= all the
+	 * other counts. That's because it is counting all instructions while
+	 * the others are counting a subset.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(events[0].result.value < events[i].result.value);
+
+	for (i = 0; i < 4; i++)
+		event_close(&events[i]);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(per_event_excludes, "per_event_excludes");
+}