summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/msr-index.h3
-rw-r--r--tools/build/Makefile.feature2
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/build/feature/test-all.c5
-rw-r--r--tools/include/linux/compiler-gcc.h12
-rw-r--r--tools/include/linux/compiler.h3
-rw-r--r--tools/perf/Documentation/itrace.txt6
-rw-r--r--tools/perf/Documentation/perf-c2c.txt2
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt2
-rw-r--r--tools/perf/Documentation/perf-record.txt15
-rw-r--r--tools/perf/Documentation/perf-stat.txt33
-rw-r--r--tools/perf/Documentation/perf-top.txt11
-rw-r--r--tools/perf/Documentation/security.txt237
-rw-r--r--tools/perf/Makefile.config43
-rw-r--r--tools/perf/Makefile.perf14
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c7
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c8
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c41
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c6
-rw-r--r--tools/perf/bench/epoll-ctl.c4
-rw-r--r--tools/perf/bench/epoll-wait.c4
-rw-r--r--tools/perf/bench/sched-messaging.c2
-rw-r--r--tools/perf/builtin-annotate.c2
-rw-r--r--tools/perf/builtin-bench.c6
-rw-r--r--tools/perf/builtin-c2c.c9
-rw-r--r--tools/perf/builtin-evlist.c2
-rw-r--r--tools/perf/builtin-ftrace.c15
-rw-r--r--tools/perf/builtin-inject.c2
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-mem.c24
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c69
-rw-r--r--tools/perf/builtin-report.c37
-rw-r--r--tools/perf/builtin-script.c41
-rw-r--r--tools/perf/builtin-stat.c181
-rw-r--r--tools/perf/builtin-timechart.c2
-rw-r--r--tools/perf/builtin-top.c12
-rw-r--r--tools/perf/builtin-trace.c92
-rwxr-xr-xtools/perf/check-headers.sh4
-rw-r--r--tools/perf/jvmti/libjvmti.c92
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json149
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json4
-rw-r--r--tools/perf/pmu-events/jsmn.h2
-rw-r--r--tools/perf/tests/Build2
-rw-r--r--tools/perf/tests/attr/system-wide-dummy50
-rw-r--r--tools/perf/tests/attr/test-record-C012
-rw-r--r--tools/perf/tests/builtin-test.c65
-rw-r--r--tools/perf/tests/demangle-java-test.c42
-rw-r--r--tools/perf/tests/dwarf-unwind.c11
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c5
-rw-r--r--tools/perf/tests/evsel-tp-sched.c8
-rw-r--r--tools/perf/tests/expr.c46
-rw-r--r--tools/perf/tests/hists_cumulate.c2
-rw-r--r--tools/perf/tests/hists_filter.c2
-rw-r--r--tools/perf/tests/hists_output.c10
-rw-r--r--tools/perf/tests/make10
-rw-r--r--tools/perf/tests/mmap-basic.c4
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c8
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c4
-rw-r--r--tools/perf/tests/openat-syscall.c2
-rw-r--r--tools/perf/tests/pfm.c203
-rw-r--r--tools/perf/tests/pmu-events.c173
-rw-r--r--tools/perf/tests/pmu.c4
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/tests.h8
-rwxr-xr-xtools/perf/trace/beauty/arch_errno_names.sh4
-rw-r--r--tools/perf/util/Build8
-rw-r--r--tools/perf/util/annotate.c1
-rw-r--r--tools/perf/util/annotate.h4
-rw-r--r--tools/perf/util/arm-spe-decoder/Build1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c219
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h82
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c (renamed from tools/perf/util/arm-spe-pkt-decoder.c)0
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h (renamed from tools/perf/util/arm-spe-pkt-decoder.h)16
-rw-r--r--tools/perf/util/arm-spe.c823
-rw-r--r--tools/perf/util/auxtrace.c22
-rw-r--r--tools/perf/util/auxtrace.h15
-rw-r--r--tools/perf/util/bpf-loader.c2
-rw-r--r--tools/perf/util/branch.h2
-rw-r--r--tools/perf/util/callchain.c14
-rw-r--r--tools/perf/util/callchain.h1
-rw-r--r--tools/perf/util/cloexec.c4
-rw-r--r--tools/perf/util/config.c14
-rw-r--r--tools/perf/util/counts.c10
-rw-r--r--tools/perf/util/counts.h7
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/demangle-java.c13
-rw-r--r--tools/perf/util/dso.c16
-rw-r--r--tools/perf/util/dso.h5
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evlist.c39
-rw-r--r--tools/perf/util/evsel.c156
-rw-r--r--tools/perf/util/evsel.h34
-rw-r--r--tools/perf/util/evsel_config.h43
-rw-r--r--tools/perf/util/evsel_fprintf.c3
-rw-r--r--tools/perf/util/evsel_fprintf.h3
-rw-r--r--tools/perf/util/expr.c130
-rw-r--r--tools/perf/util/expr.h29
-rw-r--r--tools/perf/util/expr.l16
-rw-r--r--tools/perf/util/expr.y41
-rw-r--r--tools/perf/util/genelf_debug.c4
-rw-r--r--tools/perf/util/hashmap.c238
-rw-r--r--tools/perf/util/hashmap.h176
-rw-r--r--tools/perf/util/header.c34
-rw-r--r--tools/perf/util/hist.c13
-rw-r--r--tools/perf/util/hist.h6
-rw-r--r--tools/perf/util/intel-pt.c31
-rw-r--r--tools/perf/util/jitdump.c2
-rw-r--r--tools/perf/util/jitdump.h6
-rw-r--r--tools/perf/util/machine.c4
-rw-r--r--tools/perf/util/mem-events.c15
-rw-r--r--tools/perf/util/mem-events.h2
-rw-r--r--tools/perf/util/metricgroup.c316
-rw-r--r--tools/perf/util/metricgroup.h6
-rw-r--r--tools/perf/util/ordered-events.h2
-rw-r--r--tools/perf/util/parse-events.c111
-rw-r--r--tools/perf/util/parse-events.h5
-rw-r--r--tools/perf/util/parse-events.l12
-rw-r--r--tools/perf/util/pfm.c281
-rw-r--r--tools/perf/util/pfm.h37
-rw-r--r--tools/perf/util/pmu.c33
-rw-r--r--tools/perf/util/pmu.h4
-rw-r--r--tools/perf/util/probe-event.c49
-rw-r--r--tools/perf/util/probe-finder.c1
-rw-r--r--tools/perf/util/pstack.c2
-rw-r--r--tools/perf/util/record.h6
-rw-r--r--tools/perf/util/session.c12
-rw-r--r--tools/perf/util/sideband_evlist.c2
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/stat-shadow.c53
-rw-r--r--tools/perf/util/stat.c90
-rw-r--r--tools/perf/util/stat.h7
-rw-r--r--tools/perf/util/symbol-elf.c7
-rw-r--r--tools/perf/util/symbol.c4
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/perf/util/syscalltbl.c4
-rw-r--r--tools/perf/util/syscalltbl.h14
-rw-r--r--tools/perf/util/trace-event-info.c2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/testing/selftests/exec/Makefile1
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script171
-rw-r--r--tools/testing/selftests/proc/.gitignore2
-rw-r--r--tools/testing/selftests/proc/Makefile2
-rw-r--r--tools/testing/selftests/proc/proc-fsconfig-hidepid.c50
-rw-r--r--tools/testing/selftests/proc/proc-multiple-procfs.c48
151 files changed, 4585 insertions, 982 deletions
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 12c9684d59ba..ef452b817f44 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -301,6 +301,9 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_AMD_PKG_ENERGY_STATUS	0xc001029b
+#define MSR_AMD_RAPL_POWER_UNIT		0xc0010299
+
 /* Config TDP MSRs */
 #define MSR_CONFIG_TDP_NOMINAL		0x00000648
 #define MSR_CONFIG_TDP_LEVEL_1		0x00000649
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3abd4316cd4f..cb152370fdef 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -40,7 +40,6 @@ FEATURE_TESTS_BASIC :=                  \
         glibc                           \
         gtk2                            \
         gtk2-infobar                    \
-        libaudit                        \
         libbfd                          \
         libcap                          \
         libelf                          \
@@ -112,7 +111,6 @@ FEATURE_DISPLAY ?=              \
          dwarf_getlocations     \
          glibc                  \
          gtk2                   \
-         libaudit               \
          libbfd                 \
          libcap                 \
          libelf                 \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 84f845b9627d..b1f0321180f5 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -91,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
 ###############################
 
 $(OUTPUT)test-all.bin:
-	$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+	$(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
 
 $(OUTPUT)test-hello.bin:
 	$(BUILD)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 88145e8cde1a..5479e543b194 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -74,10 +74,6 @@
 # include "test-libunwind.c"
 #undef main
 
-#define main main_test_libaudit
-# include "test-libaudit.c"
-#undef main
-
 #define main main_test_libslang
 # include "test-libslang.c"
 #undef main
@@ -208,7 +204,6 @@ int main(int argc, char *argv[])
 	main_test_libelf_gelf_getnote();
 	main_test_libelf_getshdrstrndx();
 	main_test_libunwind();
-	main_test_libaudit();
 	main_test_libslang();
 	main_test_gtk2(argc, argv);
 	main_test_gtk2_infobar(argc, argv);
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 95c072b70d0e..b9d4322e1e65 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -27,6 +27,18 @@
 #define  __pure		__attribute__((pure))
 #endif
 #define  noinline	__attribute__((noinline))
+#ifdef __has_attribute
+#if __has_attribute(disable_tail_calls)
+#define __no_tail_call	__attribute__((disable_tail_calls))
+#endif
+#endif
+#ifndef __no_tail_call
+#if GCC_VERSION > 40201
+#define __no_tail_call	__attribute__((optimize("no-optimize-sibling-calls")))
+#else
+#define __no_tail_call
+#endif
+#endif
 #ifndef __packed
 #define __packed	__attribute__((packed))
 #endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 180f7714a5f1..9f9002734e19 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -47,6 +47,9 @@
 #ifndef noinline
 #define noinline
 #endif
+#ifndef __no_tail_call
+#define __no_tail_call
+#endif
 
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #ifndef __same_type
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 271484754fee..e817179c5027 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -1,5 +1,5 @@
 		i	synthesize instructions events
-		b	synthesize branches events
+		b	synthesize branches events (branch misses for Arm SPE)
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
 		x	synthesize transactions events
@@ -9,6 +9,10 @@
 			of aux-output (refer to perf record)
 		e	synthesize error events
 		d	create a debug log
+		f	synthesize first level cache events
+		m	synthesize last level cache events
+		t	synthesize TLB events
+		a	synthesize remote access events
 		g	synthesize a call chain (use with i or x)
 		G	synthesize a call chain on existing event records
 		l	synthesize last branch entries (use with i or x)
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 2133eb320cb0..98efdab5fbd4 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -40,7 +40,7 @@ RECORD OPTIONS
 --------------
 -e::
 --event=::
-	Select the PMU event. Use 'perf mem record -e list'
+	Select the PMU event. Use 'perf c2c record -e list'
 	to list available events.
 
 -v::
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index f16d8a71d3f5..c7d3df5798e2 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -667,6 +667,11 @@ convert.*::
 		Limit the size of ordered_events queue, so we could control
 		allocation size of perf data files without proper finished
 		round events.
+stat.*::
+
+	stat.big-num::
+		(boolean) Change the default for "--big-num". To make
+		"--no-big-num" the default, set "stat.big-num=false".
 
 intel-pt.*::
 
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index eb8b7d42591a..f4cd49a7fcdb 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -687,7 +687,7 @@ The v4.2 kernel introduced support for a context switch metadata event,
 PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
 are scheduled out and in, just not by whom, which is left for the
 PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
-which in turn requires CAP_SYS_ADMIN.
+which in turn requires CAP_PERFMON or CAP_SYS_ADMIN.
 
 Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
 switches") commit, that introduces these metadata events for further info.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 561ef55743e2..fa8a5fcd27ab 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -458,7 +458,9 @@ This option sets the time out limit. The default value is 500 ms.
 
 --switch-events::
 Record context switch events i.e. events of type PERF_RECORD_SWITCH or
-PERF_RECORD_SWITCH_CPU_WIDE.
+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
+switch events will be enabled automatically, which can be suppressed by
+by the option --no-switch-events.
 
 --clang-path=PATH::
 Path to clang binary to use for compiling BPF scriptlets.
@@ -613,6 +615,17 @@ appended unit character - B/K/M/G
 	The number of threads to run when synthesizing events for existing processes.
 	By default, the number of threads equals 1.
 
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched.  Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 3fb5028aef08..b029ee728a0b 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -71,6 +71,16 @@ report::
 --tid=<tid>::
         stat events on existing thread id (comma separated list)
 
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched.  Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
 
 -a::
 --all-cpus::
@@ -93,7 +103,9 @@ report::
 
 -B::
 --big-num::
-        print large numbers with thousands' separators according to locale
+        print large numbers with thousands' separators according to locale.
+	Enabled by default. Use "--no-big-num" to disable.
+	Default setting can be changed with "perf config stat.big-num=false".
 
 -C::
 --cpu=::
@@ -234,6 +246,25 @@ filter out the startup phase of the program, which is often very different.
 
 Print statistics of transactional execution if supported.
 
+--metric-no-group::
+By default, events to compute a metric are placed in weak groups. The
+group tries to enforce scheduling all or none of the events. The
+--metric-no-group option places events outside of groups and may
+increase the chance of the event being scheduled - leading to more
+accuracy. However, as events may not be scheduled together accuracy
+for metrics like instructions per cycle can be lower - as both metrics
+may no longer be being measured at the same time.
+
+--metric-no-merge::
+By default metric events in different weak groups can be shared if one
+group contains all the events needed by another. In such cases one
+group will be eliminated reducing event multiplexing and making it so
+that certain groups of metrics sum to 100%. A downside to sharing a
+group is that the group may require multiplexing and so accuracy for a
+small group that need not have multiplexing is lowered. This option
+forbids the event merging logic from sharing events between groups and
+may be used to increase accuracy in this case.
+
 STAT RECORD
 -----------
 Stores stat data into perf data file.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 20227dabc208..ee2024691d46 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -329,6 +329,17 @@ Default is to monitor all CPUS.
 	The known limitations include exception handing such as
 	setjmp/longjmp will have calls/returns not match.
 
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched.  Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
+
 INTERACTIVE PROMPTING KEYS
 --------------------------
 
diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt
new file mode 100644
index 000000000000..4fe3b8b1958f
--- /dev/null
+++ b/tools/perf/Documentation/security.txt
@@ -0,0 +1,237 @@
+Overview
+========
+
+For general security related questions of perf_event_open() syscall usage,
+performance monitoring and observability operations by Perf see here:
+https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
+
+Enabling LSM based mandatory access control (MAC) to perf_event_open() syscall
+==============================================================================
+
+LSM hooks for mandatory access control for perf_event_open() syscall can be
+used starting from Linux v5.3. Below are the steps to extend Fedora (v31) with
+Targeted policy with perf_event_open() access control capabilities:
+
+1. Download selinux-policy SRPM package (e.g. selinux-policy-3.14.4-48.fc31.src.rpm on FC31)
+   and install it so rpmbuild directory would exist in the current working directory:
+
+   # rpm -Uhv selinux-policy-3.14.4-48.fc31.src.rpm
+
+2. Get into rpmbuild/SPECS directory and unpack the source code:
+
+   # rpmbuild -bp selinux-policy.spec
+
+3. Place patch below at rpmbuild/BUILD/selinux-policy-b86eaaf4dbcf2d51dd4432df7185c0eaf3cbcc02
+   directory and apply it:
+
+   # patch -p1 < selinux-policy-perf-events-perfmon.patch
+   patching file policy/flask/access_vectors
+   patching file policy/flask/security_classes
+   # cat selinux-policy-perf-events-perfmon.patch
+diff -Nura a/policy/flask/access_vectors b/policy/flask/access_vectors
+--- a/policy/flask/access_vectors	2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/access_vectors	2020-02-28 23:37:25.000000000 +0300
+@@ -174,6 +174,7 @@
+ 	wake_alarm
+ 	block_suspend
+ 	audit_read
++	perfmon
+ }
+ 
+ #
+@@ -1099,3 +1100,15 @@
+ 
+ class xdp_socket
+ inherits socket
++
++class perf_event
++{
++	open
++	cpu
++	kernel
++	tracepoint
++	read
++	write
++}
++
++
+diff -Nura a/policy/flask/security_classes b/policy/flask/security_classes
+--- a/policy/flask/security_classes	2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/security_classes	2020-02-28 21:35:17.000000000 +0300
+@@ -200,4 +200,6 @@
+ 
+ class xdp_socket
+ 
++class perf_event
++
+ # FLASK
+
+4. Get into rpmbuild/SPECS directory and build policy packages from patched sources:
+
+   # rpmbuild --noclean --noprep -ba selinux-policy.spec
+
+   so you have this:
+
+   # ls -alh rpmbuild/RPMS/noarch/
+   total 33M
+   drwxr-xr-x. 2 root root 4.0K Mar 20 12:16 .
+   drwxr-xr-x. 3 root root 4.0K Mar 20 12:16 ..
+   -rw-r--r--. 1 root root 112K Mar 20 12:16 selinux-policy-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root 1.2M Mar 20 12:17 selinux-policy-devel-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root 2.3M Mar 20 12:17 selinux-policy-doc-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root  12M Mar 20 12:17 selinux-policy-minimum-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root 4.5M Mar 20 12:16 selinux-policy-mls-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root 111K Mar 20 12:16 selinux-policy-sandbox-3.14.4-48.fc31.noarch.rpm
+   -rw-r--r--. 1 root root  14M Mar 20 12:17 selinux-policy-targeted-3.14.4-48.fc31.noarch.rpm
+
+5. Install SELinux packages from Fedora repo, if not already done so, and
+   update with the patched rpms above:
+
+   # rpm -Uhv rpmbuild/RPMS/noarch/selinux-policy-*
+
+6. Enable SELinux Permissive mode for Targeted policy, if not already done so:
+
+   # cat /etc/selinux/config
+
+   # This file controls the state of SELinux on the system.
+   # SELINUX= can take one of these three values:
+   #     enforcing - SELinux security policy is enforced.
+   #     permissive - SELinux prints warnings instead of enforcing.
+   #     disabled - No SELinux policy is loaded.
+   SELINUX=permissive
+   # SELINUXTYPE= can take one of these three values:
+   #     targeted - Targeted processes are protected,
+   #     minimum - Modification of targeted policy. Only selected processes are protected.
+   #     mls - Multi Level Security protection.
+   SELINUXTYPE=targeted
+
+7. Enable filesystem SELinux labeling at the next reboot:
+
+   # touch /.autorelabel
+
+8. Reboot machine and it will label filesystems and load Targeted policy into the kernel;
+
+9. Login and check that dmesg output doesn't mention that perf_event class is unknown to SELinux subsystem;
+
+10. Check that SELinux is enabled and in Permissive mode
+
+    # getenforce
+    Permissive
+
+11. Turn SELinux into Enforcing mode:
+
+    # setenforce 1
+    # getenforce
+    Enforcing
+
+Opening access to perf_event_open() syscall on Fedora with SELinux
+==================================================================
+
+Access to performance monitoring and observability operations by Perf
+can be limited for superuser or CAP_PERFMON or CAP_SYS_ADMIN privileged
+processes. MAC policy settings (e.g. SELinux) can be loaded into the kernel
+and prevent unauthorized access to perf_event_open() syscall. In such case
+Perf tool provides a message similar to the one below:
+
+   # perf stat
+   Error:
+   Access to performance monitoring and observability operations is limited.
+   Enforced MAC policy settings (SELinux) can limit access to performance
+   monitoring and observability operations. Inspect system audit records for
+   more perf_event access control information and adjusting the policy.
+   Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open
+   access to performance monitoring and observability operations for users
+   without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.
+   perf_event_paranoid setting is -1:
+     -1: Allow use of (almost) all events by all users
+         Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+   >= 0: Disallow raw and ftrace function tracepoint access
+   >= 1: Disallow CPU event access
+   >= 2: Disallow kernel profiling
+   To make the adjusted perf_event_paranoid setting permanent preserve it
+   in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)
+
+To make sure that access is limited by MAC policy settings inspect system
+audit records using journalctl command or /var/log/audit/audit.log so the
+output would contain AVC denied records related to perf_event:
+
+   # journalctl --reverse --no-pager | grep perf_event
+
+   python3[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t.
+                                         If you believe that perf should be allowed open access on perf_event labeled unconfined_t by default.
+   setroubleshoot[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. For complete SELinux messages run: sealert -l 4595ce5b-e58f-462c-9d86-3bc2074935de
+   audit[1318098]: AVC avc:  denied  { open } for  pid=1318098 comm="perf" scontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=perf_event permissive=0
+
+In order to open access to perf_event_open() syscall MAC policy settings can
+require to be extended. On SELinux system this can be done by loading a special
+policy module extending base policy settings. Perf related policy module can
+be generated using the system audit records about blocking perf_event access.
+Run the command below to generate my-perf.te policy extension file with
+perf_event related rules:
+
+   # ausearch -c 'perf' --raw | audit2allow -M my-perf && cat my-perf.te
+
+   module my-perf 1.0;
+
+   require {
+        type unconfined_t;
+        class perf_event { cpu kernel open read tracepoint write };
+   }
+
+   #============= unconfined_t ==============
+   allow unconfined_t self:perf_event { cpu kernel open read tracepoint write };
+
+Now compile, pack and load my-perf.pp extension module into the kernel:
+
+   # checkmodule -M -m -o my-perf.mod my-perf.te
+   # semodule_package -o my-perf.pp -m my-perf.mod
+   # semodule -X 300 -i my-perf.pp
+
+After all those taken steps above access to perf_event_open() syscall should
+now be allowed by the policy settings. Check access running Perf like this:
+
+   # perf stat
+   ^C
+   Performance counter stats for 'system wide':
+
+         36,387.41 msec cpu-clock                 #    7.999 CPUs utilized
+             2,629      context-switches          #    0.072 K/sec
+                57      cpu-migrations            #    0.002 K/sec
+                 1      page-faults               #    0.000 K/sec
+       263,721,559      cycles                    #    0.007 GHz
+       175,746,713      instructions              #    0.67  insn per cycle
+        19,628,798      branches                  #    0.539 M/sec
+         1,259,201      branch-misses             #    6.42% of all branches
+
+       4.549061439 seconds time elapsed
+
+The generated perf-event.pp related policy extension module can be removed
+from the kernel using this command:
+
+   # semodule -X 300 -r my-perf
+
+Alternatively the module can be temporarily disabled and enabled back using
+these two commands:
+
+   # semodule -d my-perf
+   # semodule -e my-perf
+
+If something went wrong
+=======================
+
+To turn SELinux into Permissive mode:
+   # setenforce 0
+
+To fully disable SELinux during kernel boot [3] set kernel command line parameter selinux=0
+
+To remove SELinux labeling from local filesystems:
+   # find / -mount -print0 | xargs -0 setfattr -h -x security.selinux
+
+To fully turn SELinux off a machine set SELINUX=disabled at /etc/selinux/config file and reboot;
+
+Links
+=====
+
+[1] https://download-ib01.fedoraproject.org/pub/fedora/linux/updates/31/Everything/SRPMS/Packages/s/selinux-policy-3.14.4-49.fc31.src.rpm
+[2] https://docs.fedoraproject.org/en-US/Fedora/11/html/Security-Enhanced_Linux/sect-Security-Enhanced_Linux-Working_with_SELinux-Enabling_and_Disabling_SELinux.html
+[3] https://danwalsh.livejournal.com/10972.html
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 12a8204d63c6..877ca6be0ed0 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -23,12 +23,28 @@ include $(srctree)/tools/scripts/Makefile.arch
 $(call detected_var,SRCARCH)
 
 NO_PERF_REGS := 1
-NO_SYSCALL_TABLE := 1
+
+ifneq ($(NO_SYSCALL_TABLE),1)
+  NO_SYSCALL_TABLE := 1
+
+  ifeq ($(SRCARCH),x86)
+    ifeq (${IS_64_BIT}, 1)
+      NO_SYSCALL_TABLE := 0
+    endif
+  else
+    ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390))
+      NO_SYSCALL_TABLE := 0
+    endif
+  endif
+
+  ifneq ($(NO_SYSCALL_TABLE),1)
+    CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
+  endif
+endif
 
 # Additional ARCH settings for ppc
 ifeq ($(SRCARCH),powerpc)
   NO_PERF_REGS := 0
-  NO_SYSCALL_TABLE := 0
   CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
   LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
 endif
@@ -37,7 +53,6 @@ endif
 ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
   ifeq (${IS_64_BIT}, 1)
-    NO_SYSCALL_TABLE := 0
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
     LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
@@ -55,7 +70,6 @@ endif
 
 ifeq ($(SRCARCH),arm64)
   NO_PERF_REGS := 0
-  NO_SYSCALL_TABLE := 0
   CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
   LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
 endif
@@ -70,7 +84,6 @@ endif
 
 ifeq ($(ARCH),s390)
   NO_PERF_REGS := 0
-  NO_SYSCALL_TABLE := 0
   CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
 endif
 
@@ -78,10 +91,6 @@ ifeq ($(NO_PERF_REGS),0)
   $(call detected,CONFIG_PERF_REGS)
 endif
 
-ifneq ($(NO_SYSCALL_TABLE),1)
-  CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
-endif
-
 # So far there's only x86 and arm libdw unwind support merged in perf.
 # Disable it on all other architectures in case libdw unwind
 # support is detected in system. Add supported architectures
@@ -346,7 +355,7 @@ ifndef NO_BIONIC
 endif
 
 ifeq ($(feature-eventfd), 1)
-  CFLAGS += -DHAVE_EVENTFD
+  CFLAGS += -DHAVE_EVENTFD_SUPPORT
 endif
 
 ifeq ($(feature-get_current_dir_name), 1)
@@ -651,6 +660,7 @@ ifeq ($(NO_SYSCALL_TABLE),0)
   $(call detected,CONFIG_TRACE)
 else
   ifndef NO_LIBAUDIT
+    $(call feature_check,libaudit)
     ifneq ($(feature-libaudit), 1)
       msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
       NO_LIBAUDIT := 1
@@ -1012,6 +1022,19 @@ ifdef LIBCLANGLLVM
   endif
 endif
 
+ifdef LIBPFM4
+  $(call feature_check,libpfm4)
+  ifeq ($(feature-libpfm4), 1)
+    CFLAGS += -DHAVE_LIBPFM
+    EXTLIBS += -lpfm
+    ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1
+    $(call detected,CONFIG_LIBPFM4)
+  else
+    msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev);
+    NO_LIBPFM4 := 1
+  endif
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   perf_include_dir
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 94a495594e99..86dbb51bb272 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -118,6 +118,12 @@ include ../scripts/utilities.mak
 #
 # Define LIBBPF_DYNAMIC to enable libbpf dynamic linking.
 #
+# Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables
+# generated from the kernel .tbl or unistd.h files and use, if available, libaudit
+# for doing the conversions to/from strings/id.
+#
+# Define LIBPFM4 to enable libpfm4 events extension.
+#
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -278,6 +284,7 @@ strip-libs = $(filter-out -l%,$(1))
 
 ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
+  PLUGINS_PATH=$(OUTPUT)
   BPF_PATH=$(OUTPUT)
   SUBCMD_PATH=$(OUTPUT)
   LIBPERF_PATH=$(OUTPUT)
@@ -288,6 +295,7 @@ else
 endif
 else
   TE_PATH=$(TRACE_EVENT_DIR)
+  PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
   API_PATH=$(LIB_DIR)
   BPF_PATH=$(BPF_DIR)
   SUBCMD_PATH=$(SUBCMD_DIR)
@@ -297,7 +305,7 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
-LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
 
 #
 # The static build has no dynsym table, so this does not work for
@@ -756,10 +764,10 @@ $(LIBTRACEEVENT): FORCE
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
 
 libtraceevent_plugins: FORCE
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
 
 $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
 	$(call QUIET_CLEAN, libtraceevent)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 97aa02c4491d..cea5e33d61d2 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -216,7 +216,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
 				struct evsel *evsel)
 {
 	char msg[BUFSIZ], path[PATH_MAX], *sink;
-	struct perf_evsel_config_term *term;
+	struct evsel_config_term *term;
 	int ret = -EINVAL;
 	u32 hash;
 
@@ -224,7 +224,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
 		return 0;
 
 	list_for_each_entry(term, &evsel->config_terms, list) {
-		if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+		if (term->type != EVSEL__CONFIG_TERM_DRV_CFG)
 			continue;
 
 		sink = term->val.str;
@@ -265,7 +265,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
 	ptr->evlist = evlist;
 	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
 
-	if (perf_can_record_switch_events())
+	if (!record_opts__no_switch_events(opts) &&
+	    perf_can_record_switch_events())
 		opts->record_switch_events = true;
 
 	evlist__for_each_entry(evlist, evsel) {
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index 7623d85e77f3..a50941629649 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index e5c9504f8586..e86e210bf514 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -2,6 +2,7 @@ perf-y += header.o
 perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
+perf-y += sym-handling.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
index abf2dbc7f829..7b2d96ec28e3 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -1,9 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
 #include <linux/kernel.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
 static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index ef43be9b6ec2..4e40402a4f81 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -55,6 +55,14 @@ int test__arch_unwind_sample(struct perf_sample *sample,
 		return -1;
 	}
 
+#ifdef MEMORY_SANITIZER
+	/*
+	 * Assignments to buf in the assembly function perf_regs_load aren't
+	 * seen by memory sanitizer. Zero the memory to convince memory
+	 * sanitizer the memory is initialized.
+	 */
+	memset(buf, 0, sizeof(u64) * PERF_REGS_MAX);
+#endif
 	perf_regs_load(buf);
 	regs->abi  = PERF_SAMPLE_REGS_ABI;
 	regs->regs = buf;
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 3f7c20cc7b79..839ef52c1ac2 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -59,7 +59,8 @@ struct intel_pt_recording {
 	size_t				priv_size;
 };
 
-static int intel_pt_parse_terms_with_default(struct list_head *formats,
+static int intel_pt_parse_terms_with_default(const char *pmu_name,
+					     struct list_head *formats,
 					     const char *str,
 					     u64 *config)
 {
@@ -78,7 +79,8 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats,
 		goto out_free;
 
 	attr.config = *config;
-	err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+	err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
+				     NULL);
 	if (err)
 		goto out_free;
 
@@ -88,11 +90,12 @@ out_free:
 	return err;
 }
 
-static int intel_pt_parse_terms(struct list_head *formats, const char *str,
-				u64 *config)
+static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
+				const char *str, u64 *config)
 {
 	*config = 0;
-	return intel_pt_parse_terms_with_default(formats, str, config);
+	return intel_pt_parse_terms_with_default(pmu_name, formats, str,
+						 config);
 }
 
 static u64 intel_pt_masked_bits(u64 mask, u64 bits)
@@ -229,7 +232,8 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 
 	pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 
-	intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
+			     &config);
 
 	return config;
 }
@@ -337,13 +341,16 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	if (priv_size != ptr->priv_size)
 		return -EINVAL;
 
-	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
-	intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
-			     &noretcomp_bit);
-	intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+			     "tsc", &tsc_bit);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+			     "noretcomp", &noretcomp_bit);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+			     "mtc", &mtc_bit);
 	mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
 					      "mtc_period");
-	intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+			     "cyc", &cyc_bit);
 
 	intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 
@@ -556,10 +563,9 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
 static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
 					struct evsel *evsel)
 {
-	struct perf_evsel_config_term *term;
 	u64 user_bits = 0, bits;
+	struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
 
-	term = perf_evsel__get_config_term(evsel, CFG_CHG);
 	if (term)
 		user_bits = term->val.cfg_chg;
 
@@ -769,7 +775,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 		}
 	}
 
-	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+	intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+			     "tsc", &tsc_bit);
 
 	if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
 		have_timing_info = true;
@@ -780,7 +787,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	 * Per-cpu recording needs sched_switch events to distinguish different
 	 * threads.
 	 */
-	if (have_timing_info && !perf_cpu_map__empty(cpus)) {
+	if (have_timing_info && !perf_cpu_map__empty(cpus) &&
+	    !record_opts__no_switch_events(opts)) {
 		if (perf_can_record_switch_events()) {
 			bool cpu_wide = !target__none(&opts->target) &&
 					!target__has_task(&opts->target);
@@ -875,7 +883,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 	 * per-cpu with no sched_switch (except workload-only).
 	 */
 	if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
-	    !target__none(&opts->target))
+	    !target__none(&opts->target) &&
+	    !intel_pt_evsel->core.attr.exclude_user)
 		ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
 
 	return 0;
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index fda8f4206ee4..eea2bf87232b 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index cadc18d42aa4..ca2d591aad8a 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -5,7 +5,7 @@
  * Benchmark the various operations allowed for epoll_ctl(2).
  * The idea is to concurrently stress a single epoll instance
  */
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
 /* For the CLR_() macros */
 #include <string.h>
 #include <pthread.h>
@@ -412,4 +412,4 @@ int bench_epoll_ctl(int argc, const char **argv)
 errmem:
 	err(EXIT_FAILURE, "calloc");
 }
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index cf797362675b..75dca9773186 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
 /*
  * Copyright (C) 2018 Davidlohr Bueso.
  *
@@ -540,4 +540,4 @@ int bench_epoll_wait(int argc, const char **argv)
 errmem:
 	err(EXIT_FAILURE, "calloc");
 }
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 97e4a4fb3362..71d830d7b923 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -40,7 +40,7 @@ struct sender_context {
 	unsigned int num_fds;
 	int ready_out;
 	int wakefd;
-	int out_fds[0];
+	int out_fds[];
 };
 
 struct receiver_context {
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index d3e5a84f87a2..4940d10074c3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -432,7 +432,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
 			hists__collapse_resort(hists, NULL);
 			/* Don't sort callchain */
 			evsel__reset_sample_bit(pos, CALLCHAIN);
-			perf_evsel__output_resort(pos, NULL);
+			evsel__output_resort(pos, NULL);
 
 			if (symbol_conf.event_group && !evsel__is_group_leader(pos))
 				continue;
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 083273209c88..cad31b1d3438 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -67,14 +67,14 @@ static struct bench futex_benchmarks[] = {
 	{ NULL,		NULL,						NULL			}
 };
 
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
 static struct bench epoll_benchmarks[] = {
 	{ "wait",	"Benchmark epoll concurrent epoll_waits",       bench_epoll_wait	},
 	{ "ctl",	"Benchmark epoll concurrent epoll_ctls",        bench_epoll_ctl		},
 	{ "all",	"Run all futex benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}
 };
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
 
 static struct bench internals_benchmarks[] = {
 	{ "synthesize", "Benchmark perf event synthesis",	bench_synthesize	},
@@ -95,7 +95,7 @@ static struct collection collections[] = {
 	{ "numa",	"NUMA scheduling and MM benchmarks",		numa_benchmarks		},
 #endif
 	{"futex",       "Futex stressing benchmarks",                   futex_benchmarks        },
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
 	{"epoll",       "Epoll stressing benchmarks",                   epoll_benchmarks        },
 #endif
 	{ "internals",	"Perf-internals benchmarks",			internals_benchmarks	},
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 1baf4cae086f..d617d5682c68 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2887,8 +2887,15 @@ static int parse_record_events(const struct option *opt,
 {
 	bool *event_set = (bool *) opt->value;
 
+	if (!strcmp(str, "list")) {
+		perf_mem_events__list();
+		exit(0);
+	}
+	if (perf_mem_events__parse(str))
+		exit(-1);
+
 	*event_set = true;
-	return perf_mem_events__parse(str);
+	return 0;
 }
 
 
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 440501994931..98e992801251 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -34,7 +34,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
 		return PTR_ERR(session);
 
 	evlist__for_each_entry(session->evlist, pos) {
-		perf_evsel__fprintf(pos, details, stdout);
+		evsel__fprintf(pos, details, stdout);
 
 		if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
 			has_tracepoint = true;
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 55eda54240fb..2bfc1b0db536 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -45,6 +45,7 @@ struct filter_entry {
 	char			name[];
 };
 
+static volatile int workload_exec_errno;
 static bool done;
 
 static void sig_handler(int sig __maybe_unused)
@@ -63,7 +64,7 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
 						siginfo_t *info __maybe_unused,
 						void *ucontext __maybe_unused)
 {
-	/* workload_exec_errno = info->si_value.sival_int; */
+	workload_exec_errno = info->si_value.sival_int;
 	done = true;
 }
 
@@ -383,6 +384,14 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
 
 	write_tracing_file("tracing_on", "0");
 
+	if (workload_exec_errno) {
+		const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+		/* flush stdout first so below error msg appears at the end. */
+		fflush(stdout);
+		pr_err("workload failed: %s\n", emsg);
+		goto out_close_fd;
+	}
+
 	/* read remaining buffer contents */
 	while (true) {
 		int n = read(trace_fd, buf, sizeof(buf));
@@ -397,7 +406,7 @@ out_close_fd:
 out_reset:
 	reset_tracing_files(ftrace);
 out:
-	return done ? 0 : -1;
+	return (done && !workload_exec_errno) ? 0 : -1;
 }
 
 static int perf_ftrace_config(const char *var, const char *value, void *cb)
@@ -494,7 +503,7 @@ int cmd_ftrace(int argc, const char **argv)
 	argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
 			    PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc && target__none(&ftrace.target))
-		usage_with_options(ftrace_usage, ftrace_options);
+		ftrace.target.system_wide = true;
 
 	ret = target__validate(&ftrace.target);
 	if (ret) {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 53932db97a79..4a6de4b03ac0 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -51,7 +51,7 @@ struct perf_inject {
 struct event_entry {
 	struct list_head node;
 	u32		 tid;
-	union perf_event event[0];
+	union perf_event event[];
 };
 
 static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 965ef017496f..0a7fe4cb5555 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -42,7 +42,7 @@ int cmd_list(int argc, const char **argv)
 		OPT_END()
 	};
 	const char * const list_usage[] = {
-		"perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+		"perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]",
 		NULL
 	};
 
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 68a7eb84561a..3523279af6af 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -38,26 +38,16 @@ static int parse_record_events(const struct option *opt,
 			       const char *str, int unset __maybe_unused)
 {
 	struct perf_mem *mem = *(struct perf_mem **)opt->value;
-	int j;
 
-	if (strcmp(str, "list")) {
-		if (!perf_mem_events__parse(str)) {
-			mem->operation = 0;
-			return 0;
-		}
-		exit(-1);
+	if (!strcmp(str, "list")) {
+		perf_mem_events__list();
+		exit(0);
 	}
+	if (perf_mem_events__parse(str))
+		exit(-1);
 
-	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
-		struct perf_mem_event *e = &perf_mem_events[j];
-
-		fprintf(stderr, "%-13s%-*s%s\n",
-			e->tag,
-			verbose > 0 ? 25 : 0,
-			verbose > 0 ? perf_mem_events__name(j) : "",
-			e->supported ? ": available" : "");
-	}
-	exit(0);
+	mem->operation = 0;
+	return 0;
 }
 
 static const char * const __usage[] = {
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 70548df2abb9..6b1507566770 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -364,6 +364,9 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
 
 		for (k = 0; k < pev->ntevs; k++) {
 			struct probe_trace_event *tev = &pev->tevs[k];
+			/* Skipped events have no event name */
+			if (!tev->event)
+				continue;
 
 			/* We use tev's name for showing new events */
 			show_perf_probe_event(tev->group, tev->event, pev,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e4efdbf1a81e..e108d90ae2ed 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -45,6 +45,7 @@
 #include "util/units.h"
 #include "util/bpf-event.h"
 #include "util/util.h"
+#include "util/pfm.h"
 #include "asm/bug.h"
 #include "perf.h"
 
@@ -56,6 +57,9 @@
 #include <unistd.h>
 #include <sched.h>
 #include <signal.h>
+#ifdef HAVE_EVENTFD_SUPPORT
+#include <sys/eventfd.h>
+#endif
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <sys/types.h>
@@ -538,6 +542,9 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 
 static volatile int signr = -1;
 static volatile int child_finished;
+#ifdef HAVE_EVENTFD_SUPPORT
+static int done_fd = -1;
+#endif
 
 static void sig_handler(int sig)
 {
@@ -547,6 +554,21 @@ static void sig_handler(int sig)
 		signr = sig;
 
 	done = 1;
+#ifdef HAVE_EVENTFD_SUPPORT
+{
+	u64 tmp = 1;
+	/*
+	 * It is possible for this signal handler to run after done is checked
+	 * in the main loop, but before the perf counter fds are polled. If this
+	 * happens, the poll() will continue to wait even though done is set,
+	 * and will only break out if either another signal is received, or the
+	 * counters are ready for read. To ensure the poll() doesn't sleep when
+	 * done is set, use an eventfd (done_fd) to wake up the poll().
+	 */
+	if (write(done_fd, &tmp, sizeof(tmp)) < 0)
+		pr_err("failed to signal wakeup fd, error: %m\n");
+}
+#endif // HAVE_EVENTFD_SUPPORT
 }
 
 static void sigsegv_handler(int sig)
@@ -825,19 +847,28 @@ static int record__open(struct record *rec)
 	int rc = 0;
 
 	/*
-	 * For initial_delay we need to add a dummy event so that we can track
-	 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
-	 * real events, the ones asked by the user.
+	 * For initial_delay or system wide, we need to add a dummy event so
+	 * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
+	 * event synthesis.
 	 */
-	if (opts->initial_delay) {
+	if (opts->initial_delay || target__has_cpu(&opts->target)) {
 		if (perf_evlist__add_dummy(evlist))
 			return -ENOMEM;
 
+		/* Disable tracking of mmaps on lead event. */
 		pos = evlist__first(evlist);
 		pos->tracking = 0;
+		/* Set up dummy event. */
 		pos = evlist__last(evlist);
 		pos->tracking = 1;
-		pos->core.attr.enable_on_exec = 1;
+		/*
+		 * Enable the dummy event when the process is forked for
+		 * initial_delay, immediately for system wide.
+		 */
+		if (opts->initial_delay)
+			pos->core.attr.enable_on_exec = 1;
+		else
+			pos->immediate = 1;
 	}
 
 	perf_evlist__config(evlist, opts, &callchain_param);
@@ -1538,6 +1569,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		pr_err("Compression initialization failed.\n");
 		return -1;
 	}
+#ifdef HAVE_EVENTFD_SUPPORT
+	done_fd = eventfd(0, EFD_NONBLOCK);
+	if (done_fd < 0) {
+		pr_err("Failed to create wakeup eventfd, error: %m\n");
+		status = -1;
+		goto out_delete_session;
+	}
+	err = evlist__add_pollfd(rec->evlist, done_fd);
+	if (err < 0) {
+		pr_err("Failed to add wakeup eventfd to poll list\n");
+		status = err;
+		goto out_delete_session;
+	}
+#endif // HAVE_EVENTFD_SUPPORT
 
 	session->header.env.comp_type  = PERF_COMP_ZSTD;
 	session->header.env.comp_level = rec->opts.comp_level;
@@ -1896,6 +1941,10 @@ out_child:
 	}
 
 out_delete_session:
+#ifdef HAVE_EVENTFD_SUPPORT
+	if (done_fd >= 0)
+		close(done_fd);
+#endif
 	zstd_fini(&session->zstd_data);
 	perf_session__delete(session);
 
@@ -2453,8 +2502,9 @@ static struct option __record_options[] = {
 		    "Record namespaces events"),
 	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
 		    "Record cgroup events"),
-	OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
-		    "Record context switch events"),
+	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
+			&record.opts.record_switch_events_set,
+			"Record context switch events"),
 	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
 			 "Configure all used events to run in kernel space.",
 			 PARSE_OPT_EXCLUSIVE),
@@ -2506,6 +2556,11 @@ static struct option __record_options[] = {
 	OPT_UINTEGER(0, "num-thread-synthesize",
 		     &record.opts.nr_threads_synthesize,
 		     "number of threads to run for event synthesis"),
+#ifdef HAVE_LIBPFM
+	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
+		"libpfm4 event selector. use 'perf list' to list available events",
+		parse_libpfm_events_option),
+#endif
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ba63390246c2..b63b3fb2de70 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,6 @@
 #include "util/time-utils.h"
 #include "util/auxtrace.h"
 #include "util/units.h"
-#include "util/branch.h"
 #include "util/util.h" // perf_tip()
 #include "ui/ui.h"
 #include "ui/progress.h"
@@ -402,16 +401,7 @@ static int report__setup_sample_type(struct report *rep)
 		}
 	}
 
-	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
-		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
-		    (sample_type & PERF_SAMPLE_STACK_USER)) {
-			callchain_param.record_mode = CALLCHAIN_DWARF;
-			dwarf_callchain_users = true;
-		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
-			callchain_param.record_mode = CALLCHAIN_LBR;
-		else
-			callchain_param.record_mode = CALLCHAIN_FP;
-	}
+	callchain_param_setup(sample_type);
 
 	if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
 		ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@@ -716,8 +706,7 @@ static void report__output_resort(struct report *rep)
 	ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
 
 	evlist__for_each_entry(rep->session->evlist, pos) {
-		perf_evsel__output_resort_cb(pos, &prog,
-					     hists__resort_cb, rep);
+		evsel__output_resort_cb(pos, &prog, hists__resort_cb, rep);
 	}
 
 	ui_progress__finish();
@@ -1090,6 +1079,26 @@ parse_percent_limit(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int process_attr(struct perf_tool *tool __maybe_unused,
+			union perf_event *event,
+			struct evlist **pevlist)
+{
+	u64 sample_type;
+	int err;
+
+	err = perf_event__process_attr(tool, event, pevlist);
+	if (err)
+		return err;
+
+	/*
+	 * Check if we need to enable callchains based
+	 * on events sample_type.
+	 */
+	sample_type = perf_evlist__combined_sample_type(*pevlist);
+	callchain_param_setup(sample_type);
+	return 0;
+}
+
 int cmd_report(int argc, const char **argv)
 {
 	struct perf_session *session;
@@ -1120,7 +1129,7 @@ int cmd_report(int argc, const char **argv)
 			.fork		 = perf_event__process_fork,
 			.lost		 = perf_event__process_lost,
 			.read		 = process_read_event,
-			.attr		 = perf_event__process_attr,
+			.attr		 = process_attr,
 			.tracing_data	 = perf_event__process_tracing_data,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 56d7bcd12671..5da243676f12 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -167,6 +167,7 @@ static struct {
 	u64 fields;
 	u64 invalid_fields;
 	u64 user_set_fields;
+	u64 user_unset_fields;
 } output[OUTPUT_TYPE_MAX] = {
 
 	[PERF_TYPE_HARDWARE] = {
@@ -2085,6 +2086,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct evlist *evlist;
 	struct evsel *evsel, *pos;
+	u64 sample_type;
 	int err;
 	static struct evsel_script *es;
 
@@ -2117,12 +2119,34 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
 			return 0;
 	}
 
-	set_print_ip_opts(&evsel->core.attr);
-
-	if (evsel->core.attr.sample_type)
+	if (evsel->core.attr.sample_type) {
 		err = perf_evsel__check_attr(evsel, scr->session);
+		if (err)
+			return err;
+	}
 
-	return err;
+	/*
+	 * Check if we need to enable callchains based
+	 * on events sample_type.
+	 */
+	sample_type = perf_evlist__combined_sample_type(evlist);
+	callchain_param_setup(sample_type);
+
+	/* Enable fields for callchain entries */
+	if (symbol_conf.use_callchain &&
+	    (sample_type & PERF_SAMPLE_CALLCHAIN ||
+	     sample_type & PERF_SAMPLE_BRANCH_STACK ||
+	     (sample_type & PERF_SAMPLE_REGS_USER &&
+	      sample_type & PERF_SAMPLE_STACK_USER))) {
+		int type = output_type(evsel->core.attr.type);
+
+		if (!(output[type].user_unset_fields & PERF_OUTPUT_IP))
+			output[type].fields |= PERF_OUTPUT_IP;
+		if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM))
+			output[type].fields |= PERF_OUTPUT_SYM;
+	}
+	set_print_ip_opts(&evsel->core.attr);
+	return 0;
 }
 
 static int print_event_with_time(struct perf_tool *tool,
@@ -2434,7 +2458,7 @@ static int __cmd_script(struct perf_script *script)
 struct script_spec {
 	struct list_head	node;
 	struct scripting_ops	*ops;
-	char			spec[0];
+	char			spec[];
 };
 
 static LIST_HEAD(script_specs);
@@ -2672,9 +2696,11 @@ parse:
 					if (change == REMOVE) {
 						output[j].fields &= ~all_output_options[i].field;
 						output[j].user_set_fields &= ~all_output_options[i].field;
+						output[j].user_unset_fields |= all_output_options[i].field;
 					} else {
 						output[j].fields |= all_output_options[i].field;
 						output[j].user_set_fields |= all_output_options[i].field;
+						output[j].user_unset_fields &= ~all_output_options[i].field;
 					}
 					output[j].user_set = true;
 					output[j].wildcard_set = true;
@@ -3286,7 +3312,10 @@ static int parse_xed(const struct option *opt __maybe_unused,
 		     const char *str __maybe_unused,
 		     int unset __maybe_unused)
 {
-	force_pager("xed -F insn: -A -64 | less");
+	if (isatty(1))
+		force_pager("xed -F insn: -A -64 | less");
+	else
+		force_pager("xed -F insn: -A -64");
 	return 0;
 }
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e0c1ad23c768..9be020e0098a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -66,6 +66,7 @@
 #include "util/time-utils.h"
 #include "util/top.h"
 #include "util/affinity.h"
+#include "util/pfm.h"
 #include "asm/bug.h"
 
 #include <linux/time64.h>
@@ -189,6 +190,59 @@ static struct perf_stat_config stat_config = {
 	.big_num		= true,
 };
 
+static bool cpus_map_matched(struct evsel *a, struct evsel *b)
+{
+	if (!a->core.cpus && !b->core.cpus)
+		return true;
+
+	if (!a->core.cpus || !b->core.cpus)
+		return false;
+
+	if (a->core.cpus->nr != b->core.cpus->nr)
+		return false;
+
+	for (int i = 0; i < a->core.cpus->nr; i++) {
+		if (a->core.cpus->map[i] != b->core.cpus->map[i])
+			return false;
+	}
+
+	return true;
+}
+
+static void evlist__check_cpu_maps(struct evlist *evlist)
+{
+	struct evsel *evsel, *pos, *leader;
+	char buf[1024];
+
+	evlist__for_each_entry(evlist, evsel) {
+		leader = evsel->leader;
+
+		/* Check that leader matches cpus with each member. */
+		if (leader == evsel)
+			continue;
+		if (cpus_map_matched(leader, evsel))
+			continue;
+
+		/* If there's mismatch disable the group and warn user. */
+		WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
+		evsel__group_desc(leader, buf, sizeof(buf));
+		pr_warning("  %s\n", buf);
+
+		if (verbose) {
+			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
+			pr_warning("     %s: %s\n", leader->name, buf);
+			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
+			pr_warning("     %s: %s\n", evsel->name, buf);
+		}
+
+		for_each_group_evsel(pos, leader) {
+			pos->leader = pos;
+			pos->core.nr_members = 0;
+		}
+		evsel->leader->core.nr_members = 0;
+	}
+}
+
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -314,14 +368,14 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 	return 0;
 }
 
-static void read_counters(struct timespec *rs)
+static int read_affinity_counters(struct timespec *rs)
 {
 	struct evsel *counter;
 	struct affinity affinity;
 	int i, ncpus, cpu;
 
 	if (affinity__setup(&affinity) < 0)
-		return;
+		return -1;
 
 	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
 	if (!target__has_cpu(&target) || target__has_per_thread(&target))
@@ -341,6 +395,15 @@ static void read_counters(struct timespec *rs)
 		}
 	}
 	affinity__cleanup(&affinity);
+	return 0;
+}
+
+static void read_counters(struct timespec *rs)
+{
+	struct evsel *counter;
+
+	if (!stat_config.summary && (read_affinity_counters(rs) < 0))
+		return;
 
 	evlist__for_each_entry(evsel_list, counter) {
 		if (counter->err)
@@ -351,6 +414,46 @@ static void read_counters(struct timespec *rs)
 	}
 }
 
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+	int i;
+
+	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+	if (!config->stats)
+		return -1;
+
+	config->stats_num = nthreads;
+
+	for (i = 0; i < nthreads; i++)
+		runtime_stat__init(&config->stats[i]);
+
+	return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+	int i;
+
+	if (!config->stats)
+		return;
+
+	for (i = 0; i < config->stats_num; i++)
+		runtime_stat__exit(&config->stats[i]);
+
+	zfree(&config->stats);
+}
+
+static void runtime_stat_reset(struct perf_stat_config *config)
+{
+	int i;
+
+	if (!config->stats)
+		return;
+
+	for (i = 0; i < config->stats_num; i++)
+		perf_stat__reset_shadow_per_stat(&config->stats[i]);
+}
+
 static void process_interval(void)
 {
 	struct timespec ts, rs;
@@ -359,6 +462,7 @@ static void process_interval(void)
 	diff_timespec(&rs, &ts, &ref_time);
 
 	perf_stat__reset_shadow_per_stat(&rt_stat);
+	runtime_stat_reset(&stat_config);
 	read_counters(&rs);
 
 	if (STAT_RECORD) {
@@ -367,7 +471,7 @@ static void process_interval(void)
 	}
 
 	init_stats(&walltime_nsecs_stats);
-	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
+	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
 	print_counters(&rs, 0, NULL);
 }
 
@@ -722,7 +826,21 @@ try_again_reset:
 	if (stat_config.walltime_run_table)
 		stat_config.walltime_run[run_idx] = t1 - t0;
 
-	update_stats(&walltime_nsecs_stats, t1 - t0);
+	if (interval) {
+		stat_config.interval = 0;
+		stat_config.summary = true;
+		init_stats(&walltime_nsecs_stats);
+		update_stats(&walltime_nsecs_stats, t1 - t0);
+
+		if (stat_config.aggr_mode == AGGR_GLOBAL)
+			perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+
+		perf_evlist__copy_prev_raw_counts(evsel_list);
+		perf_evlist__reset_prev_raw_counts(evsel_list);
+		runtime_stat_reset(&stat_config);
+		perf_stat__reset_shadow_per_stat(&rt_stat);
+	} else
+		update_stats(&walltime_nsecs_stats, t1 - t0);
 
 	/*
 	 * Closing a group leader splits the group, and as we only disable
@@ -821,10 +939,16 @@ static void sig_atexit(void)
 	kill(getpid(), signr);
 }
 
+void perf_stat__set_big_num(int set)
+{
+	stat_config.big_num = (set != 0);
+}
+
 static int stat__set_big_num(const struct option *opt __maybe_unused,
 			     const char *s __maybe_unused, int unset)
 {
 	big_num_opt = unset ? 0 : 1;
+	perf_stat__set_big_num(!unset);
 	return 0;
 }
 
@@ -840,7 +964,10 @@ static int parse_metric_groups(const struct option *opt,
 			       const char *str,
 			       int unset __maybe_unused)
 {
-	return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
+	return metricgroup__parse_groups(opt, str,
+					 stat_config.metric_no_group,
+					 stat_config.metric_no_merge,
+					 &stat_config.metric_events);
 }
 
 static struct option stat_options[] = {
@@ -918,6 +1045,10 @@ static struct option stat_options[] = {
 		     "ms to wait before starting measurement after program start"),
 	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
 			"Only print computed metrics. No raw values", enable_metric_only),
+	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
+		       "don't group metric events, impacts multiplexing"),
+	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
+		       "don't try to share events between metrics in a group"),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
 	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
@@ -935,6 +1066,11 @@ static struct option stat_options[] = {
 		    "Use with 'percore' event qualifier to show the event "
 		    "counts of one hardware thread by sum up total hardware "
 		    "threads of same physical core"),
+#ifdef HAVE_LIBPFM
+	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
+		"libpfm4 event selector. use 'perf list' to list available events",
+		parse_libpfm_events_option),
+#endif
 	OPT_END()
 };
 
@@ -1442,6 +1578,8 @@ static int add_default_attributes(void)
 			struct option opt = { .value = &evsel_list };
 
 			return metricgroup__parse_groups(&opt, "transaction",
+							 stat_config.metric_no_group,
+							stat_config.metric_no_merge,
 							 &stat_config.metric_events);
 		}
 
@@ -1737,35 +1875,6 @@ int process_cpu_map_event(struct perf_session *session,
 	return set_maps(st);
 }
 
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
-{
-	int i;
-
-	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
-	if (!config->stats)
-		return -1;
-
-	config->stats_num = nthreads;
-
-	for (i = 0; i < nthreads; i++)
-		runtime_stat__init(&config->stats[i]);
-
-	return 0;
-}
-
-static void runtime_stat_delete(struct perf_stat_config *config)
-{
-	int i;
-
-	if (!config->stats)
-		return;
-
-	for (i = 0; i < config->stats_num; i++)
-		runtime_stat__exit(&config->stats[i]);
-
-	zfree(&config->stats);
-}
-
 static const char * const stat_report_usage[] = {
 	"perf stat report [<options>]",
 	NULL,
@@ -2057,6 +2166,8 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
+	evlist__check_cpu_maps(evsel_list);
+
 	/*
 	 * Initialize thread_map with comm names,
 	 * so we could print it out on output.
@@ -2147,7 +2258,7 @@ int cmd_stat(int argc, const char **argv)
 		}
 	}
 
-	if (!forever && status != -1 && !interval)
+	if (!forever && status != -1 && (!interval || stat_config.summary))
 		print_counters(NULL, argc, argv);
 
 	if (STAT_RECORD) {
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index c76f84b174c4..4e380e7b5230 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -128,7 +128,7 @@ struct sample_wrapper {
 	struct sample_wrapper *next;
 
 	u64		timestamp;
-	unsigned char	data[0];
+	unsigned char	data[];
 };
 
 #define TYPE_NONE	0
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 372c38254654..13889d73f8dd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -53,6 +53,7 @@
 
 #include "util/debug.h"
 #include "util/ordered-events.h"
+#include "util/pfm.h"
 
 #include <assert.h>
 #include <elf.h>
@@ -307,7 +308,7 @@ static void perf_top__resort_hists(struct perf_top *t)
 	}
 
 	evlist__for_each_entry(evlist, pos) {
-		perf_evsel__output_resort(pos, NULL);
+		evsel__output_resort(pos, NULL);
 	}
 }
 
@@ -949,7 +950,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
 {
 	struct record_opts *opts = &top->record_opts;
 	struct evlist *evlist = top->evlist;
-	struct perf_evsel_config_term *term;
+	struct evsel_config_term *term;
 	struct list_head *config_terms;
 	struct evsel *evsel;
 	int set, overwrite = -1;
@@ -958,7 +959,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
 		set = -1;
 		config_terms = &evsel->config_terms;
 		list_for_each_entry(term, config_terms, list) {
-			if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+			if (term->type == EVSEL__CONFIG_TERM_OVERWRITE)
 				set = term->val.overwrite ? 1 : 0;
 		}
 
@@ -1575,6 +1576,11 @@ int cmd_top(int argc, const char **argv)
 		    "WARNING: should be used on grouped events."),
 	OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
 		    "Enable LBR callgraph stitching approach"),
+#ifdef HAVE_LIBPFM
+	OPT_CALLBACK(0, "pfm-events", &top.evlist, "event",
+		"libpfm4 event selector. use 'perf list' to list available events",
+		parse_libpfm_events_option),
+#endif
 	OPTS_EVSWITCH(&top.evswitch),
 	OPT_END()
 	};
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a46efb907bd4..4cbb64edc998 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -461,11 +461,11 @@ static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
 
 static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
 {
-	struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
+	struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
 
 	/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
 	if (IS_ERR(evsel))
-		evsel = perf_evsel__newtp("syscalls", direction);
+		evsel = evsel__newtp("syscalls", direction);
 
 	if (IS_ERR(evsel))
 		return NULL;
@@ -1748,12 +1748,26 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 	struct syscall *sc;
 	const char *name = syscalltbl__name(trace->sctbl, id);
 
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
 	if (trace->syscalls.table == NULL) {
 		trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
 		if (trace->syscalls.table == NULL)
 			return -ENOMEM;
 	}
+#else
+	if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
+		// When using libaudit we don't know beforehand what is the max syscall id
+		struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
+
+		if (table == NULL)
+			return -ENOMEM;
+
+		memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
 
+		trace->syscalls.table	      = table;
+		trace->sctbl->syscalls.max_id = id;
+	}
+#endif
 	sc = trace->syscalls.table + id;
 	if (sc->nonexistent)
 		return 0;
@@ -2077,8 +2091,20 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 
 	err = -EINVAL;
 
-	if (id > trace->sctbl->syscalls.max_id)
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+	if (id > trace->sctbl->syscalls.max_id) {
+#else
+	if (id >= trace->sctbl->syscalls.max_id) {
+		/*
+		 * With libaudit we don't know beforehand what is the max_id,
+		 * so we let trace__read_syscall_info() figure that out as we
+		 * go on reading syscalls.
+		 */
+		err = trace__read_syscall_info(trace, id);
+		if (err)
+#endif
 		goto out_cant_read;
+	}
 
 	if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
 	    (err = trace__read_syscall_info(trace, id)) != 0)
@@ -3045,7 +3071,7 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
 	return found;
 }
 
-static struct evsel *perf_evsel__new_pgfault(u64 config)
+static struct evsel *evsel__new_pgfault(u64 config)
 {
 	struct evsel *evsel;
 	struct perf_event_attr attr = {
@@ -3174,6 +3200,26 @@ out_enomem:
 }
 
 #ifdef HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
+{
+	if (trace->bpf_obj == NULL)
+		return NULL;
+
+	return bpf_object__find_map_by_name(trace->bpf_obj, name);
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace)
+{
+	trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace)
+{
+	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
+	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
+	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
+}
+
 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
 {
 	if (trace->bpf_obj == NULL)
@@ -3512,6 +3558,20 @@ static void trace__delete_augmented_syscalls(struct trace *trace)
 	trace->bpf_obj = NULL;
 }
 #else // HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
+						   const char *name __maybe_unused)
+{
+	return NULL;
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
+{
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
+{
+}
+
 static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
 {
 	return 0;
@@ -3841,7 +3901,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 	if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
-		pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+		pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
 		if (pgfault_maj == NULL)
 			goto out_error_mem;
 		evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
@@ -3849,7 +3909,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 	if ((trace->trace_pgfaults & TRACE_PFMIN)) {
-		pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+		pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
 		if (pgfault_min == NULL)
 			goto out_error_mem;
 		evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
@@ -4600,26 +4660,6 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
 	return 0;
 }
 
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
-{
-	if (trace->bpf_obj == NULL)
-		return NULL;
-
-	return bpf_object__find_map_by_name(trace->bpf_obj, name);
-}
-
-static void trace__set_bpf_map_filtered_pids(struct trace *trace)
-{
-	trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
-}
-
-static void trace__set_bpf_map_syscalls(struct trace *trace)
-{
-	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
-	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
-	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
-}
-
 static int trace__config(const char *var, const char *value, void *arg)
 {
 	struct trace *trace = arg;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index cf147db4e5ca..94c2bc22c2bb 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -128,4 +128,8 @@ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in
 # diff non-symmetric files
 check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
 
+# check duplicated library files
+check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
+check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
+
 cd tools/perf
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c441a34cb1c0..fcca275e5bf9 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -32,34 +32,41 @@ static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
 
 #ifdef HAVE_JVMTI_CMLR
 static jvmtiError
-do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
-		    jvmti_line_info_t *tab, jint *nr)
+do_get_line_number(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+		   jvmti_line_info_t *tab)
 {
-	jint i, lines = 0;
-	jint nr_lines = 0;
+	jint i, nr_lines = 0;
 	jvmtiLineNumberEntry *loc_tab = NULL;
 	jvmtiError ret;
+	jint src_line = -1;
 
 	ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
-	if (ret != JVMTI_ERROR_NONE) {
+	if (ret == JVMTI_ERROR_ABSENT_INFORMATION || ret == JVMTI_ERROR_NATIVE_METHOD) {
+		/* No debug information for this method */
+		return ret;
+	} else if (ret != JVMTI_ERROR_NONE) {
 		print_error(jvmti, "GetLineNumberTable", ret);
 		return ret;
 	}
 
-	for (i = 0; i < nr_lines; i++) {
-		if (loc_tab[i].start_location < bci) {
-			tab[lines].pc = (unsigned long)pc;
-			tab[lines].line_number = loc_tab[i].line_number;
-			tab[lines].discrim = 0; /* not yet used */
-			tab[lines].methodID = m;
-			lines++;
-		} else {
-			break;
-		}
+	for (i = 0; i < nr_lines && loc_tab[i].start_location <= bci; i++) {
+		src_line = i;
+	}
+
+	if (src_line != -1) {
+		tab->pc = (unsigned long)pc;
+		tab->line_number = loc_tab[src_line].line_number;
+		tab->discrim = 0; /* not yet used */
+		tab->methodID = m;
+
+		ret = JVMTI_ERROR_NONE;
+	} else {
+		ret = JVMTI_ERROR_ABSENT_INFORMATION;
 	}
+
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
-	*nr = lines;
-	return JVMTI_ERROR_NONE;
+
+	return ret;
 }
 
 static jvmtiError
@@ -67,9 +74,8 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
 {
 	const jvmtiCompiledMethodLoadRecordHeader *hdr;
 	jvmtiCompiledMethodLoadInlineRecord *rec;
-	jvmtiLineNumberEntry *lne = NULL;
 	PCStackInfo *c;
-	jint nr, ret;
+	jint ret;
 	int nr_total = 0;
 	int i, lines_total = 0;
 
@@ -82,21 +88,7 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
 	for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
 		if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
 			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
-			for (i = 0; i < rec->numpcs; i++) {
-				c = rec->pcinfo + i;
-				nr = 0;
-				/*
-				 * unfortunately, need a tab to get the number of lines!
-				 */
-				ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
-				if (ret == JVMTI_ERROR_NONE) {
-					/* free what was allocated for nothing */
-					(*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
-					nr_total += (int)nr;
-				} else {
-					print_error(jvmti, "GetLineNumberTable", ret);
-				}
-			}
+			nr_total += rec->numpcs;
 		}
 	}
 
@@ -115,14 +107,17 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
 			rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
 			for (i = 0; i < rec->numpcs; i++) {
 				c = rec->pcinfo + i;
-				nr = 0;
-				ret = do_get_line_numbers(jvmti, c->pc,
-							  c->methods[0],
-							  c->bcis[0],
-							  *tab + lines_total,
-							  &nr);
+                                /*
+                                 * c->methods is the stack of inlined method calls
+                                 * at c->pc. [0] is the leaf method. Caller frames
+                                 * are ignored at the moment.
+                                 */
+				ret = do_get_line_number(jvmti, c->pc,
+							 c->methods[0],
+							 c->bcis[0],
+							 *tab + lines_total);
 				if (ret == JVMTI_ERROR_NONE)
-					lines_total += nr;
+					lines_total++;
 			}
 		}
 	}
@@ -246,8 +241,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 	char *class_sign = NULL;
 	char *func_name = NULL;
 	char *func_sign = NULL;
-	char *file_name = NULL;
-	char fn[PATH_MAX];
 	uint64_t addr = (uint64_t)(uintptr_t)code_addr;
 	jvmtiError ret;
 	int nr_lines = 0; /* in line_tab[] */
@@ -264,7 +257,9 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 	if (has_line_numbers && map && map_length) {
 		ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
 		if (ret != JVMTI_ERROR_NONE) {
-			warnx("jvmti: cannot get line table for method");
+			if (ret != JVMTI_ERROR_NOT_FOUND) {
+				warnx("jvmti: cannot get line table for method");
+			}
 			nr_lines = 0;
 		} else if (nr_lines > 0) {
 			line_file_names = malloc(sizeof(char*) * nr_lines);
@@ -282,12 +277,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 		}
 	}
 
-	ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
-	if (ret != JVMTI_ERROR_NONE) {
-		print_error(jvmti, "GetSourceFileName", ret);
-		goto error;
-	}
-
 	ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
 					  &class_sign, NULL);
 	if (ret != JVMTI_ERROR_NONE) {
@@ -302,8 +291,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
 		goto error;
 	}
 
-	copy_class_filename(class_sign, file_name, fn, PATH_MAX);
-
 	/*
 	 * write source line info record if we have it
 	 */
@@ -323,7 +310,6 @@ error:
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
 	(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
-	(*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
 	free(line_tab);
 	while (line_file_names && (nr_lines > 0)) {
 	    if (line_file_names[nr_lines - 1]) {
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
index bffb2d4a6420..fc4aa6c2ddc9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
@@ -169,7 +169,7 @@
     },
     {
         "BriefDescription": "Cycles GCT empty where dispatch was held",
-        "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)",
+        "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "gct_empty_disp_held_cpi"
     },
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
index 811c2a8c1c9e..80816d6402e9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -208,6 +208,84 @@
         "MetricName": "fxu_stall_cpi"
     },
     {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to branch mispred",
+        "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_br_mpred_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss and branch mispred",
+        "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table other stalls",
+        "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_cyc_other_cpi"
+    },
+    {
+        "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
+        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
+        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_hb_full_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
+        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_issq_cpi"
+    },
+    {
+        "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
+        "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_other_cpi"
+    },
+    {
+        "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
+        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_sync_cpi"
+    },
+    {
+        "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
+        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_disp_held_tbegin_cpi"
+    },
+    {
+        "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
+        "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_ic_l2_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3",
+        "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_ic_l3_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
+        "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_ic_l3miss_cpi"
+    },
+    {
+        "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss",
+        "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
+        "MetricGroup": "cpi_breakdown",
+        "MetricName": "ict_noslot_ic_miss_cpi"
+    },
+    {
         "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "issue_hold_cpi"
@@ -313,7 +391,7 @@
         "MetricName": "nested_tend_stall_cpi"
     },
     {
-        "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
+        "BriefDescription": "Number of cycles the Instruction Completion Table has no itags assigned to this thread",
         "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "nothing_dispatched_cpi"
@@ -362,7 +440,7 @@
     },
     {
         "BriefDescription": "Completion stall for other reasons",
-        "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
+        "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "other_stall_cpi"
     },
@@ -425,7 +503,7 @@
         "MetricName": "st_fwd_stall_cpi"
     },
     {
-        "BriefDescription": "Nothing completed and ICT not empty",
+        "BriefDescription": "Nothing completed and Instruction Completion Table not empty",
         "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
         "MetricGroup": "cpi_breakdown",
         "MetricName": "stall_cpi"
@@ -1820,71 +1898,6 @@
         "MetricName": "fxu_all_idle"
     },
     {
-        "BriefDescription": "Ict empty for this thread due to branch mispred",
-        "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_br_mpred_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
-        "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
-    },
-    {
-        "BriefDescription": "ICT other stalls",
-        "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_cyc_other_cpi"
-    },
-    {
-        "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
-        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
-        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_hb_full_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
-        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_issq_cpi"
-    },
-    {
-        "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
-        "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_other_cpi"
-    },
-    {
-        "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
-        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_sync_cpi"
-    },
-    {
-        "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
-        "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_disp_held_tbegin_cpi"
-    },
-    {
-        "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
-        "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_ic_l2_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
-        "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_ic_l3_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
-        "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_ic_l3miss_cpi"
-    },
-    {
-        "BriefDescription": "Ict empty for this thread due to Icache Miss",
-        "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
-        "MetricName": "ict_noslot_ic_miss_cpi"
-    },
-    {
         "BriefDescription": "Rate of IERAT reloads from L2",
         "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
         "MetricName": "ipteg_from_l2_rate_percent"
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 7fde0d2943cd..d25eebce34c9 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -328,31 +328,31 @@
     },
     {
         "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
         "MetricGroup": "Memory_Lat",
         "MetricName": "DRAM_Read_Latency"
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+        "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
         "MetricGroup": "Memory_BW",
         "MetricName": "DRAM_Parallel_Reads"
     },
     {
         "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
-        "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0",
+        "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
         "MetricGroup": "Memory_Lat",
         "MetricName": "MEM_PMM_Read_Latency"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
-        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+        "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
         "MetricGroup": "Memory_BW",
         "MetricName": "PMM_Read_BW"
     },
     {
         "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
-        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+        "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
         "MetricGroup": "Memory_BW",
         "MetricName": "PMM_Write_BW"
     },
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index b4f91137f40c..390bdab1be9d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -328,13 +328,13 @@
     },
     {
         "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+        "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
         "MetricGroup": "Memory_Lat",
         "MetricName": "DRAM_Read_Latency"
     },
     {
         "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
-        "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+        "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
         "MetricGroup": "Memory_BW",
         "MetricName": "DRAM_Parallel_Reads"
     },
diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h
index c7b0f6ea2a31..1bdfd55fff30 100644
--- a/tools/perf/pmu-events/jsmn.h
+++ b/tools/perf/pmu-events/jsmn.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: MIT */
 #ifndef __JSMN_H_
 #define __JSMN_H_
 
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index c75557aeef0e..cd00498a5dce 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -57,6 +57,8 @@ perf-y += maps.o
 perf-y += time-utils-test.o
 perf-y += genelf.o
 perf-y += api-io.o
+perf-y += demangle-java-test.o
+perf-y += pfm.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
new file mode 100644
index 000000000000..eba723cc0d38
--- /dev/null
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -0,0 +1,50 @@
+# Event added by system-wide or CPU perf-record to handle the race of
+# processes starting while /proc is processed.
+[event]
+fd=1
+group_fd=-1
+cpu=*
+pid=-1
+flags=8
+type=1
+size=120
+config=9
+sample_period=4000
+sample_type=455
+read_format=4
+# Event will be enabled right away.
+disabled=0
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=0
+task=1
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=1
+exclude_host=0
+exclude_guest=0
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+mmap2=1
+comm_exec=1
+context_switch=0
+write_backward=0
+namespaces=0
+use_clockid=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0
index 93818054ae20..317730b906dd 100644
--- a/tools/perf/tests/attr/test-record-C0
+++ b/tools/perf/tests/attr/test-record-C0
@@ -9,6 +9,14 @@ cpu=0
 # no enable on exec for CPU attached
 enable_on_exec=0
 
-# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD
+# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
+# PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD
 # + PERF_SAMPLE_CPU added by -C 0
-sample_type=391
+sample_type=455
+
+# Dummy event handles mmaps, comm and task.
+mmap=0
+comm=0
+task=0
+
+[event:system-wide-dummy]
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 3471ec52ea11..da5b6cc23f25 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -75,6 +75,13 @@ static struct test generic_tests[] = {
 	{
 		.desc = "PMU events",
 		.func = test__pmu_events,
+		.subtest = {
+			.skip_if_fail	= false,
+			.get_nr		= test__pmu_events_subtest_get_nr,
+			.get_desc	= test__pmu_events_subtest_get_desc,
+			.skip_reason	= test__pmu_events_subtest_skip_reason,
+		},
+
 	},
 	{
 		.desc = "DSO data read",
@@ -310,6 +317,15 @@ static struct test generic_tests[] = {
 		.func = test__jit_write_elf,
 	},
 	{
+		.desc = "Test libpfm4 support",
+		.func = test__pfm,
+		.subtest = {
+			.skip_if_fail	= true,
+			.get_nr		= test__pfm_subtest_get_nr,
+			.get_desc	= test__pfm_subtest_get_desc,
+		}
+	},
+	{
 		.desc = "Test api io",
 		.func = test__api_io,
 	},
@@ -318,6 +334,10 @@ static struct test generic_tests[] = {
 		.func = test__maps__merge_in,
 	},
 	{
+		.desc = "Demangle Java",
+		.func = test__demangle_java,
+	},
+	{
 		.func = NULL,
 	},
 };
@@ -327,7 +347,7 @@ static struct test *tests[] = {
 	arch_tests,
 };
 
-static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[])
+static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
 {
 	int i;
 
@@ -344,7 +364,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
 			continue;
 		}
 
-		if (strcasestr(test->desc, argv[i]))
+		if (strcasestr(desc, argv[i]))
 			return true;
 	}
 
@@ -429,8 +449,15 @@ static int test_and_print(struct test *t, bool force_skip, int subtest)
 	case TEST_OK:
 		pr_info(" Ok\n");
 		break;
-	case TEST_SKIP:
-		color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+	case TEST_SKIP: {
+		const char *skip_reason = NULL;
+		if (t->subtest.skip_reason)
+			skip_reason = t->subtest.skip_reason(subtest);
+		if (skip_reason)
+			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason);
+		else
+			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+	}
 		break;
 	case TEST_FAIL:
 	default:
@@ -566,7 +593,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
 			.priv = &st,
 		};
 
-		if (!perf_test__matches(&test, curr, argc, argv))
+		if (!perf_test__matches(test.desc, curr, argc, argv))
 			continue;
 
 		st.file = ent->d_name;
@@ -594,9 +621,25 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 
 	for_each_test(j, t) {
 		int curr = i++, err;
+		int subi;
 
-		if (!perf_test__matches(t, curr, argc, argv))
-			continue;
+		if (!perf_test__matches(t->desc, curr, argc, argv)) {
+			bool skip = true;
+			int subn;
+
+			if (!t->subtest.get_nr)
+				continue;
+
+			subn = t->subtest.get_nr();
+
+			for (subi = 0; subi < subn; subi++) {
+				if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+					skip = false;
+			}
+
+			if (skip)
+				continue;
+		}
 
 		if (t->is_supported && !t->is_supported()) {
 			pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
@@ -624,7 +667,6 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 			 */
 			int subw = width > 2 ? width - 2 : width;
 			bool skip = false;
-			int subi;
 
 			if (subn <= 0) {
 				color_fprintf(stderr, PERF_COLOR_YELLOW,
@@ -641,6 +683,9 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 			}
 
 			for (subi = 0; subi < subn; subi++) {
+				if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+					continue;
+
 				pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
 					t->subtest.get_desc(subi));
 				err = test_and_print(t, skip, subi);
@@ -674,7 +719,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
 			.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
 		};
 
-		if (!perf_test__matches(&t, curr, argc, argv))
+		if (!perf_test__matches(t.desc, curr, argc, argv))
 			continue;
 
 		pr_info("%2d: %s\n", i, t.desc);
@@ -693,7 +738,7 @@ static int perf_test__list(int argc, const char **argv)
 	for_each_test(j, t) {
 		int curr = i++;
 
-		if (!perf_test__matches(t, curr, argc, argv) ||
+		if (!perf_test__matches(t->desc, curr, argc, argv) ||
 		    (t->is_supported && !t->is_supported()))
 			continue;
 
diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c
new file mode 100644
index 000000000000..8f3b90832fb0
--- /dev/null
+++ b/tools/perf/tests/demangle-java-test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "session.h"
+#include "debug.h"
+#include "demangle-java.h"
+
+int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	int ret = TEST_OK;
+	char *buf = NULL;
+	size_t i;
+
+	struct {
+		const char *mangled, *demangled;
+	} test_cases[] = {
+		{ "Ljava/lang/StringLatin1;equals([B[B)Z",
+		  "boolean java.lang.StringLatin1.equals(byte[], byte[])" },
+		{ "Ljava/util/zip/ZipUtils;CENSIZ([BI)J",
+		  "long java.util.zip.ZipUtils.CENSIZ(byte[], int)" },
+		{ "Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z",
+		  "boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)" },
+		{ "Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V",
+		  "void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)" },
+		{ "Ljava/lang/Object;<init>()V",
+		  "void java.lang.Object<init>()" },
+	};
+
+	for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+		buf = java_demangle_sym(test_cases[i].mangled, 0);
+		if (strcmp(buf, test_cases[i].demangled)) {
+			pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
+				 buf, test_cases[i].demangled);
+			ret = TEST_FAIL;
+		}
+		free(buf);
+	}
+
+	return ret;
+}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 779ce280a0e9..2491d167bf76 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,7 @@ static int init_live_machine(struct machine *machine)
 	union perf_event event;
 	pid_t pid = getpid();
 
+	memset(&event, 0, sizeof(event));
 	return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
 						  mmap_handler, machine, true);
 }
@@ -94,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
 	return strcmp((const char *) symbol, funcs[idx]);
 }
 
-noinline int test_dwarf_unwind__thread(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
 {
 	struct perf_sample sample;
 	unsigned long cnt = 0;
@@ -125,7 +126,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread)
 
 static int global_unwind_retval = -INT_MAX;
 
-noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 {
 	/* Any possible value should be 'thread' */
 	struct thread *thread = *(struct thread **)p1;
@@ -144,7 +145,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2)
 	return p1 - p2;
 }
 
-noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 {
 	struct thread *array[2] = {thread, thread};
 	void *fp = &bsearch;
@@ -163,12 +164,12 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread)
 	return global_unwind_retval;
 }
 
-noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread)
 {
 	return test_dwarf_unwind__krava_3(thread);
 }
 
-noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread)
 {
 	return test_dwarf_unwind__krava_2(thread);
 }
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 61ecd8e33a01..f7f3e5b4c180 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -100,12 +100,11 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int
 {
 	int err = 0, ret = 0;
 
-	err = perf_evsel__name_array_test(perf_evsel__hw_names);
+	err = perf_evsel__name_array_test(evsel__hw_names);
 	if (err)
 		ret = err;
 
-	err = __perf_evsel__name_array_test(perf_evsel__sw_names,
-					    PERF_COUNT_SW_DUMMY + 1);
+	err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1);
 	if (err)
 		ret = err;
 
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index ce8aa32bc3ee..0e224a0a55d9 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -35,11 +35,11 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name,
 
 int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
 {
-	struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
+	struct evsel *evsel = evsel__newtp("sched", "sched_switch");
 	int ret = 0;
 
 	if (IS_ERR(evsel)) {
-		pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+		pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
 		return -1;
 	}
 
@@ -66,10 +66,10 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
 
 	evsel__delete(evsel);
 
-	evsel = perf_evsel__newtp("sched", "sched_wakeup");
+	evsel = evsel__newtp("sched", "sched_wakeup");
 
 	if (IS_ERR(evsel)) {
-		pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+		pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
 		return -1;
 	}
 
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index f9e8e5628836..1cb02ca2b15f 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -19,15 +19,13 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
 int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 {
 	const char *p;
-	const char **other;
-	double val;
-	int i, ret;
+	double val, *val_ptr;
+	int ret;
 	struct expr_parse_ctx ctx;
-	int num_other;
 
 	expr__ctx_init(&ctx);
-	expr__add_id(&ctx, "FOO", 1);
-	expr__add_id(&ctx, "BAR", 2);
+	expr__add_id(&ctx, strdup("FOO"), 1);
+	expr__add_id(&ctx, strdup("BAR"), 2);
 
 	ret = test(&ctx, "1+1", 2);
 	ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,6 +37,8 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	ret |= test(&ctx, "min(1,2) + 1", 2);
 	ret |= test(&ctx, "max(1,2) + 1", 3);
 	ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
+	ret |= test(&ctx, "1.1 + 2.1", 3.2);
+	ret |= test(&ctx, ".1 + 2.", 2.1);
 
 	if (ret)
 		return ret;
@@ -51,25 +51,29 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
 	ret = expr__parse(&val, &ctx, p, 1);
 	TEST_ASSERT_VAL("missing operand", ret == -1);
 
+	expr__ctx_clear(&ctx);
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0);
-	TEST_ASSERT_VAL("find other", num_other == 3);
-	TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
-	TEST_ASSERT_VAL("find other", other[3] == NULL);
+			expr__find_other("FOO + BAR + BAZ + BOZO", "FOO",
+					 &ctx, 1) == 0);
+	TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3);
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO",
+						    (void **)&val_ptr));
 
+	expr__ctx_clear(&ctx);
 	TEST_ASSERT_VAL("find other",
-			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL,
-				   &other, &num_other, 3) == 0);
-	TEST_ASSERT_VAL("find other", num_other == 2);
-	TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/"));
-	TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/"));
-	TEST_ASSERT_VAL("find other", other[2] == NULL);
+			expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
+					 NULL, &ctx, 3) == 0);
+	TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2);
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/",
+						    (void **)&val_ptr));
+	TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/",
+						    (void **)&val_ptr));
 
-	for (i = 0; i < num_other; i++)
-		zfree(&other[i]);
-	free((void *)other);
+	expr__ctx_clear(&ctx);
 
 	return 0;
 }
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 7a542f1c1c78..3f2e1a581247 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -190,7 +190,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
 	 * function since TEST_ASSERT_VAL() returns in case of failure.
 	 */
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(hists_to_evsel(hists), NULL);
+	evsel__output_resort(hists_to_evsel(hists), NULL);
 
 	if (verbose > 2) {
 		pr_info("use callchain: %d, cumulate callchain: %d\n",
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 618b51ffcc01..123e07d35b55 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -142,7 +142,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu
 		struct hists *hists = evsel__hists(evsel);
 
 		hists__collapse_resort(hists, NULL);
-		perf_evsel__output_resort(evsel, NULL);
+		evsel__output_resort(evsel, NULL);
 
 		if (verbose > 2) {
 			pr_info("Normal histogram\n");
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 38f804ff6452..8973f35df604 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -155,7 +155,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(evsel, NULL);
+	evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -255,7 +255,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(evsel, NULL);
+	evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -309,7 +309,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(evsel, NULL);
+	evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -387,7 +387,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(evsel, NULL);
+	evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -490,7 +490,7 @@ static int test5(struct evsel *evsel, struct machine *machine)
 		goto out;
 
 	hists__collapse_resort(hists, NULL);
-	perf_evsel__output_resort(evsel, NULL);
+	evsel__output_resort(evsel, NULL);
 
 	if (verbose > 2) {
 		pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 5d0c3a9c47a1..9b651dfe0a6b 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -84,10 +84,13 @@ make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf	    := NO_LIBBPF=1
+make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
 make_no_libcrypto   := NO_LIBCRYPTO=1
 make_with_babeltrace:= LIBBABELTRACE=1
 make_no_sdt	    := NO_SDT=1
+make_no_syscall_tbl := NO_SYSCALL_TABLE=1
 make_with_clangllvm := LIBCLANGLLVM=1
+make_with_libpfm4   := LIBPFM4=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -112,7 +115,7 @@ make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
 make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
-make_minimal        += NO_LIBCAP=1
+make_minimal        += NO_LIBCAP=1 NO_SYSCALL_TABLE=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -144,8 +147,13 @@ run += make_no_libaudit
 run += make_no_libbionic
 run += make_no_auxtrace
 run += make_no_libbpf
+run += make_no_libbpf_DEBUG
+run += make_no_libcrypto
+run += make_no_sdt
+run += make_no_syscall_tbl
 run += make_with_babeltrace
 run += make_with_clangllvm
+run += make_with_libpfm4
 run += make_help
 run += make_doc
 run += make_perf_o
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index d4b8eb6e337a..7b0dbfc0e17d 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -79,9 +79,9 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
 		char name[64];
 
 		snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
-		evsels[i] = perf_evsel__newtp("syscalls", name);
+		evsels[i] = evsel__newtp("syscalls", name);
 		if (IS_ERR(evsels[i])) {
-			pr_debug("perf_evsel__new(%s)\n", name);
+			pr_debug("evsel__new(%s)\n", name);
 			goto out_delete_evlist;
 		}
 
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 900934be22d2..71f85e2cc127 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -44,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 
 	CPU_ZERO(&cpu_set);
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	evsel = evsel__newtp("syscalls", "sys_enter_openat");
 	if (IS_ERR(evsel)) {
 		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
 		pr_debug("%s\n", errbuf);
@@ -90,8 +90,8 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 	 * we use the auto allocation it will allocate just for 1 cpu,
 	 * as we start by cpu 0.
 	 */
-	if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
-		pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+	if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
+		pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
 		goto out_close_fd;
 	}
 
@@ -117,7 +117,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
 		}
 	}
 
-	perf_evsel__free_counts(evsel);
+	evsel__free_counts(evsel);
 out_close_fd:
 	perf_evsel__close_fd(&evsel->core);
 out_evsel_delete:
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 1dc2897d2df9..1f5f5e79ae25 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -46,9 +46,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
 		goto out;
 	}
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	evsel = evsel__newtp("syscalls", "sys_enter_openat");
 	if (IS_ERR(evsel)) {
-		pr_debug("%s: perf_evsel__newtp\n", __func__);
+		pr_debug("%s: evsel__newtp\n", __func__);
 		goto out_delete_evlist;
 	}
 
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index db5d8bb8cd06..85a8f0fe7aea 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -27,7 +27,7 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
 		return -1;
 	}
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+	evsel = evsel__newtp("syscalls", "sys_enter_openat");
 	if (IS_ERR(evsel)) {
 		tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
 		pr_debug("%s\n", errbuf);
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
new file mode 100644
index 000000000000..76a53126efdf
--- /dev/null
+++ b/tools/perf/tests/pfm.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test support for libpfm4 event encodings.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "tests.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/pfm.h"
+
+#include <linux/kernel.h>
+
+#ifdef HAVE_LIBPFM
+static int test__pfm_events(void);
+static int test__pfm_group(void);
+#endif
+
+static const struct {
+	int (*func)(void);
+	const char *desc;
+} pfm_testcase_table[] = {
+#ifdef HAVE_LIBPFM
+	{
+		.func = test__pfm_events,
+		.desc = "test of individual --pfm-events",
+	},
+	{
+		.func = test__pfm_group,
+		.desc = "test groups of --pfm-events",
+	},
+#endif
+};
+
+#ifdef HAVE_LIBPFM
+static int count_pfm_events(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	int count = 0;
+
+	perf_evlist__for_each_entry(evlist, evsel) {
+		count++;
+	}
+	return count;
+}
+
+static int test__pfm_events(void)
+{
+	struct evlist *evlist;
+	struct option opt;
+	size_t i;
+	const struct {
+		const char *events;
+		int nr_events;
+	} table[] = {
+		{
+			.events = "",
+			.nr_events = 0,
+		},
+		{
+			.events = "instructions",
+			.nr_events = 1,
+		},
+		{
+			.events = "instructions,cycles",
+			.nr_events = 2,
+		},
+		{
+			.events = "stereolab",
+			.nr_events = 0,
+		},
+		{
+			.events = "instructions,instructions",
+			.nr_events = 2,
+		},
+		{
+			.events = "stereolab,instructions",
+			.nr_events = 0,
+		},
+		{
+			.events = "instructions,stereolab",
+			.nr_events = 1,
+		},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(table); i++) {
+		evlist = evlist__new();
+		if (evlist == NULL)
+			return -ENOMEM;
+
+		opt.value = evlist;
+		parse_libpfm_events_option(&opt,
+					table[i].events,
+					0);
+		TEST_ASSERT_EQUAL(table[i].events,
+				count_pfm_events(&evlist->core),
+				table[i].nr_events);
+		TEST_ASSERT_EQUAL(table[i].events,
+				evlist->nr_groups,
+				0);
+
+		evlist__delete(evlist);
+	}
+	return 0;
+}
+
+static int test__pfm_group(void)
+{
+	struct evlist *evlist;
+	struct option opt;
+	size_t i;
+	const struct {
+		const char *events;
+		int nr_events;
+		int nr_groups;
+	} table[] = {
+		{
+			.events = "{},",
+			.nr_events = 0,
+			.nr_groups = 0,
+		},
+		{
+			.events = "{instructions}",
+			.nr_events = 1,
+			.nr_groups = 1,
+		},
+		{
+			.events = "{instructions},{}",
+			.nr_events = 1,
+			.nr_groups = 1,
+		},
+		{
+			.events = "{},{instructions}",
+			.nr_events = 0,
+			.nr_groups = 0,
+		},
+		{
+			.events = "{instructions},{instructions}",
+			.nr_events = 2,
+			.nr_groups = 2,
+		},
+		{
+			.events = "{instructions,cycles},{instructions,cycles}",
+			.nr_events = 4,
+			.nr_groups = 2,
+		},
+		{
+			.events = "{stereolab}",
+			.nr_events = 0,
+			.nr_groups = 0,
+		},
+		{
+			.events =
+			"{instructions,cycles},{instructions,stereolab}",
+			.nr_events = 3,
+			.nr_groups = 1,
+		},
+	};
+
+	for (i = 0; i < ARRAY_SIZE(table); i++) {
+		evlist = evlist__new();
+		if (evlist == NULL)
+			return -ENOMEM;
+
+		opt.value = evlist;
+		parse_libpfm_events_option(&opt,
+					table[i].events,
+					0);
+		TEST_ASSERT_EQUAL(table[i].events,
+				count_pfm_events(&evlist->core),
+				table[i].nr_events);
+		TEST_ASSERT_EQUAL(table[i].events,
+				evlist->nr_groups,
+				table[i].nr_groups);
+
+		evlist__delete(evlist);
+	}
+	return 0;
+}
+#endif
+
+const char *test__pfm_subtest_get_desc(int i)
+{
+	if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+		return NULL;
+	return pfm_testcase_table[i].desc;
+}
+
+int test__pfm_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(pfm_testcase_table);
+}
+
+int test__pfm(struct test *test __maybe_unused, int i __maybe_unused)
+{
+#ifdef HAVE_LIBPFM
+	if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+		return TEST_FAIL;
+	return pfm_testcase_table[i].func();
+#else
+	return TEST_SKIP;
+#endif
+}
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index d64261da8bf7..ab64b4a4e284 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "math.h"
 #include "parse-events.h"
 #include "pmu.h"
 #include "tests.h"
@@ -8,6 +9,9 @@
 #include <linux/zalloc.h>
 #include "debug.h"
 #include "../pmu-events/pmu-events.h"
+#include "util/evlist.h"
+#include "util/expr.h"
+#include "util/parse-events.h"
 
 struct perf_pmu_test_event {
 	struct pmu_event event;
@@ -144,7 +148,7 @@ static struct pmu_events_map *__test_pmu_get_events_map(void)
 }
 
 /* Verify generated events from pmu-events.c is as expected */
-static int __test_pmu_event_table(void)
+static int test_pmu_event_table(void)
 {
 	struct pmu_events_map *map = __test_pmu_get_events_map();
 	struct pmu_event *table;
@@ -347,14 +351,11 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
 	return res;
 }
 
-int test__pmu_events(struct test *test __maybe_unused,
-		     int subtest __maybe_unused)
+
+static int test_aliases(void)
 {
 	struct perf_pmu *pmu = NULL;
 
-	if (__test_pmu_event_table())
-		return -1;
-
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
 		int count = 0;
 
@@ -377,3 +378,163 @@ int test__pmu_events(struct test *test __maybe_unused,
 
 	return 0;
 }
+
+static bool is_number(const char *str)
+{
+	char *end_ptr;
+	double v;
+
+	errno = 0;
+	v = strtod(str, &end_ptr);
+	(void)v; // We're not interested in this value, only if it is valid
+	return errno == 0 && end_ptr != str;
+}
+
+static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+	struct parse_events_error error;
+	struct evlist *evlist;
+	int ret;
+
+	/* Numbers are always valid. */
+	if (is_number(id))
+		return 0;
+
+	evlist = evlist__new();
+	memset(&error, 0, sizeof(error));
+	ret = parse_events(evlist, id, &error);
+	if (ret && same_cpu) {
+		pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
+			pe->metric_name, id, pe->metric_expr);
+		pr_warning("Error string '%s' help '%s'\n", error.str,
+			error.help);
+	} else if (ret) {
+		pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n",
+			  id, pe->metric_name, pe->metric_expr);
+		ret = 0;
+	}
+	evlist__delete(evlist);
+	free(error.str);
+	free(error.help);
+	free(error.first_str);
+	free(error.first_help);
+	return ret;
+}
+
+static void expr_failure(const char *msg,
+			 const struct pmu_events_map *map,
+			 const struct pmu_event *pe)
+{
+	pr_debug("%s for map %s %s %s\n",
+		msg, map->cpuid, map->version, map->type);
+	pr_debug("On metric %s\n", pe->metric_name);
+	pr_debug("On expression %s\n", pe->metric_expr);
+}
+
+static int test_parsing(void)
+{
+	struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL);
+	struct pmu_events_map *map;
+	struct pmu_event *pe;
+	int i, j, k;
+	int ret = 0;
+	struct expr_parse_ctx ctx;
+	double result;
+
+	i = 0;
+	for (;;) {
+		map = &pmu_events_map[i++];
+		if (!map->table)
+			break;
+		j = 0;
+		for (;;) {
+			struct hashmap_entry *cur;
+			size_t bkt;
+
+			pe = &map->table[j++];
+			if (!pe->name && !pe->metric_group && !pe->metric_name)
+				break;
+			if (!pe->metric_expr)
+				continue;
+			expr__ctx_init(&ctx);
+			if (expr__find_other(pe->metric_expr, NULL, &ctx, 0)
+				  < 0) {
+				expr_failure("Parse other failed", map, pe);
+				ret++;
+				continue;
+			}
+
+			/*
+			 * Add all ids with a made up value. The value may
+			 * trigger divide by zero when subtracted and so try to
+			 * make them unique.
+			 */
+			k = 1;
+			hashmap__for_each_entry((&ctx.ids), cur, bkt)
+				expr__add_id(&ctx, strdup(cur->key), k++);
+
+			hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+				if (check_parse_id(cur->key, map == cpus_map,
+						   pe))
+					ret++;
+			}
+
+			if (expr__parse(&result, &ctx, pe->metric_expr, 0)) {
+				expr_failure("Parse failed", map, pe);
+				ret++;
+			}
+			expr__ctx_clear(&ctx);
+		}
+	}
+	/* TODO: fail when not ok */
+	return ret == 0 ? TEST_OK : TEST_SKIP;
+}
+
+static const struct {
+	int (*func)(void);
+	const char *desc;
+} pmu_events_testcase_table[] = {
+	{
+		.func = test_pmu_event_table,
+		.desc = "PMU event table sanity",
+	},
+	{
+		.func = test_aliases,
+		.desc = "PMU event map aliases",
+	},
+	{
+		.func = test_parsing,
+		.desc = "Parsing of PMU event table metrics",
+	},
+};
+
+const char *test__pmu_events_subtest_get_desc(int subtest)
+{
+	if (subtest < 0 ||
+	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+		return NULL;
+	return pmu_events_testcase_table[subtest].desc;
+}
+
+const char *test__pmu_events_subtest_skip_reason(int subtest)
+{
+	if (subtest < 0 ||
+	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+		return NULL;
+	if (pmu_events_testcase_table[subtest].func != test_parsing)
+		return NULL;
+	return "some metrics failed";
+}
+
+int test__pmu_events_subtest_get_nr(void)
+{
+	return (int)ARRAY_SIZE(pmu_events_testcase_table);
+}
+
+int test__pmu_events(struct test *test __maybe_unused, int subtest)
+{
+	if (subtest < 0 ||
+	    subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+		return TEST_FAIL;
+	return pmu_events_testcase_table[subtest].func();
+}
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 74379ff1f7fa..5c11fe2b3040 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -156,8 +156,8 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
 		if (ret)
 			break;
 
-		ret = perf_pmu__config_terms(&formats, &attr, terms,
-					     false, NULL);
+		ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr,
+					     terms, false, NULL);
 		if (ret)
 			break;
 
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index bfb9986093d8..4b9b731977c8 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -56,7 +56,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
 
 	evsel = evsel__new(&attr);
 	if (evsel == NULL) {
-		pr_debug("perf_evsel__new\n");
+		pr_debug("evsel__new\n");
 		goto out_delete_evlist;
 	}
 	evlist__add(evlist, evsel);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d6d4ac34eeb7..76a4e352eaaf 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -34,6 +34,7 @@ struct test {
 		bool skip_if_fail;
 		int (*get_nr)(void);
 		const char *(*get_desc)(int subtest);
+		const char *(*skip_reason)(int subtest);
 	} subtest;
 	bool (*is_supported)(void);
 	void *priv;
@@ -50,6 +51,9 @@ int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
 int test__syscall_openat_tp_fields(struct test *test, int subtest);
 int test__pmu(struct test *test, int subtest);
 int test__pmu_events(struct test *test, int subtest);
+const char *test__pmu_events_subtest_get_desc(int subtest);
+const char *test__pmu_events_subtest_skip_reason(int subtest);
+int test__pmu_events_subtest_get_nr(void);
 int test__attr(struct test *test, int subtest);
 int test__dso_data(struct test *test, int subtest);
 int test__dso_data_cache(struct test *test, int subtest);
@@ -113,6 +117,10 @@ int test__maps__merge_in(struct test *t, int subtest);
 int test__time_utils(struct test *t, int subtest);
 int test__jit_write_elf(struct test *test, int subtest);
 int test__api_io(struct test *test, int subtest);
+int test__demangle_java(struct test *test, int subtest);
+int test__pfm(struct test *test, int subtest);
+const char *test__pfm_subtest_get_desc(int subtest);
+int test__pfm_subtest_get_nr(void);
 
 bool test__bp_signal_is_supported(void);
 bool test__bp_account_is_supported(void);
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 22c9fc900c84..9f9ea45cddc4 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -57,7 +57,7 @@ process_arch()
 	local arch="$1"
 	local asm_errno=$(asm_errno_file "$arch")
 
-	$gcc $include_path -E -dM -x c $asm_errno \
+	$gcc $CFLAGS $include_path -E -dM -x c $asm_errno \
 		|grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
 		|awk '{ print $2","$3; }' \
 		|sort -t, -k2 -nu \
@@ -91,7 +91,7 @@ EoHEADER
 # in tools/perf/arch
 archlist=""
 for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
-	test -d arch/$arch && archlist="$archlist $arch"
+	test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch"
 done
 
 for arch in x86 $archlist generic; do
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ca07a162d602..8d18380ecd10 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -106,7 +106,7 @@ perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
 perf-$(CONFIG_AUXTRACE) += intel-pt.o
 perf-$(CONFIG_AUXTRACE) += intel-bts.o
 perf-$(CONFIG_AUXTRACE) += arm-spe.o
-perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
 perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
 
 ifdef CONFIG_LIBOPENCSD
@@ -136,6 +136,10 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
 perf-$(CONFIG_LIBELF) += probe-event.o
 
+ifndef CONFIG_LIBBPF
+perf-y += hashmap.o
+endif
+
 ifndef CONFIG_LIBELF
 perf-y += symbol-minimal.o
 endif
@@ -179,6 +183,8 @@ perf-$(CONFIG_LIBBPF) += bpf-event.o
 
 perf-$(CONFIG_CXX) += c++/
 
+perf-$(CONFIG_LIBPFM4) += pfm.o
+
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index d828c2d2edee..76bfb4a9d94e 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -41,7 +41,6 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <bpf/libbpf.h>
 #include <subcmd/parse-options.h>
 #include <subcmd/run-command.h>
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 2d88069d6428..0a0cd4f32175 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -144,7 +144,7 @@ struct annotation_line {
 	u32			 idx;
 	int			 idx_asm;
 	int			 data_nr;
-	struct annotation_data	 data[0];
+	struct annotation_data	 data[];
 };
 
 struct disasm_line {
@@ -227,7 +227,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
 struct sym_hist {
 	u64		      nr_samples;
 	u64		      period;
-	struct sym_hist_entry addr[0];
+	struct sym_hist_entry addr[];
 };
 
 struct cyc_hist {
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
new file mode 100644
index 000000000000..f8dae13fc876
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644
index 000000000000..302a14d0aca9
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+#ifndef BIT
+#define BIT(n)		(1UL << (n))
+#endif
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+	u8 *addr = (u8 *)&payload;
+	int ns, el;
+
+	/* Instruction virtual address or Branch target address */
+	if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+	    index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+		ns = addr[7] & SPE_ADDR_PKT_NS;
+		el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+
+		/* Fill highest byte for EL1 or EL2 (VHE) mode */
+		if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+			addr[7] = 0xff;
+		/* Clean highest byte for other cases */
+		else
+			addr[7] = 0x0;
+
+	/* Data access virtual address */
+	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+		/* Fill highest byte if bits [48..55] is 0xff */
+		if (addr[6] == 0xff)
+			addr[7] = 0xff;
+		/* Otherwise, cleanup tags */
+		else
+			addr[7] = 0x0;
+
+	/* Data access physical address */
+	} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+		/* Cleanup byte 7 */
+		addr[7] = 0x0;
+	} else {
+		pr_err("unsupported address packet index: 0x%x\n", index);
+	}
+
+	return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+	struct arm_spe_decoder *decoder;
+
+	if (!params->get_trace)
+		return NULL;
+
+	decoder = zalloc(sizeof(struct arm_spe_decoder));
+	if (!decoder)
+		return NULL;
+
+	decoder->get_trace = params->get_trace;
+	decoder->data = params->data;
+
+	return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+	free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+	struct arm_spe_buffer buffer = { .buf = 0, };
+	int ret;
+
+	pr_debug("Getting more data\n");
+	ret = decoder->get_trace(&buffer, decoder->data);
+	if (ret < 0)
+		return ret;
+
+	decoder->buf = buffer.buf;
+	decoder->len = buffer.len;
+
+	if (!decoder->len)
+		pr_debug("No more data\n");
+
+	return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+	int ret;
+
+	do {
+		if (!decoder->len) {
+			ret = arm_spe_get_data(decoder);
+
+			/* Failed to read out trace data */
+			if (ret <= 0)
+				return ret;
+		}
+
+		ret = arm_spe_get_packet(decoder->buf, decoder->len,
+					 &decoder->packet);
+		if (ret <= 0) {
+			/* Move forward for 1 byte */
+			decoder->buf += 1;
+			decoder->len -= 1;
+			return -EBADMSG;
+		}
+
+		decoder->buf += ret;
+		decoder->len -= ret;
+	} while (decoder->packet.type == ARM_SPE_PAD);
+
+	return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+	int err;
+	int idx;
+	u64 payload, ip;
+
+	memset(&decoder->record, 0x0, sizeof(decoder->record));
+
+	while (1) {
+		err = arm_spe_get_next_packet(decoder);
+		if (err <= 0)
+			return err;
+
+		idx = decoder->packet.index;
+		payload = decoder->packet.payload;
+
+		switch (decoder->packet.type) {
+		case ARM_SPE_TIMESTAMP:
+			decoder->record.timestamp = payload;
+			return 1;
+		case ARM_SPE_END:
+			return 1;
+		case ARM_SPE_ADDRESS:
+			ip = arm_spe_calc_ip(idx, payload);
+			if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+				decoder->record.from_ip = ip;
+			else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+				decoder->record.to_ip = ip;
+			break;
+		case ARM_SPE_COUNTER:
+			break;
+		case ARM_SPE_CONTEXT:
+			break;
+		case ARM_SPE_OP_TYPE:
+			break;
+		case ARM_SPE_EVENTS:
+			if (payload & BIT(EV_L1D_REFILL))
+				decoder->record.type |= ARM_SPE_L1D_MISS;
+
+			if (payload & BIT(EV_L1D_ACCESS))
+				decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+			if (payload & BIT(EV_TLB_WALK))
+				decoder->record.type |= ARM_SPE_TLB_MISS;
+
+			if (payload & BIT(EV_TLB_ACCESS))
+				decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+			if ((idx == 1 || idx == 2 || idx == 3) &&
+			    (payload & BIT(EV_LLC_MISS)))
+				decoder->record.type |= ARM_SPE_LLC_MISS;
+
+			if ((idx == 1 || idx == 2 || idx == 3) &&
+			    (payload & BIT(EV_LLC_ACCESS)))
+				decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+			if ((idx == 1 || idx == 2 || idx == 3) &&
+			    (payload & BIT(EV_REMOTE_ACCESS)))
+				decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+			if (payload & BIT(EV_MISPRED))
+				decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+			break;
+		case ARM_SPE_DATA_SOURCE:
+			break;
+		case ARM_SPE_BAD:
+			break;
+		case ARM_SPE_PAD:
+			break;
+		default:
+			pr_err("Get packet error!\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+	return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644
index 000000000000..a5111a8d4360
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_events {
+	EV_EXCEPTION_GEN	= 0,
+	EV_RETIRED		= 1,
+	EV_L1D_ACCESS		= 2,
+	EV_L1D_REFILL		= 3,
+	EV_TLB_ACCESS		= 4,
+	EV_TLB_WALK		= 5,
+	EV_NOT_TAKEN		= 6,
+	EV_MISPRED		= 7,
+	EV_LLC_ACCESS		= 8,
+	EV_LLC_MISS		= 9,
+	EV_REMOTE_ACCESS	= 10,
+	EV_ALIGNMENT		= 11,
+	EV_PARTIAL_PREDICATE	= 17,
+	EV_EMPTY_PREDICATE	= 18,
+};
+
+enum arm_spe_sample_type {
+	ARM_SPE_L1D_ACCESS	= 1 << 0,
+	ARM_SPE_L1D_MISS	= 1 << 1,
+	ARM_SPE_LLC_ACCESS	= 1 << 2,
+	ARM_SPE_LLC_MISS	= 1 << 3,
+	ARM_SPE_TLB_ACCESS	= 1 << 4,
+	ARM_SPE_TLB_MISS	= 1 << 5,
+	ARM_SPE_BRANCH_MISS	= 1 << 6,
+	ARM_SPE_REMOTE_ACCESS	= 1 << 7,
+};
+
+struct arm_spe_record {
+	enum arm_spe_sample_type type;
+	int err;
+	u64 from_ip;
+	u64 to_ip;
+	u64 timestamp;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+	const unsigned char *buf;
+	size_t len;
+	u64 offset;
+	u64 trace_nr;
+};
+
+struct arm_spe_params {
+	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+	void *data;
+};
+
+struct arm_spe_decoder {
+	int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+	void *data;
+	struct arm_spe_record record;
+
+	const unsigned char *buf;
+	size_t len;
+
+	struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b94001b756c7..b94001b756c7 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index d786ef65113f..4c870521b8eb 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -15,6 +15,8 @@
 #define ARM_SPE_NEED_MORE_BYTES		-1
 #define ARM_SPE_BAD_PACKET		-2
 
+#define ARM_SPE_PKT_MAX_SZ		16
+
 enum arm_spe_pkt_type {
 	ARM_SPE_BAD,
 	ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
 	uint64_t		payload;
 };
 
+#define SPE_ADDR_PKT_HDR_INDEX_INS		(0x0)
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH		(0x1)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT	(0x2)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS	(0x3)
+
+#define SPE_ADDR_PKT_NS				BIT(7)
+#define SPE_ADDR_PKT_CH				BIT(6)
+#define SPE_ADDR_PKT_EL_OFFSET			(5)
+#define SPE_ADDR_PKT_EL_MASK			(0x3 << SPE_ADDR_PKT_EL_OFFSET)
+#define SPE_ADDR_PKT_EL0			(0)
+#define SPE_ADDR_PKT_EL1			(1)
+#define SPE_ADDR_PKT_EL2			(2)
+#define SPE_ADDR_PKT_EL3			(3)
+
 const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
 
 int arm_spe_get_packet(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 875a0dd540e5..3882a5360ada 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -4,46 +4,85 @@
  * Copyright (c) 2017-2018, Arm Ltd.
  */
 
+#include <byteswap.h>
 #include <endian.h>
 #include <errno.h>
-#include <byteswap.h>
 #include <inttypes.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 #include <linux/log2.h>
+#include <linux/types.h>
 #include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
 
+#include "auxtrace.h"
 #include "color.h"
+#include "debug.h"
+#include "evlist.h"
 #include "evsel.h"
 #include "machine.h"
 #include "session.h"
-#include "debug.h"
-#include "auxtrace.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
 #include "arm-spe.h"
-#include "arm-spe-pkt-decoder.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
+#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
 
 struct arm_spe {
 	struct auxtrace			auxtrace;
 	struct auxtrace_queues		queues;
 	struct auxtrace_heap		heap;
+	struct itrace_synth_opts        synth_opts;
 	u32				auxtrace_type;
 	struct perf_session		*session;
 	struct machine			*machine;
 	u32				pmu_type;
+
+	u8				timeless_decoding;
+	u8				data_queued;
+
+	u8				sample_flc;
+	u8				sample_llc;
+	u8				sample_tlb;
+	u8				sample_branch;
+	u8				sample_remote_access;
+
+	u64				l1d_miss_id;
+	u64				l1d_access_id;
+	u64				llc_miss_id;
+	u64				llc_access_id;
+	u64				tlb_miss_id;
+	u64				tlb_access_id;
+	u64				branch_miss_id;
+	u64				remote_access_id;
+
+	u64				kernel_start;
+
+	unsigned long			num_events;
 };
 
 struct arm_spe_queue {
-	struct arm_spe		*spe;
-	unsigned int		queue_nr;
-	struct auxtrace_buffer	*buffer;
-	bool			on_heap;
-	bool			done;
-	pid_t			pid;
-	pid_t			tid;
-	int			cpu;
+	struct arm_spe			*spe;
+	unsigned int			queue_nr;
+	struct auxtrace_buffer		*buffer;
+	struct auxtrace_buffer		*old_buffer;
+	union perf_event		*event_buf;
+	bool				on_heap;
+	bool				done;
+	pid_t				pid;
+	pid_t				tid;
+	int				cpu;
+	struct arm_spe_decoder		*decoder;
+	u64				time;
+	u64				timestamp;
+	struct thread			*thread;
 };
 
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -92,44 +131,520 @@ static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 	arm_spe_dump(spe, buf, len);
 }
 
-static int arm_spe_process_event(struct perf_session *session __maybe_unused,
-				 union perf_event *event __maybe_unused,
-				 struct perf_sample *sample __maybe_unused,
-				 struct perf_tool *tool __maybe_unused)
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+	struct arm_spe_queue *speq = data;
+	struct auxtrace_buffer *buffer = speq->buffer;
+	struct auxtrace_buffer *old_buffer = speq->old_buffer;
+	struct auxtrace_queue *queue;
+
+	queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+	buffer = auxtrace_buffer__next(queue, buffer);
+	/* If no more data, drop the previous auxtrace_buffer and return */
+	if (!buffer) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		b->len = 0;
+		return 0;
+	}
+
+	speq->buffer = buffer;
+
+	/* If the aux_buffer doesn't have data associated, try to load it */
+	if (!buffer->data) {
+		/* get the file desc associated with the perf data file */
+		int fd = perf_data__fd(speq->spe->session->data);
+
+		buffer->data = auxtrace_buffer__get_data(buffer, fd);
+		if (!buffer->data)
+			return -ENOMEM;
+	}
+
+	b->len = buffer->size;
+	b->buf = buffer->data;
+
+	if (b->len) {
+		if (old_buffer)
+			auxtrace_buffer__drop_data(old_buffer);
+		speq->old_buffer = buffer;
+	} else {
+		auxtrace_buffer__drop_data(buffer);
+		return arm_spe_get_trace(b, data);
+	}
+
+	return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+		unsigned int queue_nr)
+{
+	struct arm_spe_params params = { .get_trace = 0, };
+	struct arm_spe_queue *speq;
+
+	speq = zalloc(sizeof(*speq));
+	if (!speq)
+		return NULL;
+
+	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+	if (!speq->event_buf)
+		goto out_free;
+
+	speq->spe = spe;
+	speq->queue_nr = queue_nr;
+	speq->pid = -1;
+	speq->tid = -1;
+	speq->cpu = -1;
+
+	/* params set */
+	params.get_trace = arm_spe_get_trace;
+	params.data = speq;
+
+	/* create new decoder */
+	speq->decoder = arm_spe_decoder_new(&params);
+	if (!speq->decoder)
+		goto out_free;
+
+	return speq;
+
+out_free:
+	zfree(&speq->event_buf);
+	free(speq);
+
+	return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+	return ip >= spe->kernel_start ?
+		PERF_RECORD_MISC_KERNEL :
+		PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+				struct arm_spe_queue *speq,
+				union perf_event *event,
+				struct perf_sample *sample)
+{
+	struct arm_spe_record *record = &speq->decoder->record;
+
+	if (!spe->timeless_decoding)
+		sample->time = speq->timestamp;
+
+	sample->ip = record->from_ip;
+	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+	sample->pid = speq->pid;
+	sample->tid = speq->tid;
+	sample->addr = record->to_ip;
+	sample->period = 1;
+	sample->cpu = speq->cpu;
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = sample->cpumode;
+	event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+			    struct arm_spe_queue *speq __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample)
+{
+	int ret;
+
+	ret = perf_session__deliver_synth_event(spe->session, event, sample);
+	if (ret)
+		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+	return ret;
+}
+
+static int
+arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
+				u64 spe_events_id)
+{
+	struct arm_spe *spe = speq->spe;
+	union perf_event *event = speq->event_buf;
+	struct perf_sample sample = { .ip = 0, };
+
+	arm_spe_prep_sample(spe, speq, event, &sample);
+
+	sample.id = spe_events_id;
+	sample.stream_id = spe_events_id;
+
+	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+	const struct arm_spe_record *record = &speq->decoder->record;
+	struct arm_spe *spe = speq->spe;
+	int err;
+
+	if (spe->sample_flc) {
+		if (record->type & ARM_SPE_L1D_MISS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->l1d_miss_id);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_L1D_ACCESS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->l1d_access_id);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_llc) {
+		if (record->type & ARM_SPE_LLC_MISS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->llc_miss_id);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_LLC_ACCESS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->llc_access_id);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_tlb) {
+		if (record->type & ARM_SPE_TLB_MISS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->tlb_miss_id);
+			if (err)
+				return err;
+		}
+
+		if (record->type & ARM_SPE_TLB_ACCESS) {
+			err = arm_spe_synth_spe_events_sample(
+					speq, spe->tlb_access_id);
+			if (err)
+				return err;
+		}
+	}
+
+	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+		err = arm_spe_synth_spe_events_sample(speq,
+						      spe->branch_miss_id);
+		if (err)
+			return err;
+	}
+
+	if (spe->sample_remote_access &&
+	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
+		err = arm_spe_synth_spe_events_sample(speq,
+						      spe->remote_access_id);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+	struct arm_spe *spe = speq->spe;
+	int ret;
+
+	if (!spe->kernel_start)
+		spe->kernel_start = machine__kernel_start(spe->machine);
+
+	while (1) {
+		ret = arm_spe_decode(speq->decoder);
+		if (!ret) {
+			pr_debug("No data or all data has been processed.\n");
+			return 1;
+		}
+
+		/*
+		 * Error is detected when decode SPE trace data, continue to
+		 * the next trace data and find out more records.
+		 */
+		if (ret < 0)
+			continue;
+
+		ret = arm_spe_sample(speq);
+		if (ret)
+			return ret;
+
+		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+			*timestamp = speq->timestamp;
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+			       struct auxtrace_queue *queue,
+			       unsigned int queue_nr)
+{
+	struct arm_spe_queue *speq = queue->priv;
+	struct arm_spe_record *record;
+
+	if (list_empty(&queue->head) || speq)
+		return 0;
+
+	speq = arm_spe__alloc_queue(spe, queue_nr);
+
+	if (!speq)
+		return -ENOMEM;
+
+	queue->priv = speq;
+
+	if (queue->cpu != -1)
+		speq->cpu = queue->cpu;
+
+	if (!speq->on_heap) {
+		int ret;
+
+		if (spe->timeless_decoding)
+			return 0;
+
+retry:
+		ret = arm_spe_decode(speq->decoder);
+
+		if (!ret)
+			return 0;
+
+		if (ret < 0)
+			goto retry;
+
+		record = &speq->decoder->record;
+
+		speq->timestamp = record->timestamp;
+		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+		if (ret)
+			return ret;
+		speq->on_heap = true;
+	}
+
+	return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < spe->queues.nr_queues; i++) {
+		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
 {
+	if (spe->queues.new_data) {
+		spe->queues.new_data = false;
+		return arm_spe__setup_queues(spe);
+	}
+
 	return 0;
 }
 
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+	struct evsel *evsel;
+	struct evlist *evlist = spe->session->evlist;
+	bool timeless_decoding = true;
+
+	/*
+	 * Circle through the list of event and complain if we find one
+	 * with the time bit set.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+			timeless_decoding = false;
+	}
+
+	return timeless_decoding;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+				    struct auxtrace_queue *queue)
+{
+	struct arm_spe_queue *speq = queue->priv;
+	pid_t tid;
+
+	tid = machine__get_current_tid(spe->machine, speq->cpu);
+	if (tid != -1) {
+		speq->tid = tid;
+		thread__zput(speq->thread);
+	} else
+		speq->tid = queue->tid;
+
+	if ((!speq->thread) && (speq->tid != -1)) {
+		speq->thread = machine__find_thread(spe->machine, -1,
+						    speq->tid);
+	}
+
+	if (speq->thread) {
+		speq->pid = speq->thread->pid_;
+		if (queue->cpu == -1)
+			speq->cpu = speq->thread->cpu;
+	}
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+	unsigned int queue_nr;
+	u64 ts;
+	int ret;
+
+	while (1) {
+		struct auxtrace_queue *queue;
+		struct arm_spe_queue *speq;
+
+		if (!spe->heap.heap_cnt)
+			return 0;
+
+		if (spe->heap.heap_array[0].ordinal >= timestamp)
+			return 0;
+
+		queue_nr = spe->heap.heap_array[0].queue_nr;
+		queue = &spe->queues.queue_array[queue_nr];
+		speq = queue->priv;
+
+		auxtrace_heap__pop(&spe->heap);
+
+		if (spe->heap.heap_cnt) {
+			ts = spe->heap.heap_array[0].ordinal + 1;
+			if (ts > timestamp)
+				ts = timestamp;
+		} else {
+			ts = timestamp;
+		}
+
+		arm_spe_set_pid_tid_cpu(spe, queue);
+
+		ret = arm_spe_run_decoder(speq, &ts);
+		if (ret < 0) {
+			auxtrace_heap__add(&spe->heap, queue_nr, ts);
+			return ret;
+		}
+
+		if (!ret) {
+			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+			if (ret < 0)
+				return ret;
+		} else {
+			speq->on_heap = false;
+		}
+	}
+
+	return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+					    u64 time_)
+{
+	struct auxtrace_queues *queues = &spe->queues;
+	unsigned int i;
+	u64 ts = 0;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+		struct arm_spe_queue *speq = queue->priv;
+
+		if (speq && (tid == -1 || speq->tid == tid)) {
+			speq->time = time_;
+			arm_spe_set_pid_tid_cpu(spe, queue);
+			arm_spe_run_decoder(speq, &ts);
+		}
+	}
+	return 0;
+}
+
+static int arm_spe_process_event(struct perf_session *session,
+				 union perf_event *event,
+				 struct perf_sample *sample,
+				 struct perf_tool *tool)
+{
+	int err = 0;
+	u64 timestamp;
+	struct arm_spe *spe = container_of(session->auxtrace,
+			struct arm_spe, auxtrace);
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events) {
+		pr_err("SPE trace requires ordered events\n");
+		return -EINVAL;
+	}
+
+	if (sample->time && (sample->time != (u64) -1))
+		timestamp = sample->time;
+	else
+		timestamp = 0;
+
+	if (timestamp || spe->timeless_decoding) {
+		err = arm_spe__update_queues(spe);
+		if (err)
+			return err;
+	}
+
+	if (spe->timeless_decoding) {
+		if (event->header.type == PERF_RECORD_EXIT) {
+			err = arm_spe_process_timeless_queues(spe,
+					event->fork.tid,
+					sample->time);
+		}
+	} else if (timestamp) {
+		if (event->header.type == PERF_RECORD_EXIT) {
+			err = arm_spe_process_queues(spe, timestamp);
+			if (err)
+				return err;
+		}
+	}
+
+	return err;
+}
+
 static int arm_spe_process_auxtrace_event(struct perf_session *session,
 					  union perf_event *event,
 					  struct perf_tool *tool __maybe_unused)
 {
 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 					     auxtrace);
-	struct auxtrace_buffer *buffer;
-	off_t data_offset;
-	int fd = perf_data__fd(session->data);
-	int err;
 
-	if (perf_data__is_pipe(session->data)) {
-		data_offset = 0;
-	} else {
-		data_offset = lseek(fd, 0, SEEK_CUR);
-		if (data_offset == -1)
-			return -errno;
-	}
+	if (!spe->data_queued) {
+		struct auxtrace_buffer *buffer;
+		off_t data_offset;
+		int fd = perf_data__fd(session->data);
+		int err;
 
-	err = auxtrace_queues__add_event(&spe->queues, session, event,
-					 data_offset, &buffer);
-	if (err)
-		return err;
+		if (perf_data__is_pipe(session->data)) {
+			data_offset = 0;
+		} else {
+			data_offset = lseek(fd, 0, SEEK_CUR);
+			if (data_offset == -1)
+				return -errno;
+		}
 
-	/* Dump here now we have copied a piped trace out of the pipe */
-	if (dump_trace) {
-		if (auxtrace_buffer__get_data(buffer, fd)) {
-			arm_spe_dump_event(spe, buffer->data,
-					     buffer->size);
-			auxtrace_buffer__put_data(buffer);
+		err = auxtrace_queues__add_event(&spe->queues, session, event,
+				data_offset, &buffer);
+		if (err)
+			return err;
+
+		/* Dump here now we have copied a piped trace out of the pipe */
+		if (dump_trace) {
+			if (auxtrace_buffer__get_data(buffer, fd)) {
+				arm_spe_dump_event(spe, buffer->data,
+						buffer->size);
+				auxtrace_buffer__put_data(buffer);
+			}
 		}
 	}
 
@@ -139,7 +654,25 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
 static int arm_spe_flush(struct perf_session *session __maybe_unused,
 			 struct perf_tool *tool __maybe_unused)
 {
-	return 0;
+	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+			auxtrace);
+	int ret;
+
+	if (dump_trace)
+		return 0;
+
+	if (!tool->ordered_events)
+		return -EINVAL;
+
+	ret = arm_spe__update_queues(spe);
+	if (ret < 0)
+		return ret;
+
+	if (spe->timeless_decoding)
+		return arm_spe_process_timeless_queues(spe, -1,
+				MAX_TIMESTAMP - 1);
+
+	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
 }
 
 static void arm_spe_free_queue(void *priv)
@@ -148,6 +681,9 @@ static void arm_spe_free_queue(void *priv)
 
 	if (!speq)
 		return;
+	thread__zput(speq->thread);
+	arm_spe_decoder_free(speq->decoder);
+	zfree(&speq->event_buf);
 	free(speq);
 }
 
@@ -196,11 +732,189 @@ static void arm_spe_print_info(__u64 *arr)
 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
 }
 
+struct arm_spe_synth {
+	struct perf_tool dummy_tool;
+	struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+			       union perf_event *event,
+			       struct perf_sample *sample __maybe_unused,
+			       struct machine *machine __maybe_unused)
+{
+	struct arm_spe_synth *arm_spe_synth =
+		      container_of(tool, struct arm_spe_synth, dummy_tool);
+
+	return perf_session__deliver_synth_event(arm_spe_synth->session,
+						 event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+			       struct perf_event_attr *attr, u64 id)
+{
+	struct arm_spe_synth arm_spe_synth;
+
+	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+	arm_spe_synth.session = session;
+
+	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+					   &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+				    const char *name)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.id && evsel->core.id[0] == id) {
+			if (evsel->name)
+				zfree(&evsel->name);
+			evsel->name = strdup(name);
+			break;
+		}
+	}
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+	struct evlist *evlist = session->evlist;
+	struct evsel *evsel;
+	struct perf_event_attr attr;
+	bool found = false;
+	u64 id;
+	int err;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->core.attr.type == spe->pmu_type) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		pr_debug("No selected events with SPE trace data\n");
+		return 0;
+	}
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.size = sizeof(struct perf_event_attr);
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+		PERF_SAMPLE_PERIOD;
+	if (spe->timeless_decoding)
+		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+	else
+		attr.sample_type |= PERF_SAMPLE_TIME;
+
+	attr.exclude_user = evsel->core.attr.exclude_user;
+	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+	attr.exclude_hv = evsel->core.attr.exclude_hv;
+	attr.exclude_host = evsel->core.attr.exclude_host;
+	attr.exclude_guest = evsel->core.attr.exclude_guest;
+	attr.sample_id_all = evsel->core.attr.sample_id_all;
+	attr.read_format = evsel->core.attr.read_format;
+
+	/* create new id val to be a fixed offset from evsel id */
+	id = evsel->core.id[0] + 1000000000;
+
+	if (!id)
+		id = 1;
+
+	if (spe->synth_opts.flc) {
+		spe->sample_flc = true;
+
+		/* Level 1 data cache miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->l1d_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "l1d-miss");
+		id += 1;
+
+		/* Level 1 data cache access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->l1d_access_id = id;
+		arm_spe_set_event_name(evlist, id, "l1d-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.llc) {
+		spe->sample_llc = true;
+
+		/* Last level cache miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->llc_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "llc-miss");
+		id += 1;
+
+		/* Last level cache access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->llc_access_id = id;
+		arm_spe_set_event_name(evlist, id, "llc-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.tlb) {
+		spe->sample_tlb = true;
+
+		/* TLB miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->tlb_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "tlb-miss");
+		id += 1;
+
+		/* TLB access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->tlb_access_id = id;
+		arm_spe_set_event_name(evlist, id, "tlb-access");
+		id += 1;
+	}
+
+	if (spe->synth_opts.branches) {
+		spe->sample_branch = true;
+
+		/* Branch miss */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->branch_miss_id = id;
+		arm_spe_set_event_name(evlist, id, "branch-miss");
+		id += 1;
+	}
+
+	if (spe->synth_opts.remote_access) {
+		spe->sample_remote_access = true;
+
+		/* Remote access */
+		err = arm_spe_synth_event(session, &attr, id);
+		if (err)
+			return err;
+		spe->remote_access_id = id;
+		arm_spe_set_event_name(evlist, id, "remote-access");
+		id += 1;
+	}
+
+	return 0;
+}
+
 int arm_spe_process_auxtrace_info(union perf_event *event,
 				  struct perf_session *session)
 {
 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
-	size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
 	struct arm_spe *spe;
 	int err;
 
@@ -221,6 +935,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
 	spe->auxtrace_type = auxtrace_info->type;
 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
 
+	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
 	spe->auxtrace.process_event = arm_spe_process_event;
 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
 	spe->auxtrace.flush_events = arm_spe_flush;
@@ -231,8 +946,30 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
 
 	arm_spe_print_info(&auxtrace_info->priv[0]);
 
+	if (dump_trace)
+		return 0;
+
+	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+		spe->synth_opts = *session->itrace_synth_opts;
+	else
+		itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+	err = arm_spe_synth_events(spe, session);
+	if (err)
+		goto err_free_queues;
+
+	err = auxtrace_queues__process_index(&spe->queues, session);
+	if (err)
+		goto err_free_queues;
+
+	if (spe->queues.populated)
+		spe->data_queued = true;
+
 	return 0;
 
+err_free_queues:
+	auxtrace_queues__free(&spe->queues);
+	session->auxtrace = NULL;
 err_free:
 	free(spe);
 	return err;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 749487a41cc7..25c639ac4ad4 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -55,7 +55,6 @@
 #include "util/mmap.h"
 
 #include <linux/ctype.h>
-#include <linux/kernel.h>
 #include "symbol/kallsyms.h"
 #include <internal/lib.h>
 
@@ -729,7 +728,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
 				  struct evlist *evlist,
 				  struct record_opts *opts, const char *str)
 {
-	struct perf_evsel_config_term *term;
+	struct evsel_config_term *term;
 	struct evsel *aux_evsel;
 	bool has_aux_sample_size = false;
 	bool has_aux_leader = false;
@@ -771,7 +770,7 @@ no_opt:
 	evlist__for_each_entry(evlist, evsel) {
 		if (evsel__is_aux_event(evsel))
 			aux_evsel = evsel;
-		term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
+		term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
 		if (term) {
 			has_aux_sample_size = true;
 			evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
@@ -1331,6 +1330,11 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
 	synth_opts->pwr_events = true;
 	synth_opts->other_events = true;
 	synth_opts->errors = true;
+	synth_opts->flc = true;
+	synth_opts->llc = true;
+	synth_opts->tlb = true;
+	synth_opts->remote_access = true;
+
 	if (no_sample) {
 		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
 		synth_opts->period = 1;
@@ -1491,6 +1495,18 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 				goto out_err;
 			p = endptr;
 			break;
+		case 'f':
+			synth_opts->flc = true;
+			break;
+		case 'm':
+			synth_opts->llc = true;
+			break;
+		case 't':
+			synth_opts->tlb = true;
+			break;
+		case 'a':
+			synth_opts->remote_access = true;
+			break;
 		case ' ':
 		case ',':
 			break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 0220a2e86c16..142ccf7d34df 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -63,6 +63,7 @@ enum itrace_period_type {
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
  * @branches: whether to synthesize 'branches' events
+ *            (branch misses only for Arm SPE)
  * @transactions: whether to synthesize events for transactions
  * @ptwrites: whether to synthesize events for ptwrites
  * @pwr_events: whether to synthesize power events
@@ -78,6 +79,10 @@ enum itrace_period_type {
  * @thread_stack: feed branches to the thread_stack
  * @last_branch: add branch context to 'instruction' events
  * @add_last_branch: add branch context to existing event records
+ * @flc: whether to synthesize first level cache events
+ * @llc: whether to synthesize last level cache events
+ * @tlb: whether to synthesize TLB events
+ * @remote_access: whether to synthesize remote access events
  * @callchain_sz: maximum callchain size
  * @last_branch_sz: branch context size
  * @period: 'instructions' events period
@@ -107,6 +112,10 @@ struct itrace_synth_opts {
 	bool			thread_stack;
 	bool			last_branch;
 	bool			add_last_branch;
+	bool			flc;
+	bool			llc;
+	bool			tlb;
+	bool			remote_access;
 	unsigned int		callchain_sz;
 	unsigned int		last_branch_sz;
 	unsigned long long	period;
@@ -596,7 +605,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
 
 #define ITRACE_HELP \
 "				i:	    		synthesize instructions events\n"		\
-"				b:	    		synthesize branches events\n"		\
+"				b:	    		synthesize branches events (branch misses for Arm SPE)\n" \
 "				c:	    		synthesize branches events (calls only)\n"	\
 "				r:	    		synthesize branches events (returns only)\n" \
 "				x:	    		synthesize transactions events\n"		\
@@ -604,6 +613,10 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
 "				p:	    		synthesize power events\n"			\
 "				e:	    		synthesize error events\n"			\
 "				d:	    		create a debug log\n"			\
+"				f:	    		synthesize first level cache events\n" \
+"				m:	    		synthesize last level cache events\n" \
+"				t:	    		synthesize TLB events\n" \
+"				a:	    		synthesize remote access events\n" \
 "				g[len]:     		synthesize a call chain (use with i or x)\n" \
 "				l[len]:     		synthesize last branch entries (use with i or x)\n" \
 "				sNUMBER:    		skip initial number of events\n"		\
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 83bfb8768235..2feb751516ab 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1225,7 +1225,7 @@ bpf__obj_config_map(struct bpf_object *obj,
 out:
 	free(map_name);
 	if (!err)
-		key_scan_pos += strlen(map_opt);
+		*key_scan_pos += strlen(map_opt);
 	return err;
 }
 
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 4d3f02fa223d..17b2ccc61094 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -46,7 +46,7 @@ struct branch_entry {
 struct branch_stack {
 	u64			nr;
 	u64			hw_idx;
-	struct branch_entry	entries[0];
+	struct branch_entry	entries[];
 };
 
 /*
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 818aa4efd386..2775b752f2fa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1599,3 +1599,17 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
 	for (node = cursor->first; node != NULL; node = node->next)
 		map__zput(node->ms.map);
 }
+
+void callchain_param_setup(u64 sample_type)
+{
+	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
+			callchain_param.record_mode = CALLCHAIN_DWARF;
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			callchain_param.record_mode = CALLCHAIN_LBR;
+		else
+			callchain_param.record_mode = CALLCHAIN_FP;
+	}
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 8f668ee29f25..fe36a9e5ccd1 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -297,4 +297,5 @@ int callchain_branch_counts(struct callchain_root *root,
 			    u64 *branch_count, u64 *predicted_count,
 			    u64 *abort_count, u64 *cycles_count);
 
+void callchain_param_setup(u64 sample_type);
 #endif	/* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index 6b3988a7aba8..fa8248aadb59 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -65,7 +65,7 @@ static int perf_flag_probe(void)
 		return 1;
 	}
 
-	WARN_ONCE(err != EINVAL && err != EBUSY,
+	WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES,
 		  "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
 		  err, str_error_r(err, sbuf, sizeof(sbuf)));
 
@@ -83,7 +83,7 @@ static int perf_flag_probe(void)
 	if (fd >= 0)
 		close(fd);
 
-	if (WARN_ONCE(fd < 0 && err != EBUSY,
+	if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES,
 		      "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
 		      err, str_error_r(err, sbuf, sizeof(sbuf))))
 		return -1;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index ef38eba56ed0..20be0504fb95 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -17,10 +17,10 @@
 #include "util/event.h"  /* proc_map_timeout */
 #include "util/hist.h"  /* perf_hist_config */
 #include "util/llvm-utils.h"   /* perf_llvm_config */
+#include "util/stat.h"  /* perf_stat__set_big_num */
 #include "build-id.h"
 #include "debug.h"
 #include "config.h"
-#include "debug.h"
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <stdlib.h>
@@ -452,6 +452,15 @@ static int perf_ui_config(const char *var, const char *value)
 	return 0;
 }
 
+static int perf_stat_config(const char *var, const char *value)
+{
+	if (!strcmp(var, "stat.big-num"))
+		perf_stat__set_big_num(perf_config_bool(var, value));
+
+	/* Add other config variables here. */
+	return 0;
+}
+
 int perf_default_config(const char *var, const char *value,
 			void *dummy __maybe_unused)
 {
@@ -473,6 +482,9 @@ int perf_default_config(const char *var, const char *value,
 	if (strstarts(var, "buildid."))
 		return perf_buildid_config(var, value);
 
+	if (strstarts(var, "stat."))
+		return perf_stat_config(var, value);
+
 	/* Add other config variables here. */
 	return 0;
 }
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index f94e1a23dad6..582f3aeaf5e4 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 #include "evsel.h"
 #include "counts.h"
 #include <linux/zalloc.h>
@@ -42,24 +43,25 @@ void perf_counts__delete(struct perf_counts *counts)
 	}
 }
 
-static void perf_counts__reset(struct perf_counts *counts)
+void perf_counts__reset(struct perf_counts *counts)
 {
 	xyarray__reset(counts->loaded);
 	xyarray__reset(counts->values);
+	memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
 }
 
-void perf_evsel__reset_counts(struct evsel *evsel)
+void evsel__reset_counts(struct evsel *evsel)
 {
 	perf_counts__reset(evsel->counts);
 }
 
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
 {
 	evsel->counts = perf_counts__new(ncpus, nthreads);
 	return evsel->counts != NULL ? 0 : -ENOMEM;
 }
 
-void perf_evsel__free_counts(struct evsel *evsel)
+void evsel__free_counts(struct evsel *evsel)
 {
 	perf_counts__delete(evsel->counts);
 	evsel->counts = NULL;
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 92196df4945f..7ff36bf6d644 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -37,9 +37,10 @@ perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool lo
 
 struct perf_counts *perf_counts__new(int ncpus, int nthreads);
 void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
 
-void perf_evsel__reset_counts(struct evsel *evsel);
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
-void perf_evsel__free_counts(struct evsel *evsel);
+void evsel__reset_counts(struct evsel *evsel);
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+void evsel__free_counts(struct evsel *evsel);
 
 #endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 7bf6b811f715..6201c3790d86 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -22,7 +22,7 @@ struct numa_topology_node {
 
 struct numa_topology {
 	u32				nr;
-	struct numa_topology_node	nodes[0];
+	struct numa_topology_node	nodes[];
 };
 
 struct cpu_topology *cpu_topology__new(void);
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
index 6fb7f34c0814..39c05200ed65 100644
--- a/tools/perf/util/demangle-java.c
+++ b/tools/perf/util/demangle-java.c
@@ -15,7 +15,7 @@ enum {
 	MODE_CLASS  = 1,
 	MODE_FUNC   = 2,
 	MODE_TYPE   = 3,
-	MODE_CTYPE  = 3, /* class arg */
+	MODE_CTYPE  = 4, /* class arg */
 };
 
 #define BASE_ENT(c, n)	[c - 'A']=n
@@ -27,7 +27,7 @@ static const char *base_types['Z' - 'A' + 1] = {
 	BASE_ENT('I', "int" ),
 	BASE_ENT('J', "long" ),
 	BASE_ENT('S', "short" ),
-	BASE_ENT('Z', "bool" ),
+	BASE_ENT('Z', "boolean" ),
 };
 
 /*
@@ -59,15 +59,16 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
 
 		switch (*q) {
 		case 'L':
-			if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
-				if (mode == MODE_CTYPE) {
+			if (mode == MODE_PREFIX || mode == MODE_TYPE) {
+				if (mode == MODE_TYPE) {
 					if (narg)
 						rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
 					narg++;
 				}
-				rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
 				if (mode == MODE_PREFIX)
 					mode = MODE_CLASS;
+				else
+					mode = MODE_CTYPE;
 			} else
 				buf[rlen++] = *q;
 			break;
@@ -120,7 +121,7 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
 			if (mode != MODE_CLASS && mode != MODE_CTYPE)
 				goto error;
 			/* safe because at least one other char to process */
-			if (isalpha(*(q + 1)))
+			if (isalpha(*(q + 1)) && mode == MODE_CLASS)
 				rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
 			if (mode == MODE_CLASS)
 				mode = MODE_FUNC;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index f338990e0fe6..99f0a39c3c59 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -47,6 +47,7 @@ char dso__symtab_origin(const struct dso *dso)
 		[DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO]	= 'D',
 		[DSO_BINARY_TYPE__FEDORA_DEBUGINFO]		= 'f',
 		[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO]		= 'u',
+		[DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO]	= 'x',
 		[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO]	= 'o',
 		[DSO_BINARY_TYPE__BUILDID_DEBUGINFO]		= 'b',
 		[DSO_BINARY_TYPE__SYSTEM_PATH_DSO]		= 'd',
@@ -129,6 +130,21 @@ int dso__read_binary_type_filename(const struct dso *dso,
 		snprintf(filename + len, size - len, "%s", dso->long_name);
 		break;
 
+	case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+		/*
+		 * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+		 * /usr/lib/debug/lib when it is expected to be in
+		 * /usr/lib/debug/usr/lib
+		 */
+		if (strlen(dso->long_name) < 9 ||
+		    strncmp(dso->long_name, "/usr/lib/", 9)) {
+			ret = -1;
+			break;
+		}
+		len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+		snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+		break;
+
 	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
 	{
 		const char *last_slash;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 9553a1fd9e8a..d3d03274b0d1 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -30,6 +30,7 @@ enum dso_binary_type {
 	DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
 	DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
 	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
 	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
 	DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
 	DSO_BINARY_TYPE__GUEST_KMODULE,
@@ -137,7 +138,7 @@ struct dso_cache {
 	struct rb_node	rb_node;
 	u64 offset;
 	u64 size;
-	char data[0];
+	char data[];
 };
 
 struct auxtrace_cache;
@@ -209,7 +210,7 @@ struct dso {
 	struct nsinfo	*nsinfo;
 	struct dso_id	 id;
 	refcount_t	 refcnt;
-	char		 name[0];
+	char		 name[];
 };
 
 /* dso__for_each_symbol - iterate over the symbols of given type
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b8289f160f07..6ae01c3c2ffa 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -79,7 +79,7 @@ struct sample_read {
 
 struct ip_callchain {
 	u64 nr;
-	u64 ips[0];
+	u64 ips[];
 };
 
 struct branch_stack;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 0a0b760d6948..173b4f0e0e6e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -233,7 +233,7 @@ void perf_evlist__set_leader(struct evlist *evlist)
 
 int __perf_evlist__add_default(struct evlist *evlist, bool precise)
 {
-	struct evsel *evsel = perf_evsel__new_cycles(precise);
+	struct evsel *evsel = evsel__new_cycles(precise);
 
 	if (evsel == NULL)
 		return -ENOMEM;
@@ -249,7 +249,7 @@ int perf_evlist__add_dummy(struct evlist *evlist)
 		.config = PERF_COUNT_SW_DUMMY,
 		.size	= sizeof(attr), /* to capture ABI version */
 	};
-	struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
+	struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
 
 	if (evsel == NULL)
 		return -ENOMEM;
@@ -266,7 +266,7 @@ static int evlist__add_attrs(struct evlist *evlist,
 	size_t i;
 
 	for (i = 0; i < nr_attrs; i++) {
-		evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
+		evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
 		if (evsel == NULL)
 			goto out_delete_partial_list;
 		list_add_tail(&evsel->core.node, &head);
@@ -325,7 +325,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
 int perf_evlist__add_newtp(struct evlist *evlist,
 			   const char *sys, const char *name, void *handler)
 {
-	struct evsel *evsel = perf_evsel__newtp(sys, name);
+	struct evsel *evsel = evsel__newtp(sys, name);
 
 	if (IS_ERR(evsel))
 		return -1;
@@ -380,22 +380,33 @@ void evlist__disable(struct evlist *evlist)
 {
 	struct evsel *pos;
 	struct affinity affinity;
-	int cpu, i;
+	int cpu, i, imm = 0;
+	bool has_imm = false;
 
 	if (affinity__setup(&affinity) < 0)
 		return;
 
-	evlist__for_each_cpu(evlist, i, cpu) {
-		affinity__set(&affinity, cpu);
-
-		evlist__for_each_entry(evlist, pos) {
-			if (evsel__cpu_iter_skip(pos, cpu))
-				continue;
-			if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
-				continue;
-			evsel__disable_cpu(pos, pos->cpu_iter - 1);
+	/* Disable 'immediate' events last */
+	for (imm = 0; imm <= 1; imm++) {
+		evlist__for_each_cpu(evlist, i, cpu) {
+			affinity__set(&affinity, cpu);
+
+			evlist__for_each_entry(evlist, pos) {
+				if (evsel__cpu_iter_skip(pos, cpu))
+					continue;
+				if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+					continue;
+				if (pos->immediate)
+					has_imm = true;
+				if (pos->immediate != imm)
+					continue;
+				evsel__disable_cpu(pos, pos->cpu_iter - 1);
+			}
 		}
+		if (!has_imm)
+			break;
 	}
+
 	affinity__cleanup(&affinity);
 	evlist__for_each_entry(evlist, pos) {
 		if (!evsel__is_group_leader(pos) || !pos->core.fd)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f3e60c45d59a..96e5171dce41 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -56,14 +56,14 @@ struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
 
-static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
 {
 	return 0;
 }
 
 void __weak test_attr__ready(void) { }
 
-static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
+static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
 {
 }
 
@@ -73,13 +73,12 @@ static struct {
 	void	(*fini)(struct evsel *evsel);
 } perf_evsel__object = {
 	.size = sizeof(struct evsel),
-	.init = perf_evsel__no_extra_init,
-	.fini = perf_evsel__no_extra_fini,
+	.init = evsel__no_extra_init,
+	.fini = evsel__no_extra_fini,
 };
 
-int perf_evsel__object_config(size_t object_size,
-			      int (*init)(struct evsel *evsel),
-			      void (*fini)(struct evsel *evsel))
+int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
+			 void (*fini)(struct evsel *evsel))
 {
 
 	if (object_size == 0)
@@ -255,11 +254,12 @@ void evsel__init(struct evsel *evsel,
 	evsel->metric_expr   = NULL;
 	evsel->metric_name   = NULL;
 	evsel->metric_events = NULL;
+	evsel->per_pkg_mask  = NULL;
 	evsel->collect_stat  = false;
 	evsel->pmu_name      = NULL;
 }
 
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
 {
 	struct evsel *evsel = zalloc(perf_evsel__object.size);
 
@@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void)
 	return perf_event_paranoid_check(1);
 }
 
-struct evsel *perf_evsel__new_cycles(bool precise)
+struct evsel *evsel__new_cycles(bool precise)
 {
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
@@ -334,7 +334,7 @@ error_free:
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
 {
 	struct evsel *evsel = zalloc(perf_evsel__object.size);
 	int err = -ENOMEM;
@@ -372,7 +372,7 @@ out_err:
 	return ERR_PTR(err);
 }
 
-const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+const char *evsel__hw_names[PERF_COUNT_HW_MAX] = {
 	"cycles",
 	"instructions",
 	"cache-references",
@@ -387,8 +387,8 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
 
 static const char *__evsel__hw_name(u64 config)
 {
-	if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
-		return perf_evsel__hw_names[config];
+	if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
+		return evsel__hw_names[config];
 
 	return "unknown-hardware";
 }
@@ -435,7 +435,7 @@ static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
-const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
 	"cpu-clock",
 	"task-clock",
 	"page-faults",
@@ -450,8 +450,8 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
 
 static const char *__evsel__sw_name(u64 config)
 {
-	if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
-		return perf_evsel__sw_names[config];
+	if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
+		return evsel__sw_names[config];
 	return "unknown-software";
 }
 
@@ -486,8 +486,7 @@ static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
 	return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
 }
 
-const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
  { "L1-dcache",	"l1-d",		"l1d",		"L1-data",		},
  { "L1-icache",	"l1-i",		"l1i",		"L1-instruction",	},
  { "LLC",	"L2",							},
@@ -497,15 +496,13 @@ const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
  { "node",								},
 };
 
-const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
-				   [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
  { "load",	"loads",	"read",					},
  { "store",	"stores",	"write",				},
  { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
 };
 
-const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
-				       [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
  { "refs",	"Reference",	"ops",		"access",		},
  { "misses",	"miss",							},
 };
@@ -521,7 +518,7 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
  * L1I : Read and prefetch only
  * ITLB and BPU : Read-only
  */
-static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+static unsigned long evsel__hw_cache_stat[C(MAX)] = {
  [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
  [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
  [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
@@ -533,7 +530,7 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
 
 bool evsel__is_cache_op_valid(u8 type, u8 op)
 {
-	if (perf_evsel__hw_cache_stat[type] & COP(op))
+	if (evsel__hw_cache_stat[type] & COP(op))
 		return true;	/* valid */
 	else
 		return false;	/* invalid */
@@ -542,13 +539,13 @@ bool evsel__is_cache_op_valid(u8 type, u8 op)
 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
 {
 	if (result) {
-		return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
-				 perf_evsel__hw_cache_op[op][0],
-				 perf_evsel__hw_cache_result[result][0]);
+		return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
+				 evsel__hw_cache_op[op][0],
+				 evsel__hw_cache_result[result][0]);
 	}
 
-	return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
-			 perf_evsel__hw_cache_op[op][1]);
+	return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
+			 evsel__hw_cache_op[op][1]);
 }
 
 static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
@@ -768,10 +765,10 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
 	}
 }
 
-static void apply_config_terms(struct evsel *evsel,
-			       struct record_opts *opts, bool track)
+static void evsel__apply_config_terms(struct evsel *evsel,
+				      struct record_opts *opts, bool track)
 {
-	struct perf_evsel_config_term *term;
+	struct evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
 	struct perf_event_attr *attr = &evsel->core.attr;
 	/* callgraph default */
@@ -784,30 +781,30 @@ static void apply_config_terms(struct evsel *evsel,
 
 	list_for_each_entry(term, config_terms, list) {
 		switch (term->type) {
-		case PERF_EVSEL__CONFIG_TERM_PERIOD:
+		case EVSEL__CONFIG_TERM_PERIOD:
 			if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
 				attr->sample_period = term->val.period;
 				attr->freq = 0;
 				evsel__reset_sample_bit(evsel, PERIOD);
 			}
 			break;
-		case PERF_EVSEL__CONFIG_TERM_FREQ:
+		case EVSEL__CONFIG_TERM_FREQ:
 			if (!(term->weak && opts->user_freq != UINT_MAX)) {
 				attr->sample_freq = term->val.freq;
 				attr->freq = 1;
 				evsel__set_sample_bit(evsel, PERIOD);
 			}
 			break;
-		case PERF_EVSEL__CONFIG_TERM_TIME:
+		case EVSEL__CONFIG_TERM_TIME:
 			if (term->val.time)
 				evsel__set_sample_bit(evsel, TIME);
 			else
 				evsel__reset_sample_bit(evsel, TIME);
 			break;
-		case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+		case EVSEL__CONFIG_TERM_CALLGRAPH:
 			callgraph_buf = term->val.str;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_BRANCH:
+		case EVSEL__CONFIG_TERM_BRANCH:
 			if (term->val.str && strcmp(term->val.str, "no")) {
 				evsel__set_sample_bit(evsel, BRANCH_STACK);
 				parse_branch_str(term->val.str,
@@ -815,16 +812,16 @@ static void apply_config_terms(struct evsel *evsel,
 			} else
 				evsel__reset_sample_bit(evsel, BRANCH_STACK);
 			break;
-		case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+		case EVSEL__CONFIG_TERM_STACK_USER:
 			dump_size = term->val.stack_user;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+		case EVSEL__CONFIG_TERM_MAX_STACK:
 			max_stack = term->val.max_stack;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+		case EVSEL__CONFIG_TERM_MAX_EVENTS:
 			evsel->max_events = term->val.max_events;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_INHERIT:
+		case EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
 			 * evsel__config. If user explicitly set
@@ -833,20 +830,20 @@ static void apply_config_terms(struct evsel *evsel,
 			 */
 			attr->inherit = term->val.inherit ? 1 : 0;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+		case EVSEL__CONFIG_TERM_OVERWRITE:
 			attr->write_backward = term->val.overwrite ? 1 : 0;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+		case EVSEL__CONFIG_TERM_DRV_CFG:
 			break;
-		case PERF_EVSEL__CONFIG_TERM_PERCORE:
+		case EVSEL__CONFIG_TERM_PERCORE:
 			break;
-		case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT:
+		case EVSEL__CONFIG_TERM_AUX_OUTPUT:
 			attr->aux_output = term->val.aux_output ? 1 : 0;
 			break;
-		case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+		case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
 			/* Already applied by auxtrace */
 			break;
-		case PERF_EVSEL__CONFIG_TERM_CFG_CHG:
+		case EVSEL__CONFIG_TERM_CFG_CHG:
 			break;
 		default:
 			break;
@@ -907,10 +904,9 @@ static bool is_dummy_event(struct evsel *evsel)
 	       (evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
 }
 
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
-							     enum evsel_term_type type)
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
 {
-	struct perf_evsel_config_term *term, *found_term = NULL;
+	struct evsel_config_term *term, *found_term = NULL;
 
 	list_for_each_entry(term, &evsel->config_terms, list) {
 		if (term->type == type)
@@ -1145,7 +1141,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * Apply event specific term settings,
 	 * it overloads any global configuration.
 	 */
-	apply_config_terms(evsel, opts, track);
+	evsel__apply_config_terms(evsel, opts, track);
 
 	evsel->ignore_missing_thread = opts->ignore_missing_thread;
 
@@ -1158,11 +1154,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	}
 
 	/*
+	 * A dummy event never triggers any actual counter and therefore
+	 * cannot be used with branch_stack.
+	 *
 	 * For initial_delay, a dummy event is added implicitly.
 	 * The software event will trigger -EOPNOTSUPP error out,
 	 * if BRANCH_STACK bit is set.
 	 */
-	if (opts->initial_delay && is_dummy_event(evsel))
+	if (is_dummy_event(evsel))
 		evsel__reset_sample_bit(evsel, BRANCH_STACK);
 }
 
@@ -1241,9 +1240,9 @@ int evsel__disable(struct evsel *evsel)
 	return err;
 }
 
-static void perf_evsel__free_config_terms(struct evsel *evsel)
+static void evsel__free_config_terms(struct evsel *evsel)
 {
-	struct perf_evsel_config_term *term, *h;
+	struct evsel_config_term *term, *h;
 
 	list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
 		list_del_init(&term->list);
@@ -1257,10 +1256,10 @@ void evsel__exit(struct evsel *evsel)
 {
 	assert(list_empty(&evsel->core.node));
 	assert(evsel->evlist == NULL);
-	perf_evsel__free_counts(evsel);
+	evsel__free_counts(evsel);
 	perf_evsel__free_fd(&evsel->core);
 	perf_evsel__free_id(&evsel->core);
-	perf_evsel__free_config_terms(evsel);
+	evsel__free_config_terms(evsel);
 	cgroup__put(evsel->cgrp);
 	perf_cpu_map__put(evsel->core.cpus);
 	perf_cpu_map__put(evsel->core.own_cpus);
@@ -1268,6 +1267,8 @@ void evsel__exit(struct evsel *evsel)
 	zfree(&evsel->group_name);
 	zfree(&evsel->name);
 	zfree(&evsel->pmu_name);
+	zfree(&evsel->per_pkg_mask);
+	zfree(&evsel->metric_events);
 	perf_evsel__object.fini(evsel);
 }
 
@@ -1425,7 +1426,7 @@ int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
 	if (FD(evsel, cpu, thread) < 0)
 		return -EINVAL;
 
-	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+	if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
 		return -ENOMEM;
 
 	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
@@ -2416,7 +2417,7 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
 
 		/* Is there already the separator in the name. */
 		if (strchr(name, '/') ||
-		    strchr(name, ':'))
+		    (strchr(name, ':') && !evsel->is_libpfm_event))
 			sep = "";
 
 		if (asprintf(&new_name, "%s%su", name, sep) < 0)
@@ -2477,31 +2478,40 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			 int err, char *msg, size_t size)
 {
 	char sbuf[STRERR_BUFSIZE];
-	int printed = 0;
+	int printed = 0, enforced = 0;
 
 	switch (err) {
 	case EPERM:
 	case EACCES:
+		printed += scnprintf(msg + printed, size - printed,
+			"Access to performance monitoring and observability operations is limited.\n");
+
+		if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
+			if (enforced) {
+				printed += scnprintf(msg + printed, size - printed,
+					"Enforced MAC policy settings (SELinux) can limit access to performance\n"
+					"monitoring and observability operations. Inspect system audit records for\n"
+					"more perf_event access control information and adjusting the policy.\n");
+			}
+		}
+
 		if (err == EPERM)
-			printed = scnprintf(msg, size,
+			printed += scnprintf(msg, size,
 				"No permission to enable %s event.\n\n", evsel__name(evsel));
 
 		return scnprintf(msg + printed, size - printed,
-		 "You may not have permission to collect %sstats.\n\n"
-		 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
-		 "which controls use of the performance events system by\n"
-		 "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n"
-		 "The current value is %d:\n\n"
+		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
+		 "access to performance monitoring and observability operations for users\n"
+		 "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n"
+		 "perf_event_paranoid setting is %d:\n"
 		 "  -1: Allow use of (almost) all events by all users\n"
 		 "      Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
-		 ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
-		 "      Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n"
-		 ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n"
-		 ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n"
-		 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
-		 "	kernel.perf_event_paranoid = -1\n" ,
-				 target->system_wide ? "system-wide " : "",
-				 perf_event_paranoid());
+		 ">= 0: Disallow raw and ftrace function tracepoint access\n"
+		 ">= 1: Disallow CPU event access\n"
+		 ">= 2: Disallow kernel profiling\n"
+		 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
+		 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
+		 perf_event_paranoid());
 	case ENOENT:
 		return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
 	case EMFILE:
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 351c0aaf2a11..0f963c2a88a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -76,6 +76,7 @@ struct evsel {
 	bool			ignore_missing_thread;
 	bool			forced_leader;
 	bool			use_uncore_alias;
+	bool			is_libpfm_event;
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			sample_read;
@@ -154,31 +155,31 @@ void perf_counts_values__scale(struct perf_counts_values *count,
 void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
 			   struct perf_counts_values *count);
 
-int perf_evsel__object_config(size_t object_size,
-			      int (*init)(struct evsel *evsel),
-			      void (*fini)(struct evsel *evsel));
+int evsel__object_config(size_t object_size,
+			 int (*init)(struct evsel *evsel),
+			 void (*fini)(struct evsel *evsel));
 
 struct perf_pmu *evsel__find_pmu(struct evsel *evsel);
 bool evsel__is_aux_event(struct evsel *evsel);
 
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
 
 static inline struct evsel *evsel__new(struct perf_event_attr *attr)
 {
-	return perf_evsel__new_idx(attr, 0);
+	return evsel__new_idx(attr, 0);
 }
 
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx);
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
 
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name)
+static inline struct evsel *evsel__newtp(const char *sys, const char *name)
 {
-	return perf_evsel__newtp_idx(sys, name, 0);
+	return evsel__newtp_idx(sys, name, 0);
 }
 
-struct evsel *perf_evsel__new_cycles(bool precise);
+struct evsel *evsel__new_cycles(bool precise);
 
 struct tep_event *event_format__new(const char *sys, const char *name);
 
@@ -198,16 +199,13 @@ void evsel__calc_id_pos(struct evsel *evsel);
 
 bool evsel__is_cache_op_valid(u8 type, u8 op);
 
-#define PERF_EVSEL__MAX_ALIASES 8
+#define EVSEL__MAX_ALIASES 8
 
-extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
-				       [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
-					  [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
-					      [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
-extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
+extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *evsel__sw_names[PERF_COUNT_SW_MAX];
 int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
 const char *evsel__name(struct evsel *evsel);
 
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index f8938916577c..aee6f808b512 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -6,30 +6,30 @@
 #include <stdbool.h>
 
 /*
- * The 'struct perf_evsel_config_term' is used to pass event
+ * The 'struct evsel_config_term' is used to pass event
  * specific configuration data to evsel__config routine.
  * It is allocated within event parsing and attached to
- * perf_evsel::config_terms list head.
+ * evsel::config_terms list head.
 */
 enum evsel_term_type {
-	PERF_EVSEL__CONFIG_TERM_PERIOD,
-	PERF_EVSEL__CONFIG_TERM_FREQ,
-	PERF_EVSEL__CONFIG_TERM_TIME,
-	PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
-	PERF_EVSEL__CONFIG_TERM_STACK_USER,
-	PERF_EVSEL__CONFIG_TERM_INHERIT,
-	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
-	PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
-	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
-	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
-	PERF_EVSEL__CONFIG_TERM_BRANCH,
-	PERF_EVSEL__CONFIG_TERM_PERCORE,
-	PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
-	PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
-	PERF_EVSEL__CONFIG_TERM_CFG_CHG,
+	EVSEL__CONFIG_TERM_PERIOD,
+	EVSEL__CONFIG_TERM_FREQ,
+	EVSEL__CONFIG_TERM_TIME,
+	EVSEL__CONFIG_TERM_CALLGRAPH,
+	EVSEL__CONFIG_TERM_STACK_USER,
+	EVSEL__CONFIG_TERM_INHERIT,
+	EVSEL__CONFIG_TERM_MAX_STACK,
+	EVSEL__CONFIG_TERM_MAX_EVENTS,
+	EVSEL__CONFIG_TERM_OVERWRITE,
+	EVSEL__CONFIG_TERM_DRV_CFG,
+	EVSEL__CONFIG_TERM_BRANCH,
+	EVSEL__CONFIG_TERM_PERCORE,
+	EVSEL__CONFIG_TERM_AUX_OUTPUT,
+	EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
+	EVSEL__CONFIG_TERM_CFG_CHG,
 };
 
-struct perf_evsel_config_term {
+struct evsel_config_term {
 	struct list_head      list;
 	enum evsel_term_type  type;
 	bool		      free_str;
@@ -53,10 +53,9 @@ struct perf_evsel_config_term {
 
 struct evsel;
 
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
-							     enum evsel_term_type type);
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type);
 
-#define perf_evsel__get_config_term(evsel, type) \
-	__perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type)
+#define evsel__get_config_term(evsel, type) \
+	__evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type)
 
 #endif // __PERF_EVSEL_CONFIG_H
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 99aed708bd5a..fb498a723a00 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -35,8 +35,7 @@ static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, vo
 	return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
 }
 
-int perf_evsel__fprintf(struct evsel *evsel,
-			struct perf_attr_details *details, FILE *fp)
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp)
 {
 	bool first = true;
 	int printed = 0;
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
index 47e6c8456bb1..3093d096c29f 100644
--- a/tools/perf/util/evsel_fprintf.h
+++ b/tools/perf/util/evsel_fprintf.h
@@ -15,8 +15,7 @@ struct perf_attr_details {
 	bool trace_fields;
 };
 
-int perf_evsel__fprintf(struct evsel *evsel,
-			struct perf_attr_details *details, FILE *fp);
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp);
 
 #define EVSEL__PRINT_IP			(1<<0)
 #define EVSEL__PRINT_SYM		(1<<1)
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index aa631e37ad1e..f64ab91c432b 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -4,25 +4,76 @@
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
+#include <linux/kernel.h>
 
 #ifdef PARSER_DEBUG
 extern int expr_debug;
 #endif
 
+static size_t key_hash(const void *key, void *ctx __maybe_unused)
+{
+	const char *str = (const char *)key;
+	size_t hash = 0;
+
+	while (*str != '\0') {
+		hash *= 31;
+		hash += *str;
+		str++;
+	}
+	return hash;
+}
+
+static bool key_equal(const void *key1, const void *key2,
+		    void *ctx __maybe_unused)
+{
+	return !strcmp((const char *)key1, (const char *)key2);
+}
+
 /* Caller must make sure id is allocated */
-void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
 {
-	int idx;
+	double *val_ptr = NULL, *old_val = NULL;
+	char *old_key = NULL;
+	int ret;
+
+	if (val != 0.0) {
+		val_ptr = malloc(sizeof(double));
+		if (!val_ptr)
+			return -ENOMEM;
+		*val_ptr = val;
+	}
+	ret = hashmap__set(&ctx->ids, name, val_ptr,
+			   (const void **)&old_key, (void **)&old_val);
+	free(old_key);
+	free(old_val);
+	return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+{
+	double *data;
 
-	assert(ctx->num_ids < MAX_PARSE_ID);
-	idx = ctx->num_ids++;
-	ctx->ids[idx].name = name;
-	ctx->ids[idx].val = val;
+	if (!hashmap__find(&ctx->ids, id, (void **)&data))
+		return -1;
+	*val_ptr = (data == NULL) ?  0.0 : *data;
+	return 0;
 }
 
 void expr__ctx_init(struct expr_parse_ctx *ctx)
 {
-	ctx->num_ids = 0;
+	hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+		free((char *)cur->key);
+		free(cur->value);
+	}
+	hashmap__clear(&ctx->ids);
 }
 
 static int
@@ -45,6 +96,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
 
 #ifdef PARSER_DEBUG
 	expr_debug = 1;
+	expr_set_debug(1, scanner);
 #endif
 
 	ret = expr_parse(val, ctx, scanner);
@@ -55,61 +107,25 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
 	return ret;
 }
 
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime)
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr, int runtime)
 {
 	return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
 }
 
-static bool
-already_seen(const char *val, const char *one, const char **other,
-	     int num_other)
-{
-	int i;
-
-	if (one && !strcasecmp(one, val))
-		return true;
-	for (i = 0; i < num_other; i++)
-		if (!strcasecmp(other[i], val))
-			return true;
-	return false;
-}
-
-int expr__find_other(const char *expr, const char *one, const char ***other,
-		     int *num_other, int runtime)
+int expr__find_other(const char *expr, const char *one,
+		     struct expr_parse_ctx *ctx, int runtime)
 {
-	int err, i = 0, j = 0;
-	struct expr_parse_ctx ctx;
-
-	expr__ctx_init(&ctx);
-	err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime);
-	if (err)
-		return -1;
-
-	*other = malloc((ctx.num_ids + 1) * sizeof(char *));
-	if (!*other)
-		return -ENOMEM;
-
-	for (i = 0, j = 0; i < ctx.num_ids; i++) {
-		const char *str = ctx.ids[i].name;
-
-		if (already_seen(str, one, *other, j))
-			continue;
-
-		str = strdup(str);
-		if (!str)
-			goto out;
-		(*other)[j++] = str;
-	}
-	(*other)[j] = NULL;
-
-out:
-	if (i != ctx.num_ids) {
-		while (--j)
-			free((char *) (*other)[i]);
-		free(*other);
-		err = -1;
+	double *old_val = NULL;
+	char *old_key = NULL;
+	int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
+
+	if (one) {
+		hashmap__delete(&ctx->ids, one,
+				(const void **)&old_key, (void **)&old_val);
+		free(old_key);
+		free(old_val);
 	}
 
-	*num_other = j;
-	return err;
+	return ret;
 }
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 87d627bb699b..8a2c1074f90f 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,17 +2,17 @@
 #ifndef PARSE_CTX_H
 #define PARSE_CTX_H 1
 
-#define EXPR_MAX_OTHER 20
-#define MAX_PARSE_ID EXPR_MAX_OTHER
-
-struct expr_parse_id {
-	const char *name;
-	double val;
-};
+// There are fixes that need to land upstream before we can use libbpf's headers,
+// for now use our copy uncoditionally, since the data structures at this point
+// are exactly the same, no problem.
+//#ifdef HAVE_LIBBPF_SUPPORT
+//#include <bpf/hashmap.h>
+//#else
+#include "util/hashmap.h"
+//#endif
 
 struct expr_parse_ctx {
-	int num_ids;
-	struct expr_parse_id ids[MAX_PARSE_ID];
+	struct hashmap ids;
 };
 
 struct expr_scanner_ctx {
@@ -21,9 +21,12 @@ struct expr_scanner_ctx {
 };
 
 void expr__ctx_init(struct expr_parse_ctx *ctx);
-void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime);
-int expr__find_other(const char *expr, const char *one, const char ***other,
-		int *num_other, int runtime);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+		const char *expr, int runtime);
+int expr__find_other(const char *expr, const char *one,
+		struct expr_parse_ctx *ids, int runtime);
 
 #endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index 74b9b59b1aa5..f397bf8b1a48 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -10,12 +10,12 @@
 char *expr_get_text(yyscan_t yyscanner);
 YYSTYPE *expr_get_lval(yyscan_t yyscanner);
 
-static int __value(YYSTYPE *yylval, char *str, int base, int token)
+static double __value(YYSTYPE *yylval, char *str, int token)
 {
-	u64 num;
+	double num;
 
 	errno = 0;
-	num = strtoull(str, NULL, base);
+	num = strtod(str, NULL);
 	if (errno)
 		return EXPR_ERROR;
 
@@ -23,12 +23,12 @@ static int __value(YYSTYPE *yylval, char *str, int base, int token)
 	return token;
 }
 
-static int value(yyscan_t scanner, int base)
+static int value(yyscan_t scanner)
 {
 	YYSTYPE *yylval = expr_get_lval(scanner);
 	char *text = expr_get_text(scanner);
 
-	return __value(yylval, text, base, NUMBER);
+	return __value(yylval, text, NUMBER);
 }
 
 /*
@@ -81,12 +81,12 @@ static int str(yyscan_t scanner, int token, int runtime)
 }
 %}
 
-number		[0-9]+
+number		([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
 
 sch		[-,=]
 spec		\\{sch}
 sym		[0-9a-zA-Z_\.:@?]+
-symbol		{spec}*{sym}*{spec}*{sym}*{spec}*{sym}
+symbol		({spec}|{sym})+
 
 %%
 	struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
@@ -105,7 +105,7 @@ min		{ return MIN; }
 if		{ return IF; }
 else		{ return ELSE; }
 #smt_on		{ return SMT_ON; }
-{number}	{ return value(yyscanner, 10); }
+{number}	{ return value(yyscanner); }
 {symbol}	{ return str(yyscanner, ID, sctx->runtime); }
 "|"		{ return '|'; }
 "^"		{ return '^'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index cd17486c1c5d..bf3e898e3055 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -27,6 +27,7 @@
 %token EXPR_PARSE EXPR_OTHER EXPR_ERROR
 %token <num> NUMBER
 %token <str> ID
+%destructor { free ($$); } <str>
 %token MIN MAX IF ELSE SMT_ON
 %left MIN MAX IF
 %left '|'
@@ -46,19 +47,6 @@ static void expr_error(double *final_val __maybe_unused,
 	pr_debug("%s\n", s);
 }
 
-static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val)
-{
-	int i;
-
-	for (i = 0; i < ctx->num_ids; i++) {
-		if (!strcasecmp(ctx->ids[i].name, id)) {
-			*val = ctx->ids[i].val;
-			return 0;
-		}
-	}
-	return -1;
-}
-
 %}
 %%
 
@@ -72,15 +60,10 @@ all_other: all_other other
 
 other: ID
 {
-	if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
-		pr_err("failed: way too many variables");
-		YYABORT;
-	}
-
-	ctx->ids[ctx->num_ids++].name = $1;
+	expr__add_id(ctx, $1, 0.0);
 }
 |
-MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
 
 
 all_expr: if_expr			{ *final_val = $1; }
@@ -92,10 +75,12 @@ if_expr:
 	;
 
 expr:	  NUMBER
-	| ID			{ if (lookup_id(ctx, $1, &$$) < 0) {
+	| ID			{ if (expr__get_id(ctx, $1, &$$)) {
 					pr_debug("%s not found\n", $1);
+					free($1);
 					YYABORT;
 				  }
+				  free($1);
 				}
 	| expr '|' expr		{ $$ = (long)$1 | (long)$3; }
 	| expr '&' expr		{ $$ = (long)$1 & (long)$3; }
@@ -103,8 +88,18 @@ expr:	  NUMBER
 	| expr '+' expr		{ $$ = $1 + $3; }
 	| expr '-' expr		{ $$ = $1 - $3; }
 	| expr '*' expr		{ $$ = $1 * $3; }
-	| expr '/' expr		{ if ($3 == 0) YYABORT; $$ = $1 / $3; }
-	| expr '%' expr		{ if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+	| expr '/' expr		{ if ($3 == 0) {
+					pr_debug("division by zero\n");
+					YYABORT;
+				  }
+				  $$ = $1 / $3;
+	                        }
+	| expr '%' expr		{ if ((long)$3 == 0) {
+					pr_debug("division by zero\n");
+					YYABORT;
+				  }
+				  $$ = (long)$1 % (long)$3;
+	                        }
 	| '-' expr %prec NEG	{ $$ = -$2; }
 	| '(' if_expr ')'	{ $$ = $2; }
 	| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 30e9f618f6cd..dd40683bd4c0 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -342,7 +342,7 @@ static void emit_lineno_info(struct buffer_ext *be,
 	 */
 
 	/* start state of the state machine we take care of */
-	unsigned long last_vma = code_addr;
+	unsigned long last_vma = 0;
 	char const  *cur_filename = NULL;
 	unsigned long cur_file_idx = 0;
 	int last_line = 1;
@@ -473,7 +473,7 @@ jit_process_debug_info(uint64_t code_addr,
 		ent = debug_entry_next(ent);
 	}
 	add_compilation_unit(di, buffer_ext_size(dl));
-	add_debug_line(dl, debug, nr_debug_entries, 0);
+	add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET);
 	add_debug_abbrev(da);
 	if (0) buffer_ext_dump(da, "abbrev");
 
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
new file mode 100644
index 000000000000..a405dad068f5
--- /dev/null
+++ b/tools/perf/util/hashmap.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	entry->next = *pprev;
+	*pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+			      struct hashmap_entry *entry)
+{
+	*pprev = entry->next;
+	entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx)
+{
+	map->hash_fn = hash_fn;
+	map->equal_fn = equal_fn;
+	map->ctx = ctx;
+
+	map->buckets = NULL;
+	map->cap = 0;
+	map->cap_bits = 0;
+	map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx)
+{
+	struct hashmap *map = malloc(sizeof(struct hashmap));
+
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+	hashmap__init(map, hash_fn, equal_fn, ctx);
+	return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+	struct hashmap_entry *cur, *tmp;
+	size_t bkt;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		free(cur);
+	}
+	free(map->buckets);
+	map->buckets = NULL;
+	map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+	if (!map)
+		return;
+
+	hashmap__clear(map);
+	free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+	return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+	return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+	/* grow if empty or more than 75% filled */
+	return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+	struct hashmap_entry **new_buckets;
+	struct hashmap_entry *cur, *tmp;
+	size_t new_cap_bits, new_cap;
+	size_t h, bkt;
+
+	new_cap_bits = map->cap_bits + 1;
+	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+		new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+	new_cap = 1UL << new_cap_bits;
+	new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+	if (!new_buckets)
+		return -ENOMEM;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+		hashmap_add_entry(&new_buckets[h], cur);
+	}
+
+	map->cap = new_cap;
+	map->cap_bits = new_cap_bits;
+	free(map->buckets);
+	map->buckets = new_buckets;
+
+	return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+			       const void *key, size_t hash,
+			       struct hashmap_entry ***pprev,
+			       struct hashmap_entry **entry)
+{
+	struct hashmap_entry *cur, **prev_ptr;
+
+	if (!map->buckets)
+		return false;
+
+	for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+	     cur;
+	     prev_ptr = &cur->next, cur = cur->next) {
+		if (map->equal_fn(cur->key, key, map->ctx)) {
+			if (pprev)
+				*pprev = prev_ptr;
+			*entry = cur;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+	int err;
+
+	if (old_key)
+		*old_key = NULL;
+	if (old_value)
+		*old_value = NULL;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (strategy != HASHMAP_APPEND &&
+	    hashmap_find_entry(map, key, h, NULL, &entry)) {
+		if (old_key)
+			*old_key = entry->key;
+		if (old_value)
+			*old_value = entry->value;
+
+		if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+			entry->key = key;
+			entry->value = value;
+			return 0;
+		} else if (strategy == HASHMAP_ADD) {
+			return -EEXIST;
+		}
+	}
+
+	if (strategy == HASHMAP_UPDATE)
+		return -ENOENT;
+
+	if (hashmap_needs_to_grow(map)) {
+		err = hashmap_grow(map);
+		if (err)
+			return err;
+		h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	}
+
+	entry = malloc(sizeof(struct hashmap_entry));
+	if (!entry)
+		return -ENOMEM;
+
+	entry->key = key;
+	entry->value = value;
+	hashmap_add_entry(&map->buckets[h], entry);
+	map->sz++;
+
+	return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+	struct hashmap_entry *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, NULL, &entry))
+		return false;
+
+	if (value)
+		*value = entry->value;
+	return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value)
+{
+	struct hashmap_entry **pprev, *entry;
+	size_t h;
+
+	h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+	if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+		return false;
+
+	if (old_key)
+		*old_key = entry->key;
+	if (old_value)
+		*old_value = entry->value;
+
+	hashmap_del_entry(pprev, entry);
+	free(entry);
+	map->sz--;
+
+	return true;
+}
+
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
new file mode 100644
index 000000000000..df59fd4fc95b
--- /dev/null
+++ b/tools/perf/util/hashmap.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+	/* shuffle bits and return requested number of upper bits */
+	return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+	const void *key;
+	void *value;
+	struct hashmap_entry *next;
+};
+
+struct hashmap {
+	hashmap_hash_fn hash_fn;
+	hashmap_equal_fn equal_fn;
+	void *ctx;
+
+	struct hashmap_entry **buckets;
+	size_t cap;
+	size_t cap_bits;
+	size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) {	\
+	.hash_fn = (hash_fn),			\
+	.equal_fn = (equal_fn),			\
+	.ctx = (ctx),				\
+	.buckets = NULL,			\
+	.cap = 0,				\
+	.cap_bits = 0,				\
+	.sz = 0,				\
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+		   hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+			     hashmap_equal_fn equal_fn,
+			     void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ *   update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ *   nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ *   This turns hashmap into a multimap by allowing multiple values to be
+ *   associated with the same key. Most useful read API for such hashmap is
+ *   hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ *   used, it will return last inserted key/value entry (first in a bucket
+ *   chain).
+ */
+enum hashmap_insert_strategy {
+	HASHMAP_ADD,
+	HASHMAP_SET,
+	HASHMAP_UPDATE,
+	HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+		    enum hashmap_insert_strategy strategy,
+		    const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+			       const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+			       const void *key, void *value,
+			       const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_SET,
+			       old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+				  const void *key, void *value,
+				  const void **old_key, void **old_value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+			       old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+				  const void *key, void *value)
+{
+	return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+		     const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt)				    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt)		    \
+	for (bkt = 0; bkt < map->cap; bkt++)				    \
+		for (cur = map->buckets[bkt];				    \
+		     cur && ({tmp = cur->next; true; });		    \
+		     cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key)			    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur;							    \
+	     cur = cur->next)						    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key)		    \
+	for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+					     map->cap_bits);		    \
+		     cur = map->buckets ? map->buckets[bkt] : NULL; });	    \
+	     cur && ({ tmp = cur->next; true; });			    \
+	     cur = tmp)							    \
+		if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 0ce47283a8a1..7a67d017d72c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3574,7 +3574,7 @@ static int perf_header__read_pipe(struct perf_session *session)
 		return -EINVAL;
 	}
 
-	return 0;
+	return f_header.size == sizeof(f_header) ? 0 : -1;
 }
 
 static int read_attr(int fd, struct perf_header *ph,
@@ -3676,7 +3676,7 @@ int perf_session__read_header(struct perf_session *session)
 	struct perf_file_header	f_header;
 	struct perf_file_attr	f_attr;
 	u64			f_id;
-	int nr_attrs, nr_ids, i, j;
+	int nr_attrs, nr_ids, i, j, err;
 	int fd = perf_data__fd(data);
 
 	session->evlist = evlist__new();
@@ -3685,8 +3685,16 @@ int perf_session__read_header(struct perf_session *session)
 
 	session->evlist->env = &header->env;
 	session->machines.host.env = &header->env;
-	if (perf_data__is_pipe(data))
-		return perf_header__read_pipe(session);
+
+	/*
+	 * We can read 'pipe' data event from regular file,
+	 * check for the pipe header regardless of source.
+	 */
+	err = perf_header__read_pipe(session);
+	if (!err || (err && perf_data__is_pipe(data))) {
+		data->is_pipe = true;
+		return err;
+	}
 
 	if (perf_file_header__read(&f_header, header, fd) < 0)
 		return -EINVAL;
@@ -3947,12 +3955,22 @@ int perf_event__process_tracing_data(struct perf_session *session,
 {
 	ssize_t size_read, padding, size = event->tracing_data.size;
 	int fd = perf_data__fd(session->data);
-	off_t offset = lseek(fd, 0, SEEK_CUR);
 	char buf[BUFSIZ];
 
-	/* setup for reading amidst mmap */
-	lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
-	      SEEK_SET);
+	/*
+	 * The pipe fd is already in proper place and in any case
+	 * we can't move it, and we'd screw the case where we read
+	 * 'pipe' data from regular file. The trace_report reads
+	 * data from 'fd' so we need to set it directly behind the
+	 * event, where the tracing data starts.
+	 */
+	if (!perf_data__is_pipe(session->data)) {
+		off_t offset = lseek(fd, 0, SEEK_CUR);
+
+		/* setup for reading amidst mmap */
+		lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
+		      SEEK_SET);
+	}
 
 	size_read = trace_report(fd, &session->tevent,
 				 session->repipe);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 12b65d00cf65..8a793e4c9400 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1930,8 +1930,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
 	}
 }
 
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
-				  hists__resort_cb_t cb, void *cb_arg)
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+			     hists__resort_cb_t cb, void *cb_arg)
 {
 	bool use_callchain;
 
@@ -1945,9 +1945,9 @@ void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
 	output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
 }
 
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
 {
-	return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL);
+	return evsel__output_resort_cb(evsel, prog, NULL, NULL);
 }
 
 void hists__output_resort(struct hists *hists, struct ui_progress *prog)
@@ -2845,9 +2845,8 @@ static int hists_evsel__init(struct evsel *evsel)
 
 int hists__init(void)
 {
-	int err = perf_evsel__object_config(sizeof(struct hists_evsel),
-					    hists_evsel__init,
-					    hists_evsel__exit);
+	int err = evsel__object_config(sizeof(struct hists_evsel),
+				       hists_evsel__init, hists_evsel__exit);
 	if (err)
 		fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
 
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 4141295a66fa..96b1c13bbccc 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -173,9 +173,9 @@ void hist_entry__delete(struct hist_entry *he);
 
 typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
 
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
-				  hists__resort_cb_t cb, void *cb_arg);
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+			     hists__resort_cb_t cb, void *cb_arg);
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
 void hists__output_resort(struct hists *hists, struct ui_progress *prog);
 void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
 			     hists__resort_cb_t cb);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index f17b1e769ae4..e4dd8bf610ce 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -913,11 +913,11 @@ static void intel_pt_add_callchain(struct intel_pt *pt,
 	sample->callchain = pt->chain;
 }
 
-static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
 {
 	size_t sz = sizeof(struct branch_stack);
 
-	sz += pt->br_stack_sz * sizeof(struct branch_entry);
+	sz += entry_cnt * sizeof(struct branch_entry);
 	return zalloc(sz);
 }
 
@@ -930,7 +930,7 @@ static int intel_pt_br_stack_init(struct intel_pt *pt)
 			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
 	}
 
-	pt->br_stack = intel_pt_alloc_br_stack(pt);
+	pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
 	if (!pt->br_stack)
 		return -ENOMEM;
 
@@ -951,6 +951,9 @@ static void intel_pt_add_br_stack(struct intel_pt *pt,
 	sample->branch_stack = pt->br_stack;
 }
 
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
+
 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
@@ -968,8 +971,10 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 			goto out_free;
 	}
 
-	if (pt->synth_opts.last_branch) {
-		ptq->last_branch = intel_pt_alloc_br_stack(pt);
+	if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
+		unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
+
+		ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
 		if (!ptq->last_branch)
 			goto out_free;
 	}
@@ -1720,9 +1725,6 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
 	}
 }
 
-/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
-#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
-
 static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
 {
 	const struct intel_pt_blk_items *items = &ptq->state->items;
@@ -1798,25 +1800,18 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
 	}
 
 	if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
-		struct {
-			struct branch_stack br_stack;
-			struct branch_entry entries[LBRS_MAX];
-		} br;
-
 		if (items->mask[INTEL_PT_LBR_0_POS] ||
 		    items->mask[INTEL_PT_LBR_1_POS] ||
 		    items->mask[INTEL_PT_LBR_2_POS]) {
-			intel_pt_add_lbrs(&br.br_stack, items);
-			sample.branch_stack = &br.br_stack;
+			intel_pt_add_lbrs(ptq->last_branch, items);
 		} else if (pt->synth_opts.last_branch) {
 			thread_stack__br_sample(ptq->thread, ptq->cpu,
 						ptq->last_branch,
 						pt->br_stack_sz);
-			sample.branch_stack = ptq->last_branch;
 		} else {
-			br.br_stack.nr = 0;
-			sample.branch_stack = &br.br_stack;
+			ptq->last_branch->nr = 0;
 		}
+		sample.branch_stack = ptq->last_branch;
 	}
 
 	if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index e3ccb0ce1938..32bb05e03fb2 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -57,7 +57,7 @@ struct debug_line_info {
 	unsigned long vma;
 	unsigned int lineno;
 	/* The filename format is unspecified, absolute path, relative etc. */
-	char const filename[0];
+	char const filename[];
 };
 
 struct jit_tool {
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index f2c3823cc81a..ab2842def83d 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h
@@ -93,7 +93,7 @@ struct debug_entry {
 	uint64_t addr;
 	int lineno;	    /* source line number starting at 1 */
 	int discrim;	    /* column discriminator, 0 is default */
-	const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+	const char name[]; /* null terminated filename, \xff\0 if same as previous entry */
 };
 
 struct jr_code_debug_info {
@@ -101,7 +101,7 @@ struct jr_code_debug_info {
 
 	uint64_t code_addr;
 	uint64_t nr_entry;
-	struct debug_entry entries[0];
+	struct debug_entry entries[];
 };
 
 struct jr_code_unwinding_info {
@@ -110,7 +110,7 @@ struct jr_code_unwinding_info {
 	uint64_t unwinding_size;
 	uint64_t eh_frame_hdr_size;
 	uint64_t mapped_size;
-	const char unwinding_data[0];
+	const char unwinding_data[];
 };
 
 union jr_entry {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8ed2135893bb..d5384807372b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -738,8 +738,8 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
 
 static int is_bpf_image(const char *name)
 {
-	return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) ||
-	       strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1);
+	return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+	       strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
 }
 
 static int machine__process_ksymbol_register(struct machine *machine,
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index aa29589f6904..ea0af0bc4314 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -103,6 +103,21 @@ int perf_mem_events__init(void)
 	return found ? 0 : -ENOENT;
 }
 
+void perf_mem_events__list(void)
+{
+	int j;
+
+	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+		struct perf_mem_event *e = &perf_mem_events[j];
+
+		fprintf(stderr, "%-13s%-*s%s\n",
+			e->tag,
+			verbose > 0 ? 25 : 0,
+			verbose > 0 ? perf_mem_events__name(j) : "",
+			e->supported ? ": available" : "");
+	}
+}
+
 static const char * const tlb_access[] = {
 	"N/A",
 	"HIT",
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index f1389bdae7bf..904dad34f7f7 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -39,6 +39,8 @@ int perf_mem_events__init(void);
 
 char *perf_mem_events__name(int i);
 
+void perf_mem_events__list(void);
+
 struct mem_info;
 int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index b071df373f8b..9e21aa767e41 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -85,50 +85,103 @@ static void metricgroup__rblist_init(struct rblist *metric_events)
 
 struct egroup {
 	struct list_head nd;
-	int idnum;
-	const char **ids;
+	struct expr_parse_ctx pctx;
 	const char *metric_name;
 	const char *metric_expr;
 	const char *metric_unit;
 	int runtime;
+	bool has_constraint;
 };
 
+/**
+ * Find a group of events in perf_evlist that correpond to those from a parsed
+ * metric expression. Note, as find_evsel_group is called in the same order as
+ * perf_evlist was constructed, metric_no_merge doesn't need to test for
+ * underfilling a group.
+ * @perf_evlist: a list of events something like: {metric1 leader, metric1
+ * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling,
+ * metric2 sibling}:W,duration_time
+ * @pctx: the parse context for the metric expression.
+ * @metric_no_merge: don't attempt to share events for the metric with other
+ * metrics.
+ * @has_constraint: is there a contraint on the group of events? In which case
+ * the events won't be grouped.
+ * @metric_events: out argument, null terminated array of evsel's associated
+ * with the metric.
+ * @evlist_used: in/out argument, bitmap tracking which evlist events are used.
+ * @return the first metric event or NULL on failure.
+ */
 static struct evsel *find_evsel_group(struct evlist *perf_evlist,
-				      const char **ids,
-				      int idnum,
+				      struct expr_parse_ctx *pctx,
+				      bool metric_no_merge,
+				      bool has_constraint,
 				      struct evsel **metric_events,
-				      bool *evlist_used)
+				      unsigned long *evlist_used)
 {
-	struct evsel *ev;
-	int i = 0, j = 0;
-	bool leader_found;
+	struct evsel *ev, *current_leader = NULL;
+	double *val_ptr;
+	int i = 0, matched_events = 0, events_to_match;
+	const int idnum = (int)hashmap__size(&pctx->ids);
+
+	/* duration_time is grouped separately. */
+	if (!has_constraint &&
+	    hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr))
+		events_to_match = idnum - 1;
+	else
+		events_to_match = idnum;
 
 	evlist__for_each_entry (perf_evlist, ev) {
-		if (evlist_used[j++])
+		/*
+		 * Events with a constraint aren't grouped and match the first
+		 * events available.
+		 */
+		if (has_constraint && ev->weak_group)
 			continue;
-		if (!strcmp(ev->name, ids[i])) {
-			if (!metric_events[i])
-				metric_events[i] = ev;
-			i++;
-			if (i == idnum)
-				break;
-		} else {
-			/* Discard the whole match and start again */
-			i = 0;
+		/* Ignore event if already used and merging is disabled. */
+		if (metric_no_merge && test_bit(ev->idx, evlist_used))
+			continue;
+		if (!has_constraint && ev->leader != current_leader) {
+			/*
+			 * Start of a new group, discard the whole match and
+			 * start again.
+			 */
+			matched_events = 0;
 			memset(metric_events, 0,
 				sizeof(struct evsel *) * idnum);
+			current_leader = ev->leader;
+		}
+		if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) {
+			if (has_constraint) {
+				/*
+				 * Events aren't grouped, ensure the same event
+				 * isn't matched from two groups.
+				 */
+				for (i = 0; i < matched_events; i++) {
+					if (!strcmp(ev->name,
+						    metric_events[i]->name)) {
+						break;
+					}
+				}
+				if (i != matched_events)
+					continue;
+			}
+			metric_events[matched_events++] = ev;
+		}
+		if (matched_events == events_to_match)
+			break;
+	}
 
-			if (!strcmp(ev->name, ids[i])) {
-				if (!metric_events[i])
-					metric_events[i] = ev;
-				i++;
-				if (i == idnum)
-					break;
+	if (events_to_match != idnum) {
+		/* Add the first duration_time. */
+		evlist__for_each_entry(perf_evlist, ev) {
+			if (!strcmp(ev->name, "duration_time")) {
+				metric_events[matched_events++] = ev;
+				break;
 			}
 		}
 	}
 
-	if (i != idnum) {
+	if (matched_events != idnum) {
 		/* Not whole match */
 		return NULL;
 	}
@@ -136,25 +189,16 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
 	metric_events[idnum] = NULL;
 
 	for (i = 0; i < idnum; i++) {
-		leader_found = false;
-		evlist__for_each_entry(perf_evlist, ev) {
-			if (!leader_found && (ev == metric_events[i]))
-				leader_found = true;
-
-			if (leader_found &&
-			    !strcmp(ev->name, metric_events[i]->name)) {
-				ev->metric_leader = metric_events[i];
-			}
-			j++;
-		}
 		ev = metric_events[i];
-		evlist_used[ev->idx] = true;
+		ev->metric_leader = ev;
+		set_bit(ev->idx, evlist_used);
 	}
 
 	return metric_events[0];
 }
 
 static int metricgroup__setup_events(struct list_head *groups,
+				     bool metric_no_merge,
 				     struct evlist *perf_evlist,
 				     struct rblist *metric_events_list)
 {
@@ -163,40 +207,44 @@ static int metricgroup__setup_events(struct list_head *groups,
 	int i = 0;
 	int ret = 0;
 	struct egroup *eg;
-	struct evsel *evsel;
-	bool *evlist_used;
+	struct evsel *evsel, *tmp;
+	unsigned long *evlist_used;
 
-	evlist_used = calloc(perf_evlist->core.nr_entries, sizeof(bool));
-	if (!evlist_used) {
-		ret = -ENOMEM;
-		return ret;
-	}
+	evlist_used = bitmap_alloc(perf_evlist->core.nr_entries);
+	if (!evlist_used)
+		return -ENOMEM;
 
 	list_for_each_entry (eg, groups, nd) {
 		struct evsel **metric_events;
 
-		metric_events = calloc(sizeof(void *), eg->idnum + 1);
+		metric_events = calloc(sizeof(void *),
+				hashmap__size(&eg->pctx.ids) + 1);
 		if (!metric_events) {
 			ret = -ENOMEM;
 			break;
 		}
-		evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
-					 metric_events, evlist_used);
+		evsel = find_evsel_group(perf_evlist, &eg->pctx,
+					 metric_no_merge,
+					 eg->has_constraint, metric_events,
+					 evlist_used);
 		if (!evsel) {
 			pr_debug("Cannot resolve %s: %s\n",
 					eg->metric_name, eg->metric_expr);
+			free(metric_events);
 			continue;
 		}
-		for (i = 0; i < eg->idnum; i++)
+		for (i = 0; metric_events[i]; i++)
 			metric_events[i]->collect_stat = true;
 		me = metricgroup__lookup(metric_events_list, evsel, true);
 		if (!me) {
 			ret = -ENOMEM;
+			free(metric_events);
 			break;
 		}
 		expr = malloc(sizeof(struct metric_expr));
 		if (!expr) {
 			ret = -ENOMEM;
+			free(metric_events);
 			break;
 		}
 		expr->metric_expr = eg->metric_expr;
@@ -207,7 +255,13 @@ static int metricgroup__setup_events(struct list_head *groups,
 		list_add(&expr->nd, &me->head);
 	}
 
-	free(evlist_used);
+	evlist__for_each_entry_safe(perf_evlist, tmp, evsel) {
+		if (!test_bit(evsel->idx, evlist_used)) {
+			evlist__remove(perf_evlist, evsel);
+			evsel__delete(evsel);
+		}
+	}
+	bitmap_free(evlist_used);
 
 	return ret;
 }
@@ -415,43 +469,49 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 }
 
 static void metricgroup__add_metric_weak_group(struct strbuf *events,
-					       const char **ids,
-					       int idnum)
+					       struct expr_parse_ctx *ctx)
 {
-	bool no_group = false;
-	int i;
+	struct hashmap_entry *cur;
+	size_t bkt;
+	bool no_group = true, has_duration = false;
 
-	for (i = 0; i < idnum; i++) {
-		pr_debug("found event %s\n", ids[i]);
+	hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+		pr_debug("found event %s\n", (const char *)cur->key);
 		/*
 		 * Duration time maps to a software event and can make
 		 * groups not count. Always use it outside a
 		 * group.
 		 */
-		if (!strcmp(ids[i], "duration_time")) {
-			if (i > 0)
-				strbuf_addf(events, "}:W,");
-			strbuf_addf(events, "duration_time");
-			no_group = true;
+		if (!strcmp(cur->key, "duration_time")) {
+			has_duration = true;
 			continue;
 		}
 		strbuf_addf(events, "%s%s",
-			i == 0 || no_group ? "{" : ",",
-			ids[i]);
+			no_group ? "{" : ",",
+			(const char *)cur->key);
 		no_group = false;
 	}
-	if (!no_group)
+	if (!no_group) {
 		strbuf_addf(events, "}:W");
+		if (has_duration)
+			strbuf_addf(events, ",duration_time");
+	} else if (has_duration)
+		strbuf_addf(events, "duration_time");
 }
 
 static void metricgroup__add_metric_non_group(struct strbuf *events,
-					      const char **ids,
-					      int idnum)
+					      struct expr_parse_ctx *ctx)
 {
-	int i;
-
-	for (i = 0; i < idnum; i++)
-		strbuf_addf(events, ",%s", ids[i]);
+	struct hashmap_entry *cur;
+	size_t bkt;
+	bool first = true;
+
+	hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+		if (!first)
+			strbuf_addf(events, ",");
+		strbuf_addf(events, "%s", (const char *)cur->key);
+		first = false;
+	}
 }
 
 static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
@@ -492,46 +552,58 @@ int __weak arch_get_runtimeparam(void)
 	return 1;
 }
 
-static int __metricgroup__add_metric(struct strbuf *events,
-		struct list_head *group_list, struct pmu_event *pe, int runtime)
+static int __metricgroup__add_metric(struct list_head *group_list,
+				     struct pmu_event *pe,
+				     bool metric_no_group,
+				     int runtime)
 {
-
-	const char **ids;
-	int idnum;
 	struct egroup *eg;
 
-	if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0)
-		return -EINVAL;
-
-	if (events->len > 0)
-		strbuf_addf(events, ",");
-
-	if (metricgroup__has_constraint(pe))
-		metricgroup__add_metric_non_group(events, ids, idnum);
-	else
-		metricgroup__add_metric_weak_group(events, ids, idnum);
-
 	eg = malloc(sizeof(*eg));
 	if (!eg)
 		return -ENOMEM;
 
-	eg->ids = ids;
-	eg->idnum = idnum;
+	expr__ctx_init(&eg->pctx);
 	eg->metric_name = pe->metric_name;
 	eg->metric_expr = pe->metric_expr;
 	eg->metric_unit = pe->unit;
 	eg->runtime = runtime;
-	list_add_tail(&eg->nd, group_list);
+	eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+
+	if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
+		expr__ctx_clear(&eg->pctx);
+		free(eg);
+		return -EINVAL;
+	}
+
+	if (list_empty(group_list))
+		list_add(&eg->nd, group_list);
+	else {
+		struct list_head *pos;
+
+		/* Place the largest groups at the front. */
+		list_for_each_prev(pos, group_list) {
+			struct egroup *old = list_entry(pos, struct egroup, nd);
+
+			if (hashmap__size(&eg->pctx.ids) <=
+			    hashmap__size(&old->pctx.ids))
+				break;
+		}
+		list_add(&eg->nd, pos);
+	}
 
 	return 0;
 }
 
-static int metricgroup__add_metric(const char *metric, struct strbuf *events,
+static int metricgroup__add_metric(const char *metric, bool metric_no_group,
+				   struct strbuf *events,
 				   struct list_head *group_list)
 {
 	struct pmu_events_map *map = perf_pmu__find_map(NULL);
 	struct pmu_event *pe;
-	int i, ret = -EINVAL;
+	struct egroup *eg;
+	int i, ret;
+	bool has_match = false;
 
 	if (!map)
 		return 0;
@@ -539,17 +611,26 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 	for (i = 0; ; i++) {
 		pe = &map->table[i];
 
-		if (!pe->name && !pe->metric_group && !pe->metric_name)
+		if (!pe->name && !pe->metric_group && !pe->metric_name) {
+			/* End of pmu events. */
+			if (!has_match)
+				return -EINVAL;
 			break;
+		}
 		if (!pe->metric_expr)
 			continue;
 		if (match_metric(pe->metric_group, metric) ||
 		    match_metric(pe->metric_name, metric)) {
-
+			has_match = true;
 			pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
 
 			if (!strstr(pe->metric_expr, "?")) {
-				ret = __metricgroup__add_metric(events, group_list, pe, 1);
+				ret = __metricgroup__add_metric(group_list,
+								pe,
+								metric_no_group,
+								1);
+				if (ret)
+					return ret;
 			} else {
 				int j, count;
 
@@ -560,17 +641,33 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
 				 * those events to group_list.
 				 */
 
-				for (j = 0; j < count; j++)
-					ret = __metricgroup__add_metric(events, group_list, pe, j);
+				for (j = 0; j < count; j++) {
+					ret = __metricgroup__add_metric(
+						group_list, pe,
+						metric_no_group, j);
+					if (ret)
+						return ret;
+				}
 			}
-			if (ret == -ENOMEM)
-				break;
 		}
 	}
-	return ret;
+	list_for_each_entry(eg, group_list, nd) {
+		if (events->len > 0)
+			strbuf_addf(events, ",");
+
+		if (eg->has_constraint) {
+			metricgroup__add_metric_non_group(events,
+							  &eg->pctx);
+		} else {
+			metricgroup__add_metric_weak_group(events,
+							   &eg->pctx);
+		}
+	}
+	return 0;
 }
 
-static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
+static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
+					struct strbuf *events,
 				        struct list_head *group_list)
 {
 	char *llist, *nlist, *p;
@@ -585,7 +682,8 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
 	strbuf_addf(events, "%s", "");
 
 	while ((p = strsep(&llist, ",")) != NULL) {
-		ret = metricgroup__add_metric(p, events, group_list);
+		ret = metricgroup__add_metric(p, metric_no_group, events,
+					      group_list);
 		if (ret == -EINVAL) {
 			fprintf(stderr, "Cannot find metric or group `%s'\n",
 					p);
@@ -603,20 +701,19 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
 static void metricgroup__free_egroups(struct list_head *group_list)
 {
 	struct egroup *eg, *egtmp;
-	int i;
 
 	list_for_each_entry_safe (eg, egtmp, group_list, nd) {
-		for (i = 0; i < eg->idnum; i++)
-			zfree(&eg->ids[i]);
-		zfree(&eg->ids);
+		expr__ctx_clear(&eg->pctx);
 		list_del_init(&eg->nd);
 		free(eg);
 	}
 }
 
 int metricgroup__parse_groups(const struct option *opt,
-			   const char *str,
-			   struct rblist *metric_events)
+			      const char *str,
+			      bool metric_no_group,
+			      bool metric_no_merge,
+			      struct rblist *metric_events)
 {
 	struct parse_events_error parse_error;
 	struct evlist *perf_evlist = *(struct evlist **)opt->value;
@@ -626,7 +723,8 @@ int metricgroup__parse_groups(const struct option *opt,
 
 	if (metric_events->nr_entries == 0)
 		metricgroup__rblist_init(metric_events);
-	ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+	ret = metricgroup__add_metric_list(str, metric_no_group,
+					   &extra_events, &group_list);
 	if (ret)
 		return ret;
 	pr_debug("adding %s\n", extra_events.buf);
@@ -637,8 +735,8 @@ int metricgroup__parse_groups(const struct option *opt,
 		goto out;
 	}
 	strbuf_release(&extra_events);
-	ret = metricgroup__setup_events(&group_list, perf_evlist,
-					metric_events);
+	ret = metricgroup__setup_events(&group_list, metric_no_merge,
+					perf_evlist, metric_events);
 out:
 	metricgroup__free_egroups(&group_list);
 	return ret;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 6b09eb30b4ec..287850bcdeca 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -29,8 +29,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 					 struct evsel *evsel,
 					 bool create);
 int metricgroup__parse_groups(const struct option *opt,
-			const char *str,
-			struct rblist *metric_events);
+			      const char *str,
+			      bool metric_no_group,
+			      bool metric_no_merge,
+			      struct rblist *metric_events);
 
 void metricgroup__print(bool metrics, bool groups, char *filter,
 			bool raw, bool details);
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 0920fb0ec6cc..75345946c4b9 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -29,7 +29,7 @@ typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
 
 struct ordered_events_buffer {
 	struct list_head	list;
-	struct ordered_event	event[0];
+	struct ordered_event	event[];
 };
 
 struct ordered_events {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b7a0518d607d..3decbb203846 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -26,7 +26,7 @@
 #include <api/fs/tracing_path.h>
 #include <perf/cpumap.h>
 #include "parse-events-bison.h"
-#define YY_EXTRA_TYPE int
+#define YY_EXTRA_TYPE void*
 #include "parse-events-flex.h"
 #include "pmu.h"
 #include "thread_map.h"
@@ -36,6 +36,7 @@
 #include "metricgroup.h"
 #include "util/evsel_config.h"
 #include "util/event.h"
+#include "util/pfm.h"
 
 #define MAX_NAME_LEN 100
 
@@ -204,7 +205,8 @@ void parse_events__handle_error(struct parse_events_error *err, int idx,
 		err->help = help;
 		break;
 	default:
-		WARN_ONCE(1, "WARNING: multiple event parsing errors\n");
+		pr_debug("Multiple errors dropping message: %s (%s)\n",
+			err->str, err->help);
 		free(err->str);
 		err->str = str;
 		free(err->help);
@@ -344,6 +346,7 @@ static char *get_config_name(struct list_head *head_terms)
 static struct evsel *
 __add_event(struct list_head *list, int *idx,
 	    struct perf_event_attr *attr,
+	    bool init_attr,
 	    char *name, struct perf_pmu *pmu,
 	    struct list_head *config_terms, bool auto_merge_stats,
 	    const char *cpu_list)
@@ -352,9 +355,10 @@ __add_event(struct list_head *list, int *idx,
 	struct perf_cpu_map *cpus = pmu ? pmu->cpus :
 			       cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
 
-	event_attr_init(attr);
+	if (init_attr)
+		event_attr_init(attr);
 
-	evsel = perf_evsel__new_idx(attr, *idx);
+	evsel = evsel__new_idx(attr, *idx);
 	if (!evsel)
 		return NULL;
 
@@ -370,15 +374,25 @@ __add_event(struct list_head *list, int *idx,
 	if (config_terms)
 		list_splice(config_terms, &evsel->config_terms);
 
-	list_add_tail(&evsel->core.node, list);
+	if (list)
+		list_add_tail(&evsel->core.node, list);
+
 	return evsel;
 }
 
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+					char *name, struct perf_pmu *pmu)
+{
+	return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false,
+			   NULL);
+}
+
 static int add_event(struct list_head *list, int *idx,
 		     struct perf_event_attr *attr, char *name,
 		     struct list_head *config_terms)
 {
-	return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM;
+	return __add_event(list, idx, attr, true, name, NULL, config_terms,
+			   false, NULL) ? 0 : -ENOMEM;
 }
 
 static int add_event_tool(struct list_head *list, int *idx,
@@ -390,7 +404,8 @@ static int add_event_tool(struct list_head *list, int *idx,
 		.config = PERF_COUNT_SW_DUMMY,
 	};
 
-	evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0");
+	evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false,
+			    "0");
 	if (!evsel)
 		return -ENOMEM;
 	evsel->tool_event = tool_event;
@@ -399,13 +414,13 @@ static int add_event_tool(struct list_head *list, int *idx,
 	return 0;
 }
 
-static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size)
 {
 	int i, j;
 	int n, longest = -1;
 
 	for (i = 0; i < size; i++) {
-		for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
+		for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
 			n = strlen(names[i][j]);
 			if (n > longest && !strncasecmp(str, names[i][j], n))
 				longest = n;
@@ -444,8 +459,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 	 * No fallback - if we cannot get a clear cache type
 	 * then bail out:
 	 */
-	cache_type = parse_aliases(type, perf_evsel__hw_cache,
-				   PERF_COUNT_HW_CACHE_MAX);
+	cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX);
 	if (cache_type == -1)
 		return -EINVAL;
 
@@ -458,7 +472,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 		n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
 
 		if (cache_op == -1) {
-			cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
+			cache_op = parse_aliases(str, evsel__hw_cache_op,
 						 PERF_COUNT_HW_CACHE_OP_MAX);
 			if (cache_op >= 0) {
 				if (!evsel__is_cache_op_valid(cache_type, cache_op))
@@ -468,7 +482,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 		}
 
 		if (cache_result == -1) {
-			cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+			cache_result = parse_aliases(str, evsel__hw_cache_result,
 						     PERF_COUNT_HW_CACHE_RESULT_MAX);
 			if (cache_result >= 0)
 				continue;
@@ -538,9 +552,8 @@ static int add_tracepoint(struct list_head *list, int *idx,
 			  struct parse_events_error *err,
 			  struct list_head *head_config)
 {
-	struct evsel *evsel;
+	struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
 
-	evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++);
 	if (IS_ERR(evsel)) {
 		tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
 		return PTR_ERR(evsel);
@@ -1214,14 +1227,14 @@ static int get_config_terms(struct list_head *head_config,
 			    struct list_head *head_terms __maybe_unused)
 {
 #define ADD_CONFIG_TERM(__type, __weak)				\
-	struct perf_evsel_config_term *__t;			\
+	struct evsel_config_term *__t;			\
 								\
 	__t = zalloc(sizeof(*__t));				\
 	if (!__t)						\
 		return -ENOMEM;					\
 								\
 	INIT_LIST_HEAD(&__t->list);				\
-	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\
+	__t->type       = EVSEL__CONFIG_TERM_ ## __type;	\
 	__t->weak	= __weak;				\
 	list_add_tail(&__t->list, head_terms)
 
@@ -1312,7 +1325,7 @@ do {								\
 }
 
 /*
- * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
+ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
  * each bit of attr->config that the user has changed.
  */
 static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
@@ -1400,10 +1413,10 @@ int parse_events_add_tool(struct parse_events_state *parse_state,
 
 static bool config_term_percore(struct list_head *config_terms)
 {
-	struct perf_evsel_config_term *term;
+	struct evsel_config_term *term;
 
 	list_for_each_entry(term, config_terms, list) {
-		if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE)
+		if (term->type == EVSEL__CONFIG_TERM_PERCORE)
 			return term->val.percore;
 	}
 
@@ -1424,6 +1437,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	bool use_uncore_alias;
 	LIST_HEAD(config_terms);
 
+	if (verbose > 1) {
+		fprintf(stderr, "Attempting to add event pmu '%s' with '",
+			name);
+		if (head_config) {
+			struct parse_events_term *term;
+
+			list_for_each_entry(term, head_config, list) {
+				fprintf(stderr, "%s,", term->config);
+			}
+		}
+		fprintf(stderr, "' that may result in non-fatal errors\n");
+	}
+
 	pmu = perf_pmu__find(name);
 	if (!pmu) {
 		char *err_str;
@@ -1446,8 +1472,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
-				    auto_merge_stats, NULL);
+		evsel = __add_event(list, &parse_state->idx, &attr, true, NULL,
+				    pmu, NULL, auto_merge_stats, NULL);
 		if (evsel) {
 			evsel->pmu_name = name ? strdup(name) : NULL;
 			evsel->use_uncore_alias = use_uncore_alias;
@@ -1460,6 +1486,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	if (perf_pmu__check_alias(pmu, head_config, &info))
 		return -EINVAL;
 
+	if (verbose > 1) {
+		fprintf(stderr, "After aliases, add event pmu '%s' with '",
+			name);
+		if (head_config) {
+			struct parse_events_term *term;
+
+			list_for_each_entry(term, head_config, list) {
+				fprintf(stderr, "%s,", term->config);
+			}
+		}
+		fprintf(stderr, "' that may result in non-fatal errors\n");
+	}
+
 	/*
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
@@ -1478,17 +1517,18 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 		return -ENOMEM;
 
 	if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
-		struct perf_evsel_config_term *pos, *tmp;
+		struct evsel_config_term *pos, *tmp;
 
 		list_for_each_entry_safe(pos, tmp, &config_terms, list) {
 			list_del_init(&pos->list);
-			zfree(&pos->val.str);
+			if (pos->free_str)
+				zfree(&pos->val.str);
 			free(pos);
 		}
 		return -EINVAL;
 	}
 
-	evsel = __add_event(list, &parse_state->idx, &attr,
+	evsel = __add_event(list, &parse_state->idx, &attr, true,
 			    get_config_name(head_config), pmu,
 			    &config_terms, auto_merge_stats, NULL);
 	if (evsel) {
@@ -1630,12 +1670,11 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
 		 * event. That can be used to distinguish the leader from
 		 * other members, even they have the same event name.
 		 */
-		if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
+		if ((leader != evsel) &&
+		    !strcmp(leader->pmu_name, evsel->pmu_name)) {
 			is_leader = false;
 			continue;
 		}
-		/* The name is always alias name */
-		WARN_ON(strcmp(leader->name, evsel->name));
 
 		/* Store the leader event for each PMU */
 		leaders[nr_pmu++] = (uintptr_t) evsel;
@@ -2002,13 +2041,14 @@ perf_pmu__parse_check(const char *name)
 	return r ? r->type : PMU_EVENT_SYMBOL_ERR;
 }
 
-static int parse_events__scanner(const char *str, void *parse_state, int start_token)
+static int parse_events__scanner(const char *str,
+				 struct parse_events_state *parse_state)
 {
 	YY_BUFFER_STATE buffer;
 	void *scanner;
 	int ret;
 
-	ret = parse_events_lex_init_extra(start_token, &scanner);
+	ret = parse_events_lex_init_extra(parse_state, &scanner);
 	if (ret)
 		return ret;
 
@@ -2016,6 +2056,7 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
 
 #ifdef PARSER_DEBUG
 	parse_events_debug = 1;
+	parse_events_set_debug(1, scanner);
 #endif
 	ret = parse_events_parse(parse_state, scanner);
 
@@ -2031,11 +2072,12 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
 int parse_events_terms(struct list_head *terms, const char *str)
 {
 	struct parse_events_state parse_state = {
-		.terms = NULL,
+		.terms  = NULL,
+		.stoken = PE_START_TERMS,
 	};
 	int ret;
 
-	ret = parse_events__scanner(str, &parse_state, PE_START_TERMS);
+	ret = parse_events__scanner(str, &parse_state);
 	if (!ret) {
 		list_splice(parse_state.terms, terms);
 		zfree(&parse_state.terms);
@@ -2054,10 +2096,11 @@ int parse_events(struct evlist *evlist, const char *str,
 		.idx    = evlist->core.nr_entries,
 		.error  = err,
 		.evlist = evlist,
+		.stoken = PE_START_EVENTS,
 	};
 	int ret;
 
-	ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS);
+	ret = parse_events__scanner(str, &parse_state);
 	perf_pmu__parse_cleanup();
 
 	if (!ret && list_empty(&parse_state.list)) {
@@ -2817,6 +2860,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
 	print_sdt_events(NULL, NULL, name_only);
 
 	metricgroup__print(true, true, NULL, name_only, details_flag);
+
+	print_libpfm_events(name_only, long_desc);
 }
 
 int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 6ead9661238c..1fe23a2f9b36 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -17,6 +17,7 @@ struct evlist;
 struct parse_events_error;
 
 struct option;
+struct perf_pmu;
 
 struct tracepoint_path {
 	char *system;
@@ -128,6 +129,7 @@ struct parse_events_state {
 	struct parse_events_error *error;
 	struct evlist	  *evlist;
 	struct list_head	  *terms;
+	int			   stoken;
 };
 
 void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -187,6 +189,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 			 bool auto_merge_stats,
 			 bool use_alias);
 
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+					char *name, struct perf_pmu *pmu);
+
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str,
 			       struct list_head **listp);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index c589fc42f058..002802e17059 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -209,10 +209,10 @@ modifier_bp	[rwx]{1,3}
 %%
 
 %{
-	{
-		int start_token;
+	struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
 
-		start_token = parse_events_get_extra(yyscanner);
+	{
+		int start_token = _parse_state->stoken;
 
 		if (start_token == PE_START_TERMS)
 			BEGIN(config);
@@ -220,7 +220,7 @@ modifier_bp	[rwx]{1,3}
 			BEGIN(event);
 
 		if (start_token) {
-			parse_events_set_extra(NULL, yyscanner);
+			_parse_state->stoken = 0;
 			/*
 			 * The flex parser does not init locations variable
 			 * via the scan_string interface, so we need do the
@@ -252,7 +252,9 @@ modifier_bp	[rwx]{1,3}
 			BEGIN(INITIAL);
 			REWIND(0);
 		}
-
+,		{
+			return ',';
+		}
 }
 
 <array>{
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
new file mode 100644
index 000000000000..d735acb6c29c
--- /dev/null
+++ b/tools/perf/util/pfm.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/pfm.h"
+
+#include <string.h>
+#include <linux/kernel.h>
+#include <perfmon/pfmlib_perf_event.h>
+
+static void libpfm_initialize(void)
+{
+	int ret;
+
+	ret = pfm_initialize();
+	if (ret != PFM_SUCCESS) {
+		ui__warning("libpfm failed to initialize: %s\n",
+			pfm_strerror(ret));
+	}
+}
+
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+			int unset __maybe_unused)
+{
+	struct evlist *evlist = *(struct evlist **)opt->value;
+	struct perf_event_attr attr;
+	struct perf_pmu *pmu;
+	struct evsel *evsel, *grp_leader = NULL;
+	char *p, *q, *p_orig;
+	const char *sep;
+	int grp_evt = -1;
+	int ret;
+
+	libpfm_initialize();
+
+	p_orig = p = strdup(str);
+	if (!p)
+		return -1;
+	/*
+	 * force loading of the PMU list
+	 */
+	perf_pmu__scan(NULL);
+
+	for (q = p; strsep(&p, ",{}"); q = p) {
+		sep = p ? str + (p - p_orig - 1) : "";
+		if (*sep == '{') {
+			if (grp_evt > -1) {
+				ui__error(
+					"nested event groups not supported\n");
+				goto error;
+			}
+			grp_evt++;
+		}
+
+		/* no event */
+		if (*q == '\0')
+			continue;
+
+		memset(&attr, 0, sizeof(attr));
+		event_attr_init(&attr);
+
+		ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3,
+						&attr, NULL, NULL);
+
+		if (ret != PFM_SUCCESS) {
+			ui__error("failed to parse event %s : %s\n", str,
+				  pfm_strerror(ret));
+			goto error;
+		}
+
+		pmu = perf_pmu__find_by_type((unsigned int)attr.type);
+		evsel = parse_events__add_event(evlist->core.nr_entries,
+						&attr, q, pmu);
+		if (evsel == NULL)
+			goto error;
+
+		evsel->is_libpfm_event = true;
+
+		evlist__add(evlist, evsel);
+
+		if (grp_evt == 0)
+			grp_leader = evsel;
+
+		if (grp_evt > -1) {
+			evsel->leader = grp_leader;
+			grp_leader->core.nr_members++;
+			grp_evt++;
+		}
+
+		if (*sep == '}') {
+			if (grp_evt < 0) {
+				ui__error(
+				   "cannot close a non-existing event group\n");
+				goto error;
+			}
+			evlist->nr_groups++;
+			grp_leader = NULL;
+			grp_evt = -1;
+		}
+	}
+	return 0;
+error:
+	free(p_orig);
+	return -1;
+}
+
+static const char *srcs[PFM_ATTR_CTRL_MAX] = {
+	[PFM_ATTR_CTRL_UNKNOWN] = "???",
+	[PFM_ATTR_CTRL_PMU] = "PMU",
+	[PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+};
+
+static void
+print_attr_flags(pfm_event_attr_info_t *info)
+{
+	int n = 0;
+
+	if (info->is_dfl) {
+		printf("[default] ");
+		n++;
+	}
+
+	if (info->is_precise) {
+		printf("[precise] ");
+		n++;
+	}
+
+	if (!n)
+		printf("- ");
+}
+
+static void
+print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+{
+	pfm_event_attr_info_t ainfo;
+	const char *src;
+	int j, ret;
+
+	ainfo.size = sizeof(ainfo);
+
+	printf("  %s\n", info->name);
+	printf("    [%s]\n", info->desc);
+	if (long_desc) {
+		if (info->equiv)
+			printf("      Equiv: %s\n", info->equiv);
+
+		printf("      Code  : 0x%"PRIx64"\n", info->code);
+	}
+	pfm_for_each_event_attr(j, info) {
+		ret = pfm_get_event_attr_info(info->idx, j,
+					      PFM_OS_PERF_EVENT_EXT, &ainfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		if (ainfo.type == PFM_ATTR_UMASK) {
+			printf("      %s:%s\n", info->name, ainfo.name);
+			printf("        [%s]\n", ainfo.desc);
+		}
+
+		if (!long_desc)
+			continue;
+
+		if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+			ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+		src = srcs[ainfo.ctrl];
+		switch (ainfo.type) {
+		case PFM_ATTR_UMASK:
+			printf("        Umask : 0x%02"PRIx64" : %s: ",
+				ainfo.code, src);
+			print_attr_flags(&ainfo);
+			putchar('\n');
+			break;
+		case PFM_ATTR_MOD_BOOL:
+			printf("      Modif : %s: [%s] : %s (boolean)\n", src,
+				ainfo.name, ainfo.desc);
+			break;
+		case PFM_ATTR_MOD_INTEGER:
+			printf("      Modif : %s: [%s] : %s (integer)\n", src,
+				ainfo.name, ainfo.desc);
+			break;
+		case PFM_ATTR_NONE:
+		case PFM_ATTR_RAW_UMASK:
+		case PFM_ATTR_MAX:
+		default:
+			printf("      Attr  : %s: [%s] : %s\n", src,
+				ainfo.name, ainfo.desc);
+		}
+	}
+}
+
+/*
+ * list all pmu::event:umask, pmu::event
+ * printed events may not be all valid combinations of umask for an event
+ */
+static void
+print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
+{
+	pfm_event_attr_info_t ainfo;
+	int j, ret;
+	bool has_umask = false;
+
+	ainfo.size = sizeof(ainfo);
+
+	pfm_for_each_event_attr(j, info) {
+		ret = pfm_get_event_attr_info(info->idx, j,
+					      PFM_OS_PERF_EVENT_EXT, &ainfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		if (ainfo.type != PFM_ATTR_UMASK)
+			continue;
+
+		printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
+		has_umask = true;
+	}
+	if (!has_umask)
+		printf("%s::%s\n", pinfo->name, info->name);
+}
+
+void print_libpfm_events(bool name_only, bool long_desc)
+{
+	pfm_event_info_t info;
+	pfm_pmu_info_t pinfo;
+	int i, p, ret;
+
+	libpfm_initialize();
+
+	/* initialize to zero to indicate ABI version */
+	info.size  = sizeof(info);
+	pinfo.size = sizeof(pinfo);
+
+	if (!name_only)
+		puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+
+	pfm_for_all_pmus(p) {
+		bool printed_pmu = false;
+
+		ret = pfm_get_pmu_info(p, &pinfo);
+		if (ret != PFM_SUCCESS)
+			continue;
+
+		/* only print events that are supported by host HW */
+		if (!pinfo.is_present)
+			continue;
+
+		/* handled by perf directly */
+		if (pinfo.pmu == PFM_PMU_PERF_EVENT)
+			continue;
+
+		for (i = pinfo.first_event; i != -1;
+		     i = pfm_get_event_next(i)) {
+
+			ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
+						&info);
+			if (ret != PFM_SUCCESS)
+				continue;
+
+			if (!name_only && !printed_pmu) {
+				printf("%s:\n", pinfo.name);
+				printed_pmu = true;
+			}
+
+			if (!name_only)
+				print_libpfm_events_detailed(&info, long_desc);
+			else
+				print_libpfm_events_raw(&pinfo, &info);
+		}
+		if (!name_only && printed_pmu)
+			putchar('\n');
+	}
+}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
new file mode 100644
index 000000000000..7d70dda87012
--- /dev/null
+++ b/tools/perf/util/pfm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#ifndef __PERF_PFM_H
+#define __PERF_PFM_H
+
+#include <subcmd/parse-options.h>
+
+#ifdef HAVE_LIBPFM
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+			int unset);
+
+void print_libpfm_events(bool name_only, bool long_desc);
+
+#else
+#include <linux/compiler.h>
+
+static inline int parse_libpfm_events_option(
+	const struct option *opt __maybe_unused,
+	const char *str __maybe_unused,
+	int unset __maybe_unused)
+{
+	return 0;
+}
+
+static inline void print_libpfm_events(bool name_only __maybe_unused,
+				       bool long_desc __maybe_unused)
+{
+}
+
+#endif
+
+
+#endif /* __PERF_PFM_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 92bd7fafcce6..93fe72a9dc0b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1056,7 +1056,8 @@ error:
  * Setup one of config[12] attr members based on the
  * user input data - term parameter.
  */
-static int pmu_config_term(struct list_head *formats,
+static int pmu_config_term(const char *pmu_name,
+			   struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct parse_events_term *term,
 			   struct list_head *head_terms,
@@ -1082,16 +1083,24 @@ static int pmu_config_term(struct list_head *formats,
 
 	format = pmu_find_format(formats, term->config);
 	if (!format) {
-		if (verbose > 0)
-			printf("Invalid event/parameter '%s'\n", term->config);
+		char *pmu_term = pmu_formats_string(formats);
+		char *unknown_term;
+		char *help_msg;
+
+		if (asprintf(&unknown_term,
+				"unknown term '%s' for pmu '%s'",
+				term->config, pmu_name) < 0)
+			unknown_term = NULL;
+		help_msg = parse_events_formats_error_string(pmu_term);
 		if (err) {
-			char *pmu_term = pmu_formats_string(formats);
-
 			parse_events__handle_error(err, term->err_term,
-				strdup("unknown term"),
-				parse_events_formats_error_string(pmu_term));
-			free(pmu_term);
+						   unknown_term,
+						   help_msg);
+		} else {
+			pr_debug("%s (%s)\n", unknown_term, help_msg);
+			free(unknown_term);
 		}
+		free(pmu_term);
 		return -EINVAL;
 	}
 
@@ -1168,7 +1177,7 @@ static int pmu_config_term(struct list_head *formats,
 	return 0;
 }
 
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms,
 			   bool zero, struct parse_events_error *err)
@@ -1176,7 +1185,7 @@ int perf_pmu__config_terms(struct list_head *formats,
 	struct parse_events_term *term;
 
 	list_for_each_entry(term, head_terms, list) {
-		if (pmu_config_term(formats, attr, term, head_terms,
+		if (pmu_config_term(pmu_name, formats, attr, term, head_terms,
 				    zero, err))
 			return -EINVAL;
 	}
@@ -1196,8 +1205,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 	bool zero = !!pmu->default_config;
 
 	attr->type = pmu->type;
-	return perf_pmu__config_terms(&pmu->format, attr, head_terms,
-				      zero, err);
+	return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
+				      head_terms, zero, err);
 }
 
 static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index cb6fbec50313..85e0c7f2515c 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -9,7 +9,7 @@
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
 
-struct perf_evsel_config_term;
+struct evsel_config_term;
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -76,7 +76,7 @@ struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
 		     struct list_head *head_terms,
 		     struct parse_events_error *error);
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms,
 			   bool zero, struct parse_events_error *error);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eea132f512b0..a08f373d3305 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -102,7 +102,7 @@ void exit_probe_symbol_maps(void)
 	symbol__exit();
 }
 
-static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
 {
 	/* kmap->ref_reloc_sym should be set if host_machine is initialized */
 	struct kmap *kmap;
@@ -114,6 +114,10 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
 	kmap = map__kmap(map);
 	if (!kmap)
 		return NULL;
+
+	if (pmap)
+		*pmap = map;
+
 	return kmap->ref_reloc_sym;
 }
 
@@ -125,7 +129,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
 	struct map *map;
 
 	/* ref_reloc_sym is just a label. Need a special fix*/
-	reloc_sym = kernel_get_ref_reloc_sym();
+	reloc_sym = kernel_get_ref_reloc_sym(NULL);
 	if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
 		*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
 	else {
@@ -232,21 +236,22 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
 static bool kprobe_blacklist__listed(unsigned long address);
 static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
 {
-	u64 etext_addr = 0;
-	int ret;
-
-	/* Get the address of _etext for checking non-probable text symbol */
-	ret = kernel_get_symbol_address_by_name("_etext", &etext_addr,
-						false, false);
+	struct map *map;
+	bool ret = false;
 
-	if (ret == 0 && etext_addr < address)
-		pr_warning("%s is out of .text, skip it.\n", symbol);
-	else if (kprobe_blacklist__listed(address))
+	map = kernel_get_module_map(NULL);
+	if (map) {
+		ret = address <= map->start || map->end < address;
+		if (ret)
+			pr_warning("%s is out of .text, skip it.\n", symbol);
+		map__put(map);
+	}
+	if (!ret && kprobe_blacklist__listed(address)) {
 		pr_warning("%s is blacklisted function, skip it.\n", symbol);
-	else
-		return false;
+		ret = true;
+	}
 
-	return true;
+	return ret;
 }
 
 /*
@@ -745,6 +750,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 				       int ntevs)
 {
 	struct ref_reloc_sym *reloc_sym;
+	struct map *map;
 	char *tmp;
 	int i, skipped = 0;
 
@@ -753,7 +759,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 		return post_process_offline_probe_trace_events(tevs, ntevs,
 						symbol_conf.vmlinux_name);
 
-	reloc_sym = kernel_get_ref_reloc_sym();
+	reloc_sym = kernel_get_ref_reloc_sym(&map);
 	if (!reloc_sym) {
 		pr_warning("Relocated base symbol is not found!\n");
 		return -EINVAL;
@@ -764,9 +770,13 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
 			continue;
 		if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
 			continue;
-		/* If we found a wrong one, mark it by NULL symbol */
+		/*
+		 * If we found a wrong one, mark it by NULL symbol.
+		 * Since addresses in debuginfo is same as objdump, we need
+		 * to convert it to addresses on memory.
+		 */
 		if (kprobe_warn_out_range(tevs[i].point.symbol,
-					  tevs[i].point.address)) {
+			map__objdump_2mem(map, tevs[i].point.address))) {
 			tmp = NULL;
 			skipped++;
 		} else {
@@ -1765,8 +1775,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
 	fmt1_str = strtok_r(argv0_str, ":", &fmt);
 	fmt2_str = strtok_r(NULL, "/", &fmt);
 	fmt3_str = strtok_r(NULL, " \t", &fmt);
-	if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL
-	    || fmt3_str == NULL) {
+	if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
 		semantic_error("Failed to parse event name: %s\n", argv[0]);
 		ret = -EINVAL;
 		goto out;
@@ -2936,7 +2945,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	/* Note that the symbols in the kmodule are not relocated */
 	if (!pev->uprobes && !pev->target &&
 			(!pp->retprobe || kretprobe_offset_is_supported())) {
-		reloc_sym = kernel_get_ref_reloc_sym();
+		reloc_sym = kernel_get_ref_reloc_sym(NULL);
 		if (!reloc_sym) {
 			pr_warning("Relocated base symbol is not found!\n");
 			ret = -EINVAL;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index e4cff49384f4..55924255c535 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -101,6 +101,7 @@ enum dso_binary_type distro_dwarf_types[] = {
 	DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
 	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
 	DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index 80ff41fc45be..a1d1e4ef6257 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -15,7 +15,7 @@
 struct pstack {
 	unsigned short	top;
 	unsigned short	max_nr_entries;
-	void		*entries[0];
+	void		*entries[];
 };
 
 struct pstack *pstack__new(unsigned short max_nr_entries)
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 923565c3b155..39d1de4b2a36 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -36,6 +36,7 @@ struct record_opts {
 	bool	      record_namespaces;
 	bool	      record_cgroup;
 	bool	      record_switch_events;
+	bool	      record_switch_events_set;
 	bool	      all_kernel;
 	bool	      all_user;
 	bool	      kernel_callchains;
@@ -76,4 +77,9 @@ extern struct option *record_options;
 
 int record__parse_freq(const struct option *opt, const char *str, int unset);
 
+static inline bool record_opts__no_switch_events(const struct record_opts *opts)
+{
+	return opts->record_switch_events_set && !opts->record_switch_events;
+}
+
 #endif // _PERF_RECORD_H
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c11d89e0ee55..1a157e84a04a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -33,7 +33,6 @@
 #include "../perf.h"
 #include "arch/common.h"
 #include <internal/lib.h>
-#include <linux/err.h>
 
 #ifdef HAVE_ZSTD_SUPPORT
 static int perf_session__process_compressed_event(struct perf_session *session,
@@ -1104,7 +1103,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
 	for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
 		u64 val = regs[i++];
 
-		printf(".... %-5s 0x%" PRIx64 "\n",
+		printf(".... %-5s 0x%016" PRIx64 "\n",
 		       perf_reg_name(rid), val);
 	}
 }
@@ -1542,8 +1541,13 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 		 */
 		return 0;
 	case PERF_RECORD_HEADER_TRACING_DATA:
-		/* setup for reading amidst mmap */
-		lseek(fd, file_offset, SEEK_SET);
+		/*
+		 * Setup for reading amidst mmap, but only when we
+		 * are in 'file' mode. The 'pipe' fd is in proper
+		 * place already.
+		 */
+		if (!perf_data__is_pipe(session->data))
+			lseek(fd, file_offset, SEEK_SET);
 		return tool->tracing_data(session, event);
 	case PERF_RECORD_HEADER_BUILD_ID:
 		return tool->build_id(session, event);
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
index 1580a3cbec2d..ded9ced02797 100644
--- a/tools/perf/util/sideband_evlist.c
+++ b/tools/perf/util/sideband_evlist.c
@@ -22,7 +22,7 @@ int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *att
 		attr->sample_id_all = 1;
 	}
 
-	evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries);
+	evsel = evsel__new_idx(attr, evlist->core.nr_entries);
 	if (!evsel)
 		return -1;
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c1f8879f92cc..d42339df20f8 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2817,7 +2817,7 @@ static char *prefix_if_not_in(const char *pre, char *str)
 		return str;
 
 	if (asprintf(&n, "%s,%s", pre, str) < 0)
-		return NULL;
+		n = NULL;
 
 	free(str);
 	return n;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 129b8c5f2538..a7c13a88ecb9 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -323,35 +323,46 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 {
 	struct evsel *counter, *leader, **metric_events, *oc;
 	bool found;
-	const char **metric_names;
+	struct expr_parse_ctx ctx;
+	struct hashmap_entry *cur;
+	size_t bkt;
 	int i;
-	int num_metric_names;
 
+	expr__ctx_init(&ctx);
 	evlist__for_each_entry(evsel_list, counter) {
 		bool invalid = false;
 
 		leader = counter->leader;
 		if (!counter->metric_expr)
 			continue;
+
+		expr__ctx_clear(&ctx);
 		metric_events = counter->metric_events;
 		if (!metric_events) {
-			if (expr__find_other(counter->metric_expr, counter->name,
-						&metric_names, &num_metric_names, 1) < 0)
+			if (expr__find_other(counter->metric_expr,
+					     counter->name,
+					     &ctx, 1) < 0)
 				continue;
 
 			metric_events = calloc(sizeof(struct evsel *),
-					       num_metric_names + 1);
-			if (!metric_events)
+					       hashmap__size(&ctx.ids) + 1);
+			if (!metric_events) {
+				expr__ctx_clear(&ctx);
 				return;
+			}
 			counter->metric_events = metric_events;
 		}
 
-		for (i = 0; i < num_metric_names; i++) {
+		i = 0;
+		hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+			const char *metric_name = (const char *)cur->key;
+
 			found = false;
 			if (leader) {
 				/* Search in group */
 				for_each_group_member (oc, leader) {
-					if (!strcasecmp(oc->name, metric_names[i]) &&
+					if (!strcasecmp(oc->name,
+							metric_name) &&
 						!oc->collect_stat) {
 						found = true;
 						break;
@@ -360,7 +371,8 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 			}
 			if (!found) {
 				/* Search ignoring groups */
-				oc = perf_stat__find_event(evsel_list, metric_names[i]);
+				oc = perf_stat__find_event(evsel_list,
+							   metric_name);
 			}
 			if (!oc) {
 				/* Deduping one is good enough to handle duplicated PMUs. */
@@ -373,27 +385,28 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
 				 * of events. So we ask the user instead to add the missing
 				 * events.
 				 */
-				if (!printed || strcasecmp(printed, metric_names[i])) {
+				if (!printed ||
+				    strcasecmp(printed, metric_name)) {
 					fprintf(stderr,
 						"Add %s event to groups to get metric expression for %s\n",
-						metric_names[i],
+						metric_name,
 						counter->name);
-					printed = strdup(metric_names[i]);
+					printed = strdup(metric_name);
 				}
 				invalid = true;
 				continue;
 			}
-			metric_events[i] = oc;
+			metric_events[i++] = oc;
 			oc->collect_stat = true;
 		}
 		metric_events[i] = NULL;
-		free(metric_names);
 		if (invalid) {
 			free(metric_events);
 			counter->metric_events = NULL;
 			counter->metric_expr = NULL;
 		}
 	}
+	expr__ctx_clear(&ctx);
 }
 
 static double runtime_stat_avg(struct runtime_stat *st,
@@ -724,7 +737,6 @@ static void generic_metric(struct perf_stat_config *config,
 			   const char *metric_name,
 			   const char *metric_unit,
 			   int runtime,
-			   double avg,
 			   int cpu,
 			   struct perf_stat_output_ctx *out,
 			   struct runtime_stat *st)
@@ -737,8 +749,6 @@ static void generic_metric(struct perf_stat_config *config,
 	char *n, *pn;
 
 	expr__ctx_init(&pctx);
-	/* Must be first id entry */
-	expr__add_id(&pctx, name, avg);
 	for (i = 0; metric_events[i]; i++) {
 		struct saved_value *v;
 		struct stats *stats;
@@ -797,7 +807,7 @@ static void generic_metric(struct perf_stat_config *config,
 				print_metric(config, ctxp, NULL, "%8.1f",
 					     metric_bf, ratio);
 			} else {
-				print_metric(config, ctxp, NULL, "%8.1f",
+				print_metric(config, ctxp, NULL, "%8.2f",
 					metric_name ?
 					metric_name :
 					out->force_header ?  name : "",
@@ -814,8 +824,7 @@ static void generic_metric(struct perf_stat_config *config,
 			     (metric_name ? metric_name : name) : "", 0);
 	}
 
-	for (i = 1; i < pctx.num_ids; i++)
-		zfree(&pctx.ids[i].name);
+	expr__ctx_clear(&pctx);
 }
 
 void perf_stat__print_shadow_stats(struct perf_stat_config *config,
@@ -1027,7 +1036,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 			print_metric(config, ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
 		generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
-				evsel->metric_name, NULL, 1, avg, cpu, out, st);
+				evsel->metric_name, NULL, 1, cpu, out, st);
 	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
@@ -1056,7 +1065,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				out->new_line(config, ctxp);
 			generic_metric(config, mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
-					mexp->metric_unit, mexp->runtime, avg, cpu, out, st);
+					mexp->metric_unit, mexp->runtime, cpu, out, st);
 		}
 	}
 	if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 774468341851..cdb154381a87 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -115,7 +115,7 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
 	}
 }
 
-static void perf_evsel__reset_stat_priv(struct evsel *evsel)
+static void evsel__reset_stat_priv(struct evsel *evsel)
 {
 	int i;
 	struct perf_stat_evsel *ps = evsel->stats;
@@ -126,16 +126,16 @@ static void perf_evsel__reset_stat_priv(struct evsel *evsel)
 	perf_stat_evsel_id_init(evsel);
 }
 
-static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
+static int evsel__alloc_stat_priv(struct evsel *evsel)
 {
 	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
 	if (evsel->stats == NULL)
 		return -ENOMEM;
-	perf_evsel__reset_stat_priv(evsel);
+	evsel__reset_stat_priv(evsel);
 	return 0;
 }
 
-static void perf_evsel__free_stat_priv(struct evsel *evsel)
+static void evsel__free_stat_priv(struct evsel *evsel)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
 
@@ -144,8 +144,7 @@ static void perf_evsel__free_stat_priv(struct evsel *evsel)
 	zfree(&evsel->stats);
 }
 
-static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
-					     int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
 {
 	struct perf_counts *counts;
 
@@ -156,29 +155,26 @@ static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
 	return counts ? 0 : -ENOMEM;
 }
 
-static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
 {
 	perf_counts__delete(evsel->prev_raw_counts);
 	evsel->prev_raw_counts = NULL;
 }
 
-static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
 {
-	if (evsel->prev_raw_counts) {
-		evsel->prev_raw_counts->aggr.val = 0;
-		evsel->prev_raw_counts->aggr.ena = 0;
-		evsel->prev_raw_counts->aggr.run = 0;
-       }
+	if (evsel->prev_raw_counts)
+		perf_counts__reset(evsel->prev_raw_counts);
 }
 
-static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
 {
 	int ncpus = evsel__nr_cpus(evsel);
 	int nthreads = perf_thread_map__nr(evsel->core.threads);
 
-	if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
-	    perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
-	    (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+	if (evsel__alloc_stat_priv(evsel) < 0 ||
+	    evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+	    (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
 		return -ENOMEM;
 
 	return 0;
@@ -189,7 +185,7 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (perf_evsel__alloc_stats(evsel, alloc_raw))
+		if (evsel__alloc_stats(evsel, alloc_raw))
 			goto out_free;
 	}
 
@@ -205,9 +201,9 @@ void perf_evlist__free_stats(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		perf_evsel__free_stat_priv(evsel);
-		perf_evsel__free_counts(evsel);
-		perf_evsel__free_prev_raw_counts(evsel);
+		evsel__free_stat_priv(evsel);
+		evsel__free_counts(evsel);
+		evsel__free_prev_raw_counts(evsel);
 	}
 }
 
@@ -216,8 +212,8 @@ void perf_evlist__reset_stats(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		perf_evsel__reset_stat_priv(evsel);
-		perf_evsel__reset_counts(evsel);
+		evsel__reset_stat_priv(evsel);
+		evsel__reset_counts(evsel);
 	}
 }
 
@@ -226,7 +222,51 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel)
-		perf_evsel__reset_prev_raw_counts(evsel);
+		evsel__reset_prev_raw_counts(evsel);
+}
+
+static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+	int ncpus = evsel__nr_cpus(evsel);
+	int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+	for (int thread = 0; thread < nthreads; thread++) {
+		for (int cpu = 0; cpu < ncpus; cpu++) {
+			*perf_counts(evsel->counts, cpu, thread) =
+				*perf_counts(evsel->prev_raw_counts, cpu,
+					     thread);
+		}
+	}
+
+	evsel->counts->aggr = evsel->prev_raw_counts->aggr;
+}
+
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		perf_evsel__copy_prev_raw_counts(evsel);
+}
+
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	/*
+	 * To collect the overall statistics for interval mode,
+	 * we copy the counts from evsel->prev_raw_counts to
+	 * evsel->counts. The perf_stat_process_counter creates
+	 * aggr values from per cpu values, but the per cpu values
+	 * are 0 for AGGR_GLOBAL. So we use a trick that saves the
+	 * previous aggr value to the first member of perf_counts,
+	 * then aggr calculation in process_counter_values can work
+	 * correctly.
+	 */
+	evlist__for_each_entry(evlist, evsel) {
+		*perf_counts(evsel->prev_raw_counts, 0, 0) =
+			evsel->prev_raw_counts->aggr;
+	}
 }
 
 static void zero_per_pkg(struct evsel *counter)
@@ -368,7 +408,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	 * interval mode, otherwise overall avg running
 	 * averages will be shown for each interval.
 	 */
-	if (config->interval) {
+	if (config->interval || config->summary) {
 		for (i = 0; i < 3; i++)
 			init_stats(&ps->res_stats[i]);
 	}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b4fdfaa7f2c0..f75ae679eb28 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -110,6 +110,9 @@ struct perf_stat_config {
 	bool			 all_kernel;
 	bool			 all_user;
 	bool			 percore_show_thread;
+	bool			 summary;
+	bool			 metric_no_group;
+	bool			 metric_no_merge;
 	FILE			*output;
 	unsigned int		 interval;
 	unsigned int		 timeout;
@@ -132,6 +135,8 @@ struct perf_stat_config {
 	struct rblist		 metric_events;
 };
 
+void perf_stat__set_big_num(int set);
+
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
@@ -198,6 +203,8 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
 void perf_evlist__free_stats(struct evlist *evlist);
 void perf_evlist__reset_stats(struct evlist *evlist);
 void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index be5b493f8284..5e43054bffea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1458,6 +1458,7 @@ struct kcore_copy_info {
 	u64 first_symbol;
 	u64 last_symbol;
 	u64 first_module;
+	u64 first_module_symbol;
 	u64 last_module_symbol;
 	size_t phnum;
 	struct list_head phdrs;
@@ -1534,6 +1535,8 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
 		return 0;
 
 	if (strchr(name, '[')) {
+		if (!kci->first_module_symbol || start < kci->first_module_symbol)
+			kci->first_module_symbol = start;
 		if (start > kci->last_module_symbol)
 			kci->last_module_symbol = start;
 		return 0;
@@ -1731,6 +1734,10 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
 		kci->etext += page_size;
 	}
 
+	if (kci->first_module_symbol &&
+	    (!kci->first_module || kci->first_module_symbol < kci->first_module))
+		kci->first_module = kci->first_module_symbol;
+
 	kci->first_module = round_down(kci->first_module, page_size);
 
 	if (kci->last_module_symbol) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 381da6b39f89..5ddf84dcbae7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -79,6 +79,7 @@ static enum dso_binary_type binary_type_symtab[] = {
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
 	DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
 	DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+	DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
 	DSO_BINARY_TYPE__NOT_FOUND,
 };
 
@@ -1223,6 +1224,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
 
 				m->end = old_map->start;
 				list_add_tail(&m->node, &merged);
+				new_map->pgoff += old_map->end - new_map->start;
 				new_map->start = old_map->end;
 			}
 		} else {
@@ -1243,6 +1245,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
 				 *      |new......| ->         |new...|
 				 * |old....|        -> |old....|
 				 */
+				new_map->pgoff += old_map->end - new_map->start;
 				new_map->start = old_map->end;
 			}
 		}
@@ -1529,6 +1532,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
 	case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
 	case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+	case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
 	case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
 	case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
 		return !kmod && dso->kernel == DSO_TYPE_USER;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 93fc43db1be3..ff4f4c47e148 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -55,7 +55,7 @@ struct symbol {
 	u8		inlined:1;
 	u8		arch_sym;
 	bool		annotate2;
-	char		name[0];
+	char		name[];
 };
 
 void symbol__delete(struct symbol *sym);
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 820fceeb19a9..03bd99d3be16 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -8,9 +8,9 @@
 #include "syscalltbl.h"
 #include <stdlib.h>
 #include <linux/compiler.h>
+#include <linux/zalloc.h>
 
 #ifdef HAVE_SYSCALL_TABLE_SUPPORT
-#include <linux/zalloc.h>
 #include <string.h>
 #include "string2.h"
 
@@ -142,7 +142,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
 
 struct syscalltbl *syscalltbl__new(void)
 {
-	struct syscalltbl *tbl = malloc(sizeof(*tbl));
+	struct syscalltbl *tbl = zalloc(sizeof(*tbl));
 	if (tbl)
 		tbl->audit_machine = audit_detect_machine();
 	return tbl;
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 9172613028d0..a41d2ca9e4ae 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -3,14 +3,12 @@
 #define __PERF_SYSCALLTBL_H
 
 struct syscalltbl {
-	union {
-		int audit_machine;
-		struct {
-			int max_id;
-			int nr_entries;
-			void *entries;
-		} syscalls;
-	};
+	int audit_machine;
+	struct {
+		int max_id;
+		int nr_entries;
+		void *entries;
+	} syscalls;
 };
 
 struct syscalltbl *syscalltbl__new(void);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 086e98ff42a3..0e5c4786f296 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -428,7 +428,7 @@ try_id:
 		if (!ppath->next) {
 error:
 			pr_debug("No memory to alloc tracepoints list\n");
-			put_tracepoints_path(&path);
+			put_tracepoints_path(path.next);
 			return NULL;
 		}
 next:
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index b4649f5a0c2f..9aededc0bc06 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -243,7 +243,7 @@ struct eh_frame_hdr {
 	 *    encoded_t fde_addr;
 	 * } binary_search_table[fde_count];
 	 */
-	char data[0];
+	char data[];
 } __packed;
 
 static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 33339e31e365..7f4527f897c4 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,6 +3,7 @@ CFLAGS = -Wall
 CFLAGS += -Wno-nonnull
 CFLAGS += -D_GNU_SOURCE
 
+TEST_PROGS := binfmt_script
 TEST_GEN_PROGS := execveat
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 # Makefile is a run-time dependency, since it's accessed by the execveat test
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script
new file mode 100755
index 000000000000..05f94a741c7a
--- /dev/null
+++ b/tools/testing/selftests/exec/binfmt_script
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
+# with various other pathological cases.
+import os, subprocess
+
+# Relevant commits
+#
+# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
+# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
+
+# BINPRM_BUF_SIZE
+SIZE=256
+
+NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
+
+test_num=0
+
+code='''#!/usr/bin/perl
+print "Executed interpreter! Args:\n";
+print "0 : '$0'\n";
+$counter = 1;
+foreach my $a (@ARGV) {
+    print "$counter : '$a'\n";
+    $counter++;
+}
+'''
+
+##
+# test - produce a binfmt_script hashbang line for testing
+#
+# @size:     bytes for bprm->buf line, including hashbang but not newline
+# @good:     whether this script is expected to execute correctly
+# @hashbang: the special 2 bytes for running binfmt_script
+# @leading:  any leading whitespace before the executable path
+# @root:     start of executable pathname
+# @target:   end of executable pathname
+# @arg:      bytes following the executable pathname
+# @fill:     character to fill between @root and @target to reach @size bytes
+# @newline:  character to use as newline, not counted towards @size
+# ...
+def test(name, size, good=True, leading="", root="./", target="/perl",
+                     fill="A", arg="", newline="\n", hashbang="#!"):
+    global test_num, tests, NAME_MAX
+    test_num += 1
+    if test_num > tests:
+        raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
+                         % (test_num, tests))
+
+    middle = ""
+    remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
+    # The middle of the pathname must not exceed NAME_MAX
+    while remaining >= NAME_MAX:
+        middle += fill * (NAME_MAX - 1)
+        middle += '/'
+        remaining -= NAME_MAX
+    middle += fill * remaining
+
+    dirpath = root + middle
+    binary = dirpath + target
+    if len(target):
+        os.makedirs(dirpath, mode=0o755, exist_ok=True)
+        open(binary, "w").write(code)
+        os.chmod(binary, 0o755)
+
+    buf=hashbang + leading + root + middle + target + arg + newline
+    if len(newline) > 0:
+        buf += 'echo this is not really perl\n'
+
+    script = "binfmt_script-%s" % (name)
+    open(script, "w").write(buf)
+    os.chmod(script, 0o755)
+
+    proc = subprocess.Popen(["./%s" % (script)], shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    stdout = proc.communicate()[0]
+
+    if proc.returncode == 0 and b'Executed interpreter' in stdout:
+        if good:
+            print("ok %d - binfmt_script %s (successful good exec)"
+                  % (test_num, name))
+        else:
+            print("not ok %d - binfmt_script %s succeeded when it should have failed"
+                  % (test_num, name))
+    else:
+        if good:
+            print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
+                  % (test_num, name, proc.returncode))
+        else:
+            print("ok %d - binfmt_script %s (correctly failed bad exec)"
+                  % (test_num, name))
+
+    # Clean up crazy binaries
+    os.unlink(script)
+    if len(target):
+        elements = binary.split('/')
+        os.unlink(binary)
+        elements.pop()
+        while len(elements) > 1:
+            os.rmdir("/".join(elements))
+            elements.pop()
+
+tests=27
+print("TAP version 1.3")
+print("1..%d" % (tests))
+
+### FAIL (8 tests)
+
+# Entire path is well past the BINFMT_BUF_SIZE.
+test(name="too-big",        size=SIZE+80, good=False)
+# Path is right at max size, making it impossible to tell if it was truncated.
+test(name="exact",          size=SIZE,    good=False)
+# Same as above, but with leading whitespace.
+test(name="exact-space",    size=SIZE,    good=False, leading=" ")
+# Huge buffer of only whitespace.
+test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
+                                              fill=" ", target="")
+# A good path, but it gets truncated due to leading whitespace.
+test(name="truncated",      size=SIZE+17, good=False, leading=" " * 19)
+# Entirely empty except for #!
+test(name="empty",          size=2,       good=False, root="",
+                                          fill="", target="", newline="")
+# Within size, but entirely spaces
+test(name="spaces",         size=SIZE-1,  good=False, root="", fill=" ",
+                                          target="", newline="")
+# Newline before binary.
+test(name="newline-prefix", size=SIZE-1,  good=False, leading="\n",
+                                          root="", fill=" ", target="")
+
+### ok (19 tests)
+
+# The original test case that was broken by commit:
+# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
+test(name="test.pl",        size=439, leading=" ",
+     root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
+     arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
+# One byte under size, leaving newline visible.
+test(name="one-under",           size=SIZE-1)
+# Two bytes under size, leaving newline visible.
+test(name="two-under",           size=SIZE-2)
+# Exact size, but trailing whitespace visible instead of newline
+test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
+# Exact size, but trailing space and first arg char visible instead of newline.
+test(name="exact-trunc-arg",     size=SIZE, arg=" f")
+# One bute under, with confirmed non-truncated arg since newline now visible.
+test(name="one-under-full-arg",  size=SIZE-1, arg=" f")
+# Short read buffer by one byte.
+test(name="one-under-no-nl",     size=SIZE-1, newline="")
+# Short read buffer by half buffer size.
+test(name="half-under-no-nl",    size=int(SIZE/2), newline="")
+# One byte under with whitespace arg. leaving wenline visible.
+test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
+# One byte under with whitespace leading. leaving wenline visible.
+test(name="one-under-leading",   size=SIZE-1, leading=" ")
+# One byte under with whitespace leading and as arg. leaving newline visible.
+test(name="one-under-leading-trunc-arg",  size=SIZE-1, leading=" ", arg=" ")
+# Same as above, but with 2 bytes under
+test(name="two-under-no-nl",     size=SIZE-2, newline="")
+test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
+test(name="two-under-leading",   size=SIZE-2, leading=" ")
+test(name="two-under-leading-trunc-arg",   size=SIZE-2, leading=" ", arg=" ")
+# Same as above, but with buffer half filled
+test(name="two-under-no-nl",     size=int(SIZE/2), newline="")
+test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
+test(name="two-under-leading",   size=int(SIZE/2), leading=" ")
+test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+
+if test_num != tests:
+    raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
+                     % (test_num, tests))
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 4bca5a9327a4..bed4b5318a86 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -2,7 +2,9 @@
 /fd-001-lookup
 /fd-002-posix-eq
 /fd-003-kthread
+/proc-fsconfig-hidepid
 /proc-loadavg-001
+/proc-multiple-procfs
 /proc-pid-vm
 /proc-self-map-files-001
 /proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index a8ed0f684829..8be8a03d2973 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -19,5 +19,7 @@ TEST_GEN_PROGS += self
 TEST_GEN_PROGS += setns-dcache
 TEST_GEN_PROGS += setns-sysvipc
 TEST_GEN_PROGS += thread-self
+TEST_GEN_PROGS += proc-multiple-procfs
+TEST_GEN_PROGS += proc-fsconfig-hidepid
 
 include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-fsconfig-hidepid.c b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
new file mode 100644
index 000000000000..b9af8f537185
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+
+static inline int fsopen(const char *fsname, unsigned int flags)
+{
+	return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int fsconfig(int fd, unsigned int cmd, const char *key, const void *val, int aux)
+{
+	return syscall(__NR_fsconfig, fd, cmd, key, val, aux);
+}
+
+int main(void)
+{
+	int fsfd, ret;
+	int hidepid = 2;
+
+	assert((fsfd = fsopen("proc", 0)) != -1);
+
+	ret = fsconfig(fsfd, FSCONFIG_SET_BINARY, "hidepid", &hidepid, 0);
+	assert(ret == -1);
+	assert(errno == EINVAL);
+
+	assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "2", 0));
+	assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "invisible", 0));
+
+	assert(!close(fsfd));
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-multiple-procfs.c b/tools/testing/selftests/proc/proc-multiple-procfs.c
new file mode 100644
index 000000000000..ab912ad95dab
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-multiple-procfs.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+	struct stat proc_st1, proc_st2;
+	char procbuff[] = "/tmp/proc.XXXXXX/meminfo";
+	char procdir1[] = "/tmp/proc.XXXXXX";
+	char procdir2[] = "/tmp/proc.XXXXXX";
+
+	assert(mkdtemp(procdir1) != NULL);
+	assert(mkdtemp(procdir2) != NULL);
+
+	assert(!mount("proc", procdir1, "proc", 0, "hidepid=1"));
+	assert(!mount("proc", procdir2, "proc", 0, "hidepid=2"));
+
+	snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1);
+	assert(!stat(procbuff, &proc_st1));
+
+	snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2);
+	assert(!stat(procbuff, &proc_st2));
+
+	umount(procdir1);
+	umount(procdir2);
+
+	assert(proc_st1.st_dev != proc_st2.st_dev);
+
+	return 0;
+}