summary refs log tree commit diff
path: root/tools/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-22 13:59:43 -0800
commit3a36281a17199737b468befb826d4a23eb774445 (patch)
tree18bbdfcb0c30215883809f248d0334b7ea84d5ba /tools/include
parent7c70f3a7488d2fa62d32849d138bf2b8420fe788 (diff)
parent3027ce36ccbae74f2e7c1afbfc3f69fee0c2a996 (diff)
downloadlinux-3a36281a17199737b468befb826d4a23eb774445.tar.gz
Merge tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo:
 "New features:

   - Support instruction latency in 'perf report', with both memory
     latency (weight) and instruction latency information, users can
     locate expensive load instructions and understand time spent in
     different stages.

   - Extend 'perf c2c' to display the number of loads which were blocked
     by data or address conflict.

   - Add 'perf stat' support for L2 topdown events in systems such as
     Intel's Sapphire rapids server.

   - Add support for PERF_SAMPLE_CODE_PAGE_SIZE in various tools, as a
     sort key, for instance:

        perf report --stdio --sort=comm,symbol,code_page_size

   - New 'perf daemon' command to run long running sessions while
     providing a way to control the enablement of events without
     restarting a traditional 'perf record' session.

   - Enable counting events for BPF programs in 'perf stat' just like
     for other targets (tid, cgroup, cpu, etc), e.g.:

        # perf stat -e ref-cycles,cycles -b 254 -I 1000
           1.487903822            115,200      ref-cycles
           1.487903822             86,012      cycles
           2.489147029             80,560      ref-cycles
           2.489147029             73,784      cycles
        ^C

     The example above counts 'cycles' and 'ref-cycles' of BPF program
     of id 254. It is similar to bpftool-prog-profile command, but more
     flexible.

   - Support the new layout for PERF_RECORD_MMAP2 to carry the DSO
     build-id using infrastructure generalised from the eBPF subsystem,
     removing the need for traversing the perf.data file to collect
     build-ids at the end of 'perf record' sessions and helping with
     long running sessions where binaries can get replaced in updates,
     leading to possible mis-resolution of symbols.

   - Support filtering by hex address in 'perf script'.

   - Support DSO filter in 'perf script', like in other perf tools.

   - Add namespaces support to 'perf inject'

   - Add support for SDT (Dtrace Style Markers) events on ARM64.

  perf record:

   - Fix handling of eventfd() when draining a buffer in 'perf record'.

   - Improvements to the generation of metadata events for pre-existing
     threads (mmaps, comm, etc), speeding up the work done at the start
     of system wide or per CPU 'perf record' sessions.

  Hardware tracing:

   - Initial support for tracing KVM with Intel PT.

   - Intel PT fixes for IPC

   - Support Intel PT PSB (synchronization packets) events.

   - Automatically group aux-output events to overcome --filter syntax.

   - Enable PERF_SAMPLE_DATA_SRC on ARMs SPE.

   - Update ARM's CoreSight hardware tracing OpenCSD library to v1.0.0.

  perf annotate TUI:

   - Fix handling of 'k' ("show line number") hotkey

   - Fix jump parsing for C++ code.

  perf probe:

   - Add protection to avoid endless loop.

  cgroups:

   - Avoid reading cgroup mountpoint multiple times, caching it.

   - Fix handling of cgroup v1/v2 in mixed hierarchy.

  Symbol resolving:

   - Add OCaml symbol demangling.

   - Further fixes for handling PE executables when using perf with Wine
     and .exe/.dll files.

   - Fix 'perf unwind' DSO handling.

   - Resolve symbols against debug file first, to deal with artifacts
     related to LTO.

   - Fix gap between kernel end and module start on powerpc.

  Reporting tools:

   - The DSO filter shouldn't show samples in unresolved maps.

   - Improve debuginfod support in various tools.

  build ids:

   - Fix 16-byte build ids in 'perf buildid-cache', add a 'perf test'
     entry for that case.

  perf test:

   - Support for PERF_SAMPLE_WEIGHT_STRUCT.

   - Add test case for PERF_SAMPLE_CODE_PAGE_SIZE.

   - Shell based tests for 'perf daemon's commands ('start', 'stop,
     'reconfig', 'list', etc).

   - ARM cs-etm 'perf test' fixes.

   - Add parse-metric memory bandwidth testcase.

  Compiler related:

   - Fix 'perf probe' kretprobe issue caused by gcc 11 bug when used
     with -fpatchable-function-entry.

   - Fix ARM64 build with gcc 11's -Wformat-overflow.

   - Fix unaligned access in sample parsing test.

   - Fix printf conversion specifier for IP addresses on arm64, s390 and
     powerpc.

  Arch specific:

   - Support exposing Performance Monitor Counter SPRs as part of
     extended regs on powerpc.

   - Add JSON 'perf stat' metrics for ARM64's imx8mp, imx8mq and imx8mn
     DDR, fix imx8mm ones.

   - Fix common and uarch events for ARM64's A76 and Ampere eMag"

* tag 'perf-tools-for-v5.12-2020-02-19' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (148 commits)
  perf buildid-cache: Don't skip 16-byte build-ids
  perf buildid-cache: Add test for 16-byte build-id
  perf symbol: Remove redundant libbfd checks
  perf test: Output the sub testing result in cs-etm
  perf test: Suppress logs in cs-etm testing
  perf tools: Fix arm64 build error with gcc-11
  perf intel-pt: Add documentation for tracing virtual machines
  perf intel-pt: Split VM-Entry and VM-Exit branches
  perf intel-pt: Adjust sample flags for VM-Exit
  perf intel-pt: Allow for a guest kernel address filter
  perf intel-pt: Support decoding of guest kernel
  perf machine: Factor out machine__idle_thread()
  perf machine: Factor out machines__find_guest()
  perf intel-pt: Amend decoder to track the NR flag
  perf intel-pt: Retain the last PIP packet payload as is
  perf intel_pt: Add vmlaunch and vmresume as branches
  perf script: Add branch types for VM-Entry and VM-Exit
  perf auxtrace: Automatically group aux-output events
  perf test: Fix unaligned access in sample parsing test
  perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing
  ...
Diffstat (limited to 'tools/include')
-rw-r--r--tools/include/uapi/linux/perf_event.h96
-rw-r--r--tools/include/uapi/linux/prctl.h3
2 files changed, 90 insertions, 9 deletions
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b15e3447cd9f..ad15e40d7f5d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -145,12 +145,14 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_CGROUP			= 1U << 21,
 	PERF_SAMPLE_DATA_PAGE_SIZE		= 1U << 22,
 	PERF_SAMPLE_CODE_PAGE_SIZE		= 1U << 23,
+	PERF_SAMPLE_WEIGHT_STRUCT		= 1U << 24,
 
-	PERF_SAMPLE_MAX = 1U << 24,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 25,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
 
+#define PERF_SAMPLE_WEIGHT_TYPE	(PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
 /*
  * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
  *
@@ -386,7 +388,8 @@ struct perf_event_attr {
 				aux_output     :  1, /* generate AUX records instead of events */
 				cgroup         :  1, /* include cgroup events */
 				text_poke      :  1, /* include text poke events */
-				__reserved_1   : 30;
+				build_id       :  1, /* use build id in mmap2 events */
+				__reserved_1   : 29;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -659,6 +662,22 @@ struct perf_event_mmap_page {
 	__u64	aux_size;
 };
 
+/*
+ * The current state of perf_event_header::misc bits usage:
+ * ('|' used bit, '-' unused bit)
+ *
+ *  012         CDEF
+ *  |||---------||||
+ *
+ *  Where:
+ *    0-2     CPUMODE_MASK
+ *
+ *    C       PROC_MAP_PARSE_TIMEOUT
+ *    D       MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
+ *    E       MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
+ *    F       (reserved)
+ */
+
 #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_RECORD_MISC_KERNEL			(1 << 0)
@@ -690,6 +709,7 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_EXACT_IP           - PERF_RECORD_SAMPLE of precise events
  *   PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *   PERF_RECORD_MISC_MMAP_BUILD_ID      - PERF_RECORD_MMAP2 event
  *
  *
  * PERF_RECORD_MISC_EXACT_IP:
@@ -699,9 +719,13 @@ struct perf_event_mmap_page {
  *
  * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
  *   Indicates that thread was preempted in TASK_RUNNING state.
+ *
+ * PERF_RECORD_MISC_MMAP_BUILD_ID:
+ *   Indicates that mmap2 event carries build id data.
  */
 #define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
 #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT	(1 << 14)
+#define PERF_RECORD_MISC_MMAP_BUILD_ID		(1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
@@ -890,7 +914,24 @@ enum perf_event_type {
 	 * 	  char			data[size];
 	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
 	 *
-	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+	 *	{ union perf_sample_weight
+	 *	 {
+	 *		u64		full; && PERF_SAMPLE_WEIGHT
+	 *	#if defined(__LITTLE_ENDIAN_BITFIELD)
+	 *		struct {
+	 *			u32	var1_dw;
+	 *			u16	var2_w;
+	 *			u16	var3_w;
+	 *		} && PERF_SAMPLE_WEIGHT_STRUCT
+	 *	#elif defined(__BIG_ENDIAN_BITFIELD)
+	 *		struct {
+	 *			u16	var3_w;
+	 *			u16	var2_w;
+	 *			u32	var1_dw;
+	 *		} && PERF_SAMPLE_WEIGHT_STRUCT
+	 *	#endif
+	 *	 }
+	 *	}
 	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
@@ -915,10 +956,20 @@ enum perf_event_type {
 	 *	u64				addr;
 	 *	u64				len;
 	 *	u64				pgoff;
-	 *	u32				maj;
-	 *	u32				min;
-	 *	u64				ino;
-	 *	u64				ino_generation;
+	 *	union {
+	 *		struct {
+	 *			u32		maj;
+	 *			u32		min;
+	 *			u64		ino;
+	 *			u64		ino_generation;
+	 *		};
+	 *		struct {
+	 *			u8		build_id_size;
+	 *			u8		__reserved_1;
+	 *			u16		__reserved_2;
+	 *			u8		build_id[20];
+	 *		};
+	 *	};
 	 *	u32				prot, flags;
 	 *	char				filename[];
 	 * 	struct sample_id		sample_id;
@@ -1127,14 +1178,16 @@ union perf_mem_data_src {
 			mem_lvl_num:4,	/* memory hierarchy level number */
 			mem_remote:1,   /* remote */
 			mem_snoopx:2,	/* snoop mode, ext */
-			mem_rsvd:24;
+			mem_blk:3,	/* access blocked */
+			mem_rsvd:21;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:24,
+		__u64	mem_rsvd:21,
+			mem_blk:3,	/* access blocked */
 			mem_snoopx:2,	/* snoop mode, ext */
 			mem_remote:1,   /* remote */
 			mem_lvl_num:4,	/* memory hierarchy level number */
@@ -1217,6 +1270,12 @@ union perf_mem_data_src {
 #define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
 #define PERF_MEM_TLB_SHIFT	26
 
+/* Access blocked */
+#define PERF_MEM_BLK_NA		0x01 /* not available */
+#define PERF_MEM_BLK_DATA	0x02 /* data could not be forwarded */
+#define PERF_MEM_BLK_ADDR	0x04 /* address conflict */
+#define PERF_MEM_BLK_SHIFT	40
+
 #define PERF_MEM_S(a, s) \
 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
 
@@ -1248,4 +1307,23 @@ struct perf_branch_entry {
 		reserved:40;
 };
 
+union perf_sample_weight {
+	__u64		full;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	struct {
+		__u32	var1_dw;
+		__u16	var2_w;
+		__u16	var3_w;
+	};
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	struct {
+		__u16	var3_w;
+		__u16	var2_w;
+		__u32	var1_dw;
+	};
+#else
+#error "Unknown endianness"
+#endif
+};
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 90deb41c8a34..667f1aed091c 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -251,5 +251,8 @@ struct prctl_mm_map {
 #define PR_SET_SYSCALL_USER_DISPATCH	59
 # define PR_SYS_DISPATCH_OFF		0
 # define PR_SYS_DISPATCH_ON		1
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW	0
+# define SYSCALL_DISPATCH_FILTER_BLOCK	1
 
 #endif /* _LINUX_PRCTL_H */