summary refs log tree commit diff
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-kmem.txt13
-rw-r--r--tools/perf/Documentation/perf-probe.txt21
-rw-r--r--tools/perf/Makefile9
-rw-r--r--tools/perf/bench/sched-messaging.c8
-rw-r--r--tools/perf/bench/sched-pipe.c11
-rw-r--r--tools/perf/builtin-annotate.c15
-rw-r--r--tools/perf/builtin-bench.c57
-rw-r--r--tools/perf/builtin-buildid-list.c57
-rw-r--r--tools/perf/builtin-kmem.c138
-rw-r--r--tools/perf/builtin-probe.c80
-rw-r--r--tools/perf/builtin-record.c25
-rw-r--r--tools/perf/builtin-report.c59
-rw-r--r--tools/perf/builtin-sched.c225
-rw-r--r--tools/perf/builtin-timechart.c69
-rw-r--r--tools/perf/builtin-trace.c68
-rw-r--r--tools/perf/perf.h12
-rw-r--r--tools/perf/util/data_map.c75
-rw-r--r--tools/perf/util/data_map.h11
-rw-r--r--tools/perf/util/event.c78
-rw-r--r--tools/perf/util/event.h24
-rw-r--r--tools/perf/util/header.c37
-rw-r--r--tools/perf/util/header.h4
-rw-r--r--tools/perf/util/map.c87
-rw-r--r--tools/perf/util/parse-events.c17
-rw-r--r--tools/perf/util/parse-options.c3
-rw-r--r--tools/perf/util/probe-event.c133
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c2
-rw-r--r--tools/perf/util/session.c80
-rw-r--r--tools/perf/util/session.h16
-rw-r--r--tools/perf/util/symbol.c273
-rw-r--r--tools/perf/util/symbol.h17
-rw-r--r--tools/perf/util/thread.c63
-rw-r--r--tools/perf/util/thread.h42
-rw-r--r--tools/perf/util/trace-event-parse.c4
-rw-r--r--tools/perf/util/trace-event-perl.c67
-rw-r--r--tools/perf/util/trace-event-perl.h4
-rw-r--r--tools/perf/util/trace-event-read.c3
38 files changed, 1216 insertions, 692 deletions
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 44b0ce35c28a..eac4d852e7cd 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -8,16 +8,16 @@ perf-kmem - Tool to trace/measure kernel memory(slab) properties
 SYNOPSIS
 --------
 [verse]
-'perf kmem' {record} [<options>]
+'perf kmem' {record|stat} [<options>]
 
 DESCRIPTION
 -----------
-There's two variants of perf kmem:
+There are two variants of perf kmem:
 
   'perf kmem record <command>' to record the kmem events
   of an arbitrary workload.
 
-  'perf kmem' to report kernel memory statistics.
+  'perf kmem stat' to report kernel memory statistics.
 
 OPTIONS
 -------
@@ -25,8 +25,11 @@ OPTIONS
 --input=<file>::
 	Select the input file (default: perf.data)
 
---stat=<caller|alloc>::
-	Select per callsite or per allocation statistics
+--caller::
+	Show per-callsite statistics
+
+--alloc::
+	Show per-allocation statistics
 
 -s <key[,key2...]>::
 --sort=<key[,key2...]>::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 9270594e6dfd..8fa6bf99fcb5 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -8,10 +8,13 @@ perf-probe - Define new dynamic tracepoints
 SYNOPSIS
 --------
 [verse]
-'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...]
+'perf probe' [options] --add='PROBE' [...]
 or
-'perf probe' [options] 'PROBE' ['PROBE' ...]
-
+'perf probe' [options] PROBE
+or
+'perf probe' [options] --del='[GROUP:]EVENT' [...]
+or
+'perf probe' --list
 
 DESCRIPTION
 -----------
@@ -31,8 +34,16 @@ OPTIONS
         Be more verbose (show parsed arguments, etc).
 
 -a::
---add::
-	Define a probe point (see PROBE SYNTAX for detail)
+--add=::
+	Define a probe event (see PROBE SYNTAX for detail).
+
+-d::
+--del=::
+	Delete a probe event.
+
+-l::
+--list::
+	List up current probe events.
 
 PROBE SYNTAX
 ------------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 23ec66098bdc..406999668cab 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -237,8 +237,8 @@ lib = lib
 
 export prefix bindir sharedir sysconfdir
 
-CC = gcc
-AR = ar
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
 RM = rm -f
 TAR = tar
 FIND = find
@@ -356,7 +356,9 @@ LIB_H += util/parse-options.h
 LIB_H += util/parse-events.h
 LIB_H += util/quote.h
 LIB_H += util/util.h
+LIB_H += util/header.h
 LIB_H += util/help.h
+LIB_H += util/session.h
 LIB_H += util/strbuf.h
 LIB_H += util/string.h
 LIB_H += util/strlist.h
@@ -405,6 +407,7 @@ LIB_OBJS += util/callchain.o
 LIB_OBJS += util/values.o
 LIB_OBJS += util/debug.o
 LIB_OBJS += util/map.o
+LIB_OBJS += util/session.o
 LIB_OBJS += util/thread.o
 LIB_OBJS += util/trace-event-parse.o
 LIB_OBJS += util/trace-event-read.o
@@ -492,8 +495,10 @@ else
 	LIB_OBJS += util/probe-finder.o
 endif
 
+ifndef NO_LIBPERL
 PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null`
 PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+endif
 
 ifneq ($(shell sh -c "(echo '\#include <EXTERN.h>'; echo '\#include <perl.h>'; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y)
 	BASIC_CFLAGS += -DNO_LIBPERL
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 605a2a959aa8..81cee78181fa 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -1,6 +1,6 @@
 /*
  *
- * builtin-bench-messaging.c
+ * sched-messaging.c
  *
  * messaging: Benchmark for scheduler and IPC mechanisms
  *
@@ -320,10 +320,12 @@ int bench_sched_messaging(int argc, const char **argv,
 		       num_groups, num_groups * 2 * num_fds,
 		       thread_mode ? "threads" : "processes");
 		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
-		       diff.tv_sec, diff.tv_usec/1000);
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
 		break;
 	case BENCH_FORMAT_SIMPLE:
-		printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000);
+		printf("%lu.%03lu\n", diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
 		break;
 	default:
 		/* reaching here is something disaster */
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 238185f97977..4f77c7c27640 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -1,6 +1,6 @@
 /*
  *
- * builtin-bench-pipe.c
+ * sched-pipe.c
  *
  * pipe: Benchmark for pipe()
  *
@@ -87,7 +87,8 @@ int bench_sched_pipe(int argc, const char **argv,
 	if (pid) {
 		retpid = waitpid(pid, &wait_stat, 0);
 		assert((retpid == pid) && WIFEXITED(wait_stat));
-		return 0;
+	} else {
+		exit(0);
 	}
 
 	switch (bench_format) {
@@ -99,7 +100,8 @@ int bench_sched_pipe(int argc, const char **argv,
 		result_usec += diff.tv_usec;
 
 		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
-		       diff.tv_sec, diff.tv_usec/1000);
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec/1000));
 
 		printf(" %14lf usecs/op\n",
 		       (double)result_usec / (double)loops);
@@ -110,7 +112,8 @@ int bench_sched_pipe(int argc, const char **argv,
 
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n",
-		       diff.tv_sec, diff.tv_usec / 1000);
+		       diff.tv_sec,
+		       (unsigned long) (diff.tv_usec / 1000));
 		break;
 
 	default:
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 0bf2e8f9af57..21a78d30d53d 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -25,6 +25,7 @@
 #include "util/thread.h"
 #include "util/sort.h"
 #include "util/hist.h"
+#include "util/session.h"
 #include "util/data_map.h"
 
 static char		const *input_name = "perf.data";
@@ -462,21 +463,23 @@ static struct perf_file_handler file_handler = {
 
 static int __cmd_annotate(void)
 {
-	struct perf_header *header;
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY, force);
 	struct thread *idle;
 	int ret;
 
+	if (session == NULL)
+		return -ENOMEM;
+
 	idle = register_idle_thread();
 	register_perf_file_handler(&file_handler);
 
-	ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
-				      &event__cwdlen, &event__cwd);
+	ret = perf_session__process_events(session, 0, &event__cwdlen, &event__cwd);
 	if (ret)
-		return ret;
+		goto out_delete;
 
 	if (dump_trace) {
 		event__print_totals();
-		return 0;
+		goto out_delete;
 	}
 
 	if (verbose > 3)
@@ -489,6 +492,8 @@ static int __cmd_annotate(void)
 	output__resort(event__total[0]);
 
 	find_annotations();
+out_delete:
+	perf_session__delete(session);
 
 	return ret;
 }
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index e043eb83092a..46996774e559 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -31,6 +31,9 @@ struct bench_suite {
 	const char *summary;
 	int (*fn)(int, const char **, const char *);
 };
+						\
+/* sentinel: easy for help */
+#define suite_all { "all", "test all suite (pseudo suite)", NULL }
 
 static struct bench_suite sched_suites[] = {
 	{ "messaging",
@@ -39,6 +42,7 @@ static struct bench_suite sched_suites[] = {
 	{ "pipe",
 	  "Flood of communication over pipe() between two processes",
 	  bench_sched_pipe      },
+	suite_all,
 	{ NULL,
 	  NULL,
 	  NULL                  }
@@ -48,6 +52,7 @@ static struct bench_suite mem_suites[] = {
 	{ "memcpy",
 	  "Simple memory copy in various ways",
 	  bench_mem_memcpy },
+	suite_all,
 	{ NULL,
 	  NULL,
 	  NULL             }
@@ -66,6 +71,9 @@ static struct bench_subsys subsystems[] = {
 	{ "mem",
 	  "memory access performance",
 	  mem_suites },
+	{ "all",		/* sentinel: easy for help */
+	  "test all subsystem (pseudo subsystem)",
+	  NULL },
 	{ NULL,
 	  NULL,
 	  NULL       }
@@ -75,11 +83,11 @@ static void dump_suites(int subsys_index)
 {
 	int i;
 
-	printf("List of available suites for %s...\n\n",
+	printf("# List of available suites for %s...\n\n",
 	       subsystems[subsys_index].name);
 
 	for (i = 0; subsystems[subsys_index].suites[i].name; i++)
-		printf("\t%s: %s\n",
+		printf("%14s: %s\n",
 		       subsystems[subsys_index].suites[i].name,
 		       subsystems[subsys_index].suites[i].summary);
 
@@ -110,10 +118,10 @@ static void print_usage(void)
 		printf("\t%s\n", bench_usage[i]);
 	printf("\n");
 
-	printf("List of available subsystems...\n\n");
+	printf("# List of available subsystems...\n\n");
 
 	for (i = 0; subsystems[i].name; i++)
-		printf("\t%s: %s\n",
+		printf("%14s: %s\n",
 		       subsystems[i].name, subsystems[i].summary);
 	printf("\n");
 }
@@ -131,6 +139,37 @@ static int bench_str2int(char *str)
 	return BENCH_FORMAT_UNKNOWN;
 }
 
+static void all_suite(struct bench_subsys *subsys)	  /* FROM HERE */
+{
+	int i;
+	const char *argv[2];
+	struct bench_suite *suites = subsys->suites;
+
+	argv[1] = NULL;
+	/*
+	 * TODO:
+	 * preparing preset parameters for
+	 * embedded, ordinary PC, HPC, etc...
+	 * will be helpful
+	 */
+	for (i = 0; suites[i].fn; i++) {
+		printf("# Running %s/%s benchmark...\n",
+		       subsys->name,
+		       suites[i].name);
+
+		argv[1] = suites[i].name;
+		suites[i].fn(1, argv, NULL);
+		printf("\n");
+	}
+}
+
+static void all_subsystem(void)
+{
+	int i;
+	for (i = 0; subsystems[i].suites; i++)
+		all_suite(&subsystems[i]);
+}
+
 int cmd_bench(int argc, const char **argv, const char *prefix __used)
 {
 	int i, j, status = 0;
@@ -155,6 +194,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __used)
 		goto end;
 	}
 
+	if (!strcmp(argv[0], "all")) {
+		all_subsystem();
+		goto end;
+	}
+
 	for (i = 0; subsystems[i].name; i++) {
 		if (strcmp(subsystems[i].name, argv[0]))
 			continue;
@@ -165,6 +209,11 @@ int cmd_bench(int argc, const char **argv, const char *prefix __used)
 			goto end;
 		}
 
+		if (!strcmp(argv[1], "all")) {
+			all_suite(&subsystems[i]);
+			goto end;
+		}
+
 		for (j = 0; subsystems[i].suites[j].name; j++) {
 			if (strcmp(subsystems[i].suites[j].name, argv[1]))
 				continue;
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 7dee9d19ab7a..bfd16a1594e4 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -11,15 +11,15 @@
 #include "util/cache.h"
 #include "util/data_map.h"
 #include "util/debug.h"
-#include "util/header.h"
 #include "util/parse-options.h"
+#include "util/session.h"
 #include "util/symbol.h"
 
 static char const *input_name = "perf.data";
 static int force;
 
 static const char *const buildid_list_usage[] = {
-	"perf report [<options>]",
+	"perf buildid-list [<options>]",
 	NULL
 };
 
@@ -55,56 +55,17 @@ static int perf_file_section__process_buildids(struct perf_file_section *self,
 static int __cmd_buildid_list(void)
 {
 	int err = -1;
-	struct perf_header *header;
-	struct perf_file_header f_header;
-	struct stat input_stat;
-	int input = open(input_name, O_RDONLY);
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY, force);
 
-	if (input < 0) {
-		pr_err("failed to open file: %s", input_name);
-		if (!strcmp(input_name, "perf.data"))
-			pr_err("  (try 'perf record' first)");
-		pr_err("\n");
-		goto out;
-	}
-
-	err = fstat(input, &input_stat);
-	if (err < 0) {
-		perror("failed to stat file");
-		goto out_close;
-	}
-
-	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-		pr_err("file %s not owned by current user or root\n",
-		       input_name);
-		goto out_close;
-	}
-
-	if (!input_stat.st_size) {
-		pr_info("zero-sized file, nothing to do!\n");
-		goto out_close;
-	}
-
-	err = -1;
-	header = perf_header__new();
-	if (header == NULL)
-		goto out_close;
-
-	if (perf_file_header__read(&f_header, header, input) < 0) {
-		pr_warning("incompatible file format");
-		goto out_close;
-	}
+	if (session == NULL)
+		return -1;
 
-	err = perf_header__process_sections(header, input,
+	err = perf_header__process_sections(&session->header, session->fd,
 				         perf_file_section__process_buildids);
+	if (err >= 0)
+		dsos__fprintf_buildid(stdout);
 
-	if (err < 0)
-		goto out_close;
-
-	dsos__fprintf_buildid(stdout);
-out_close:
-	close(input);
-out:
+	perf_session__delete(session);
 	return err;
 }
 
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 047fef74bd52..2071d2485913 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -6,6 +6,7 @@
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
+#include "util/session.h"
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
@@ -20,7 +21,6 @@ typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
 
 static char const		*input_name = "perf.data";
 
-static struct perf_header	*header;
 static u64			sample_type;
 
 static int			alloc_flag;
@@ -57,11 +57,6 @@ static struct rb_root root_caller_sorted;
 static unsigned long total_requested, total_allocated;
 static unsigned long nr_allocs, nr_cross_allocs;
 
-struct raw_event_sample {
-	u32 size;
-	char data[0];
-};
-
 #define PATH_SYS_NODE	"/sys/devices/system/node"
 
 static void init_cpunode_map(void)
@@ -201,7 +196,7 @@ static void insert_caller_stat(unsigned long call_site,
 	}
 }
 
-static void process_alloc_event(struct raw_event_sample *raw,
+static void process_alloc_event(void *data,
 				struct event *event,
 				int cpu,
 				u64 timestamp __used,
@@ -214,10 +209,10 @@ static void process_alloc_event(struct raw_event_sample *raw,
 	int bytes_alloc;
 	int node1, node2;
 
-	ptr = raw_field_value(event, "ptr", raw->data);
-	call_site = raw_field_value(event, "call_site", raw->data);
-	bytes_req = raw_field_value(event, "bytes_req", raw->data);
-	bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
+	ptr = raw_field_value(event, "ptr", data);
+	call_site = raw_field_value(event, "call_site", data);
+	bytes_req = raw_field_value(event, "bytes_req", data);
+	bytes_alloc = raw_field_value(event, "bytes_alloc", data);
 
 	insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
 	insert_caller_stat(call_site, bytes_req, bytes_alloc);
@@ -227,7 +222,7 @@ static void process_alloc_event(struct raw_event_sample *raw,
 
 	if (node) {
 		node1 = cpunode_map[cpu];
-		node2 = raw_field_value(event, "node", raw->data);
+		node2 = raw_field_value(event, "node", data);
 		if (node1 != node2)
 			nr_cross_allocs++;
 	}
@@ -262,7 +257,7 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 	return NULL;
 }
 
-static void process_free_event(struct raw_event_sample *raw,
+static void process_free_event(void *data,
 			       struct event *event,
 			       int cpu,
 			       u64 timestamp __used,
@@ -271,7 +266,7 @@ static void process_free_event(struct raw_event_sample *raw,
 	unsigned long ptr;
 	struct alloc_stat *s_alloc, *s_caller;
 
-	ptr = raw_field_value(event, "ptr", raw->data);
+	ptr = raw_field_value(event, "ptr", data);
 
 	s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 	if (!s_alloc)
@@ -289,66 +284,53 @@ static void process_free_event(struct raw_event_sample *raw,
 }
 
 static void
-process_raw_event(event_t *raw_event __used, void *more_data,
+process_raw_event(event_t *raw_event __used, void *data,
 		  int cpu, u64 timestamp, struct thread *thread)
 {
-	struct raw_event_sample *raw = more_data;
 	struct event *event;
 	int type;
 
-	type = trace_parse_common_type(raw->data);
+	type = trace_parse_common_type(data);
 	event = trace_find_event(type);
 
 	if (!strcmp(event->name, "kmalloc") ||
 	    !strcmp(event->name, "kmem_cache_alloc")) {
-		process_alloc_event(raw, event, cpu, timestamp, thread, 0);
+		process_alloc_event(data, event, cpu, timestamp, thread, 0);
 		return;
 	}
 
 	if (!strcmp(event->name, "kmalloc_node") ||
 	    !strcmp(event->name, "kmem_cache_alloc_node")) {
-		process_alloc_event(raw, event, cpu, timestamp, thread, 1);
+		process_alloc_event(data, event, cpu, timestamp, thread, 1);
 		return;
 	}
 
 	if (!strcmp(event->name, "kfree") ||
 	    !strcmp(event->name, "kmem_cache_free")) {
-		process_free_event(raw, event, cpu, timestamp, thread);
+		process_free_event(data, event, cpu, timestamp, thread);
 		return;
 	}
 }
 
 static int process_sample_event(event_t *event)
 {
-	u64 ip = event->ip.ip;
-	u64 timestamp = -1;
-	u32 cpu = -1;
-	u64 period = 1;
-	void *more_data = event->ip.__more_data;
-	struct thread *thread = threads__findnew(event->ip.pid);
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		timestamp = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	struct sample_data data;
+	struct thread *thread;
 
-	if (sample_type & PERF_SAMPLE_CPU) {
-		cpu = *(u32 *)more_data;
-		more_data += sizeof(u32);
-		more_data += sizeof(u32); /* reserved */
-	}
+	memset(&data, 0, sizeof(data));
+	data.time = -1;
+	data.cpu = -1;
+	data.period = 1;
 
-	if (sample_type & PERF_SAMPLE_PERIOD) {
-		period = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	event__parse_sample(event, sample_type, &data);
 
 	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
-		event->ip.pid, event->ip.tid,
-		(void *)(long)ip,
-		(long long)period);
+		data.pid, data.tid,
+		(void *)(long)data.ip,
+		(long long)data.period);
 
+	thread = threads__findnew(event->ip.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -357,7 +339,8 @@ static int process_sample_event(event_t *event)
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	process_raw_event(event, more_data, cpu, timestamp, thread);
+	process_raw_event(event, data.raw_data, data.cpu,
+			  data.time, thread);
 
 	return 0;
 }
@@ -384,11 +367,18 @@ static struct perf_file_handler file_handler = {
 
 static int read_events(void)
 {
+	int err;
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
+
+	if (session == NULL)
+		return -ENOMEM;
+
 	register_idle_thread();
 	register_perf_file_handler(&file_handler);
 
-	return mmap_dispatch_perf_file(&header, input_name, 0, 0,
-				       &event__cwdlen, &event__cwd);
+	err = perf_session__process_events(session, 0, &event__cwdlen, &event__cwd);
+	perf_session__delete(session);
+	return err;
 }
 
 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -420,7 +410,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = thread__find_function(kthread, addr, NULL);
+				sym = map_groups__find_function(kmaps, addr, NULL);
 		} else
 			addr = data->ptr;
 
@@ -543,7 +533,7 @@ static int __cmd_kmem(void)
 }
 
 static const char * const kmem_usage[] = {
-	"perf kmem [<options>] {record}",
+	"perf kmem [<options>] {record|stat}",
 	NULL
 };
 
@@ -703,18 +693,17 @@ static int parse_sort_opt(const struct option *opt __used,
 	return 0;
 }
 
-static int parse_stat_opt(const struct option *opt __used,
-			  const char *arg, int unset __used)
+static int parse_caller_opt(const struct option *opt __used,
+			  const char *arg __used, int unset __used)
 {
-	if (!arg)
-		return -1;
+	caller_flag = (alloc_flag + 1);
+	return 0;
+}
 
-	if (strcmp(arg, "alloc") == 0)
-		alloc_flag = (caller_flag + 1);
-	else if (strcmp(arg, "caller") == 0)
-		caller_flag = (alloc_flag + 1);
-	else
-		return -1;
+static int parse_alloc_opt(const struct option *opt __used,
+			  const char *arg __used, int unset __used)
+{
+	alloc_flag = (caller_flag + 1);
 	return 0;
 }
 
@@ -739,14 +728,17 @@ static int parse_line_opt(const struct option *opt __used,
 static const struct option kmem_options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		   "input file name"),
-	OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
-		     "stat selector, Pass 'alloc' or 'caller'.",
-		     parse_stat_opt),
+	OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
+			   "show per-callsite statistics",
+			   parse_caller_opt),
+	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
+			   "show per-allocation statistics",
+			   parse_alloc_opt),
 	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
 		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
 		     parse_sort_opt),
 	OPT_CALLBACK('l', "line", NULL, "num",
-		     "show n lins",
+		     "show n lines",
 		     parse_line_opt),
 	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
 	OPT_END()
@@ -790,18 +782,22 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used)
 
 	argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
 
-	if (argc && !strncmp(argv[0], "rec", 3))
-		return __cmd_record(argc, argv);
-	else if (argc)
+	if (!argc)
 		usage_with_options(kmem_usage, kmem_options);
 
-	if (list_empty(&caller_sort))
-		setup_sorting(&caller_sort, default_sort_order);
-	if (list_empty(&alloc_sort))
-		setup_sorting(&alloc_sort, default_sort_order);
+	if (!strncmp(argv[0], "rec", 3)) {
+		return __cmd_record(argc, argv);
+	} else if (!strcmp(argv[0], "stat")) {
+		setup_cpunode_map();
 
-	setup_cpunode_map();
+		if (list_empty(&caller_sort))
+			setup_sorting(&caller_sort, default_sort_order);
+		if (list_empty(&alloc_sort))
+			setup_sorting(&alloc_sort, default_sort_order);
 
-	return __cmd_kmem();
+		return __cmd_kmem();
+	}
+
+	return 0;
 }
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index a58e11b7ea80..5a47c1e11f77 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -35,6 +35,7 @@
 #include "perf.h"
 #include "builtin.h"
 #include "util/util.h"
+#include "util/strlist.h"
 #include "util/event.h"
 #include "util/debug.h"
 #include "util/parse-options.h"
@@ -43,11 +44,12 @@
 #include "util/probe-event.h"
 
 /* Default vmlinux search paths */
-#define NR_SEARCH_PATH 3
+#define NR_SEARCH_PATH 4
 const char *default_search_path[NR_SEARCH_PATH] = {
 "/lib/modules/%s/build/vmlinux",		/* Custom build kernel */
 "/usr/lib/debug/lib/modules/%s/vmlinux",	/* Red Hat debuginfo */
 "/boot/vmlinux-debug-%s",			/* Ubuntu */
+"./vmlinux",					/* CWD */
 };
 
 #define MAX_PATH_LEN 256
@@ -60,6 +62,7 @@ static struct {
 	int need_dwarf;
 	int nr_probe;
 	struct probe_point probes[MAX_PROBES];
+	struct strlist *dellist;
 } session;
 
 static bool listing;
@@ -79,6 +82,25 @@ static void parse_probe_event(const char *str)
 	pr_debug("%d arguments\n", pp->nr_args);
 }
 
+static void parse_probe_event_argv(int argc, const char **argv)
+{
+	int i, len;
+	char *buf;
+
+	/* Bind up rest arguments */
+	len = 0;
+	for (i = 0; i < argc; i++)
+		len += strlen(argv[i]) + 1;
+	buf = zalloc(len + 1);
+	if (!buf)
+		die("Failed to allocate memory for binding arguments.");
+	len = 0;
+	for (i = 0; i < argc; i++)
+		len += sprintf(&buf[len], "%s ", argv[i]);
+	parse_probe_event(buf);
+	free(buf);
+}
+
 static int opt_add_probe_event(const struct option *opt __used,
 			      const char *str, int unset __used)
 {
@@ -87,6 +109,17 @@ static int opt_add_probe_event(const struct option *opt __used,
 	return 0;
 }
 
+static int opt_del_probe_event(const struct option *opt __used,
+			       const char *str, int unset __used)
+{
+	if (str) {
+		if (!session.dellist)
+			session.dellist = strlist__new(true, NULL);
+		strlist__add(session.dellist, str);
+	}
+	return 0;
+}
+
 #ifndef NO_LIBDWARF
 static int open_default_vmlinux(void)
 {
@@ -121,6 +154,7 @@ static int open_default_vmlinux(void)
 static const char * const probe_usage[] = {
 	"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
 	"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
+	"perf probe [<options>] --del '[GROUP:]EVENT' ...",
 	"perf probe --list",
 	NULL
 };
@@ -132,7 +166,9 @@ static const struct option options[] = {
 	OPT_STRING('k', "vmlinux", &session.vmlinux, "file",
 		"vmlinux/module pathname"),
 #endif
-	OPT_BOOLEAN('l', "list", &listing, "list up current probes"),
+	OPT_BOOLEAN('l', "list", &listing, "list up current probe events"),
+	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
+		opt_del_probe_event),
 	OPT_CALLBACK('a', "add", NULL,
 #ifdef NO_LIBDWARF
 		"FUNC[+OFFS|%return] [ARG ...]",
@@ -160,7 +196,7 @@ static const struct option options[] = {
 
 int cmd_probe(int argc, const char **argv, const char *prefix __used)
 {
-	int i, j, ret;
+	int i, ret;
 #ifndef NO_LIBDWARF
 	int fd;
 #endif
@@ -168,40 +204,52 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
 
 	argc = parse_options(argc, argv, options, probe_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
-	for (i = 0; i < argc; i++)
-		parse_probe_event(argv[i]);
+	if (argc > 0)
+		parse_probe_event_argv(argc, argv);
 
-	if ((session.nr_probe == 0 && !listing) ||
-	    (session.nr_probe != 0 && listing))
+	if ((session.nr_probe == 0 && !session.dellist && !listing))
 		usage_with_options(probe_usage, options);
 
 	if (listing) {
+		if (session.nr_probe != 0 || session.dellist) {
+			pr_warning("  Error: Don't use --list with"
+				   " --add/--del.\n");
+			usage_with_options(probe_usage, options);
+		}
 		show_perf_probe_events();
 		return 0;
 	}
 
+	if (session.dellist) {
+		del_trace_kprobe_events(session.dellist);
+		strlist__delete(session.dellist);
+		if (session.nr_probe == 0)
+			return 0;
+	}
+
 	if (session.need_dwarf)
 #ifdef NO_LIBDWARF
 		die("Debuginfo-analysis is not supported");
 #else	/* !NO_LIBDWARF */
 		pr_debug("Some probes require debuginfo.\n");
 
-	if (session.vmlinux)
+	if (session.vmlinux) {
+		pr_debug("Try to open %s.", session.vmlinux);
 		fd = open(session.vmlinux, O_RDONLY);
-	else
+	} else
 		fd = open_default_vmlinux();
 	if (fd < 0) {
 		if (session.need_dwarf)
-			die("Could not open vmlinux/module file.");
+			die("Could not open debuginfo file.");
 
-		pr_warning("Could not open vmlinux/module file."
-			   " Try to use symbols.\n");
+		pr_debug("Could not open vmlinux/module file."
+			 " Try to use symbols.\n");
 		goto end_dwarf;
 	}
 
 	/* Searching probe points */
-	for (j = 0; j < session.nr_probe; j++) {
-		pp = &session.probes[j];
+	for (i = 0; i < session.nr_probe; i++) {
+		pp = &session.probes[i];
 		if (pp->found)
 			continue;
 
@@ -223,8 +271,8 @@ end_dwarf:
 #endif /* !NO_LIBDWARF */
 
 	/* Synthesize probes without dwarf */
-	for (j = 0; j < session.nr_probe; j++) {
-		pp = &session.probes[j];
+	for (i = 0; i < session.nr_probe; i++) {
+		pp = &session.probes[i];
 		if (pp->found)	/* This probe is already found. */
 			continue;
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0e519c667e3a..4decbd14eaed 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -17,6 +17,7 @@
 #include "util/header.h"
 #include "util/event.h"
 #include "util/debug.h"
+#include "util/session.h"
 #include "util/symbol.h"
 
 #include <unistd.h>
@@ -62,7 +63,7 @@ static int			nr_cpu				=      0;
 
 static int			file_new			=      1;
 
-struct perf_header		*header				=   NULL;
+static struct perf_session	*session;
 
 struct mmap_data {
 	int			counter;
@@ -216,12 +217,12 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
 {
 	struct perf_header_attr *h_attr;
 
-	if (nr < header->attrs) {
-		h_attr = header->attr[nr];
+	if (nr < session->header.attrs) {
+		h_attr = session->header.attr[nr];
 	} else {
 		h_attr = perf_header_attr__new(a);
 		if (h_attr != NULL)
-			if (perf_header__add_attr(header, h_attr) < 0) {
+			if (perf_header__add_attr(&session->header, h_attr) < 0) {
 				perf_header_attr__delete(h_attr);
 				h_attr = NULL;
 			}
@@ -395,9 +396,9 @@ static void open_counters(int cpu, pid_t pid)
 
 static void atexit_header(void)
 {
-	header->data_size += bytes_written;
+	session->header.data_size += bytes_written;
 
-	perf_header__write(header, output, true);
+	perf_header__write(&session->header, output, true);
 }
 
 static int __cmd_record(int argc, const char **argv)
@@ -440,24 +441,24 @@ static int __cmd_record(int argc, const char **argv)
 		exit(-1);
 	}
 
-	header = perf_header__new();
-	if (header == NULL) {
+	session = perf_session__new(output_name, O_WRONLY, force);
+	if (session == NULL) {
 		pr_err("Not enough memory for reading perf file header\n");
 		return -1;
 	}
 
 	if (!file_new) {
-		err = perf_header__read(header, output);
+		err = perf_header__read(&session->header, output);
 		if (err < 0)
 			return err;
 	}
 
 	if (raw_samples) {
-		perf_header__set_feat(header, HEADER_TRACE_INFO);
+		perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
 	} else {
 		for (i = 0; i < nr_counters; i++) {
 			if (attrs[i].sample_type & PERF_SAMPLE_RAW) {
-				perf_header__set_feat(header, HEADER_TRACE_INFO);
+				perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
 				break;
 			}
 		}
@@ -481,7 +482,7 @@ static int __cmd_record(int argc, const char **argv)
 	}
 
 	if (file_new) {
-		err = perf_header__write(header, output, false);
+		err = perf_header__write(&session->header, output, false);
 		if (err < 0)
 			return err;
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 383c4ab4f9af..e2ec49a9b731 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -22,6 +22,7 @@
 #include "perf.h"
 #include "util/debug.h"
 #include "util/header.h"
+#include "util/session.h"
 
 #include "util/parse-options.h"
 #include "util/parse-events.h"
@@ -52,7 +53,7 @@ static int		exclude_other = 1;
 
 static char		callchain_default_opt[] = "fractal,0.5";
 
-static struct perf_header *header;
+static struct perf_session *session;
 
 static u64		sample_type;
 
@@ -605,44 +606,41 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
 
 static int process_sample_event(event_t *event)
 {
-	u64 ip = event->ip.ip;
-	u64 period = 1;
-	void *more_data = event->ip.__more_data;
-	struct ip_callchain *chain = NULL;
+	struct sample_data data;
 	int cpumode;
 	struct addr_location al;
-	struct thread *thread = threads__findnew(event->ip.pid);
+	struct thread *thread;
 
-	if (sample_type & PERF_SAMPLE_PERIOD) {
-		period = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	memset(&data, 0, sizeof(data));
+	data.period = 1;
+
+	event__parse_sample(event, sample_type, &data);
 
 	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
-		event->ip.pid, event->ip.tid,
-		(void *)(long)ip,
-		(long long)period);
+		data.pid, data.tid,
+		(void *)(long)data.ip,
+		(long long)data.period);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		unsigned int i;
 
-		chain = (void *)more_data;
+		dump_printf("... chain: nr:%Lu\n", data.callchain->nr);
 
-		dump_printf("... chain: nr:%Lu\n", chain->nr);
-
-		if (validate_chain(chain, event) < 0) {
+		if (validate_chain(data.callchain, event) < 0) {
 			pr_debug("call-chain problem with event, "
 				 "skipping it.\n");
 			return 0;
 		}
 
 		if (dump_trace) {
-			for (i = 0; i < chain->nr; i++)
-				dump_printf("..... %2d: %016Lx\n", i, chain->ips[i]);
+			for (i = 0; i < data.callchain->nr; i++)
+				dump_printf("..... %2d: %016Lx\n",
+					    i, data.callchain->ips[i]);
 		}
 	}
 
+	thread = threads__findnew(data.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			event->header.type);
@@ -657,7 +655,7 @@ static int process_sample_event(event_t *event)
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	thread__find_addr_location(thread, cpumode,
-				   MAP__FUNCTION, ip, &al, NULL);
+				   MAP__FUNCTION, data.ip, &al, NULL);
 	/*
 	 * We have to do this here as we may have a dso with no symbol hit that
 	 * has a name longer than the ones with symbols sampled.
@@ -675,12 +673,12 @@ static int process_sample_event(event_t *event)
 	if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name))
 		return 0;
 
-	if (hist_entry__add(&al, chain, period)) {
+	if (hist_entry__add(&al, data.callchain, data.period)) {
 		pr_debug("problem incrementing symbol count, skipping event\n");
 		return -1;
 	}
 
-	event__stats.total += period;
+	event__stats.total += data.period;
 
 	return 0;
 }
@@ -704,7 +702,7 @@ static int process_read_event(event_t *event)
 {
 	struct perf_event_attr *attr;
 
-	attr = perf_header__find_attr(event->read.id, header);
+	attr = perf_header__find_attr(event->read.id, &session->header);
 
 	if (show_threads) {
 		const char *name = attr ? __event_name(attr->type, attr->config)
@@ -769,6 +767,10 @@ static int __cmd_report(void)
 	struct thread *idle;
 	int ret;
 
+	session = perf_session__new(input_name, O_RDONLY, force);
+	if (session == NULL)
+		return -ENOMEM;
+
 	idle = register_idle_thread();
 	thread__comm_adjust(idle);
 
@@ -777,14 +779,14 @@ static int __cmd_report(void)
 
 	register_perf_file_handler(&file_handler);
 
-	ret = mmap_dispatch_perf_file(&header, input_name, force,
-				      full_paths, &event__cwdlen, &event__cwd);
+	ret = perf_session__process_events(session, full_paths,
+					   &event__cwdlen, &event__cwd);
 	if (ret)
-		return ret;
+		goto out_delete;
 
 	if (dump_trace) {
 		event__print_totals();
-		return 0;
+		goto out_delete;
 	}
 
 	if (verbose > 3)
@@ -799,7 +801,8 @@ static int __cmd_report(void)
 
 	if (show_threads)
 		perf_read_values_destroy(&show_threads_values);
-
+out_delete:
+	perf_session__delete(session);
 	return ret;
 }
 
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 26b782f26ee1..65021fe1361e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -6,6 +6,7 @@
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/header.h"
+#include "util/session.h"
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
@@ -13,7 +14,6 @@
 #include "util/debug.h"
 #include "util/data_map.h"
 
-#include <sys/types.h>
 #include <sys/prctl.h>
 
 #include <semaphore.h>
@@ -22,7 +22,6 @@
 
 static char			const *input_name = "perf.data";
 
-static struct perf_header	*header;
 static u64			sample_type;
 
 static char			default_sort_order[] = "avg, max, switch, runtime";
@@ -141,6 +140,7 @@ struct work_atoms {
 	struct thread		*thread;
 	struct rb_node		node;
 	u64			max_lat;
+	u64			max_lat_at;
 	u64			total_lat;
 	u64			nb_atoms;
 	u64			total_runtime;
@@ -414,34 +414,33 @@ static u64 get_cpu_usage_nsec_parent(void)
 	return sum;
 }
 
-static u64 get_cpu_usage_nsec_self(void)
+static int self_open_counters(void)
 {
-	char filename [] = "/proc/1234567890/sched";
-	unsigned long msecs, nsecs;
-	char *line = NULL;
-	u64 total = 0;
-	size_t len = 0;
-	ssize_t chars;
-	FILE *file;
-	int ret;
+	struct perf_event_attr attr;
+	int fd;
 
-	sprintf(filename, "/proc/%d/sched", getpid());
-	file = fopen(filename, "r");
-	BUG_ON(!file);
+	memset(&attr, 0, sizeof(attr));
 
-	while ((chars = getline(&line, &len, file)) != -1) {
-		ret = sscanf(line, "se.sum_exec_runtime : %ld.%06ld\n",
-			&msecs, &nsecs);
-		if (ret == 2) {
-			total = msecs*1e6 + nsecs;
-			break;
-		}
-	}
-	if (line)
-		free(line);
-	fclose(file);
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_TASK_CLOCK;
+
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 
-	return total;
+	if (fd < 0)
+		die("Error: sys_perf_event_open() syscall returned"
+		    "with %d (%s)\n", fd, strerror(errno));
+	return fd;
+}
+
+static u64 get_cpu_usage_nsec_self(int fd)
+{
+	u64 runtime;
+	int ret;
+
+	ret = read(fd, &runtime, sizeof(runtime));
+	BUG_ON(ret != sizeof(runtime));
+
+	return runtime;
 }
 
 static void *thread_func(void *ctx)
@@ -450,9 +449,11 @@ static void *thread_func(void *ctx)
 	u64 cpu_usage_0, cpu_usage_1;
 	unsigned long i, ret;
 	char comm2[22];
+	int fd;
 
 	sprintf(comm2, ":%s", this_task->comm);
 	prctl(PR_SET_NAME, comm2);
+	fd = self_open_counters();
 
 again:
 	ret = sem_post(&this_task->ready_for_work);
@@ -462,16 +463,15 @@ again:
 	ret = pthread_mutex_unlock(&start_work_mutex);
 	BUG_ON(ret);
 
-	cpu_usage_0 = get_cpu_usage_nsec_self();
+	cpu_usage_0 = get_cpu_usage_nsec_self(fd);
 
 	for (i = 0; i < this_task->nr_events; i++) {
 		this_task->curr_event = i;
 		process_sched_event(this_task, this_task->atoms[i]);
 	}
 
-	cpu_usage_1 = get_cpu_usage_nsec_self();
+	cpu_usage_1 = get_cpu_usage_nsec_self(fd);
 	this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
-
 	ret = sem_post(&this_task->work_done_sem);
 	BUG_ON(ret);
 
@@ -628,11 +628,6 @@ static void test_calibrations(void)
 	printf("the sleep test took %Ld nsecs\n", T1-T0);
 }
 
-struct raw_event_sample {
-	u32 size;
-	char data[0];
-};
-
 #define FILL_FIELD(ptr, field, event, data)	\
 	ptr.field = (typeof(ptr.field)) raw_field_value(event, #field, data)
 
@@ -1019,8 +1014,10 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
 
 	delta = atom->sched_in_time - atom->wake_up_time;
 	atoms->total_lat += delta;
-	if (delta > atoms->max_lat)
+	if (delta > atoms->max_lat) {
 		atoms->max_lat = delta;
+		atoms->max_lat_at = timestamp;
+	}
 	atoms->nb_atoms++;
 }
 
@@ -1216,10 +1213,11 @@ static void output_lat_thread(struct work_atoms *work_list)
 
 	avg = work_list->total_lat / work_list->nb_atoms;
 
-	printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n",
+	printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
 	      (double)work_list->total_runtime / 1e6,
 		 work_list->nb_atoms, (double)avg / 1e6,
-		 (double)work_list->max_lat / 1e6);
+		 (double)work_list->max_lat / 1e6,
+		 (double)work_list->max_lat_at / 1e9);
 }
 
 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1356,7 +1354,7 @@ static void sort_lat(void)
 static struct trace_sched_handler *trace_handler;
 
 static void
-process_sched_wakeup_event(struct raw_event_sample *raw,
+process_sched_wakeup_event(void *data,
 			   struct event *event,
 			   int cpu __used,
 			   u64 timestamp __used,
@@ -1364,13 +1362,13 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
 {
 	struct trace_wakeup_event wakeup_event;
 
-	FILL_COMMON_FIELDS(wakeup_event, event, raw->data);
+	FILL_COMMON_FIELDS(wakeup_event, event, data);
 
-	FILL_ARRAY(wakeup_event, comm, event, raw->data);
-	FILL_FIELD(wakeup_event, pid, event, raw->data);
-	FILL_FIELD(wakeup_event, prio, event, raw->data);
-	FILL_FIELD(wakeup_event, success, event, raw->data);
-	FILL_FIELD(wakeup_event, cpu, event, raw->data);
+	FILL_ARRAY(wakeup_event, comm, event, data);
+	FILL_FIELD(wakeup_event, pid, event, data);
+	FILL_FIELD(wakeup_event, prio, event, data);
+	FILL_FIELD(wakeup_event, success, event, data);
+	FILL_FIELD(wakeup_event, cpu, event, data);
 
 	if (trace_handler->wakeup_event)
 		trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
@@ -1469,7 +1467,7 @@ map_switch_event(struct trace_switch_event *switch_event,
 
 
 static void
-process_sched_switch_event(struct raw_event_sample *raw,
+process_sched_switch_event(void *data,
 			   struct event *event,
 			   int this_cpu,
 			   u64 timestamp __used,
@@ -1477,15 +1475,15 @@ process_sched_switch_event(struct raw_event_sample *raw,
 {
 	struct trace_switch_event switch_event;
 
-	FILL_COMMON_FIELDS(switch_event, event, raw->data);
+	FILL_COMMON_FIELDS(switch_event, event, data);
 
-	FILL_ARRAY(switch_event, prev_comm, event, raw->data);
-	FILL_FIELD(switch_event, prev_pid, event, raw->data);
-	FILL_FIELD(switch_event, prev_prio, event, raw->data);
-	FILL_FIELD(switch_event, prev_state, event, raw->data);
-	FILL_ARRAY(switch_event, next_comm, event, raw->data);
-	FILL_FIELD(switch_event, next_pid, event, raw->data);
-	FILL_FIELD(switch_event, next_prio, event, raw->data);
+	FILL_ARRAY(switch_event, prev_comm, event, data);
+	FILL_FIELD(switch_event, prev_pid, event, data);
+	FILL_FIELD(switch_event, prev_prio, event, data);
+	FILL_FIELD(switch_event, prev_state, event, data);
+	FILL_ARRAY(switch_event, next_comm, event, data);
+	FILL_FIELD(switch_event, next_pid, event, data);
+	FILL_FIELD(switch_event, next_prio, event, data);
 
 	if (curr_pid[this_cpu] != (u32)-1) {
 		/*
@@ -1502,7 +1500,7 @@ process_sched_switch_event(struct raw_event_sample *raw,
 }
 
 static void
-process_sched_runtime_event(struct raw_event_sample *raw,
+process_sched_runtime_event(void *data,
 			   struct event *event,
 			   int cpu __used,
 			   u64 timestamp __used,
@@ -1510,17 +1508,17 @@ process_sched_runtime_event(struct raw_event_sample *raw,
 {
 	struct trace_runtime_event runtime_event;
 
-	FILL_ARRAY(runtime_event, comm, event, raw->data);
-	FILL_FIELD(runtime_event, pid, event, raw->data);
-	FILL_FIELD(runtime_event, runtime, event, raw->data);
-	FILL_FIELD(runtime_event, vruntime, event, raw->data);
+	FILL_ARRAY(runtime_event, comm, event, data);
+	FILL_FIELD(runtime_event, pid, event, data);
+	FILL_FIELD(runtime_event, runtime, event, data);
+	FILL_FIELD(runtime_event, vruntime, event, data);
 
 	if (trace_handler->runtime_event)
 		trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
 }
 
 static void
-process_sched_fork_event(struct raw_event_sample *raw,
+process_sched_fork_event(void *data,
 			 struct event *event,
 			 int cpu __used,
 			 u64 timestamp __used,
@@ -1528,12 +1526,12 @@ process_sched_fork_event(struct raw_event_sample *raw,
 {
 	struct trace_fork_event fork_event;
 
-	FILL_COMMON_FIELDS(fork_event, event, raw->data);
+	FILL_COMMON_FIELDS(fork_event, event, data);
 
-	FILL_ARRAY(fork_event, parent_comm, event, raw->data);
-	FILL_FIELD(fork_event, parent_pid, event, raw->data);
-	FILL_ARRAY(fork_event, child_comm, event, raw->data);
-	FILL_FIELD(fork_event, child_pid, event, raw->data);
+	FILL_ARRAY(fork_event, parent_comm, event, data);
+	FILL_FIELD(fork_event, parent_pid, event, data);
+	FILL_ARRAY(fork_event, child_comm, event, data);
+	FILL_FIELD(fork_event, child_pid, event, data);
 
 	if (trace_handler->fork_event)
 		trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
@@ -1550,7 +1548,7 @@ process_sched_exit_event(struct event *event,
 }
 
 static void
-process_sched_migrate_task_event(struct raw_event_sample *raw,
+process_sched_migrate_task_event(void *data,
 			   struct event *event,
 			   int cpu __used,
 			   u64 timestamp __used,
@@ -1558,80 +1556,66 @@ process_sched_migrate_task_event(struct raw_event_sample *raw,
 {
 	struct trace_migrate_task_event migrate_task_event;
 
-	FILL_COMMON_FIELDS(migrate_task_event, event, raw->data);
+	FILL_COMMON_FIELDS(migrate_task_event, event, data);
 
-	FILL_ARRAY(migrate_task_event, comm, event, raw->data);
-	FILL_FIELD(migrate_task_event, pid, event, raw->data);
-	FILL_FIELD(migrate_task_event, prio, event, raw->data);
-	FILL_FIELD(migrate_task_event, cpu, event, raw->data);
+	FILL_ARRAY(migrate_task_event, comm, event, data);
+	FILL_FIELD(migrate_task_event, pid, event, data);
+	FILL_FIELD(migrate_task_event, prio, event, data);
+	FILL_FIELD(migrate_task_event, cpu, event, data);
 
 	if (trace_handler->migrate_task_event)
 		trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread);
 }
 
 static void
-process_raw_event(event_t *raw_event __used, void *more_data,
+process_raw_event(event_t *raw_event __used, void *data,
 		  int cpu, u64 timestamp, struct thread *thread)
 {
-	struct raw_event_sample *raw = more_data;
 	struct event *event;
 	int type;
 
-	type = trace_parse_common_type(raw->data);
+
+	type = trace_parse_common_type(data);
 	event = trace_find_event(type);
 
 	if (!strcmp(event->name, "sched_switch"))
-		process_sched_switch_event(raw, event, cpu, timestamp, thread);
+		process_sched_switch_event(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_stat_runtime"))
-		process_sched_runtime_event(raw, event, cpu, timestamp, thread);
+		process_sched_runtime_event(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_wakeup"))
-		process_sched_wakeup_event(raw, event, cpu, timestamp, thread);
+		process_sched_wakeup_event(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_wakeup_new"))
-		process_sched_wakeup_event(raw, event, cpu, timestamp, thread);
+		process_sched_wakeup_event(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_process_fork"))
-		process_sched_fork_event(raw, event, cpu, timestamp, thread);
+		process_sched_fork_event(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_process_exit"))
 		process_sched_exit_event(event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "sched_migrate_task"))
-		process_sched_migrate_task_event(raw, event, cpu, timestamp, thread);
+		process_sched_migrate_task_event(data, event, cpu, timestamp, thread);
 }
 
 static int process_sample_event(event_t *event)
 {
+	struct sample_data data;
 	struct thread *thread;
-	u64 ip = event->ip.ip;
-	u64 timestamp = -1;
-	u32 cpu = -1;
-	u64 period = 1;
-	void *more_data = event->ip.__more_data;
 
 	if (!(sample_type & PERF_SAMPLE_RAW))
 		return 0;
 
-	thread = threads__findnew(event->ip.pid);
+	memset(&data, 0, sizeof(data));
+	data.time = -1;
+	data.cpu = -1;
+	data.period = -1;
 
-	if (sample_type & PERF_SAMPLE_TIME) {
-		timestamp = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		cpu = *(u32 *)more_data;
-		more_data += sizeof(u32);
-		more_data += sizeof(u32); /* reserved */
-	}
-
-	if (sample_type & PERF_SAMPLE_PERIOD) {
-		period = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	event__parse_sample(event, sample_type, &data);
 
 	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
-		event->ip.pid, event->ip.tid,
-		(void *)(long)ip,
-		(long long)period);
+		data.pid, data.tid,
+		(void *)(long)data.ip,
+		(long long)data.period);
 
+	thread = threads__findnew(data.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
@@ -1640,10 +1624,10 @@ static int process_sample_event(event_t *event)
 
 	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
 
-	if (profile_cpu != -1 && profile_cpu != (int) cpu)
+	if (profile_cpu != -1 && profile_cpu != (int)data.cpu)
 		return 0;
 
-	process_raw_event(event, more_data, cpu, timestamp, thread);
+	process_raw_event(event, data.raw_data, data.cpu, data.time, thread);
 
 	return 0;
 }
@@ -1679,11 +1663,18 @@ static struct perf_file_handler file_handler = {
 
 static int read_events(void)
 {
+	int err;
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
+
+	if (session == NULL)
+		return -ENOMEM;
+
 	register_idle_thread();
 	register_perf_file_handler(&file_handler);
 
-	return mmap_dispatch_perf_file(&header, input_name, 0, 0,
-				       &event__cwdlen, &event__cwd);
+	err = perf_session__process_events(session, 0, &event__cwdlen, &event__cwd);
+	perf_session__delete(session);
+	return err;
 }
 
 static void print_bad_events(void)
@@ -1724,9 +1715,9 @@ static void __cmd_lat(void)
 	read_events();
 	sort_lat();
 
-	printf("\n -----------------------------------------------------------------------------------------\n");
-	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms |\n");
-	printf(" -----------------------------------------------------------------------------------------\n");
+	printf("\n ---------------------------------------------------------------------------------------------------------------\n");
+	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at     |\n");
+	printf(" ---------------------------------------------------------------------------------------------------------------\n");
 
 	next = rb_first(&sorted_atom_root);
 
@@ -1902,13 +1893,18 @@ static int __cmd_record(int argc, const char **argv)
 
 int cmd_sched(int argc, const char **argv, const char *prefix __used)
 {
-	symbol__init(0);
-
 	argc = parse_options(argc, argv, sched_options, sched_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
 	if (!argc)
 		usage_with_options(sched_usage, sched_options);
 
+	/*
+	 * Aliased to 'perf trace' for now:
+	 */
+	if (!strcmp(argv[0], "trace"))
+		return cmd_trace(argc, argv, prefix);
+
+	symbol__init(0);
 	if (!strncmp(argv[0], "rec", 3)) {
 		return __cmd_record(argc, argv);
 	} else if (!strncmp(argv[0], "lat", 3)) {
@@ -1932,11 +1928,6 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
 				usage_with_options(replay_usage, replay_options);
 		}
 		__cmd_replay();
-	} else if (!strcmp(argv[0], "trace")) {
-		/*
-		 * Aliased to 'perf trace' for now:
-		 */
-		return cmd_trace(argc, argv, prefix);
 	} else {
 		usage_with_options(sched_usage, sched_options);
 	}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index cb58b6605fcc..759dd2b35fdb 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -302,12 +302,11 @@ process_exit_event(event_t *event)
 }
 
 struct trace_entry {
-	u32			size;
 	unsigned short		type;
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-	int			tgid;
+	int			lock_depth;
 };
 
 struct power_entry {
@@ -484,43 +483,22 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
 static int
 process_sample_event(event_t *event)
 {
-	int cursor = 0;
-	u64 addr = 0;
-	u64 stamp = 0;
-	u32 cpu = 0;
-	u32 pid = 0;
+	struct sample_data data;
 	struct trace_entry *te;
 
-	if (sample_type & PERF_SAMPLE_IP)
-		cursor++;
+	memset(&data, 0, sizeof(data));
 
-	if (sample_type & PERF_SAMPLE_TID) {
-		pid = event->sample.array[cursor]>>32;
-		cursor++;
-	}
-	if (sample_type & PERF_SAMPLE_TIME) {
-		stamp = event->sample.array[cursor++];
-
-		if (!first_time || first_time > stamp)
-			first_time = stamp;
-		if (last_time < stamp)
-			last_time = stamp;
+	event__parse_sample(event, sample_type, &data);
 
+	if (sample_type & PERF_SAMPLE_TIME) {
+		if (!first_time || first_time > data.time)
+			first_time = data.time;
+		if (last_time < data.time)
+			last_time = data.time;
 	}
-	if (sample_type & PERF_SAMPLE_ADDR)
-		addr = event->sample.array[cursor++];
-	if (sample_type & PERF_SAMPLE_ID)
-		cursor++;
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		cursor++;
-	if (sample_type & PERF_SAMPLE_CPU)
-		cpu = event->sample.array[cursor++] & 0xFFFFFFFF;
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		cursor++;
 
-	te = (void *)&event->sample.array[cursor];
-
-	if (sample_type & PERF_SAMPLE_RAW && te->size > 0) {
+	te = (void *)data.raw_data;
+	if (sample_type & PERF_SAMPLE_RAW && data.raw_size > 0) {
 		char *event_str;
 		struct power_entry *pe;
 
@@ -532,19 +510,19 @@ process_sample_event(event_t *event)
 			return 0;
 
 		if (strcmp(event_str, "power:power_start") == 0)
-			c_state_start(cpu, stamp, pe->value);
+			c_state_start(data.cpu, data.time, pe->value);
 
 		if (strcmp(event_str, "power:power_end") == 0)
-			c_state_end(cpu, stamp);
+			c_state_end(data.cpu, data.time);
 
 		if (strcmp(event_str, "power:power_frequency") == 0)
-			p_state_change(cpu, stamp, pe->value);
+			p_state_change(data.cpu, data.time, pe->value);
 
 		if (strcmp(event_str, "sched:sched_wakeup") == 0)
-			sched_wakeup(cpu, stamp, pid, te);
+			sched_wakeup(data.cpu, data.time, data.pid, te);
 
 		if (strcmp(event_str, "sched:sched_switch") == 0)
-			sched_switch(cpu, stamp, te);
+			sched_switch(data.cpu, data.time, te);
 	}
 	return 0;
 }
@@ -1081,15 +1059,17 @@ static struct perf_file_handler file_handler = {
 
 static int __cmd_timechart(void)
 {
-	struct perf_header *header;
+	struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
 	int ret;
 
+	if (session == NULL)
+		return -ENOMEM;
+
 	register_perf_file_handler(&file_handler);
 
-	ret = mmap_dispatch_perf_file(&header, input_name, 0, 0,
-				      &event__cwdlen, &event__cwd);
+	ret = perf_session__process_events(session, 0, &event__cwdlen, &event__cwd);
 	if (ret)
-		return EXIT_FAILURE;
+		goto out_delete;
 
 	process_samples();
 
@@ -1101,8 +1081,9 @@ static int __cmd_timechart(void)
 
 	pr_info("Written %2.1f seconds of trace to %s.\n",
 		(last_time - first_time) / 1000000000.0, output_name);
-
-	return EXIT_SUCCESS;
+out_delete:
+	perf_session__delete(session);
+	return ret;
 }
 
 static const char * const timechart_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index abb914aa7be6..0756664666f1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -7,6 +7,7 @@
 #include "util/header.h"
 #include "util/exec_cmd.h"
 #include "util/trace-event.h"
+#include "util/session.h"
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -61,63 +62,45 @@ static int cleanup_scripting(void)
 
 static char const		*input_name = "perf.data";
 
-static struct perf_header	*header;
+static struct perf_session 	*session;
 static u64			sample_type;
 
 static int process_sample_event(event_t *event)
 {
-	u64 ip = event->ip.ip;
-	u64 timestamp = -1;
-	u32 cpu = -1;
-	u64 period = 1;
-	void *more_data = event->ip.__more_data;
-	struct thread *thread = threads__findnew(event->ip.pid);
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		timestamp = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	struct sample_data data;
+	struct thread *thread;
 
-	if (sample_type & PERF_SAMPLE_CPU) {
-		cpu = *(u32 *)more_data;
-		more_data += sizeof(u32);
-		more_data += sizeof(u32); /* reserved */
-	}
+	memset(&data, 0, sizeof(data));
+	data.time = -1;
+	data.cpu = -1;
+	data.period = 1;
 
-	if (sample_type & PERF_SAMPLE_PERIOD) {
-		period = *(u64 *)more_data;
-		more_data += sizeof(u64);
-	}
+	event__parse_sample(event, sample_type, &data);
 
 	dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
 		event->header.misc,
-		event->ip.pid, event->ip.tid,
-		(void *)(long)ip,
-		(long long)period);
+		data.pid, data.tid,
+		(void *)(long)data.ip,
+		(long long)data.period);
 
+	thread = threads__findnew(event->ip.pid);
 	if (thread == NULL) {
 		pr_debug("problem processing %d event, skipping it.\n",
 			 event->header.type);
 		return -1;
 	}
 
-	dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
-
 	if (sample_type & PERF_SAMPLE_RAW) {
-		struct {
-			u32 size;
-			char data[0];
-		} *raw = more_data;
-
 		/*
 		 * FIXME: better resolve from pid from the struct trace_entry
 		 * field, although it should be the same than this perf
 		 * event pid
 		 */
-		scripting_ops->process_event(cpu, raw->data, raw->size,
-					     timestamp, thread->comm);
+		scripting_ops->process_event(data.cpu, data.raw_data,
+					     data.raw_size,
+					     data.time, thread->comm);
 	}
-	event__stats.total += period;
+	event__stats.total += data.period;
 
 	return 0;
 }
@@ -144,11 +127,18 @@ static struct perf_file_handler file_handler = {
 
 static int __cmd_trace(void)
 {
+	int err;
+
+	session = perf_session__new(input_name, O_RDONLY, 0);
+	if (session == NULL)
+		return -ENOMEM;
+
 	register_idle_thread();
 	register_perf_file_handler(&file_handler);
 
-	return mmap_dispatch_perf_file(&header, input_name,
-				       0, 0, &event__cwdlen, &event__cwd);
+	err = perf_session__process_events(session, 0, &event__cwdlen, &event__cwd);
+	perf_session__delete(session);
+	return err;
 }
 
 struct script_spec {
@@ -366,11 +356,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
 			return -1;
 		}
 
-		header = perf_header__new();
-		if (header == NULL)
-			return -1;
-
-		perf_header__read(header, input);
+		perf_header__read(&session->header, input);
 		err = scripting_ops->generate_script("perf-trace");
 		goto out;
 	}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 454d5d55f32d..75f941bfba9e 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -59,6 +59,18 @@
 #define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
 #endif
 
+#ifdef __arm__
+#include "../../arch/arm/include/asm/unistd.h"
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define rmb()		asm volatile("mov r0, #0xffff0fff; mov lr, pc;" \
+				     "sub pc, r0, #95" ::: "r0", "lr", "cc", \
+				     "memory")
+#define cpu_relax()	asm volatile("":::"memory")
+#endif
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
index ca0bedf637c2..6d46dda53a29 100644
--- a/tools/perf/util/data_map.c
+++ b/tools/perf/util/data_map.c
@@ -100,11 +100,11 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	}
 }
 
-int perf_header__read_build_ids(int input, off_t offset, off_t size)
+int perf_header__read_build_ids(int input, u64 offset, u64 size)
 {
 	struct build_id_event bev;
 	char filename[PATH_MAX];
-	off_t limit = offset + size;
+	u64 limit = offset + size;
 	int err = -1;
 
 	while (offset < limit) {
@@ -129,23 +129,16 @@ out:
 	return err;
 }
 
-int mmap_dispatch_perf_file(struct perf_header **pheader,
-			    const char *input_name,
-			    int force,
-			    int full_paths,
-			    int *cwdlen,
-			    char **cwd)
+int perf_session__process_events(struct perf_session *self,
+				 int full_paths, int *cwdlen, char **cwd)
 {
 	int err;
-	struct perf_header *header;
 	unsigned long head, shift;
 	unsigned long offset = 0;
-	struct stat input_stat;
 	size_t	page_size;
 	u64 sample_type;
 	event_t *event;
 	uint32_t size;
-	int input;
 	char *buf;
 
 	if (curr_handler == NULL) {
@@ -155,56 +148,19 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
 
 	page_size = getpagesize();
 
-	input = open(input_name, O_RDONLY);
-	if (input < 0) {
-		pr_err("Failed to open file: %s", input_name);
-		if (!strcmp(input_name, "perf.data"))
-			pr_err("  (try 'perf record' first)");
-		pr_err("\n");
-		return -errno;
-	}
-
-	if (fstat(input, &input_stat) < 0) {
-		pr_err("failed to stat file");
-		err = -errno;
-		goto out_close;
-	}
-
-	err = -EACCES;
-	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
-		pr_err("file: %s not owned by current user or root\n",
-			input_name);
-		goto out_close;
-	}
-
-	if (input_stat.st_size == 0) {
-		pr_info("zero-sized file, nothing to do!\n");
-		goto done;
-	}
-
-	err = -ENOMEM;
-	header = perf_header__new();
-	if (header == NULL)
-		goto out_close;
-
-	err = perf_header__read(header, input);
-	if (err < 0)
-		goto out_delete;
-	*pheader = header;
-	head = header->data_offset;
-
-	sample_type = perf_header__sample_type(header);
+	head = self->header.data_offset;
+	sample_type = perf_header__sample_type(&self->header);
 
 	err = -EINVAL;
 	if (curr_handler->sample_type_check &&
 	    curr_handler->sample_type_check(sample_type) < 0)
-		goto out_delete;
+		goto out_err;
 
 	if (!full_paths) {
 		if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
 			pr_err("failed to get the current directory\n");
 			err = -errno;
-			goto out_delete;
+			goto out_err;
 		}
 		*cwd = __cwd;
 		*cwdlen = strlen(*cwd);
@@ -219,11 +175,11 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
 
 remap:
 	buf = mmap(NULL, page_size * mmap_window, PROT_READ,
-		   MAP_SHARED, input, offset);
+		   MAP_SHARED, self->fd, offset);
 	if (buf == MAP_FAILED) {
 		pr_err("failed to mmap file\n");
 		err = -errno;
-		goto out_delete;
+		goto out_err;
 	}
 
 more:
@@ -273,19 +229,14 @@ more:
 
 	head += size;
 
-	if (offset + head >= header->data_offset + header->data_size)
+	if (offset + head >= self->header.data_offset + self->header.data_size)
 		goto done;
 
-	if (offset + head < (unsigned long)input_stat.st_size)
+	if (offset + head < self->size)
 		goto more;
 
 done:
 	err = 0;
-out_close:
-	close(input);
-
+out_err:
 	return err;
-out_delete:
-	perf_header__delete(header);
-	goto out_close;
 }
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h
index 3180ff7e3633..98c5b823388c 100644
--- a/tools/perf/util/data_map.h
+++ b/tools/perf/util/data_map.h
@@ -3,6 +3,7 @@
 
 #include "event.h"
 #include "header.h"
+#include "session.h"
 
 typedef int (*event_type_handler_t)(event_t *);
 
@@ -21,12 +22,8 @@ struct perf_file_handler {
 };
 
 void register_perf_file_handler(struct perf_file_handler *handler);
-int mmap_dispatch_perf_file(struct perf_header **pheader,
-			    const char *input_name,
-			    int force,
-			    int full_paths,
-			    int *cwdlen,
-			    char **cwd);
-int perf_header__read_build_ids(int input, off_t offset, off_t file_size);
+int perf_session__process_events(struct perf_session *self,
+				 int full_paths, int *cwdlen, char **cwd);
+int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
 
 #endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 414b89d1bde9..ba0de90cd3d4 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -254,13 +254,14 @@ void thread__find_addr_location(struct thread *self, u8 cpumode,
 				struct addr_location *al,
 				symbol_filter_t filter)
 {
-	struct thread *thread = al->thread = self;
+	struct map_groups *mg = &self->mg;
 
+	al->thread = self;
 	al->addr = addr;
 
 	if (cpumode & PERF_RECORD_MISC_KERNEL) {
 		al->level = 'k';
-		thread = kthread;
+		mg = kmaps;
 	} else if (cpumode & PERF_RECORD_MISC_USER)
 		al->level = '.';
 	else {
@@ -270,7 +271,7 @@ void thread__find_addr_location(struct thread *self, u8 cpumode,
 		return;
 	}
 try_again:
-	al->map = thread__find_map(thread, type, al->addr);
+	al->map = map_groups__find(mg, type, al->addr);
 	if (al->map == NULL) {
 		/*
 		 * If this is outside of all known maps, and is a negative
@@ -281,8 +282,8 @@ try_again:
 		 * "[vdso]" dso, but for now lets use the old trick of looking
 		 * in the whole kernel symbol list.
 		 */
-		if ((long long)al->addr < 0 && thread != kthread) {
-			thread = kthread;
+		if ((long long)al->addr < 0 && mg != kmaps) {
+			mg = kmaps;
 			goto try_again;
 		}
 		al->sym = NULL;
@@ -310,3 +311,70 @@ int event__preprocess_sample(const event_t *self, struct addr_location *al,
 			al->level == 'H' ? "[hypervisor]" : "<not found>");
 	return 0;
 }
+
+int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
+{
+	u64 *array = event->sample.array;
+
+	if (type & PERF_SAMPLE_IP) {
+		data->ip = event->ip.ip;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TID) {
+		u32 *p = (u32 *)array;
+		data->pid = p[0];
+		data->tid = p[1];
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_TIME) {
+		data->time = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ADDR) {
+		data->addr = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_ID) {
+		data->id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_CPU) {
+		u32 *p = (u32 *)array;
+		data->cpu = *p;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_PERIOD) {
+		data->period = *array;
+		array++;
+	}
+
+	if (type & PERF_SAMPLE_READ) {
+		pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
+		return -1;
+	}
+
+	if (type & PERF_SAMPLE_CALLCHAIN) {
+		data->callchain = (struct ip_callchain *)array;
+		array += 1 + data->callchain->nr;
+	}
+
+	if (type & PERF_SAMPLE_RAW) {
+		u32 *p = (u32 *)array;
+		data->raw_size = *p;
+		p++;
+		data->raw_data = p;
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index a4cc8105cf67..51a96c2effde 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -56,11 +56,25 @@ struct read_event {
 	u64 id;
 };
 
-struct sample_event{
+struct sample_event {
 	struct perf_event_header        header;
 	u64 array[];
 };
 
+struct sample_data {
+	u64 ip;
+	u32 pid, tid;
+	u64 time;
+	u64 addr;
+	u64 id;
+	u64 stream_id;
+	u32 cpu;
+	u64 period;
+	struct ip_callchain *callchain;
+	u32 raw_size;
+	void *raw_data;
+};
+
 #define BUILD_ID_SIZE 20
 
 struct build_id_event {
@@ -89,10 +103,11 @@ void event__print_totals(void);
 
 enum map_type {
 	MAP__FUNCTION = 0,
-
-	MAP__NR_TYPES,
+	MAP__VARIABLE,
 };
 
+#define MAP__NR_TYPES (MAP__VARIABLE + 1)
+
 struct map {
 	union {
 		struct rb_node	rb_node;
@@ -136,6 +151,8 @@ int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *self, FILE *fp);
 struct symbol *map__find_symbol(struct map *self, u64 addr,
 				symbol_filter_t filter);
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+					symbol_filter_t filter);
 void map__fixup_start(struct map *self);
 void map__fixup_end(struct map *self);
 
@@ -155,5 +172,6 @@ int event__process_task(event_t *self);
 struct addr_location;
 int event__preprocess_sample(const event_t *self, struct addr_location *al,
 			     symbol_filter_t filter);
+int event__parse_sample(event_t *event, u64 type, struct sample_data *data);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4805e6dfd23c..f2e8d8715111 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -58,35 +58,19 @@ int perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
 	return 0;
 }
 
-/*
- * Create new perf.data header:
- */
-struct perf_header *perf_header__new(void)
+int perf_header__init(struct perf_header *self)
 {
-	struct perf_header *self = zalloc(sizeof(*self));
-
-	if (self != NULL) {
-		self->size = 1;
-		self->attr = malloc(sizeof(void *));
-
-		if (self->attr == NULL) {
-			free(self);
-			self = NULL;
-		}
-	}
-
-	return self;
+	self->size = 1;
+	self->attr = malloc(sizeof(void *));
+	return self->attr == NULL ? -ENOMEM : 0;
 }
 
-void perf_header__delete(struct perf_header *self)
+void perf_header__exit(struct perf_header *self)
 {
 	int i;
-
 	for (i = 0; i < self->attrs; ++i)
-		perf_header_attr__delete(self->attr[i]);
-
+                perf_header_attr__delete(self->attr[i]);
 	free(self->attr);
-	free(self);
 }
 
 int perf_header__add_attr(struct perf_header *self,
@@ -187,7 +171,9 @@ static int do_write(int fd, const void *buf, size_t size)
 
 static int __dsos__write_buildid_table(struct list_head *head, int fd)
 {
+#define NAME_ALIGN	64
 	struct dso *pos;
+	static const char zero_buf[NAME_ALIGN];
 
 	list_for_each_entry(pos, head, node) {
 		int err;
@@ -197,14 +183,17 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
 		if (!pos->has_build_id)
 			continue;
 		len = pos->long_name_len + 1;
-		len = ALIGN(len, 64);
+		len = ALIGN(len, NAME_ALIGN);
 		memset(&b, 0, sizeof(b));
 		memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
 		b.header.size = sizeof(b) + len;
 		err = do_write(fd, &b, sizeof(b));
 		if (err < 0)
 			return err;
-		err = do_write(fd, pos->long_name, len);
+		err = do_write(fd, pos->long_name, pos->long_name_len + 1);
+		if (err < 0)
+			return err;
+		err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
 		if (err < 0)
 			return err;
 	}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d1dbe2b79c42..d118d05d3abe 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,8 +55,8 @@ struct perf_header {
 	DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
 };
 
-struct perf_header *perf_header__new(void);
-void perf_header__delete(struct perf_header *self);
+int perf_header__init(struct perf_header *self);
+void perf_header__exit(struct perf_header *self);
 
 int perf_header__read(struct perf_header *self, int fd);
 int perf_header__write(struct perf_header *self, int fd, bool at_exit);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 69f94fe9db20..76bdca640a9b 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -104,43 +104,64 @@ void map__fixup_end(struct map *self)
 
 #define DSO__DELETED "(deleted)"
 
-struct symbol *map__find_symbol(struct map *self, u64 addr,
-				symbol_filter_t filter)
+static int map__load(struct map *self, symbol_filter_t filter)
 {
-	if (!dso__loaded(self->dso, self->type)) {
-		int nr = dso__load(self->dso, self, filter);
-
-		if (nr < 0) {
-			if (self->dso->has_build_id) {
-				char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
-				build_id__sprintf(self->dso->build_id,
-						  sizeof(self->dso->build_id),
-						  sbuild_id);
-				pr_warning("%s with build id %s not found",
-					   self->dso->long_name, sbuild_id);
-			} else
-				pr_warning("Failed to open %s",
-					   self->dso->long_name);
-			pr_warning(", continuing without symbols\n");
-			return NULL;
-		} else if (nr == 0) {
-			const char *name = self->dso->long_name;
-			const size_t len = strlen(name);
-			const size_t real_len = len - sizeof(DSO__DELETED);
-
-			if (len > sizeof(DSO__DELETED) &&
-			    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
-				pr_warning("%.*s was updated, restart the long running apps that use it!\n",
-					   (int)real_len, name);
-			} else {
-				pr_warning("no symbols found in %s, maybe install a debug package?\n", name);
-			}
-			return NULL;
+	const char *name = self->dso->long_name;
+	int nr = dso__load(self->dso, self, filter);
+
+	if (nr < 0) {
+		if (self->dso->has_build_id) {
+			char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+			build_id__sprintf(self->dso->build_id,
+					  sizeof(self->dso->build_id),
+					  sbuild_id);
+			pr_warning("%s with build id %s not found",
+				   name, sbuild_id);
+		} else
+			pr_warning("Failed to open %s", name);
+
+		pr_warning(", continuing without symbols\n");
+		return -1;
+	} else if (nr == 0) {
+		const size_t len = strlen(name);
+		const size_t real_len = len - sizeof(DSO__DELETED);
+
+		if (len > sizeof(DSO__DELETED) &&
+		    strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+			pr_warning("%.*s was updated, restart the long "
+				   "running apps that use it!\n",
+				   (int)real_len, name);
+		} else {
+			pr_warning("no symbols found in %s, maybe install "
+				   "a debug package?\n", name);
 		}
+
+		return -1;
 	}
 
-	return self->dso->find_symbol(self->dso, self->type, addr);
+	return 0;
+}
+
+struct symbol *map__find_symbol(struct map *self, u64 addr,
+				symbol_filter_t filter)
+{
+	if (!dso__loaded(self->dso, self->type) && map__load(self, filter) < 0)
+		return NULL;
+
+	return dso__find_symbol(self->dso, self->type, addr);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+					symbol_filter_t filter)
+{
+	if (!dso__loaded(self->dso, self->type) && map__load(self, filter) < 0)
+		return NULL;
+
+	if (!dso__sorted_by_name(self->dso, self->type))
+		dso__sort_by_name(self->dso, self->type);
+
+	return dso__find_symbol_by_name(self->dso, self->type, name);
 }
 
 struct map *map__clone(struct map *self)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9e5dbd66d34d..e5bc0fb016b2 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config)
 			if (id == config) {
 				closedir(evt_dir);
 				closedir(sys_dir);
-				path = zalloc(sizeof(path));
+				path = zalloc(sizeof(*path));
 				path->system = malloc(MAX_EVENT_LENGTH);
 				if (!path->system) {
 					free(path);
@@ -467,7 +467,6 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 	while ((evt_ent = readdir(evt_dir))) {
 		char event_opt[MAX_EVOPT_LEN + 1];
 		int len;
-		unsigned int rem = MAX_EVOPT_LEN;
 
 		if (!strcmp(evt_ent->d_name, ".")
 		    || !strcmp(evt_ent->d_name, "..")
@@ -475,20 +474,12 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 		    || !strcmp(evt_ent->d_name, "filter"))
 			continue;
 
-		len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s", sys_name,
-			       evt_ent->d_name);
+		len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
+			       evt_ent->d_name, flags ? ":" : "",
+			       flags ?: "");
 		if (len < 0)
 			return EVT_FAILED;
 
-		rem -= len;
-		if (flags) {
-			if (rem < strlen(flags) + 1)
-				return EVT_FAILED;
-
-			strcat(event_opt, ":");
-			strcat(event_opt, flags);
-		}
-
 		if (parse_events(NULL, event_opt, 0))
 			return EVT_FAILED;
 	}
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c
index 6d8af48c925e..efebd5b476b3 100644
--- a/tools/perf/util/parse-options.c
+++ b/tools/perf/util/parse-options.c
@@ -430,6 +430,9 @@ int usage_with_options_internal(const char * const *usagestr,
 		pos = fprintf(stderr, "    ");
 		if (opts->short_name)
 			pos += fprintf(stderr, "-%c", opts->short_name);
+		else
+			pos += fprintf(stderr, "    ");
+
 		if (opts->long_name && opts->short_name)
 			pos += fprintf(stderr, ", ");
 		if (opts->long_name)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index cd7fbda5e2a5..d14a4585bcaf 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -48,6 +48,9 @@
 
 /* If there is no space to write, returns -E2BIG. */
 static int e_snprintf(char *str, size_t size, const char *format, ...)
+	__attribute__((format(printf, 3, 4)));
+
+static int e_snprintf(char *str, size_t size, const char *format, ...)
 {
 	int ret;
 	va_list ap;
@@ -258,7 +261,7 @@ int synthesize_perf_probe_event(struct probe_point *pp)
 		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function,
 				 offs, pp->retprobe ? "%return" : "", line);
 	else
-		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line);
+		ret = e_snprintf(buf, MAX_CMDLEN, "%s%s", pp->file, line);
 	if (ret <= 0)
 		goto error;
 	len = ret;
@@ -373,14 +376,32 @@ static void clear_probe_point(struct probe_point *pp)
 		free(pp->args);
 	for (i = 0; i < pp->found; i++)
 		free(pp->probes[i]);
-	memset(pp, 0, sizeof(pp));
+	memset(pp, 0, sizeof(*pp));
+}
+
+/* Show an event */
+static void show_perf_probe_event(const char *group, const char *event,
+				  const char *place, struct probe_point *pp)
+{
+	int i;
+	char buf[128];
+
+	e_snprintf(buf, 128, "%s:%s", group, event);
+	printf("  %-40s (on %s", buf, place);
+
+	if (pp->nr_args > 0) {
+		printf(" with");
+		for (i = 0; i < pp->nr_args; i++)
+			printf(" %s", pp->args[i]);
+	}
+	printf(")\n");
 }
 
 /* List up current perf-probe events */
 void show_perf_probe_events(void)
 {
 	unsigned int i;
-	int fd;
+	int fd, nr;
 	char *group, *event;
 	struct probe_point pp;
 	struct strlist *rawlist;
@@ -393,8 +414,13 @@ void show_perf_probe_events(void)
 	for (i = 0; i < strlist__nr_entries(rawlist); i++) {
 		ent = strlist__entry(rawlist, i);
 		parse_trace_kprobe_event(ent->s, &group, &event, &pp);
+		/* Synthesize only event probe point */
+		nr = pp.nr_args;
+		pp.nr_args = 0;
 		synthesize_perf_probe_event(&pp);
-		printf("[%s:%s]\t%s\n", group, event, pp.probes[0]);
+		pp.nr_args = nr;
+		/* Show an event */
+		show_perf_probe_event(group, event, pp.probes[0], &pp);
 		free(group);
 		free(event);
 		clear_probe_point(&pp);
@@ -404,21 +430,28 @@ void show_perf_probe_events(void)
 }
 
 /* Get current perf-probe event names */
-static struct strlist *get_perf_event_names(int fd)
+static struct strlist *get_perf_event_names(int fd, bool include_group)
 {
 	unsigned int i;
 	char *group, *event;
+	char buf[128];
 	struct strlist *sl, *rawlist;
 	struct str_node *ent;
 
 	rawlist = get_trace_kprobe_event_rawlist(fd);
 
-	sl = strlist__new(false, NULL);
+	sl = strlist__new(true, NULL);
 	for (i = 0; i < strlist__nr_entries(rawlist); i++) {
 		ent = strlist__entry(rawlist, i);
 		parse_trace_kprobe_event(ent->s, &group, &event, NULL);
-		strlist__add(sl, event);
+		if (include_group) {
+			if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
+				die("Failed to copy group:event name.");
+			strlist__add(sl, buf);
+		} else
+			strlist__add(sl, event);
 		free(group);
+		free(event);
 	}
 
 	strlist__delete(rawlist);
@@ -426,24 +459,30 @@ static struct strlist *get_perf_event_names(int fd)
 	return sl;
 }
 
-static int write_trace_kprobe_event(int fd, const char *buf)
+static void write_trace_kprobe_event(int fd, const char *buf)
 {
 	int ret;
 
+	pr_debug("Writing event: %s\n", buf);
 	ret = write(fd, buf, strlen(buf));
 	if (ret <= 0)
-		die("Failed to create event.");
-	else
-		printf("Added new event: %s\n", buf);
-
-	return ret;
+		die("Failed to write event: %s", strerror(errno));
 }
 
 static void get_new_event_name(char *buf, size_t len, const char *base,
 			       struct strlist *namelist)
 {
 	int i, ret;
-	for (i = 0; i < MAX_EVENT_INDEX; i++) {
+
+	/* Try no suffix */
+	ret = e_snprintf(buf, len, "%s", base);
+	if (ret < 0)
+		die("snprintf() failed: %s", strerror(-ret));
+	if (!strlist__has_entry(namelist, buf))
+		return;
+
+	/* Try to add suffix */
+	for (i = 1; i < MAX_EVENT_INDEX; i++) {
 		ret = e_snprintf(buf, len, "%s_%d", base, i);
 		if (ret < 0)
 			die("snprintf() failed: %s", strerror(-ret));
@@ -464,7 +503,7 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
 
 	fd = open_kprobe_events(O_RDWR, O_APPEND);
 	/* Get current event names */
-	namelist = get_perf_event_names(fd);
+	namelist = get_perf_event_names(fd, false);
 
 	for (j = 0; j < nr_probes; j++) {
 		pp = probes + j;
@@ -476,9 +515,73 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes)
 				 PERFPROBE_GROUP, event,
 				 pp->probes[i]);
 			write_trace_kprobe_event(fd, buf);
+			printf("Added new event:\n");
+			/* Get the first parameter (probe-point) */
+			sscanf(pp->probes[i], "%s", buf);
+			show_perf_probe_event(PERFPROBE_GROUP, event,
+					      buf, pp);
 			/* Add added event name to namelist */
 			strlist__add(namelist, event);
 		}
 	}
+	/* Show how to use the event. */
+	printf("\nYou can now use it on all perf tools, such as:\n\n");
+	printf("\tperf record -e %s:%s -a sleep 1\n\n", PERFPROBE_GROUP, event);
+
+	strlist__delete(namelist);
+	close(fd);
+}
+
+static void del_trace_kprobe_event(int fd, const char *group,
+				   const char *event, struct strlist *namelist)
+{
+	char buf[128];
+
+	if (e_snprintf(buf, 128, "%s:%s", group, event) < 0)
+		die("Failed to copy event.");
+	if (!strlist__has_entry(namelist, buf)) {
+		pr_warning("Warning: event \"%s\" is not found.\n", buf);
+		return;
+	}
+	/* Convert from perf-probe event to trace-kprobe event */
+	if (e_snprintf(buf, 128, "-:%s/%s", group, event) < 0)
+		die("Failed to copy event.");
+
+	write_trace_kprobe_event(fd, buf);
+	printf("Remove event: %s:%s\n", group, event);
+}
+
+void del_trace_kprobe_events(struct strlist *dellist)
+{
+	int fd;
+	unsigned int i;
+	const char *group, *event;
+	char *p, *str;
+	struct str_node *ent;
+	struct strlist *namelist;
+
+	fd = open_kprobe_events(O_RDWR, O_APPEND);
+	/* Get current event names */
+	namelist = get_perf_event_names(fd, true);
+
+	for (i = 0; i < strlist__nr_entries(dellist); i++) {
+		ent = strlist__entry(dellist, i);
+		str = strdup(ent->s);
+		if (!str)
+			die("Failed to copy event.");
+		p = strchr(str, ':');
+		if (p) {
+			group = str;
+			*p = '\0';
+			event = p + 1;
+		} else {
+			group = PERFPROBE_GROUP;
+			event = str;
+		}
+		del_trace_kprobe_event(fd, group, event, namelist);
+		free(str);
+	}
+	strlist__delete(namelist);
 	close(fd);
 }
+
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 0c6fe56fe38a..f752159124ae 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -10,6 +10,7 @@ extern void parse_trace_kprobe_event(const char *str, char **group,
 				     char **event, struct probe_point *pp);
 extern int synthesize_trace_kprobe_event(struct probe_point *pp);
 extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes);
+extern void del_trace_kprobe_events(struct strlist *dellist);
 extern void show_perf_probe_events(void);
 
 /* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 293cdfc1b8ca..4585f1d86792 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -106,7 +106,7 @@ static int strtailcmp(const char *s1, const char *s2)
 {
 	int i1 = strlen(s1);
 	int i2 = strlen(s2);
-	while (--i1 > 0 && --i2 > 0) {
+	while (--i1 >= 0 && --i2 >= 0) {
 		if (s1[i1] != s2[i2])
 			return s1[i1] - s2[i2];
 	}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
new file mode 100644
index 000000000000..707ce1cb1621
--- /dev/null
+++ b/tools/perf/util/session.c
@@ -0,0 +1,80 @@
+#include <linux/kernel.h>
+
+#include <unistd.h>
+#include <sys/types.h>
+
+#include "session.h"
+#include "util.h"
+
+static int perf_session__open(struct perf_session *self, bool force)
+{
+	struct stat input_stat;
+
+	self->fd = open(self->filename, O_RDONLY);
+	if (self->fd < 0) {
+		pr_err("failed to open file: %s", self->filename);
+		if (!strcmp(self->filename, "perf.data"))
+			pr_err("  (try 'perf record' first)");
+		pr_err("\n");
+		return -errno;
+	}
+
+	if (fstat(self->fd, &input_stat) < 0)
+		goto out_close;
+
+	if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
+		pr_err("file %s not owned by current user or root\n",
+		       self->filename);
+		goto out_close;
+	}
+
+	if (!input_stat.st_size) {
+		pr_info("zero-sized file (%s), nothing to do!\n",
+			self->filename);
+		goto out_close;
+	}
+
+	if (perf_header__read(&self->header, self->fd) < 0) {
+		pr_err("incompatible file format");
+		goto out_close;
+	}
+
+	self->size = input_stat.st_size;
+	return 0;
+
+out_close:
+	close(self->fd);
+	self->fd = -1;
+	return -1;
+}
+
+struct perf_session *perf_session__new(const char *filename, int mode, bool force)
+{
+	size_t len = strlen(filename) + 1;
+	struct perf_session *self = zalloc(sizeof(*self) + len);
+
+	if (self == NULL)
+		goto out;
+
+	if (perf_header__init(&self->header) < 0)
+		goto out_delete;
+
+	memcpy(self->filename, filename, len);
+
+	if (mode == O_RDONLY && perf_session__open(self, force) < 0) {
+		perf_session__delete(self);
+		self = NULL;
+	}
+out:
+	return self;
+out_delete:
+	free(self);
+	return NULL;
+}
+
+void perf_session__delete(struct perf_session *self)
+{
+	perf_header__exit(&self->header);
+	close(self->fd);
+	free(self);
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
new file mode 100644
index 000000000000..f3699c8c8ed4
--- /dev/null
+++ b/tools/perf/util/session.h
@@ -0,0 +1,16 @@
+#ifndef __PERF_SESSION_H
+#define __PERF_SESSION_H
+
+#include "header.h"
+
+struct perf_session {
+	struct perf_header	header;
+	unsigned long		size;
+	int			fd;
+	char filename[0];
+};
+
+struct perf_session *perf_session__new(const char *filename, int mode, bool force);
+void perf_session__delete(struct perf_session *self);
+
+#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index fffcb937cdcb..d3d9fed74f1d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -29,11 +29,9 @@ enum dso_origin {
 };
 
 static void dsos__add(struct list_head *head, struct dso *dso);
-static struct map *thread__find_map_by_name(struct thread *self, char *name);
 static struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
-struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
-				struct thread *thread, symbol_filter_t filter);
+				struct map_groups *mg, symbol_filter_t filter);
 unsigned int symbol__priv_size;
 static int vmlinux_path__nr_entries;
 static char **vmlinux_path;
@@ -43,19 +41,41 @@ static struct symbol_conf symbol_conf__defaults = {
 	.try_vmlinux_path = true,
 };
 
-static struct thread kthread_mem;
-struct thread *kthread = &kthread_mem;
+static struct map_groups kmaps_mem;
+struct map_groups *kmaps = &kmaps_mem;
 
 bool dso__loaded(const struct dso *self, enum map_type type)
 {
 	return self->loaded & (1 << type);
 }
 
+bool dso__sorted_by_name(const struct dso *self, enum map_type type)
+{
+	return self->sorted_by_name & (1 << type);
+}
+
 static void dso__set_loaded(struct dso *self, enum map_type type)
 {
 	self->loaded |= (1 << type);
 }
 
+static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
+{
+	self->sorted_by_name |= (1 << type);
+}
+
+static bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+{
+	switch (map_type) {
+	case MAP__FUNCTION:
+		return symbol_type == 'T' || symbol_type == 'W';
+	case MAP__VARIABLE:
+		return symbol_type == 'D' || symbol_type == 'd';
+	default:
+		return false;
+	}
+}
+
 static void symbols__fixup_end(struct rb_root *self)
 {
 	struct rb_node *nd, *prevnd = rb_first(self);
@@ -79,7 +99,7 @@ static void symbols__fixup_end(struct rb_root *self)
 		curr->end = roundup(curr->start, 4096);
 }
 
-static void __thread__fixup_maps_end(struct thread *self, enum map_type type)
+static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
 {
 	struct map *prev, *curr;
 	struct rb_node *nd, *prevnd = rb_first(&self->maps[type]);
@@ -102,11 +122,11 @@ static void __thread__fixup_maps_end(struct thread *self, enum map_type type)
 	curr->end = ~0UL;
 }
 
-static void thread__fixup_maps_end(struct thread *self)
+static void map_groups__fixup_end(struct map_groups *self)
 {
 	int i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		__thread__fixup_maps_end(self, i);
+		__map_groups__fixup_end(self, i);
 }
 
 static struct symbol *symbol__new(u64 start, u64 len, const char *name)
@@ -164,11 +184,11 @@ struct dso *dso__new(const char *name)
 		dso__set_long_name(self, self->name);
 		self->short_name = self->name;
 		for (i = 0; i < MAP__NR_TYPES; ++i)
-			self->symbols[i] = RB_ROOT;
-		self->find_symbol = dso__find_symbol;
+			self->symbols[i] = self->symbol_names[i] = RB_ROOT;
 		self->slen_calculated = 0;
 		self->origin = DSO__ORIG_NOT_FOUND;
 		self->loaded = 0;
+		self->sorted_by_name = 0;
 		self->has_build_id = 0;
 	}
 
@@ -246,11 +266,85 @@ static struct symbol *symbols__find(struct rb_root *self, u64 ip)
 	return NULL;
 }
 
-struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr)
+struct symbol_name_rb_node {
+	struct rb_node	rb_node;
+	struct symbol	sym;
+};
+
+static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
+{
+	struct rb_node **p = &self->rb_node;
+	struct rb_node *parent = NULL;
+	struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s;
+
+	while (*p != NULL) {
+		parent = *p;
+		s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
+		if (strcmp(sym->name, s->sym.name) < 0)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&symn->rb_node, parent, p);
+	rb_insert_color(&symn->rb_node, self);
+}
+
+static void symbols__sort_by_name(struct rb_root *self, struct rb_root *source)
+{
+	struct rb_node *nd;
+
+	for (nd = rb_first(source); nd; nd = rb_next(nd)) {
+		struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+		symbols__insert_by_name(self, pos);
+	}
+}
+
+static struct symbol *symbols__find_by_name(struct rb_root *self, const char *name)
+{
+	struct rb_node *n;
+
+	if (self == NULL)
+		return NULL;
+
+	n = self->rb_node;
+
+	while (n) {
+		struct symbol_name_rb_node *s;
+		int cmp;
+
+		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
+		cmp = strcmp(name, s->sym.name);
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return &s->sym;
+	}
+
+	return NULL;
+}
+
+struct symbol *dso__find_symbol(struct dso *self,
+				enum map_type type, u64 addr)
 {
 	return symbols__find(&self->symbols[type], addr);
 }
 
+struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
+					const char *name)
+{
+	return symbols__find_by_name(&self->symbol_names[type], name);
+}
+
+void dso__sort_by_name(struct dso *self, enum map_type type)
+{
+	dso__set_sorted_by_name(self, type);
+	return symbols__sort_by_name(&self->symbol_names[type],
+				     &self->symbols[type]);
+}
+
 int build_id__sprintf(u8 *self, int len, char *bf)
 {
 	char *bid = bf;
@@ -327,10 +421,7 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map)
 			continue;
 
 		symbol_type = toupper(line[len]);
-		/*
-		 * We're interested only in code ('T'ext)
-		 */
-		if (symbol_type != 'T' && symbol_type != 'W')
+		if (!symbol_type__is_a(symbol_type, map->type))
 			continue;
 
 		symbol_name = line + len + 2;
@@ -364,8 +455,8 @@ out_failure:
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread,
-			       symbol_filter_t filter)
+static int dso__split_kallsyms(struct dso *self, struct map *map,
+			       struct map_groups *mg, symbol_filter_t filter)
 {
 	struct map *curr_map = map;
 	struct symbol *pos;
@@ -382,13 +473,13 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread
 
 		module = strchr(pos->name, '\t');
 		if (module) {
-			if (!thread->use_modules)
+			if (!mg->use_modules)
 				goto discard_symbol;
 
 			*module++ = '\0';
 
 			if (strcmp(self->name, module)) {
-				curr_map = thread__find_map_by_name(thread, module);
+				curr_map = map_groups__find_by_name(mg, map->type, module);
 				if (curr_map == NULL) {
 					pr_debug("/proc/{kallsyms,modules} "
 					         "inconsistency!\n");
@@ -419,7 +510,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread
 			}
 
 			curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
-			__thread__insert_map(thread, curr_map);
+			map_groups__insert(mg, curr_map);
 			++kernel_range;
 		}
 
@@ -440,7 +531,7 @@ discard_symbol:		rb_erase(&pos->rb_node, root);
 
 
 static int dso__load_kallsyms(struct dso *self, struct map *map,
-			      struct thread *thread, symbol_filter_t filter)
+			      struct map_groups *mg, symbol_filter_t filter)
 {
 	if (dso__load_all_kallsyms(self, map) < 0)
 		return -1;
@@ -448,13 +539,13 @@ static int dso__load_kallsyms(struct dso *self, struct map *map,
 	symbols__fixup_end(&self->symbols[map->type]);
 	self->origin = DSO__ORIG_KERNEL;
 
-	return dso__split_kallsyms(self, map, thread, filter);
+	return dso__split_kallsyms(self, map, mg, filter);
 }
 
 size_t kernel_maps__fprintf(FILE *fp)
 {
 	size_t printed = fprintf(fp, "Kernel maps:\n");
-	printed += thread__fprintf_maps(kthread, fp);
+	printed += map_groups__fprintf_maps(kmaps, fp);
 	return printed + fprintf(fp, "END kernel maps\n");
 }
 
@@ -544,6 +635,13 @@ static inline int elf_sym__is_function(const GElf_Sym *sym)
 	       sym->st_shndx != SHN_UNDEF;
 }
 
+static inline bool elf_sym__is_object(const GElf_Sym *sym)
+{
+	return elf_sym__type(sym) == STT_OBJECT &&
+		sym->st_name != 0 &&
+		sym->st_shndx != SHN_UNDEF;
+}
+
 static inline int elf_sym__is_label(const GElf_Sym *sym)
 {
 	return elf_sym__type(sym) == STT_NOTYPE &&
@@ -564,6 +662,12 @@ static inline int elf_sec__is_text(const GElf_Shdr *shdr,
 	return strstr(elf_sec__name(shdr, secstrs), "text") != NULL;
 }
 
+static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
+				    const Elf_Data *secstrs)
+{
+	return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
+}
+
 static inline const char *elf_sym__name(const GElf_Sym *sym,
 					const Elf_Data *symstrs)
 {
@@ -744,8 +848,32 @@ out:
 	return 0;
 }
 
+static bool elf_sym__is_a(GElf_Sym *self, enum map_type type)
+{
+	switch (type) {
+	case MAP__FUNCTION:
+		return elf_sym__is_function(self);
+	case MAP__VARIABLE:
+		return elf_sym__is_object(self);
+	default:
+		return false;
+	}
+}
+
+static bool elf_sec__is_a(GElf_Shdr *self, Elf_Data *secstrs, enum map_type type)
+{
+	switch (type) {
+	case MAP__FUNCTION:
+		return elf_sec__is_text(self, secstrs);
+	case MAP__VARIABLE:
+		return elf_sec__is_data(self, secstrs);
+	default:
+		return false;
+	}
+}
+
 static int dso__load_sym(struct dso *self, struct map *map,
-			 struct thread *thread, const char *name, int fd,
+			 struct map_groups *mg, const char *name, int fd,
 			 symbol_filter_t filter, int kernel, int kmodule)
 {
 	struct map *curr_map = map;
@@ -818,7 +946,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 		int is_label = elf_sym__is_label(&sym);
 		const char *section_name;
 
-		if (!is_label && !elf_sym__is_function(&sym))
+		if (!is_label && !elf_sym__is_a(&sym, map->type))
 			continue;
 
 		sec = elf_getscn(elf, sym.st_shndx);
@@ -827,7 +955,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 
 		gelf_getshdr(sec, &shdr);
 
-		if (is_label && !elf_sec__is_text(&shdr, secstrs))
+		if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
 			continue;
 
 		elf_name = elf_sym__name(&sym, symstrs);
@@ -849,7 +977,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 			snprintf(dso_name, sizeof(dso_name),
 				 "%s%s", self->short_name, section_name);
 
-			curr_map = thread__find_map_by_name(thread, dso_name);
+			curr_map = map_groups__find_by_name(mg, map->type, dso_name);
 			if (curr_map == NULL) {
 				u64 start = sym.st_value;
 
@@ -868,7 +996,7 @@ static int dso__load_sym(struct dso *self, struct map *map,
 				curr_map->map_ip = identity__map_ip;
 				curr_map->unmap_ip = identity__map_ip;
 				curr_dso->origin = DSO__ORIG_KERNEL;
-				__thread__insert_map(kthread, curr_map);
+				map_groups__insert(kmaps, curr_map);
 				dsos__add(&dsos__kernel, curr_dso);
 			} else
 				curr_dso = curr_map->dso;
@@ -938,8 +1066,9 @@ static bool __dsos__read_build_ids(struct list_head *head)
 
 bool dsos__read_build_ids(void)
 {
-	return __dsos__read_build_ids(&dsos__kernel) ||
-	       __dsos__read_build_ids(&dsos__user);
+	bool kbuildids = __dsos__read_build_ids(&dsos__kernel),
+	     ubuildids = __dsos__read_build_ids(&dsos__user);
+	return kbuildids || ubuildids;
 }
 
 /*
@@ -1093,7 +1222,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
 	dso__set_loaded(self, map->type);
 
 	if (self->kernel)
-		return dso__load_kernel_sym(self, map, kthread, filter);
+		return dso__load_kernel_sym(self, map, kmaps, filter);
 
 	name = malloc(size);
 	if (!name)
@@ -1179,11 +1308,12 @@ out:
 	return ret;
 }
 
-static struct map *thread__find_map_by_name(struct thread *self, char *name)
+struct map *map_groups__find_by_name(struct map_groups *self,
+				     enum map_type type, const char *name)
 {
 	struct rb_node *nd;
 
-	for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
 		struct map *map = rb_entry(nd, struct map, rb_node);
 
 		if (map->dso && strcmp(map->dso->name, name) == 0)
@@ -1227,7 +1357,7 @@ static int dsos__set_modules_path_dir(char *dirname)
 				 (int)(dot - dent->d_name), dent->d_name);
 
 			strxfrchar(dso_name, '-', '_');
-			map = thread__find_map_by_name(kthread, dso_name);
+			map = map_groups__find_by_name(kmaps, MAP__FUNCTION, dso_name);
 			if (map == NULL)
 				continue;
 
@@ -1280,7 +1410,7 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return self;
 }
 
-static int thread__create_module_maps(struct thread *self)
+static int map_groups__create_module_maps(struct map_groups *self)
 {
 	char *line = NULL;
 	size_t n;
@@ -1337,7 +1467,7 @@ static int thread__create_module_maps(struct thread *self)
 			dso->has_build_id = true;
 
 		dso->origin = DSO__ORIG_KMODULE;
-		__thread__insert_map(self, map);
+		map_groups__insert(self, map);
 		dsos__add(&dsos__kernel, dso);
 	}
 
@@ -1352,7 +1482,8 @@ out_failure:
 	return -1;
 }
 
-static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread,
+static int dso__load_vmlinux(struct dso *self, struct map *map,
+			     struct map_groups *mg,
 			     const char *vmlinux, symbol_filter_t filter)
 {
 	int err = -1, fd;
@@ -1386,14 +1517,14 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *t
 		return -1;
 
 	dso__set_loaded(self, map->type);
-	err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0);
+	err = dso__load_sym(self, map, mg, self->long_name, fd, filter, 1, 0);
 	close(fd);
 
 	return err;
 }
 
 static int dso__load_kernel_sym(struct dso *self, struct map *map,
-				struct thread *thread, symbol_filter_t filter)
+				struct map_groups *mg, symbol_filter_t filter)
 {
 	int err;
 	bool is_kallsyms;
@@ -1403,7 +1534,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 		pr_debug("Looking at the vmlinux_path (%d entries long)\n",
 			 vmlinux_path__nr_entries);
 		for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-			err = dso__load_vmlinux(self, map, thread,
+			err = dso__load_vmlinux(self, map, mg,
 						vmlinux_path[i], filter);
 			if (err > 0) {
 				pr_debug("Using %s for symbols\n",
@@ -1419,12 +1550,12 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
 	if (is_kallsyms)
 		goto do_kallsyms;
 
-	err = dso__load_vmlinux(self, map, thread, self->long_name, filter);
+	err = dso__load_vmlinux(self, map, mg, self->long_name, filter);
 	if (err <= 0) {
 		pr_info("The file %s cannot be used, "
 			"trying to use /proc/kallsyms...", self->long_name);
 do_kallsyms:
-		err = dso__load_kallsyms(self, map, thread, filter);
+		err = dso__load_kallsyms(self, map, mg, filter);
 		if (err > 0 && !is_kallsyms)
                         dso__set_long_name(self, strdup("[kernel.kallsyms]"));
 	}
@@ -1507,42 +1638,59 @@ size_t dsos__fprintf_buildid(FILE *fp)
 		__dsos__fprintf_buildid(&dsos__user, fp));
 }
 
-static int thread__create_kernel_map(struct thread *self, const char *vmlinux)
+static struct dso *dsos__create_kernel( const char *vmlinux)
 {
-	struct map *kmap;
 	struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
 
 	if (kernel == NULL)
-		return -1;
-
-	kmap = map__new2(0, kernel, MAP__FUNCTION);
-	if (kmap == NULL)
-		goto out_delete_kernel_dso;
+		return NULL;
 
-	kmap->map_ip	   = kmap->unmap_ip = identity__map_ip;
 	kernel->short_name = "[kernel]";
 	kernel->kernel	   = 1;
 
 	vdso = dso__new("[vdso]");
 	if (vdso == NULL)
-		goto out_delete_kernel_map;
+		goto out_delete_kernel_dso;
 	dso__set_loaded(vdso, MAP__FUNCTION);
 
 	if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
 				 sizeof(kernel->build_id)) == 0)
 		kernel->has_build_id = true;
 
-	__thread__insert_map(self, kmap);
 	dsos__add(&dsos__kernel, kernel);
 	dsos__add(&dsos__user, vdso);
 
-	return 0;
+	return kernel;
 
-out_delete_kernel_map:
-	map__delete(kmap);
 out_delete_kernel_dso:
 	dso__delete(kernel);
-	return -1;
+	return NULL;
+}
+
+static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux)
+{
+	struct map *functions, *variables;
+	struct dso *kernel = dsos__create_kernel(vmlinux);
+
+	if (kernel == NULL)
+		return -1;
+
+	functions = map__new2(0, kernel, MAP__FUNCTION);
+	if (functions == NULL)
+		return -1;
+
+	variables = map__new2(0, kernel, MAP__VARIABLE);
+	if (variables == NULL) {
+		map__delete(functions);
+		return -1;
+	}
+
+	functions->map_ip = functions->unmap_ip =
+		variables->map_ip = variables->unmap_ip = identity__map_ip;
+	map_groups__insert(self, functions);
+	map_groups__insert(self, variables);
+
+	return 0;
 }
 
 static void vmlinux_path__exit(void)
@@ -1606,23 +1754,26 @@ int symbol__init(struct symbol_conf *conf)
 
 	elf_version(EV_CURRENT);
 	symbol__priv_size = pconf->priv_size;
-	thread__init(kthread, 0);
+	if (pconf->sort_by_name)
+		symbol__priv_size += (sizeof(struct symbol_name_rb_node) -
+				      sizeof(struct symbol));
+	map_groups__init(kmaps);
 
 	if (pconf->try_vmlinux_path && vmlinux_path__init() < 0)
 		return -1;
 
-	if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) {
+	if (map_groups__create_kernel_maps(kmaps, pconf->vmlinux_name) < 0) {
 		vmlinux_path__exit();
 		return -1;
 	}
 
-	kthread->use_modules = pconf->use_modules;
-	if (pconf->use_modules && thread__create_module_maps(kthread) < 0)
+	kmaps->use_modules = pconf->use_modules;
+	if (pconf->use_modules && map_groups__create_module_maps(kmaps) < 0)
 		pr_debug("Failed to load list of modules in use, "
 			 "continuing...\n");
 	/*
 	 * Now that we have all the maps created, just set the ->end of them:
 	 */
-	thread__fixup_maps_end(kthread);
+	map_groups__fixup_end(kmaps);
 	return 0;
 }
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 17003efa0b39..cf99f88adf39 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -52,7 +52,8 @@ struct symbol {
 struct symbol_conf {
 	unsigned short	priv_size;
 	bool		try_vmlinux_path,
-			use_modules;
+			use_modules,
+			sort_by_name;
 	const char	*vmlinux_name;
 };
 
@@ -74,13 +75,13 @@ struct addr_location {
 struct dso {
 	struct list_head node;
 	struct rb_root	 symbols[MAP__NR_TYPES];
-	struct symbol    *(*find_symbol)(struct dso *self,
-					 enum map_type type, u64 addr);
+	struct rb_root	 symbol_names[MAP__NR_TYPES];
 	u8		 adjust_symbols:1;
 	u8		 slen_calculated:1;
 	u8		 has_build_id:1;
 	u8		 kernel:1;
 	unsigned char	 origin;
+	u8		 sorted_by_name;
 	u8		 loaded;
 	u8		 build_id[BUILD_ID_SIZE];
 	u16		 long_name_len;
@@ -93,6 +94,9 @@ struct dso *dso__new(const char *name);
 void dso__delete(struct dso *self);
 
 bool dso__loaded(const struct dso *self, enum map_type type);
+bool dso__sorted_by_name(const struct dso *self, enum map_type type);
+
+void dso__sort_by_name(struct dso *self, enum map_type type);
 
 struct dso *dsos__findnew(const char *name);
 int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
@@ -103,6 +107,9 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
 size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
 char dso__symtab_origin(const struct dso *self);
 void dso__set_build_id(struct dso *self, void *build_id);
+struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
+					const char *name);
 
 int filename__read_build_id(const char *filename, void *bf, size_t size);
 int sysfs__read_build_id(const char *filename, void *bf, size_t size);
@@ -113,8 +120,8 @@ size_t kernel_maps__fprintf(FILE *fp);
 
 int symbol__init(struct symbol_conf *conf);
 
-struct thread;
-struct thread *kthread;
+struct map_groups;
+struct map_groups *kmaps;
 extern struct list_head dsos__user, dsos__kernel;
 extern struct dso *vdso;
 #endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 603f5610861b..b68a00ea4121 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -9,11 +9,9 @@
 static struct rb_root threads;
 static struct thread *last_match;
 
-void thread__init(struct thread *self, pid_t pid)
+void map_groups__init(struct map_groups *self)
 {
 	int i;
-	self->pid = pid;
-	self->comm = NULL;
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
 		self->maps[i] = RB_ROOT;
 		INIT_LIST_HEAD(&self->removed_maps[i]);
@@ -25,7 +23,8 @@ static struct thread *thread__new(pid_t pid)
 	struct thread *self = zalloc(sizeof(*self));
 
 	if (self != NULL) {
-		thread__init(self, pid);
+		map_groups__init(&self->mg);
+		self->pid = pid;
 		self->comm = malloc(32);
 		if (self->comm)
 			snprintf(self->comm, 32, ":%d", self->pid);
@@ -55,10 +54,11 @@ int thread__comm_len(struct thread *self)
 
 static const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__FUNCTION] = "Functions",
+	[MAP__VARIABLE] = "Variables",
 };
 
-static size_t __thread__fprintf_maps(struct thread *self,
-				     enum map_type type, FILE *fp)
+static size_t __map_groups__fprintf_maps(struct map_groups *self,
+					 enum map_type type, FILE *fp)
 {
 	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
 	struct rb_node *nd;
@@ -76,16 +76,16 @@ static size_t __thread__fprintf_maps(struct thread *self,
 	return printed;
 }
 
-size_t thread__fprintf_maps(struct thread *self, FILE *fp)
+size_t map_groups__fprintf_maps(struct map_groups *self, FILE *fp)
 {
 	size_t printed = 0, i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __thread__fprintf_maps(self, i, fp);
+		printed += __map_groups__fprintf_maps(self, i, fp);
 	return printed;
 }
 
-static size_t __thread__fprintf_removed_maps(struct thread *self,
-					     enum map_type type, FILE *fp)
+static size_t __map_groups__fprintf_removed_maps(struct map_groups *self,
+						 enum map_type type, FILE *fp)
 {
 	struct map *pos;
 	size_t printed = 0;
@@ -101,20 +101,25 @@ static size_t __thread__fprintf_removed_maps(struct thread *self,
 	return printed;
 }
 
-static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp)
+static size_t map_groups__fprintf_removed_maps(struct map_groups *self, FILE *fp)
 {
 	size_t printed = 0, i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __thread__fprintf_removed_maps(self, i, fp);
+		printed += __map_groups__fprintf_removed_maps(self, i, fp);
 	return printed;
 }
 
-static size_t thread__fprintf(struct thread *self, FILE *fp)
+static size_t map_groups__fprintf(struct map_groups *self, FILE *fp)
 {
-	size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm);
-	printed += thread__fprintf_removed_maps(self, fp);
+	size_t printed = map_groups__fprintf_maps(self, fp);
 	printed += fprintf(fp, "Removed maps:\n");
-	return printed + thread__fprintf_removed_maps(self, fp);
+	return printed + map_groups__fprintf_removed_maps(self, fp);
+}
+
+static size_t thread__fprintf(struct thread *self, FILE *fp)
+{
+	return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) +
+	       map_groups__fprintf(&self->mg, fp);
 }
 
 struct thread *threads__findnew(pid_t pid)
@@ -168,7 +173,8 @@ struct thread *register_idle_thread(void)
 	return thread;
 }
 
-static void thread__remove_overlappings(struct thread *self, struct map *map)
+static void map_groups__remove_overlappings(struct map_groups *self,
+					    struct map *map)
 {
 	struct rb_root *root = &self->maps[map->type];
 	struct rb_node *next = rb_first(root);
@@ -238,12 +244,15 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
 
 void thread__insert_map(struct thread *self, struct map *map)
 {
-	thread__remove_overlappings(self, map);
-	maps__insert(&self->maps[map->type], map);
+	map_groups__remove_overlappings(&self->mg, map);
+	map_groups__insert(&self->mg, map);
 }
 
-static int thread__clone_maps(struct thread *self, struct thread *parent,
-			      enum map_type type)
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+static int map_groups__clone(struct map_groups *self,
+			     struct map_groups *parent, enum map_type type)
 {
 	struct rb_node *nd;
 	for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
@@ -251,7 +260,7 @@ static int thread__clone_maps(struct thread *self, struct thread *parent,
 		struct map *new = map__clone(map);
 		if (new == NULL)
 			return -ENOMEM;
-		thread__insert_map(self, new);
+		map_groups__insert(self, new);
 	}
 	return 0;
 }
@@ -267,7 +276,7 @@ int thread__fork(struct thread *self, struct thread *parent)
 		return -ENOMEM;
 
 	for (i = 0; i < MAP__NR_TYPES; ++i)
-		if (thread__clone_maps(self, parent, i) < 0)
+		if (map_groups__clone(&self->mg, &parent->mg, i) < 0)
 			return -ENOMEM;
 	return 0;
 }
@@ -286,11 +295,11 @@ size_t threads__fprintf(FILE *fp)
 	return ret;
 }
 
-struct symbol *thread__find_symbol(struct thread *self,
-				   enum map_type type, u64 addr,
-				   symbol_filter_t filter)
+struct symbol *map_groups__find_symbol(struct map_groups *self,
+				       enum map_type type, u64 addr,
+				       symbol_filter_t filter)
 {
-	struct map *map = thread__find_map(self, type, addr);
+	struct map *map = map_groups__find(self, type, addr);
 
 	if (map != NULL)
 		return map__find_symbol(map, map->map_ip(map, addr), filter);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 686d6e914d9e..1751802a09ba 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -5,52 +5,66 @@
 #include <unistd.h>
 #include "symbol.h"
 
-struct thread {
-	struct rb_node		rb_node;
+struct map_groups {
 	struct rb_root		maps[MAP__NR_TYPES];
 	struct list_head	removed_maps[MAP__NR_TYPES];
-	pid_t			pid;
 	bool			use_modules;
+};
+
+struct thread {
+	struct rb_node		rb_node;
+	struct map_groups	mg;
+	pid_t			pid;
 	char			shortname[3];
 	char			*comm;
 	int			comm_len;
 };
 
-void thread__init(struct thread *self, pid_t pid);
+void map_groups__init(struct map_groups *self);
 int thread__set_comm(struct thread *self, const char *comm);
 int thread__comm_len(struct thread *self);
 struct thread *threads__findnew(pid_t pid);
 struct thread *register_idle_thread(void);
 void thread__insert_map(struct thread *self, struct map *map);
 int thread__fork(struct thread *self, struct thread *parent);
-size_t thread__fprintf_maps(struct thread *self, FILE *fp);
+size_t map_groups__fprintf_maps(struct map_groups *self, FILE *fp);
 size_t threads__fprintf(FILE *fp);
 
 void maps__insert(struct rb_root *maps, struct map *map);
 struct map *maps__find(struct rb_root *maps, u64 addr);
 
-static inline struct map *thread__find_map(struct thread *self,
+static inline void map_groups__insert(struct map_groups *self, struct map *map)
+{
+	 maps__insert(&self->maps[map->type], map);
+}
+
+static inline struct map *map_groups__find(struct map_groups *self,
 					   enum map_type type, u64 addr)
 {
-	return self ? maps__find(&self->maps[type], addr) : NULL;
+	return maps__find(&self->maps[type], addr);
 }
 
-static inline void __thread__insert_map(struct thread *self, struct map *map)
+static inline struct map *thread__find_map(struct thread *self,
+					   enum map_type type, u64 addr)
 {
-	 maps__insert(&self->maps[map->type], map);
+	return self ? map_groups__find(&self->mg, type, addr) : NULL;
 }
 
 void thread__find_addr_location(struct thread *self, u8 cpumode,
 				enum map_type type, u64 addr,
 				struct addr_location *al,
 				symbol_filter_t filter);
-struct symbol *thread__find_symbol(struct thread *self,
-				   enum map_type type, u64 addr,
-				   symbol_filter_t filter);
+struct symbol *map_groups__find_symbol(struct map_groups *self,
+				       enum map_type type, u64 addr,
+				       symbol_filter_t filter);
 
 static inline struct symbol *
-thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter)
+map_groups__find_function(struct map_groups *self, u64 addr,
+			  symbol_filter_t filter)
 {
-	return thread__find_symbol(self, MAP__FUNCTION, addr, filter);
+	return map_groups__find_symbol(self, MAP__FUNCTION, addr, filter);
 }
+
+struct map *map_groups__find_by_name(struct map_groups *self,
+				     enum map_type type, const char *name);
 #endif	/* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 0302405aa2ca..c5c32be040bf 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -177,7 +177,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
 		func_count++;
 	}
 
-	func_list = malloc_or_die(sizeof(*func_list) * func_count + 1);
+	func_list = malloc_or_die(sizeof(*func_list) * (func_count + 1));
 
 	i = 0;
 	while (list) {
@@ -1477,7 +1477,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok)
 			goto out_free;
 
 		field = malloc_or_die(sizeof(*field));
-		memset(field, 0, sizeof(field));
+		memset(field, 0, sizeof(*field));
 
 		value = arg_eval(arg);
 		field->value = strdup(value);
diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c
index 51e833fd58c3..a5ffe60db5d6 100644
--- a/tools/perf/util/trace-event-perl.c
+++ b/tools/perf/util/trace-event-perl.c
@@ -32,9 +32,6 @@
 
 void xs_init(pTHX);
 
-void boot_Perf__Trace__Context(pTHX_ CV *cv);
-void boot_DynaLoader(pTHX_ CV *cv);
-
 void xs_init(pTHX)
 {
 	const char *file = __FILE__;
@@ -573,26 +570,72 @@ struct scripting_ops perl_scripting_ops = {
 	.generate_script = perl_generate_script,
 };
 
-#ifdef NO_LIBPERL
-void setup_perl_scripting(void)
+static void print_unsupported_msg(void)
 {
 	fprintf(stderr, "Perl scripting not supported."
-		"  Install libperl and rebuild perf to enable it.  e.g. "
-		"apt-get install libperl-dev (ubuntu), yum install "
-		"perl-ExtUtils-Embed (Fedora), etc.\n");
+		"  Install libperl and rebuild perf to enable it.\n"
+		"For example:\n  # apt-get install libperl-dev (ubuntu)"
+		"\n  # yum install perl-ExtUtils-Embed (Fedora)"
+		"\n  etc.\n");
 }
-#else
-void setup_perl_scripting(void)
+
+static int perl_start_script_unsupported(const char *script __unused)
+{
+	print_unsupported_msg();
+
+	return -1;
+}
+
+static int perl_stop_script_unsupported(void)
+{
+	return 0;
+}
+
+static void perl_process_event_unsupported(int cpu __unused,
+					   void *data __unused,
+					   int size __unused,
+					   unsigned long long nsecs __unused,
+					   char *comm __unused)
+{
+}
+
+static int perl_generate_script_unsupported(const char *outfile __unused)
+{
+	print_unsupported_msg();
+
+	return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+	.name = "Perl",
+	.start_script = perl_start_script_unsupported,
+	.stop_script = perl_stop_script_unsupported,
+	.process_event = perl_process_event_unsupported,
+	.generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
 {
 	int err;
-	err = script_spec_register("Perl", &perl_scripting_ops);
+	err = script_spec_register("Perl", scripting_ops);
 	if (err)
 		die("error registering Perl script extension");
 
-	err = script_spec_register("pl", &perl_scripting_ops);
+	err = script_spec_register("pl", scripting_ops);
 	if (err)
 		die("error registering pl script extension");
 
 	scripting_context = malloc(sizeof(struct scripting_context));
 }
+
+#ifdef NO_LIBPERL
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+void setup_perl_scripting(void)
+{
+	register_perl_scripting(&perl_scripting_ops);
+}
 #endif
diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h
index 8fe0d866fe1a..e88fb26137bb 100644
--- a/tools/perf/util/trace-event-perl.h
+++ b/tools/perf/util/trace-event-perl.h
@@ -34,9 +34,13 @@ typedef int INTERP;
 #define dXSUB_SYS
 #define pTHX_
 static inline void newXS(const char *a, void *b, const char *c) {}
+static void boot_Perf__Trace__Context(pTHX_ CV *cv) {}
+static void boot_DynaLoader(pTHX_ CV *cv) {}
 #else
 #include <EXTERN.h>
 #include <perl.h>
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
 typedef PerlInterpreter * INTERP;
 #endif
 
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 342dfdd43f87..1744422cafcb 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -145,8 +145,9 @@ static void read_proc_kallsyms(void)
 	if (!size)
 		return;
 
-	buf = malloc_or_die(size);
+	buf = malloc_or_die(size + 1);
 	read_or_die(buf, size);
+	buf[size] = '\0';
 
 	parse_proc_kallsyms(buf, size);