summary refs log tree commit diff
path: root/samples/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-15 15:04:25 -0700
commit9a76aba02a37718242d7cdc294f0a3901928aa57 (patch)
tree2040d038f85d2120f21af83b0793efd5af1864e3 /samples/bpf
parent0a957467c5fd46142bc9c52758ffc552d4c5e2f7 (diff)
parent26a1ccc6c117be8e33e0410fce8c5298b0015b99 (diff)
downloadlinux-9a76aba02a37718242d7cdc294f0a3901928aa57.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
 "Highlights:

   - Gustavo A. R. Silva keeps working on the implicit switch fallthru
     changes.

   - Support 802.11ax High-Efficiency wireless in cfg80211 et al, From
     Luca Coelho.

   - Re-enable ASPM in r8169, from Kai-Heng Feng.

   - Add virtual XFRM interfaces, which avoids all of the limitations of
     existing IPSEC tunnels. From Steffen Klassert.

   - Convert GRO over to use a hash table, so that when we have many
     flows active we don't traverse a long list during accumluation.

   - Many new self tests for routing, TC, tunnels, etc. Too many
     contributors to mention them all, but I'm really happy to keep
     seeing this stuff.

   - Hardware timestamping support for dpaa_eth/fsl-fman from Yangbo Lu.

   - Lots of cleanups and fixes in L2TP code from Guillaume Nault.

   - Add IPSEC offload support to netdevsim, from Shannon Nelson.

   - Add support for slotting with non-uniform distribution to netem
     packet scheduler, from Yousuk Seung.

   - Add UDP GSO support to mlx5e, from Boris Pismenny.

   - Support offloading of Team LAG in NFP, from John Hurley.

   - Allow to configure TX queue selection based upon RX queue, from
     Amritha Nambiar.

   - Support ethtool ring size configuration in aquantia, from Anton
     Mikaev.

   - Support DSCP and flowlabel per-transport in SCTP, from Xin Long.

   - Support list based batching and stack traversal of SKBs, this is
     very exciting work. From Edward Cree.

   - Busyloop optimizations in vhost_net, from Toshiaki Makita.

   - Introduce the ETF qdisc, which allows time based transmissions. IGB
     can offload this in hardware. From Vinicius Costa Gomes.

   - Add parameter support to devlink, from Moshe Shemesh.

   - Several multiplication and division optimizations for BPF JIT in
     nfp driver, from Jiong Wang.

   - Lots of prepatory work to make more of the packet scheduler layer
     lockless, when possible, from Vlad Buslov.

   - Add ACK filter and NAT awareness to sch_cake packet scheduler, from
     Toke Høiland-Jørgensen.

   - Support regions and region snapshots in devlink, from Alex Vesker.

   - Allow to attach XDP programs to both HW and SW at the same time on
     a given device, with initial support in nfp. From Jakub Kicinski.

   - Add TLS RX offload and support in mlx5, from Ilya Lesokhin.

   - Use PHYLIB in r8169 driver, from Heiner Kallweit.

   - All sorts of changes to support Spectrum 2 in mlxsw driver, from
     Ido Schimmel.

   - PTP support in mv88e6xxx DSA driver, from Andrew Lunn.

   - Make TCP_USER_TIMEOUT socket option more accurate, from Jon
     Maxwell.

   - Support for templates in packet scheduler classifier, from Jiri
     Pirko.

   - IPV6 support in RDS, from Ka-Cheong Poon.

   - Native tproxy support in nf_tables, from Máté Eckl.

   - Maintain IP fragment queue in an rbtree, but optimize properly for
     in-order frags. From Peter Oskolkov.

   - Improvde handling of ACKs on hole repairs, from Yuchung Cheng"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1996 commits)
  bpf: test: fix spelling mistake "REUSEEPORT" -> "REUSEPORT"
  hv/netvsc: Fix NULL dereference at single queue mode fallback
  net: filter: mark expected switch fall-through
  xen-netfront: fix warn message as irq device name has '/'
  cxgb4: Add new T5 PCI device ids 0x50af and 0x50b0
  net: dsa: mv88e6xxx: missing unlock on error path
  rds: fix building with IPV6=m
  inet/connection_sock: prefer _THIS_IP_ to current_text_addr
  net: dsa: mv88e6xxx: bitwise vs logical bug
  net: sock_diag: Fix spectre v1 gadget in __sock_diag_cmd()
  ieee802154: hwsim: using right kind of iteration
  net: hns3: Add vlan filter setting by ethtool command -K
  net: hns3: Set tx ring' tc info when netdev is up
  net: hns3: Remove tx ring BD len register in hns3_enet
  net: hns3: Fix desc num set to default when setting channel
  net: hns3: Fix for phy link issue when using marvell phy driver
  net: hns3: Fix for information of phydev lost problem when down/up
  net: hns3: Fix for command format parsing error in hclge_is_all_function_id_zero
  net: hns3: Add support for serdes loopback selftest
  bnxt_en: take coredump_record structure off stack
  ...
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile25
-rw-r--r--samples/bpf/bpf_load.c3
-rw-r--r--samples/bpf/hash_func01.h55
-rw-r--r--samples/bpf/test_cgrp2_attach2.c21
-rw-r--r--samples/bpf/test_cgrp2_sock2.c2
-rw-r--r--samples/bpf/xdp_fwd_user.c34
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c114
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c4
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c43
-rw-r--r--samples/bpf/xdp_rxq_info_user.c45
-rw-r--r--samples/bpf/xdp_sample_pkts_kern.c66
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c169
-rw-r--r--samples/bpf/xdpsock_user.c43
13 files changed, 590 insertions, 34 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index bd9f6c2a808e..36f9f41d094b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -52,6 +52,7 @@ hostprogs-y += xdp_adjust_tail
 hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
+hostprogs-y += xdp_sample_pkts
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -104,9 +105,10 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o syscall_tp_user.o
 cpustat-objs := bpf_load.o cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := bpf_load.o xdpsock_user.o
-xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
+xdpsock-objs := xdpsock_user.o
+xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
+xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -163,6 +165,7 @@ always += xdp_adjust_tail_kern.o
 always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
+always += xdp_sample_pkts_kern.o
 
 KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include
 KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/
@@ -179,6 +182,7 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
 HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
+HOSTCFLAGS_xdp_sample_pkts_user.o += -I$(srctree)/tools/lib/bpf/
 
 KBUILD_HOSTLDLIBS		+= $(LIBBPF) -lelf
 HOSTLDLIBS_tracex4		+= -lrt
@@ -191,6 +195,8 @@ HOSTLDLIBS_xdpsock		+= -pthread
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
 LLC ?= llc
 CLANG ?= clang
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
 
 # Detect that we're cross compiling and use the cross compiler
 ifdef CROSS_COMPILE
@@ -198,6 +204,16 @@ HOSTCC = $(CROSS_COMPILE)gcc
 CLANG_ARCH_ARGS = -target $(ARCH)
 endif
 
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
+	EXTRA_CFLAGS += -g
+	LLC_FLAGS += -mattr=dwarfris
+	DWARF2BTF = y
+endif
+
 # Trick to allow make to be run from this directory
 all:
 	$(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR)
@@ -256,4 +272,7 @@ $(obj)/%.o: $(src)/%.c
 		-Wno-gnu-variable-sized-type-not-at-end \
 		-Wno-address-of-packed-member -Wno-tautological-compare \
 		-Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
+		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 89161c9ed466..904e775d1a44 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -107,6 +107,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		return -1;
 	}
 
+	if (prog_cnt == MAX_PROGS)
+		return -1;
+
 	fd = bpf_load_program(prog_type, prog, insns_cnt, license, kern_version,
 			      bpf_log_buf, BPF_LOG_BUF_SIZE);
 	if (fd < 0) {
diff --git a/samples/bpf/hash_func01.h b/samples/bpf/hash_func01.h
new file mode 100644
index 000000000000..38255812e376
--- /dev/null
+++ b/samples/bpf/hash_func01.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1
+ *
+ * Based on Paul Hsieh's (LGPG 2.1) hash function
+ * From: http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+#define get16bits(d) (*((const __u16 *) (d)))
+
+static __always_inline
+__u32 SuperFastHash (const char *data, int len, __u32 initval) {
+	__u32 hash = initval;
+	__u32 tmp;
+	int rem;
+
+	if (len <= 0 || data == NULL) return 0;
+
+	rem = len & 3;
+	len >>= 2;
+
+	/* Main loop */
+#pragma clang loop unroll(full)
+	for (;len > 0; len--) {
+		hash  += get16bits (data);
+		tmp    = (get16bits (data+2) << 11) ^ hash;
+		hash   = (hash << 16) ^ tmp;
+		data  += 2*sizeof (__u16);
+		hash  += hash >> 11;
+	}
+
+	/* Handle end cases */
+	switch (rem) {
+        case 3: hash += get16bits (data);
+                hash ^= hash << 16;
+                hash ^= ((signed char)data[sizeof (__u16)]) << 18;
+                hash += hash >> 11;
+                break;
+        case 2: hash += get16bits (data);
+                hash ^= hash << 11;
+                hash += hash >> 17;
+                break;
+        case 1: hash += (signed char)*data;
+                hash ^= hash << 10;
+                hash += hash >> 1;
+	}
+
+	/* Force "avalanching" of final 127 bits */
+	hash ^= hash << 3;
+	hash += hash >> 5;
+	hash ^= hash << 4;
+	hash += hash >> 17;
+	hash ^= hash << 25;
+	hash += hash >> 6;
+
+	return hash;
+}
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index b453e6a161be..180f9d813bca 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -8,7 +8,8 @@
  *   information. The number of invocations of the program, which maps
  *   to the number of packets received, is stored to key 0. Key 1 is
  *   incremented on each iteration by the number of bytes stored in
- *   the skb.
+ *   the skb. The program also stores the number of received bytes
+ *   in the cgroup storage.
  *
  * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
  *
@@ -21,12 +22,15 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <sys/resource.h>
+#include <sys/time.h>
 #include <unistd.h>
 
 #include <linux/bpf.h>
 #include <bpf/bpf.h>
 
 #include "bpf_insn.h"
+#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 #define FOO		"/foo"
@@ -205,6 +209,8 @@ static int map_fd = -1;
 
 static int prog_load_cnt(int verdict, int val)
 {
+	int cgroup_storage_fd;
+
 	if (map_fd < 0)
 		map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
 	if (map_fd < 0) {
@@ -212,6 +218,13 @@ static int prog_load_cnt(int verdict, int val)
 		return -1;
 	}
 
+	cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+				sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+	if (cgroup_storage_fd < 0) {
+		printf("failed to create map '%s'\n", strerror(errno));
+		return -1;
+	}
+
 	struct bpf_insn prog[] = {
 		BPF_MOV32_IMM(BPF_REG_0, 0),
 		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
@@ -222,6 +235,11 @@ static int prog_load_cnt(int verdict, int val)
 		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 		BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
 		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+		BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+		BPF_MOV64_IMM(BPF_REG_2, 0),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, val),
+		BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
 		BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
 		BPF_EXIT_INSN(),
 	};
@@ -237,6 +255,7 @@ static int prog_load_cnt(int verdict, int val)
 		printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
 		return 0;
 	}
+	close(cgroup_storage_fd);
 	return ret;
 }
 
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
index 3b5be2364975..a9277b118c33 100644
--- a/samples/bpf/test_cgrp2_sock2.c
+++ b/samples/bpf/test_cgrp2_sock2.c
@@ -51,7 +51,7 @@ int main(int argc, char **argv)
 	if (argc > 3)
 		filter_id = atoi(argv[3]);
 
-	if (filter_id > prog_cnt) {
+	if (filter_id >= prog_cnt) {
 		printf("Invalid program id; program not found in file\n");
 		return EXIT_FAILURE;
 	}
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index a87a2048ed32..f88e1d7093d6 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -24,8 +24,7 @@
 #include <fcntl.h>
 #include <libgen.h>
 
-#include "bpf_load.h"
-#include "bpf_util.h"
+#include "bpf/libbpf.h"
 #include <bpf/bpf.h>
 
 
@@ -63,9 +62,15 @@ static void usage(const char *prog)
 
 int main(int argc, char **argv)
 {
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	const char *prog_name = "xdp_fwd";
+	struct bpf_program *prog;
 	char filename[PATH_MAX];
+	struct bpf_object *obj;
 	int opt, i, idx, err;
-	int prog_id = 0;
+	int prog_fd, map_fd;
 	int attach = 1;
 	int ret = 0;
 
@@ -75,7 +80,7 @@ int main(int argc, char **argv)
 			attach = 0;
 			break;
 		case 'D':
-			prog_id = 1;
+			prog_name = "xdp_fwd_direct";
 			break;
 		default:
 			usage(basename(argv[0]));
@@ -90,6 +95,7 @@ int main(int argc, char **argv)
 
 	if (attach) {
 		snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+		prog_load_attr.file = filename;
 
 		if (access(filename, O_RDONLY) < 0) {
 			printf("error accessing file %s: %s\n",
@@ -97,19 +103,25 @@ int main(int argc, char **argv)
 			return 1;
 		}
 
-		if (load_bpf_file(filename)) {
-			printf("%s", bpf_log_buf);
+		if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
 			return 1;
-		}
 
-		if (!prog_fd[prog_id]) {
-			printf("load_bpf_file: %s\n", strerror(errno));
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog_fd = bpf_program__fd(prog);
+		if (prog_fd < 0) {
+			printf("program not found: %s\n", strerror(prog_fd));
+			return 1;
+		}
+		map_fd = bpf_map__fd(bpf_object__find_map_by_name(obj,
+								  "tx_port"));
+		if (map_fd < 0) {
+			printf("map not found: %s\n", strerror(map_fd));
 			return 1;
 		}
 	}
 	if (attach) {
 		for (i = 1; i < 64; ++i)
-			bpf_map_update_elem(map_fd[0], &i, &i, 0);
+			bpf_map_update_elem(map_fd, &i, &i, 0);
 	}
 
 	for (i = optind; i < argc; ++i) {
@@ -126,7 +138,7 @@ int main(int argc, char **argv)
 			if (err)
 				ret = err;
 		} else {
-			err = do_attach(idx, prog_fd[prog_id], argv[i]);
+			err = do_attach(idx, prog_fd, argv[i]);
 			if (err)
 				ret = err;
 		}
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 4938dcbaecbf..a306d1c75622 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -13,6 +13,7 @@
 
 #include <uapi/linux/bpf.h>
 #include "bpf_helpers.h"
+#include "hash_func01.h"
 
 #define MAX_CPUS 64 /* WARNING - sync with _user.c */
 
@@ -134,7 +135,16 @@ bool parse_eth(struct ethhdr *eth, void *data_end,
 			return false;
 		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 	}
-	/* TODO: Handle double VLAN tagged packet */
+	/* Handle double VLAN tagged packet */
+	if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+		struct vlan_hdr *vlan_hdr;
+
+		vlan_hdr = (void *)eth + offset;
+		offset += sizeof(*vlan_hdr);
+		if ((void *)eth + offset > data_end)
+			return false;
+		eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+	}
 
 	*eth_proto = ntohs(eth_type);
 	*l3_offset = offset;
@@ -452,6 +462,108 @@ int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 }
 
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct iphdr *iph = data + nh_off;
+	u32 cpu_hash;
+
+	if (iph + 1 > data_end)
+		return 0;
+
+	cpu_hash = iph->saddr + iph->daddr;
+	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+	return cpu_hash;
+}
+
+static __always_inline
+u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct ipv6hdr *ip6h = data + nh_off;
+	u32 cpu_hash;
+
+	if (ip6h + 1 > data_end)
+		return 0;
+
+	cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
+	cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
+	cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
+	cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
+	cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+	return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp_cpu_map5_lb_hash_ip_pairs")
+int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	u8 ip_proto = IPPROTO_UDP;
+	struct datarec *rec;
+	u16 eth_proto = 0;
+	u64 l3_offset = 0;
+	u32 cpu_dest = 0;
+	u32 cpu_idx = 0;
+	u32 *cpu_lookup;
+	u32 *cpu_max;
+	u32 cpu_hash;
+	u32 key = 0;
+
+	/* Count RX packet in map */
+	rec = bpf_map_lookup_elem(&rx_cnt, &key);
+	if (!rec)
+		return XDP_ABORTED;
+	rec->processed++;
+
+	cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
+	if (!cpu_max)
+		return XDP_ABORTED;
+
+	if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+		return XDP_PASS; /* Just skip */
+
+	/* Hash for IPv4 and IPv6 */
+	switch (eth_proto) {
+	case ETH_P_IP:
+		cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+		break;
+	case ETH_P_IPV6:
+		cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+		break;
+	case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+	default:
+		cpu_hash = 0;
+	}
+
+	/* Choose CPU based on hash */
+	cpu_idx = cpu_hash % *cpu_max;
+
+	cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+	if (!cpu_lookup)
+		return XDP_ABORTED;
+	cpu_dest = *cpu_lookup;
+
+	if (cpu_dest >= MAX_CPUS) {
+		rec->issue++;
+		return XDP_ABORTED;
+	}
+
+	return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 4b4d78fffe30..9a6c7e0a6dd1 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -22,7 +22,7 @@ static const char *__doc__ =
 #define MAX_CPUS 64 /* WARNING - sync with _kern.c */
 
 /* How many xdp_progs are defined in _kern.c */
-#define MAX_PROG 5
+#define MAX_PROG 6
 
 /* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
  * use bpf/libbpf.h), but cannot as (currently) needed for XDP
@@ -567,7 +567,7 @@ int main(int argc, char **argv)
 	int added_cpus = 0;
 	int longindex = 0;
 	int interval = 2;
-	int prog_num = 0;
+	int prog_num = 5;
 	int add_cpu = -1;
 	__u32 qsize;
 	int opt;
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
index 3fd209291653..222a83eed1cb 100644
--- a/samples/bpf/xdp_rxq_info_kern.c
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -4,6 +4,8 @@
  *  Example howto extract XDP RX-queue info
  */
 #include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/in.h>
 #include "bpf_helpers.h"
 
 /* Config setup from with userspace
@@ -14,6 +16,12 @@
 struct config {
 	__u32 action;
 	int ifindex;
+	__u32 options;
+};
+enum cfg_options_flags {
+	NO_TOUCH = 0x0U,
+	READ_MEM = 0x1U,
+	SWAP_MAC = 0x2U,
 };
 struct bpf_map_def SEC("maps") config_map = {
 	.type		= BPF_MAP_TYPE_ARRAY,
@@ -45,6 +53,23 @@ struct bpf_map_def SEC("maps") rx_queue_index_map = {
 	.max_entries	= MAX_RXQs + 1,
 };
 
+static __always_inline
+void swap_src_dst_mac(void *data)
+{
+	unsigned short *p = data;
+	unsigned short dst[3];
+
+	dst[0] = p[0];
+	dst[1] = p[1];
+	dst[2] = p[2];
+	p[0] = p[3];
+	p[1] = p[4];
+	p[2] = p[5];
+	p[3] = dst[0];
+	p[4] = dst[1];
+	p[5] = dst[2];
+}
+
 SEC("xdp_prog0")
 int  xdp_prognum0(struct xdp_md *ctx)
 {
@@ -90,6 +115,24 @@ int  xdp_prognum0(struct xdp_md *ctx)
 	if (key == MAX_RXQs)
 		rxq_rec->issue++;
 
+	/* Default: Don't touch packet data, only count packets */
+	if (unlikely(config->options & (READ_MEM|SWAP_MAC))) {
+		struct ethhdr *eth = data;
+
+		if (eth + 1 > data_end)
+			return XDP_ABORTED;
+
+		/* Avoid compiler removing this: Drop non 802.3 Ethertypes */
+		if (ntohs(eth->h_proto) < ETH_P_802_3_MIN)
+			return XDP_ABORTED;
+
+		/* XDP_TX requires changing MAC-addrs, else HW may drop.
+		 * Can also be enabled with --swapmac (for test purposes)
+		 */
+		if (unlikely(config->options & SWAP_MAC))
+			swap_src_dst_mac(data);
+	}
+
 	return config->action;
 }
 
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index e4e9ba52bff0..248a7eab9531 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -50,6 +50,8 @@ static const struct option long_options[] = {
 	{"sec",		required_argument,	NULL, 's' },
 	{"no-separators", no_argument,		NULL, 'z' },
 	{"action",	required_argument,	NULL, 'a' },
+	{"readmem", 	no_argument,		NULL, 'r' },
+	{"swapmac", 	no_argument,		NULL, 'm' },
 	{0, 0, NULL,  0 }
 };
 
@@ -66,6 +68,12 @@ static void int_exit(int sig)
 struct config {
 	__u32 action;
 	int ifindex;
+	__u32 options;
+};
+enum cfg_options_flags {
+	NO_TOUCH = 0x0U,
+	READ_MEM = 0x1U,
+	SWAP_MAC = 0x2U,
 };
 #define XDP_ACTION_MAX (XDP_TX + 1)
 #define XDP_ACTION_MAX_STRLEN 11
@@ -109,6 +117,18 @@ static void list_xdp_actions(void)
 	printf("\n");
 }
 
+static char* options2str(enum cfg_options_flags flag)
+{
+	if (flag == NO_TOUCH)
+		return "no_touch";
+	if (flag & SWAP_MAC)
+		return "swapmac";
+	if (flag & READ_MEM)
+		return "read";
+	fprintf(stderr, "ERR: Unknown config option flags");
+	exit(EXIT_FAIL);
+}
+
 static void usage(char *argv[])
 {
 	int i;
@@ -305,7 +325,7 @@ static __u64 calc_errs_pps(struct datarec *r,
 
 static void stats_print(struct stats_record *stats_rec,
 			struct stats_record *stats_prev,
-			int action)
+			int action, __u32 cfg_opt)
 {
 	unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
 	unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -316,8 +336,8 @@ static void stats_print(struct stats_record *stats_rec,
 	int i;
 
 	/* Header */
-	printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
-	       ifname, ifindex, action2str(action));
+	printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n",
+	       ifname, ifindex, action2str(action), options2str(cfg_opt));
 
 	/* stats_global_map */
 	{
@@ -399,7 +419,7 @@ static inline void swap(struct stats_record **a, struct stats_record **b)
 	*b = tmp;
 }
 
-static void stats_poll(int interval, int action)
+static void stats_poll(int interval, int action, __u32 cfg_opt)
 {
 	struct stats_record *record, *prev;
 
@@ -410,7 +430,7 @@ static void stats_poll(int interval, int action)
 	while (1) {
 		swap(&prev, &record);
 		stats_collect(record);
-		stats_print(record, prev, action);
+		stats_print(record, prev, action, cfg_opt);
 		sleep(interval);
 	}
 
@@ -421,6 +441,7 @@ static void stats_poll(int interval, int action)
 
 int main(int argc, char **argv)
 {
+	__u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */
 	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	struct bpf_prog_load_attr prog_load_attr = {
 		.prog_type	= BPF_PROG_TYPE_XDP,
@@ -435,6 +456,7 @@ int main(int argc, char **argv)
 	int interval = 2;
 	__u32 key = 0;
 
+
 	char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
 	int action = XDP_PASS; /* Default action */
 	char *action_str = NULL;
@@ -496,6 +518,12 @@ int main(int argc, char **argv)
 			action_str = (char *)&action_str_buf;
 			strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
 			break;
+		case 'r':
+			cfg_options |= READ_MEM;
+			break;
+		case 'm':
+			cfg_options |= SWAP_MAC;
+			break;
 		case 'h':
 		error:
 		default:
@@ -523,6 +551,11 @@ int main(int argc, char **argv)
 	}
 	cfg.action = action;
 
+	/* XDP_TX requires changing MAC-addrs, else HW may drop */
+	if (action == XDP_TX)
+		cfg_options |= SWAP_MAC;
+	cfg.options = cfg_options;
+
 	/* Trick to pretty printf with thousands separators use %' */
 	if (use_separators)
 		setlocale(LC_NUMERIC, "en_US");
@@ -542,6 +575,6 @@ int main(int argc, char **argv)
 		return EXIT_FAIL_XDP;
 	}
 
-	stats_poll(interval, action);
+	stats_poll(interval, action, cfg_options);
 	return EXIT_OK;
 }
diff --git a/samples/bpf/xdp_sample_pkts_kern.c b/samples/bpf/xdp_sample_pkts_kern.c
new file mode 100644
index 000000000000..f7ca8b850978
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_kern.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define SAMPLE_SIZE 64ul
+#define MAX_CPUS 128
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = MAX_CPUS,
+};
+
+SEC("xdp_sample")
+int xdp_sample_prog(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+
+	/* Metadata will be in the perf event before the packet data. */
+	struct S {
+		u16 cookie;
+		u16 pkt_len;
+	} __packed metadata;
+
+	if (data < data_end) {
+		/* The XDP perf_event_output handler will use the upper 32 bits
+		 * of the flags argument as a number of bytes to include of the
+		 * packet payload in the event data. If the size is too big, the
+		 * call to bpf_perf_event_output will fail and return -EFAULT.
+		 *
+		 * See bpf_xdp_event_output in net/core/filter.c.
+		 *
+		 * The BPF_F_CURRENT_CPU flag means that the event output fd
+		 * will be indexed by the CPU number in the event map.
+		 */
+		u64 flags = BPF_F_CURRENT_CPU;
+		u16 sample_size;
+		int ret;
+
+		metadata.cookie = 0xdead;
+		metadata.pkt_len = (u16)(data_end - data);
+		sample_size = min(metadata.pkt_len, SAMPLE_SIZE);
+		flags |= (u64)sample_size << 32;
+
+		ret = bpf_perf_event_output(ctx, &my_map, flags,
+					    &metadata, sizeof(metadata));
+		if (ret)
+			bpf_printk("perf_event_output failed: %d\n", ret);
+	}
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
new file mode 100644
index 000000000000..8dd87c1eb560
--- /dev/null
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <net/if.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/sysinfo.h>
+#include <sys/ioctl.h>
+#include <signal.h>
+#include <libbpf.h>
+#include <bpf/bpf.h>
+
+#include "perf-sys.h"
+#include "trace_helpers.h"
+
+#define MAX_CPUS 128
+static int pmu_fds[MAX_CPUS], if_idx;
+static struct perf_event_mmap_page *headers[MAX_CPUS];
+static char *if_name;
+
+static int do_attach(int idx, int fd, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, fd, 0);
+	if (err < 0)
+		printf("ERROR: failed to attach program to %s\n", name);
+
+	return err;
+}
+
+static int do_detach(int idx, const char *name)
+{
+	int err;
+
+	err = bpf_set_link_xdp_fd(idx, -1, 0);
+	if (err < 0)
+		printf("ERROR: failed to detach program from %s\n", name);
+
+	return err;
+}
+
+#define SAMPLE_SIZE 64
+
+static int print_bpf_output(void *data, int size)
+{
+	struct {
+		__u16 cookie;
+		__u16 pkt_len;
+		__u8  pkt_data[SAMPLE_SIZE];
+	} __packed *e = data;
+	int i;
+
+	if (e->cookie != 0xdead) {
+		printf("BUG cookie %x sized %d\n",
+		       e->cookie, size);
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+
+	printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len);
+	for (i = 0; i < 14 && i < e->pkt_len; i++)
+		printf("%02x ", e->pkt_data[i]);
+	printf("\n");
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_bpf_perf_event(int map_fd, int num)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.wakeup_events = 1, /* get an fd notification for every event */
+	};
+	int i;
+
+	for (i = 0; i < num; i++) {
+		int key = i;
+
+		pmu_fds[i] = sys_perf_event_open(&attr, -1/*pid*/, i/*cpu*/,
+						 -1/*group_fd*/, 0);
+
+		assert(pmu_fds[i] >= 0);
+		assert(bpf_map_update_elem(map_fd, &key,
+					   &pmu_fds[i], BPF_ANY) == 0);
+		ioctl(pmu_fds[i], PERF_EVENT_IOC_ENABLE, 0);
+	}
+}
+
+static void sig_handler(int signo)
+{
+	do_detach(if_idx, if_name);
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int prog_fd, map_fd;
+	char filename[256];
+	int ret, err, i;
+	int numcpus;
+
+	if (argc < 2) {
+		printf("Usage: %s <ifname>\n", argv[0]);
+		return 1;
+	}
+
+	numcpus = get_nprocs();
+	if (numcpus > MAX_CPUS)
+		numcpus = MAX_CPUS;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = filename;
+
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		return 1;
+
+	if (!prog_fd) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	map = bpf_map__next(NULL, obj);
+	if (!map) {
+		printf("finding a map in obj file failed\n");
+		return 1;
+	}
+	map_fd = bpf_map__fd(map);
+
+	if_idx = if_nametoindex(argv[1]);
+	if (!if_idx)
+		if_idx = strtoul(argv[1], NULL, 0);
+
+	if (!if_idx) {
+		fprintf(stderr, "Invalid ifname\n");
+		return 1;
+	}
+	if_name = argv[1];
+	err = do_attach(if_idx, prog_fd, argv[1]);
+	if (err)
+		return err;
+
+	if (signal(SIGINT, sig_handler) ||
+	    signal(SIGHUP, sig_handler) ||
+	    signal(SIGTERM, sig_handler)) {
+		perror("signal");
+		return 1;
+	}
+
+	test_bpf_perf_event(map_fd, numcpus);
+
+	for (i = 0; i < numcpus; i++)
+		if (perf_event_mmap_header(pmu_fds[i], &headers[i]) < 0)
+			return 1;
+
+	ret = perf_event_poller_multi(pmu_fds, headers, numcpus,
+				      print_bpf_output);
+	kill(0, SIGINT);
+	return ret;
+}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index 5904b1543831..4914788b6727 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -26,7 +26,7 @@
 #include <sys/types.h>
 #include <poll.h>
 
-#include "bpf_load.h"
+#include "bpf/libbpf.h"
 #include "bpf_util.h"
 #include <bpf/bpf.h>
 
@@ -145,8 +145,13 @@ static void dump_stats(void);
 	} while (0)
 
 #define barrier() __asm__ __volatile__("": : :"memory")
+#ifdef __aarch64__
+#define u_smp_rmb() __asm__ __volatile__("dmb ishld": : :"memory")
+#define u_smp_wmb() __asm__ __volatile__("dmb ishst": : :"memory")
+#else
 #define u_smp_rmb() barrier()
 #define u_smp_wmb() barrier()
+#endif
 #define likely(x) __builtin_expect(!!(x), 1)
 #define unlikely(x) __builtin_expect(!!(x), 0)
 
@@ -886,7 +891,13 @@ static void l2fwd(struct xdpsock *xsk)
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_prog_load_attr prog_load_attr = {
+		.prog_type	= BPF_PROG_TYPE_XDP,
+	};
+	int prog_fd, qidconf_map, xsks_map;
+	struct bpf_object *obj;
 	char xdp_filename[256];
+	struct bpf_map *map;
 	int i, ret, key = 0;
 	pthread_t pt;
 
@@ -899,24 +910,38 @@ int main(int argc, char **argv)
 	}
 
 	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
+	prog_load_attr.file = xdp_filename;
 
-	if (load_bpf_file(xdp_filename)) {
-		fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf);
+	if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+		exit(EXIT_FAILURE);
+	if (prog_fd < 0) {
+		fprintf(stderr, "ERROR: no program found: %s\n",
+			strerror(prog_fd));
 		exit(EXIT_FAILURE);
 	}
 
-	if (!prog_fd[0]) {
-		fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n",
-			strerror(errno));
+	map = bpf_object__find_map_by_name(obj, "qidconf_map");
+	qidconf_map = bpf_map__fd(map);
+	if (qidconf_map < 0) {
+		fprintf(stderr, "ERROR: no qidconf map found: %s\n",
+			strerror(qidconf_map));
+		exit(EXIT_FAILURE);
+	}
+
+	map = bpf_object__find_map_by_name(obj, "xsks_map");
+	xsks_map = bpf_map__fd(map);
+	if (xsks_map < 0) {
+		fprintf(stderr, "ERROR: no xsks map found: %s\n",
+			strerror(xsks_map));
 		exit(EXIT_FAILURE);
 	}
 
-	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) {
+	if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) {
 		fprintf(stderr, "ERROR: link set xdp fd failed\n");
 		exit(EXIT_FAILURE);
 	}
 
-	ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0);
+	ret = bpf_map_update_elem(qidconf_map, &key, &opt_queue, 0);
 	if (ret) {
 		fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n");
 		exit(EXIT_FAILURE);
@@ -933,7 +958,7 @@ int main(int argc, char **argv)
 	/* ...and insert them into the map. */
 	for (i = 0; i < num_socks; i++) {
 		key = i;
-		ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0);
+		ret = bpf_map_update_elem(xsks_map, &key, &xsks[i]->sfd, 0);
 		if (ret) {
 			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
 			exit(EXIT_FAILURE);