summary refs log tree commit diff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-07-09 12:24:15 -0700
committerJakub Kicinski <kuba@kernel.org>2022-07-09 12:24:16 -0700
commit0076cad30135f95bf9a144269906f9b7a4eb542c (patch)
tree1a48680205d7b23123a3864c25c814d6d0dfbd8e
parent877d4e3cedd18cd5a4cef7685b64af72f8322ac1 (diff)
parent24bdfdd2ec343c94adf38fb5bc699f12e543713b (diff)
downloadlinux-0076cad30135f95bf9a144269906f9b7a4eb542c.tar.gz
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:

====================
pull-request: bpf-next 2022-07-09

We've added 94 non-merge commits during the last 19 day(s) which contain
a total of 125 files changed, 5141 insertions(+), 6701 deletions(-).

The main changes are:

1) Add new way for performing BTF type queries to BPF, from Daniel Müller.

2) Add inlining of calls to bpf_loop() helper when its function callback is
   statically known, from Eduard Zingerman.

3) Implement BPF TCP CC framework usability improvements, from Jörn-Thorben Hinz.

4) Add LSM flavor for attaching per-cgroup BPF programs to existing LSM
   hooks, from Stanislav Fomichev.

5) Remove all deprecated libbpf APIs in prep for 1.0 release, from Andrii Nakryiko.

6) Add benchmarks around local_storage to BPF selftests, from Dave Marchevsky.

7) AF_XDP sample removal (given move to libxdp) and various improvements around AF_XDP
   selftests, from Magnus Karlsson & Maciej Fijalkowski.

8) Add bpftool improvements for memcg probing and bash completion, from Quentin Monnet.

9) Add arm64 JIT support for BPF-2-BPF coupled with tail calls, from Jakub Sitnicki.

10) Sockmap optimizations around throughput of UDP transmissions which have been
    improved by 61%, from Cong Wang.

11) Rework perf's BPF prologue code to remove deprecated functions, from Jiri Olsa.

12) Fix sockmap teardown path to avoid sleepable sk_psock_stop, from John Fastabend.

13) Fix libbpf's cleanup around legacy kprobe/uprobe on error case, from Chuang Wang.

14) Fix libbpf's bpf_helpers.h to work with gcc for the case of its sec/pragma
    macro, from James Hilliard.

15) Fix libbpf's pt_regs macros for riscv to use a0 for RC register, from Yixun Lan.

16) Fix bpftool to show the name of type BPF_OBJ_LINK, from Yafang Shao.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (94 commits)
  selftests/bpf: Fix xdp_synproxy build failure if CONFIG_NF_CONNTRACK=m/n
  bpf: Correctly propagate errors up from bpf_core_composites_match
  libbpf: Disable SEC pragma macro on GCC
  bpf: Check attach_func_proto more carefully in check_return_code
  selftests/bpf: Add test involving restrict type qualifier
  bpftool: Add support for KIND_RESTRICT to gen min_core_btf command
  MAINTAINERS: Add entry for AF_XDP selftests files
  selftests, xsk: Rename AF_XDP testing app
  bpf, docs: Remove deprecated xsk libbpf APIs description
  selftests/bpf: Add benchmark for local_storage RCU Tasks Trace usage
  libbpf, riscv: Use a0 for RC register
  libbpf: Remove unnecessary usdt_rel_ip assignments
  selftests/bpf: Fix few more compiler warnings
  selftests/bpf: Fix bogus uninitialized variable warning
  bpftool: Remove zlib feature test from Makefile
  libbpf: Cleanup the legacy uprobe_event on failed add/attach_event()
  libbpf: Fix wrong variable used in perf_event_uprobe_open_legacy()
  libbpf: Cleanup the legacy kprobe_event on failed add/attach_event()
  selftests/bpf: Add type match test against kernel's task_struct
  selftests/bpf: Add nested type to type based tests
  ...
====================

Link: https://lore.kernel.org/r/20220708233145.32365-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--Documentation/bpf/instruction-set.rst2
-rw-r--r--Documentation/bpf/libbpf/libbpf_naming_convention.rst13
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/arm64/net/bpf_jit_comp.c9
-rw-r--r--arch/x86/net/bpf_jit_comp.c30
-rw-r--r--include/linux/bpf-cgroup-defs.h13
-rw-r--r--include/linux/bpf-cgroup.h9
-rw-r--r--include/linux/bpf.h47
-rw-r--r--include/linux/bpf_lsm.h7
-rw-r--r--include/linux/bpf_verifier.h12
-rw-r--r--include/linux/btf_ids.h3
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/net.h4
-rw-r--r--include/net/tcp.h1
-rw-r--r--include/net/udp.h3
-rw-r--r--include/uapi/linux/bpf.h5
-rw-r--r--kernel/bpf/bpf_iter.c9
-rw-r--r--kernel/bpf/bpf_lsm.c81
-rw-r--r--kernel/bpf/bpf_struct_ops.c7
-rw-r--r--kernel/bpf/btf.c94
-rw-r--r--kernel/bpf/cgroup.c350
-rw-r--r--kernel/bpf/core.c15
-rw-r--r--kernel/bpf/syscall.c18
-rw-r--r--kernel/bpf/trampoline.c262
-rw-r--r--kernel/bpf/verifier.c238
-rw-r--r--kernel/trace/trace_uprobe.c2
-rw-r--r--lib/test_bpf.c4
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/skmsg.c48
-rw-r--r--net/core/sock_map.c2
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/bpf_tcp_ca.c39
-rw-r--r--net/ipv4/tcp.c44
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/unix/af_unix.c23
-rw-r--r--samples/bpf/Makefile9
-rw-r--r--samples/bpf/xdp1_kern.c11
-rw-r--r--samples/bpf/xdp2_kern.c11
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c2
-rw-r--r--samples/bpf/xdpsock.h19
-rw-r--r--samples/bpf/xdpsock_ctrl_proc.c190
-rw-r--r--samples/bpf/xdpsock_kern.c24
-rw-r--r--samples/bpf/xdpsock_user.c2019
-rw-r--r--samples/bpf/xsk_fwd.c1085
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst12
-rw-r--r--tools/bpf/bpftool/Makefile11
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool28
-rw-r--r--tools/bpf/bpftool/cgroup.c109
-rw-r--r--tools/bpf/bpftool/common.c72
-rw-r--r--tools/bpf/bpftool/feature.c59
-rw-r--r--tools/bpf/bpftool/gen.c109
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/include/linux/btf_ids.h35
-rw-r--r--tools/include/uapi/linux/bpf.h5
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile2
-rw-r--r--tools/lib/bpf/bpf.c200
-rw-r--r--tools/lib/bpf/bpf.h98
-rw-r--r--tools/lib/bpf/bpf_core_read.h11
-rw-r--r--tools/lib/bpf/bpf_helpers.h13
-rw-r--r--tools/lib/bpf/bpf_tracing.h2
-rw-r--r--tools/lib/bpf/btf.c183
-rw-r--r--tools/lib/bpf/btf.h86
-rw-r--r--tools/lib/bpf/btf_dump.c23
-rw-r--r--tools/lib/bpf/libbpf.c1537
-rw-r--r--tools/lib/bpf/libbpf.h469
-rw-r--r--tools/lib/bpf/libbpf.map114
-rw-r--r--tools/lib/bpf/libbpf_common.h16
-rw-r--r--tools/lib/bpf/libbpf_internal.h24
-rw-r--r--tools/lib/bpf/libbpf_legacy.h28
-rw-r--r--tools/lib/bpf/libbpf_probes.c125
-rw-r--r--tools/lib/bpf/netlink.c62
-rw-r--r--tools/lib/bpf/relo_core.c366
-rw-r--r--tools/lib/bpf/relo_core.h6
-rw-r--r--tools/lib/bpf/usdt.c6
-rw-r--r--tools/perf/util/bpf-loader.c204
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile10
-rw-r--r--tools/testing/selftests/bpf/bench.c97
-rw-r--r--tools/testing/selftests/bpf/bench.h16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c287
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c281
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage.sh24
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh11
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh17
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h9
-rw-r--r--tools/testing/selftests/bpf/config6
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c313
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c114
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h112
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_bench.c104
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c67
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c180
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c14
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c21
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c60
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c49
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c24
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py20
-rw-r--r--tools/testing/selftests/bpf/test_btf.h2
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c367
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh6
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c263
-rw-r--r--tools/testing/selftests/bpf/xsk.c (renamed from tools/lib/bpf/xsk.c)92
-rw-r--r--tools/testing/selftests/bpf/xsk.h (renamed from tools/lib/bpf/xsk.h)30
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c (renamed from tools/testing/selftests/bpf/xdpxceiver.c)25
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h (renamed from tools/testing/selftests/bpf/xdpxceiver.h)6
125 files changed, 5141 insertions, 6701 deletions
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 9e27fbdb2206..1b0e6711dec9 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -351,7 +351,7 @@ These instructions have seven implicit operands:
  * Register R0 is an implicit output which contains the data fetched from
    the packet.
  * Registers R1-R5 are scratch registers that are clobbered after a call to
-   ``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
+   ``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions.
 
 These instructions have an implicit program exit condition as well. When an
 eBPF program is trying to access the data beyond the packet boundary, the
diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
index f86360f734a8..c5ac97f3d4c4 100644
--- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst
+++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst
@@ -9,8 +9,8 @@ described here. It's recommended to follow these conventions whenever a
 new function or type is added to keep libbpf API clean and consistent.
 
 All types and functions provided by libbpf API should have one of the
-following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
-``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
+following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``,
+``ring_buffer_``, ``perf_buffer_``.
 
 System call wrappers
 --------------------
@@ -59,15 +59,6 @@ Auxiliary functions and types that don't fit well in any of categories
 described above should have ``libbpf_`` prefix, e.g.
 ``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
 
-AF_XDP functions
--------------------
-
-AF_XDP functions should have an ``xsk_`` prefix, e.g.
-``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
-of both low-level ring access functions and high-level configuration
-functions. These can be mixed and matched. Note that these functions
-are not reentrant for performance reasons.
-
 ABI
 ---
 
diff --git a/MAINTAINERS b/MAINTAINERS
index c4b709e8536c..14b07494e86a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21917,8 +21917,7 @@ F:	include/uapi/linux/if_xdp.h
 F:	include/uapi/linux/xdp_diag.h
 F:	include/net/netns/xdp.h
 F:	net/xdp/
-F:	samples/bpf/xdpsock*
-F:	tools/lib/bpf/xsk*
+F:	tools/testing/selftests/bpf/*xsk*
 
 XEN BLOCK SUBSYSTEM
 M:	Roger Pau Monné <roger.pau@citrix.com>
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 42f2e9a8616c..f08a4447d363 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -246,6 +246,7 @@ static bool is_lsi_offset(int offset, int scale)
 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 {
 	const struct bpf_prog *prog = ctx->prog;
+	const bool is_main_prog = prog->aux->func_idx == 0;
 	const u8 r6 = bpf2a64[BPF_REG_6];
 	const u8 r7 = bpf2a64[BPF_REG_7];
 	const u8 r8 = bpf2a64[BPF_REG_8];
@@ -299,7 +300,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
 	/* Set up BPF prog stack base register */
 	emit(A64_MOV(1, fp, A64_SP), ctx);
 
-	if (!ebpf_from_cbpf) {
+	if (!ebpf_from_cbpf && is_main_prog) {
 		/* Initialize tail_call_cnt */
 		emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
@@ -1530,3 +1531,9 @@ void bpf_jit_free_exec(void *addr)
 {
 	return vfree(addr);
 }
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+	return true;
+}
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c98b8c0ed3b8..b88f43c9f050 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1771,6 +1771,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 			   struct bpf_tramp_link *l, int stack_size,
 			   int run_ctx_off, bool save_ret)
 {
+	void (*exit)(struct bpf_prog *prog, u64 start,
+		     struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
+	u64 (*enter)(struct bpf_prog *prog,
+		     struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
 	u8 *prog = *pprog;
 	u8 *jmp_insn;
 	int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
@@ -1789,15 +1793,21 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	 */
 	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off);
 
+	if (p->aux->sleepable) {
+		enter = __bpf_prog_enter_sleepable;
+		exit = __bpf_prog_exit_sleepable;
+	} else if (p->expected_attach_type == BPF_LSM_CGROUP) {
+		enter = __bpf_prog_enter_lsm_cgroup;
+		exit = __bpf_prog_exit_lsm_cgroup;
+	}
+
 	/* arg1: mov rdi, progs[i] */
 	emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
 	/* arg2: lea rsi, [rbp - ctx_cookie_off] */
 	EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
 
-	if (emit_call(&prog,
-		      p->aux->sleepable ? __bpf_prog_enter_sleepable :
-		      __bpf_prog_enter, prog))
-			return -EINVAL;
+	if (emit_call(&prog, enter, prog))
+		return -EINVAL;
 	/* remember prog start time returned by __bpf_prog_enter */
 	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
 
@@ -1841,10 +1851,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
 	/* arg3: lea rdx, [rbp - run_ctx_off] */
 	EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
-	if (emit_call(&prog,
-		      p->aux->sleepable ? __bpf_prog_exit_sleepable :
-		      __bpf_prog_exit, prog))
-			return -EINVAL;
+	if (emit_call(&prog, exit, prog))
+		return -EINVAL;
 
 	*pprog = prog;
 	return 0;
@@ -2492,3 +2500,9 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 		return ERR_PTR(-EINVAL);
 	return dst;
 }
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+	return true;
+}
diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h
index 695d1224a71b..7b121bd780eb 100644
--- a/include/linux/bpf-cgroup-defs.h
+++ b/include/linux/bpf-cgroup-defs.h
@@ -10,6 +10,13 @@
 
 struct bpf_prog_array;
 
+#ifdef CONFIG_BPF_LSM
+/* Maximum number of concurrently attachable per-cgroup LSM hooks. */
+#define CGROUP_LSM_NUM 10
+#else
+#define CGROUP_LSM_NUM 0
+#endif
+
 enum cgroup_bpf_attach_type {
 	CGROUP_BPF_ATTACH_TYPE_INVALID = -1,
 	CGROUP_INET_INGRESS = 0,
@@ -35,6 +42,8 @@ enum cgroup_bpf_attach_type {
 	CGROUP_INET4_GETSOCKNAME,
 	CGROUP_INET6_GETSOCKNAME,
 	CGROUP_INET_SOCK_RELEASE,
+	CGROUP_LSM_START,
+	CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1,
 	MAX_CGROUP_BPF_ATTACH_TYPE
 };
 
@@ -47,8 +56,8 @@ struct cgroup_bpf {
 	 * have either zero or one element
 	 * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
 	 */
-	struct list_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
-	u32 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
+	struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
+	u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
 
 	/* list of cgroup shared storages */
 	struct list_head storages;
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 669d96d074ad..2bd1b5f8de9b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,6 +23,13 @@ struct ctl_table;
 struct ctl_table_header;
 struct task_struct;
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+				       const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+					 const struct bpf_insn *insn);
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+					  const struct bpf_insn *insn);
+
 #ifdef CONFIG_CGROUP_BPF
 
 #define CGROUP_ATYPE(type) \
@@ -95,7 +102,7 @@ struct bpf_cgroup_link {
 };
 
 struct bpf_prog_list {
-	struct list_head node;
+	struct hlist_node node;
 	struct bpf_prog *prog;
 	struct bpf_cgroup_link *link;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0edd7d2c0064..2b21f2a3452f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -56,6 +56,8 @@ typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
 					struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+typedef unsigned int (*bpf_func_t)(const void *,
+				   const struct bpf_insn *);
 struct bpf_iter_seq_info {
 	const struct seq_operations *seq_ops;
 	bpf_iter_init_seq_priv_t init_seq_private;
@@ -792,6 +794,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
 				       struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
+					struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
+					struct bpf_tramp_run_ctx *run_ctx);
 void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 
@@ -879,8 +885,7 @@ struct bpf_dispatcher {
 static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
 	const void *ctx,
 	const struct bpf_insn *insnsi,
-	unsigned int (*bpf_func)(const void *,
-				 const struct bpf_insn *))
+	bpf_func_t bpf_func)
 {
 	return bpf_func(ctx, insnsi);
 }
@@ -909,8 +914,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 	noinline __nocfi unsigned int bpf_dispatcher_##name##_func(	\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
-		unsigned int (*bpf_func)(const void *,			\
-					 const struct bpf_insn *))	\
+		bpf_func_t bpf_func)					\
 	{								\
 		return bpf_func(ctx, insnsi);				\
 	}								\
@@ -921,8 +925,7 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 	unsigned int bpf_dispatcher_##name##_func(			\
 		const void *ctx,					\
 		const struct bpf_insn *insnsi,				\
-		unsigned int (*bpf_func)(const void *,			\
-					 const struct bpf_insn *));	\
+		bpf_func_t bpf_func);					\
 	extern struct bpf_dispatcher bpf_dispatcher_##name;
 #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
 #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name)
@@ -1061,6 +1064,7 @@ struct bpf_prog_aux {
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
 	u32 verified_insns;
+	int cgroup_atype; /* enum cgroup_bpf_attach_type */
 	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
@@ -1168,6 +1172,11 @@ struct bpf_tramp_link {
 	u64 cookie;
 };
 
+struct bpf_shim_tramp_link {
+	struct bpf_tramp_link link;
+	struct bpf_trampoline *trampoline;
+};
+
 struct bpf_tracing_link {
 	struct bpf_tramp_link link;
 	enum bpf_attach_type attach_type;
@@ -1246,6 +1255,9 @@ struct bpf_dummy_ops {
 int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
 			    union bpf_attr __user *uattr);
 #endif
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+				    int cgroup_atype);
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog);
 #else
 static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 {
@@ -1269,6 +1281,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
 {
 	return -EINVAL;
 }
+static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+						  int cgroup_atype)
+{
+	return -EOPNOTSUPP;
+}
+static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+}
 #endif
 
 struct bpf_array {
@@ -1286,6 +1306,9 @@ struct bpf_array {
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
 #define MAX_TAIL_CALL_CNT 33
 
+/* Maximum number of loops for bpf_loop */
+#define BPF_MAX_LOOPS	BIT(23)
+
 #define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
 				 BPF_F_RDONLY_PROG |	\
 				 BPF_F_WRONLY |		\
@@ -2363,9 +2386,13 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
+extern const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto;
 extern const struct bpf_func_proto bpf_find_vma_proto;
 extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
+extern const struct bpf_func_proto bpf_set_retval_proto;
+extern const struct bpf_func_proto bpf_get_retval_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2519,4 +2546,12 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
 void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
 int bpf_dynptr_check_size(u32 size);
 
+#ifdef CONFIG_BPF_LSM
+void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
+void bpf_cgroup_atype_put(int cgroup_atype);
+#else
+static inline void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) {}
+static inline void bpf_cgroup_atype_put(int cgroup_atype) {}
+#endif /* CONFIG_BPF_LSM */
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 479c101546ad..4bcf76a9bb06 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -42,6 +42,8 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
 extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
 void bpf_inode_storage_free(struct inode *inode);
 
+void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
+
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -65,6 +67,11 @@ static inline void bpf_inode_storage_free(struct inode *inode)
 {
 }
 
+static inline void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+					   bpf_func_t *bpf_func)
+{
+}
+
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3930c963fa67..81b19669efba 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -344,6 +344,14 @@ struct bpf_verifier_state_list {
 	int miss_cnt, hit_cnt;
 };
 
+struct bpf_loop_inline_state {
+	int initialized:1; /* set to true upon first entry */
+	int fit_for_inline:1; /* true if callback function is the same
+			       * at each call and flags are always zero
+			       */
+	u32 callback_subprogno; /* valid when fit_for_inline is true */
+};
+
 /* Possible states for alu_state member. */
 #define BPF_ALU_SANITIZE_SRC		(1U << 0)
 #define BPF_ALU_SANITIZE_DST		(1U << 1)
@@ -373,6 +381,10 @@ struct bpf_insn_aux_data {
 				u32 mem_size;	/* mem_size for non-struct typed var */
 			};
 		} btf_var;
+		/* if instruction is a call to bpf_loop this field tracks
+		 * the state of the relevant registers to make decision about inlining
+		 */
+		struct bpf_loop_inline_state loop_inline_state;
 	};
 	u64 map_key_state; /* constant (32 bit) key tracking for maps */
 	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
index 335a19092368..252a4befeab1 100644
--- a/include/linux/btf_ids.h
+++ b/include/linux/btf_ids.h
@@ -179,7 +179,8 @@ extern struct btf_id_set name;
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)			\
-	BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d0cbb31b1b4d..4c1a8b247545 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -914,6 +914,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
+bool bpf_jit_supports_subprog_tailcalls(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_helper_changes_pkt_data(void *func);
 
diff --git a/include/linux/net.h b/include/linux/net.h
index 12093f4db50c..a03485e8cbb2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -152,6 +152,8 @@ struct module;
 struct sk_buff;
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 			       unsigned int, size_t);
+typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *);
+
 
 struct proto_ops {
 	int		family;
@@ -214,6 +216,8 @@ struct proto_ops {
 	 */
 	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
 				     sk_read_actor_t recv_actor);
+	/* This is different from read_sock(), it reads an entire skb at a time. */
+	int		(*read_skb)(struct sock *sk, skb_read_actor_t recv_actor);
 	int		(*sendpage_locked)(struct sock *sk, struct page *page,
 					   int offset, size_t size, int flags);
 	int		(*sendmsg_locked)(struct sock *sk, struct msghdr *msg,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c21a9b516f1e..8e48dc56837b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -672,6 +672,7 @@ void tcp_get_info(struct sock *, struct tcp_info *);
 /* Read 'sendfile()'-style from a TCP socket */
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		  sk_read_actor_t recv_actor);
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 
 void tcp_initialize_rcv_mss(struct sock *sk);
 
diff --git a/include/net/udp.h b/include/net/udp.h
index b60eea2e3fae..987f7fc7c0aa 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -306,8 +306,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
 			       struct sk_buff *skb);
 struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
-		  sk_read_actor_t recv_actor);
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e81362891596..379e68fb866f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -998,6 +998,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
+	BPF_LSM_CGROUP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1431,6 +1432,7 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
+		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -6075,6 +6077,8 @@ struct bpf_prog_info {
 	__u64 run_cnt;
 	__u64 recursion_misses;
 	__u32 verified_insns;
+	__u32 attach_btf_obj_id;
+	__u32 attach_btf_id;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -6782,6 +6786,7 @@ enum bpf_core_relo_kind {
 	BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
 	BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
 	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+	BPF_CORE_TYPE_MATCHES = 12,          /* type match in target kernel */
 };
 
 /*
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index d5d96ceca105..7e8fd49406f6 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -723,9 +723,6 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
-/* maximum number of loops */
-#define MAX_LOOPS	BIT(23)
-
 BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
 	   u64, flags)
 {
@@ -733,9 +730,13 @@ BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
 	u64 ret;
 	u32 i;
 
+	/* Note: these safety checks are also verified when bpf_loop
+	 * is inlined, be careful to modify this code in sync. See
+	 * function verifier.c:inline_bpf_loop.
+	 */
 	if (flags)
 		return -EINVAL;
-	if (nr_loops > MAX_LOOPS)
+	if (nr_loops > BPF_MAX_LOOPS)
 		return -E2BIG;
 
 	for (i = 0; i < nr_loops; i++) {
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index c1351df9f7ee..d469b7f3deef 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -16,6 +16,7 @@
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
 #include <linux/ima.h>
+#include <linux/bpf-cgroup.h>
 
 /* For every LSM hook that allows attachment of BPF programs, declare a nop
  * function where a BPF program can be attached.
@@ -35,6 +36,57 @@ BTF_SET_START(bpf_lsm_hooks)
 #undef LSM_HOOK
 BTF_SET_END(bpf_lsm_hooks)
 
+/* List of LSM hooks that should operate on 'current' cgroup regardless
+ * of function signature.
+ */
+BTF_SET_START(bpf_lsm_current_hooks)
+/* operate on freshly allocated sk without any cgroup association */
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_SET_END(bpf_lsm_current_hooks)
+
+/* List of LSM hooks that trigger while the socket is properly locked.
+ */
+BTF_SET_START(bpf_lsm_locked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_sock_rcv_skb)
+BTF_ID(func, bpf_lsm_sock_graft)
+BTF_ID(func, bpf_lsm_inet_csk_clone)
+BTF_ID(func, bpf_lsm_inet_conn_established)
+BTF_SET_END(bpf_lsm_locked_sockopt_hooks)
+
+/* List of LSM hooks that trigger while the socket is _not_ locked,
+ * but it's ok to call bpf_{g,s}etsockopt because the socket is still
+ * in the early init phase.
+ */
+BTF_SET_START(bpf_lsm_unlocked_sockopt_hooks)
+BTF_ID(func, bpf_lsm_socket_post_create)
+BTF_ID(func, bpf_lsm_socket_socketpair)
+BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
+
+void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
+			     bpf_func_t *bpf_func)
+{
+	const struct btf_param *args;
+
+	if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
+	    btf_id_set_contains(&bpf_lsm_current_hooks,
+				prog->aux->attach_btf_id)) {
+		*bpf_func = __cgroup_bpf_run_lsm_current;
+		return;
+	}
+
+	args = btf_params(prog->aux->attach_func_proto);
+
+#ifdef CONFIG_NET
+	if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
+		*bpf_func = __cgroup_bpf_run_lsm_socket;
+	else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
+		*bpf_func = __cgroup_bpf_run_lsm_sock;
+	else
+#endif
+		*bpf_func = __cgroup_bpf_run_lsm_current;
+}
+
 int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
 			const struct bpf_prog *prog)
 {
@@ -158,6 +210,35 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
 	case BPF_FUNC_get_attach_cookie:
 		return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
+	case BPF_FUNC_get_local_storage:
+		return prog->expected_attach_type == BPF_LSM_CGROUP ?
+			&bpf_get_local_storage_proto : NULL;
+	case BPF_FUNC_set_retval:
+		return prog->expected_attach_type == BPF_LSM_CGROUP ?
+			&bpf_set_retval_proto : NULL;
+	case BPF_FUNC_get_retval:
+		return prog->expected_attach_type == BPF_LSM_CGROUP ?
+			&bpf_get_retval_proto : NULL;
+	case BPF_FUNC_setsockopt:
+		if (prog->expected_attach_type != BPF_LSM_CGROUP)
+			return NULL;
+		if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+					prog->aux->attach_btf_id))
+			return &bpf_sk_setsockopt_proto;
+		if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+					prog->aux->attach_btf_id))
+			return &bpf_unlocked_sk_setsockopt_proto;
+		return NULL;
+	case BPF_FUNC_getsockopt:
+		if (prog->expected_attach_type != BPF_LSM_CGROUP)
+			return NULL;
+		if (btf_id_set_contains(&bpf_lsm_locked_sockopt_hooks,
+					prog->aux->attach_btf_id))
+			return &bpf_sk_getsockopt_proto;
+		if (btf_id_set_contains(&bpf_lsm_unlocked_sockopt_hooks,
+					prog->aux->attach_btf_id))
+			return &bpf_unlocked_sk_getsockopt_proto;
+		return NULL;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d9a3c9207240..7e0068c3399c 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -503,10 +503,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		goto unlock;
 	}
 
-	/* Error during st_ops->reg().  It is very unlikely since
-	 * the above init_member() should have caught it earlier
-	 * before reg().  The only possibility is if there was a race
-	 * in registering the struct_ops (under the same name) to
+	/* Error during st_ops->reg(). Can happen if this struct_ops needs to be
+	 * verified as a whole, after all init_member() calls. Can also happen if
+	 * there was a race in registering the struct_ops (under the same name) to
 	 * a sub-system through different struct_ops's maps.
 	 */
 	set_memory_nx((long)st_map->image, 1);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d003d4d8242a..4423045b8ff3 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5368,6 +5368,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
 	if (arg == nr_args) {
 		switch (prog->expected_attach_type) {
+		case BPF_LSM_CGROUP:
 		case BPF_LSM_MAC:
 		case BPF_TRACE_FEXIT:
 			/* When LSM programs are attached to void LSM hooks
@@ -7421,87 +7422,6 @@ EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
 
 #define MAX_TYPES_ARE_COMPAT_DEPTH 2
 
-static
-int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
-				const struct btf *targ_btf, __u32 targ_id,
-				int level)
-{
-	const struct btf_type *local_type, *targ_type;
-	int depth = 32; /* max recursion depth */
-
-	/* caller made sure that names match (ignoring flavor suffix) */
-	local_type = btf_type_by_id(local_btf, local_id);
-	targ_type = btf_type_by_id(targ_btf, targ_id);
-	if (btf_kind(local_type) != btf_kind(targ_type))
-		return 0;
-
-recur:
-	depth--;
-	if (depth < 0)
-		return -EINVAL;
-
-	local_type = btf_type_skip_modifiers(local_btf, local_id, &local_id);
-	targ_type = btf_type_skip_modifiers(targ_btf, targ_id, &targ_id);
-	if (!local_type || !targ_type)
-		return -EINVAL;
-
-	if (btf_kind(local_type) != btf_kind(targ_type))
-		return 0;
-
-	switch (btf_kind(local_type)) {
-	case BTF_KIND_UNKN:
-	case BTF_KIND_STRUCT:
-	case BTF_KIND_UNION:
-	case BTF_KIND_ENUM:
-	case BTF_KIND_FWD:
-	case BTF_KIND_ENUM64:
-		return 1;
-	case BTF_KIND_INT:
-		/* just reject deprecated bitfield-like integers; all other
-		 * integers are by default compatible between each other
-		 */
-		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
-	case BTF_KIND_PTR:
-		local_id = local_type->type;
-		targ_id = targ_type->type;
-		goto recur;
-	case BTF_KIND_ARRAY:
-		local_id = btf_array(local_type)->type;
-		targ_id = btf_array(targ_type)->type;
-		goto recur;
-	case BTF_KIND_FUNC_PROTO: {
-		struct btf_param *local_p = btf_params(local_type);
-		struct btf_param *targ_p = btf_params(targ_type);
-		__u16 local_vlen = btf_vlen(local_type);
-		__u16 targ_vlen = btf_vlen(targ_type);
-		int i, err;
-
-		if (local_vlen != targ_vlen)
-			return 0;
-
-		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
-			if (level <= 0)
-				return -EINVAL;
-
-			btf_type_skip_modifiers(local_btf, local_p->type, &local_id);
-			btf_type_skip_modifiers(targ_btf, targ_p->type, &targ_id);
-			err = __bpf_core_types_are_compat(local_btf, local_id,
-							  targ_btf, targ_id,
-							  level - 1);
-			if (err <= 0)
-				return err;
-		}
-
-		/* tail recurse for return type check */
-		btf_type_skip_modifiers(local_btf, local_type->type, &local_id);
-		btf_type_skip_modifiers(targ_btf, targ_type->type, &targ_id);
-		goto recur;
-	}
-	default:
-		return 0;
-	}
-}
-
 /* Check local and target types for compatibility. This check is used for
  * type-based CO-RE relocations and follow slightly different rules than
  * field-based relocations. This function assumes that root types were already
@@ -7524,11 +7444,19 @@ recur:
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id)
 {
-	return __bpf_core_types_are_compat(local_btf, local_id,
-					   targ_btf, targ_id,
+	return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
 					   MAX_TYPES_ARE_COMPAT_DEPTH);
 }
 
+#define MAX_TYPES_MATCH_DEPTH 2
+
+int bpf_core_types_match(const struct btf *local_btf, u32 local_id,
+			 const struct btf *targ_btf, u32 targ_id)
+{
+	return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false,
+				      MAX_TYPES_MATCH_DEPTH);
+}
+
 static bool bpf_core_is_flavor_sep(const char *s)
 {
 	/* check X___Y name pattern, where X and Y are not underscores */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 7a394f7c205c..59b7eb60d5b4 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -14,6 +14,8 @@
 #include <linux/string.h>
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/bpf_lsm.h>
+#include <linux/bpf_verifier.h>
 #include <net/sock.h>
 #include <net/bpf_sk_storage.h>
 
@@ -61,6 +63,132 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
 	return run_ctx.retval;
 }
 
+unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
+				       const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct sock *sk;
+	struct cgroup *cgrp;
+	int ret = 0;
+	u64 *args;
+
+	args = (u64 *)ctx;
+	sk = (void *)(unsigned long)args[0];
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0, NULL);
+	return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
+					 const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct socket *sock;
+	struct cgroup *cgrp;
+	int ret = 0;
+	u64 *args;
+
+	args = (u64 *)ctx;
+	sock = (void *)(unsigned long)args[0];
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0, NULL);
+	return ret;
+}
+
+unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
+					  const struct bpf_insn *insn)
+{
+	const struct bpf_prog *shim_prog;
+	struct cgroup *cgrp;
+	int ret = 0;
+
+	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
+	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
+
+	/* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
+	cgrp = task_dfl_cgroup(current);
+	if (likely(cgrp))
+		ret = bpf_prog_run_array_cg(&cgrp->bpf,
+					    shim_prog->aux->cgroup_atype,
+					    ctx, bpf_prog_run, 0, NULL);
+	return ret;
+}
+
+#ifdef CONFIG_BPF_LSM
+struct cgroup_lsm_atype {
+	u32 attach_btf_id;
+	int refcnt;
+};
+
+static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
+
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+	int i;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	if (attach_type != BPF_LSM_CGROUP)
+		return to_cgroup_bpf_attach_type(attach_type);
+
+	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+		if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
+			return CGROUP_LSM_START + i;
+
+	for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
+		if (cgroup_lsm_atype[i].attach_btf_id == 0)
+			return CGROUP_LSM_START + i;
+
+	return -E2BIG;
+
+}
+
+void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
+{
+	int i = cgroup_atype - CGROUP_LSM_START;
+
+	lockdep_assert_held(&cgroup_mutex);
+
+	WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
+		     cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
+
+	cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
+	cgroup_lsm_atype[i].refcnt++;
+}
+
+void bpf_cgroup_atype_put(int cgroup_atype)
+{
+	int i = cgroup_atype - CGROUP_LSM_START;
+
+	mutex_lock(&cgroup_mutex);
+	if (--cgroup_lsm_atype[i].refcnt <= 0)
+		cgroup_lsm_atype[i].attach_btf_id = 0;
+	WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
+	mutex_unlock(&cgroup_mutex);
+}
+#else
+static enum cgroup_bpf_attach_type
+bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
+{
+	if (attach_type != BPF_LSM_CGROUP)
+		return to_cgroup_bpf_attach_type(attach_type);
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_BPF_LSM */
+
 void cgroup_bpf_offline(struct cgroup *cgrp)
 {
 	cgroup_get(cgrp);
@@ -157,15 +285,22 @@ static void cgroup_bpf_release(struct work_struct *work)
 	mutex_lock(&cgroup_mutex);
 
 	for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
-		struct list_head *progs = &cgrp->bpf.progs[atype];
-		struct bpf_prog_list *pl, *pltmp;
+		struct hlist_head *progs = &cgrp->bpf.progs[atype];
+		struct bpf_prog_list *pl;
+		struct hlist_node *pltmp;
 
-		list_for_each_entry_safe(pl, pltmp, progs, node) {
-			list_del(&pl->node);
-			if (pl->prog)
+		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
+			hlist_del(&pl->node);
+			if (pl->prog) {
+				if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
+					bpf_trampoline_unlink_cgroup_shim(pl->prog);
 				bpf_prog_put(pl->prog);
-			if (pl->link)
+			}
+			if (pl->link) {
+				if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
+					bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
 				bpf_cgroup_link_auto_detach(pl->link);
+			}
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key[atype]);
 		}
@@ -217,12 +352,12 @@ static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
 /* count number of elements in the list.
  * it's slow but the list cannot be long
  */
-static u32 prog_list_length(struct list_head *head)
+static u32 prog_list_length(struct hlist_head *head)
 {
 	struct bpf_prog_list *pl;
 	u32 cnt = 0;
 
-	list_for_each_entry(pl, head, node) {
+	hlist_for_each_entry(pl, head, node) {
 		if (!prog_list_prog(pl))
 			continue;
 		cnt++;
@@ -291,7 +426,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
 		if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
 			continue;
 
-		list_for_each_entry(pl, &p->bpf.progs[atype], node) {
+		hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
 			if (!prog_list_prog(pl))
 				continue;
 
@@ -342,7 +477,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
 		cgroup_bpf_get(p);
 
 	for (i = 0; i < NR; i++)
-		INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
+		INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
 
 	INIT_LIST_HEAD(&cgrp->bpf.storages);
 
@@ -418,7 +553,7 @@ cleanup:
 
 #define BPF_CGROUP_MAX_PROGS 64
 
-static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
 					       struct bpf_prog *prog,
 					       struct bpf_cgroup_link *link,
 					       struct bpf_prog *replace_prog,
@@ -428,12 +563,12 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
 	/* single-attach case */
 	if (!allow_multi) {
-		if (list_empty(progs))
+		if (hlist_empty(progs))
 			return NULL;
-		return list_first_entry(progs, typeof(*pl), node);
+		return hlist_entry(progs->first, typeof(*pl), node);
 	}
 
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (prog && pl->prog == prog && prog != replace_prog)
 			/* disallow attaching the same prog twice */
 			return ERR_PTR(-EINVAL);
@@ -444,7 +579,7 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
 
 	/* direct prog multi-attach w/ replacement case */
 	if (replace_prog) {
-		list_for_each_entry(pl, progs, node) {
+		hlist_for_each_entry(pl, progs, node) {
 			if (pl->prog == replace_prog)
 				/* a match found */
 				return pl;
@@ -478,9 +613,10 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	struct bpf_prog *old_prog = NULL;
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
 	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
+	struct bpf_prog *new_prog = prog ? : link->link.prog;
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	int err;
 
 	if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
@@ -494,7 +630,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 		/* replace_prog implies BPF_F_REPLACE, and vice versa */
 		return -EINVAL;
 
-	atype = to_cgroup_bpf_attach_type(type);
+	atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
 	if (atype < 0)
 		return -EINVAL;
 
@@ -503,7 +639,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	if (!hierarchy_allows_attach(cgrp, atype))
 		return -EPERM;
 
-	if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
+	if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
 		/* Disallow attaching non-overridable on top
 		 * of existing overridable in this cgroup.
 		 * Disallow attaching multi-prog if overridable or none
@@ -525,12 +661,22 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	if (pl) {
 		old_prog = pl->prog;
 	} else {
+		struct hlist_node *last = NULL;
+
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 		if (!pl) {
 			bpf_cgroup_storages_free(new_storage);
 			return -ENOMEM;
 		}
-		list_add_tail(&pl->node, progs);
+		if (hlist_empty(progs))
+			hlist_add_head(&pl->node, progs);
+		else
+			hlist_for_each(last, progs) {
+				if (last->next)
+					continue;
+				hlist_add_behind(&pl->node, last);
+				break;
+			}
 	}
 
 	pl->prog = prog;
@@ -538,17 +684,30 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
 	bpf_cgroup_storages_assign(pl->storage, storage);
 	cgrp->bpf.flags[atype] = saved_flags;
 
+	if (type == BPF_LSM_CGROUP) {
+		err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
+		if (err)
+			goto cleanup;
+	}
+
 	err = update_effective_progs(cgrp, atype);
 	if (err)
-		goto cleanup;
+		goto cleanup_trampoline;
 
-	if (old_prog)
+	if (old_prog) {
+		if (type == BPF_LSM_CGROUP)
+			bpf_trampoline_unlink_cgroup_shim(old_prog);
 		bpf_prog_put(old_prog);
-	else
+	} else {
 		static_branch_inc(&cgroup_bpf_enabled_key[atype]);
+	}
 	bpf_cgroup_storages_link(new_storage, cgrp, type);
 	return 0;
 
+cleanup_trampoline:
+	if (type == BPF_LSM_CGROUP)
+		bpf_trampoline_unlink_cgroup_shim(new_prog);
+
 cleanup:
 	if (old_prog) {
 		pl->prog = old_prog;
@@ -556,7 +715,7 @@ cleanup:
 	}
 	bpf_cgroup_storages_free(new_storage);
 	if (!old_prog) {
-		list_del(&pl->node);
+		hlist_del(&pl->node);
 		kfree(pl);
 	}
 	return err;
@@ -587,7 +746,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_array *progs;
 	struct bpf_prog_list *pl;
-	struct list_head *head;
+	struct hlist_head *head;
 	struct cgroup *cg;
 	int pos;
 
@@ -603,7 +762,7 @@ static void replace_effective_prog(struct cgroup *cgrp,
 				continue;
 
 			head = &cg->bpf.progs[atype];
-			list_for_each_entry(pl, head, node) {
+			hlist_for_each_entry(pl, head, node) {
 				if (!prog_list_prog(pl))
 					continue;
 				if (pl->link == link)
@@ -637,10 +796,10 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog *old_prog;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
 	bool found = false;
 
-	atype = to_cgroup_bpf_attach_type(link->type);
+	atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
 	if (atype < 0)
 		return -EINVAL;
 
@@ -649,7 +808,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
 	if (link->link.prog->type != new_prog->type)
 		return -EINVAL;
 
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (pl->link == link) {
 			found = true;
 			break;
@@ -688,7 +847,7 @@ out_unlock:
 	return ret;
 }
 
-static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
 					       struct bpf_prog *prog,
 					       struct bpf_cgroup_link *link,
 					       bool allow_multi)
@@ -696,14 +855,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 	struct bpf_prog_list *pl;
 
 	if (!allow_multi) {
-		if (list_empty(progs))
+		if (hlist_empty(progs))
 			/* report error when trying to detach and nothing is attached */
 			return ERR_PTR(-ENOENT);
 
 		/* to maintain backward compatibility NONE and OVERRIDE cgroups
 		 * allow detaching with invalid FD (prog==NULL) in legacy mode
 		 */
-		return list_first_entry(progs, typeof(*pl), node);
+		return hlist_entry(progs->first, typeof(*pl), node);
 	}
 
 	if (!prog && !link)
@@ -713,7 +872,7 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 		return ERR_PTR(-EINVAL);
 
 	/* find the prog or link and detach it */
-	list_for_each_entry(pl, progs, node) {
+	hlist_for_each_entry(pl, progs, node) {
 		if (pl->prog == prog && pl->link == link)
 			return pl;
 	}
@@ -737,7 +896,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
 	struct cgroup_subsys_state *css;
 	struct bpf_prog_array *progs;
 	struct bpf_prog_list *pl;
-	struct list_head *head;
+	struct hlist_head *head;
 	struct cgroup *cg;
 	int pos;
 
@@ -754,7 +913,7 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
 				continue;
 
 			head = &cg->bpf.progs[atype];
-			list_for_each_entry(pl, head, node) {
+			hlist_for_each_entry(pl, head, node) {
 				if (!prog_list_prog(pl))
 					continue;
 				if (pl->prog == prog && pl->link == link)
@@ -791,10 +950,16 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog *old_prog;
 	struct bpf_prog_list *pl;
-	struct list_head *progs;
+	struct hlist_head *progs;
+	u32 attach_btf_id = 0;
 	u32 flags;
 
-	atype = to_cgroup_bpf_attach_type(type);
+	if (prog)
+		attach_btf_id = prog->aux->attach_btf_id;
+	if (link)
+		attach_btf_id = link->link.prog->aux->attach_btf_id;
+
+	atype = bpf_cgroup_atype_find(type, attach_btf_id);
 	if (atype < 0)
 		return -EINVAL;
 
@@ -822,13 +987,17 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	}
 
 	/* now can actually delete it from this cgroup list */
-	list_del(&pl->node);
+	hlist_del(&pl->node);
+
 	kfree(pl);
-	if (list_empty(progs))
+	if (hlist_empty(progs))
 		/* last program was detached, reset flags to zero */
 		cgrp->bpf.flags[atype] = 0;
-	if (old_prog)
+	if (old_prog) {
+		if (type == BPF_LSM_CGROUP)
+			bpf_trampoline_unlink_cgroup_shim(old_prog);
 		bpf_prog_put(old_prog);
+	}
 	static_branch_dec(&cgroup_bpf_enabled_key[atype]);
 	return 0;
 }
@@ -848,57 +1017,90 @@ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 			      union bpf_attr __user *uattr)
 {
+	__u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
 	enum bpf_attach_type type = attr->query.attach_type;
+	enum cgroup_bpf_attach_type from_atype, to_atype;
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog_array *effective;
-	struct list_head *progs;
-	struct bpf_prog *prog;
 	int cnt, ret = 0, i;
+	int total_cnt = 0;
 	u32 flags;
 
-	atype = to_cgroup_bpf_attach_type(type);
-	if (atype < 0)
-		return -EINVAL;
-
-	progs = &cgrp->bpf.progs[atype];
-	flags = cgrp->bpf.flags[atype];
+	if (type == BPF_LSM_CGROUP) {
+		if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
+			return -EINVAL;
 
-	effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
-					      lockdep_is_held(&cgroup_mutex));
+		from_atype = CGROUP_LSM_START;
+		to_atype = CGROUP_LSM_END;
+		flags = 0;
+	} else {
+		from_atype = to_cgroup_bpf_attach_type(type);
+		if (from_atype < 0)
+			return -EINVAL;
+		to_atype = from_atype;
+		flags = cgrp->bpf.flags[from_atype];
+	}
 
-	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
-		cnt = bpf_prog_array_length(effective);
-	else
-		cnt = prog_list_length(progs);
+	for (atype = from_atype; atype <= to_atype; atype++) {
+		if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+							      lockdep_is_held(&cgroup_mutex));
+			total_cnt += bpf_prog_array_length(effective);
+		} else {
+			total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
+		}
+	}
 
 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
 		return -EFAULT;
-	if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+	if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
 		return -EFAULT;
-	if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+	if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
 		/* return early if user requested only program count + flags */
 		return 0;
-	if (attr->query.prog_cnt < cnt) {
-		cnt = attr->query.prog_cnt;
+
+	if (attr->query.prog_cnt < total_cnt) {
+		total_cnt = attr->query.prog_cnt;
 		ret = -ENOSPC;
 	}
 
-	if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
-		return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
-	} else {
-		struct bpf_prog_list *pl;
-		u32 id;
-
-		i = 0;
-		list_for_each_entry(pl, progs, node) {
-			prog = prog_list_prog(pl);
-			id = prog->aux->id;
-			if (copy_to_user(prog_ids + i, &id, sizeof(id)))
-				return -EFAULT;
-			if (++i == cnt)
-				break;
+	for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
+		if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
+							      lockdep_is_held(&cgroup_mutex));
+			cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
+			ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
+		} else {
+			struct hlist_head *progs;
+			struct bpf_prog_list *pl;
+			struct bpf_prog *prog;
+			u32 id;
+
+			progs = &cgrp->bpf.progs[atype];
+			cnt = min_t(int, prog_list_length(progs), total_cnt);
+			i = 0;
+			hlist_for_each_entry(pl, progs, node) {
+				prog = prog_list_prog(pl);
+				id = prog->aux->id;
+				if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+					return -EFAULT;
+				if (++i == cnt)
+					break;
+			}
 		}
+
+		if (prog_attach_flags) {
+			flags = cgrp->bpf.flags[atype];
+
+			for (i = 0; i < cnt; i++)
+				if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
+					return -EFAULT;
+			prog_attach_flags += cnt;
+		}
+
+		prog_ids += cnt;
+		total_cnt -= cnt;
 	}
 	return ret;
 }
@@ -987,6 +1189,8 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
 
 	WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
 				    cg_link->type));
+	if (cg_link->type == BPF_LSM_CGROUP)
+		bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
 
 	cg = cg_link->cgroup;
 	cg_link->cgroup = NULL;
@@ -1331,7 +1535,7 @@ BPF_CALL_0(bpf_get_retval)
 	return ctx->retval;
 }
 
-static const struct bpf_func_proto bpf_get_retval_proto = {
+const struct bpf_func_proto bpf_get_retval_proto = {
 	.func		= bpf_get_retval,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
@@ -1346,7 +1550,7 @@ BPF_CALL_1(bpf_set_retval, int, retval)
 	return 0;
 }
 
-static const struct bpf_func_proto bpf_set_retval_proto = {
+const struct bpf_func_proto bpf_set_retval_proto = {
 	.func		= bpf_set_retval,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b5ffebcce6cc..805c2ad5c793 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -107,6 +107,9 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 	fp->aux->prog = fp;
 	fp->jit_requested = ebpf_jit_enabled();
 	fp->blinding_requested = bpf_jit_blinding_enabled(fp);
+#ifdef CONFIG_CGROUP_BPF
+	aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID;
+#endif
 
 	INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
 	mutex_init(&fp->aux->used_maps_mutex);
@@ -2570,6 +2573,10 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 #ifdef CONFIG_BPF_SYSCALL
 	bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
 #endif
+#ifdef CONFIG_CGROUP_BPF
+	if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID)
+		bpf_cgroup_atype_put(aux->cgroup_atype);
+#endif
 	bpf_free_used_maps(aux);
 	bpf_free_used_btfs(aux);
 	if (bpf_prog_is_dev_bound(aux))
@@ -2666,6 +2673,8 @@ const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
 const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
+const struct bpf_func_proto bpf_set_retval_proto __weak;
+const struct bpf_func_proto bpf_get_retval_proto __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
@@ -2729,6 +2738,12 @@ bool __weak bpf_jit_needs_zext(void)
 	return false;
 }
 
+/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool __weak bpf_jit_supports_subprog_tailcalls(void)
+{
+	return false;
+}
+
 bool __weak bpf_jit_supports_kfunc_call(void)
 {
 	return false;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d5af5b99f0d..ab688d85b2c6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3416,6 +3416,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 		return BPF_PROG_TYPE_SK_LOOKUP;
 	case BPF_XDP:
 		return BPF_PROG_TYPE_XDP;
+	case BPF_LSM_CGROUP:
+		return BPF_PROG_TYPE_LSM;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3469,6 +3471,11 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_LSM:
+		if (ptype == BPF_PROG_TYPE_LSM &&
+		    prog->expected_attach_type != BPF_LSM_CGROUP)
+			return -EINVAL;
+
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 		break;
 	default:
@@ -3506,13 +3513,14 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_LSM:
 		return cgroup_bpf_prog_detach(attr, ptype);
 	default:
 		return -EINVAL;
 	}
 }
 
-#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
+#define BPF_PROG_QUERY_LAST_FIELD query.prog_attach_flags
 
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
@@ -3548,6 +3556,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_SYSCTL:
 	case BPF_CGROUP_GETSOCKOPT:
 	case BPF_CGROUP_SETSOCKOPT:
+	case BPF_LSM_CGROUP:
 		return cgroup_bpf_prog_query(attr, uattr);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_query(attr, uattr);
@@ -4058,6 +4067,11 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 
 	if (prog->aux->btf)
 		info.btf_id = btf_obj_id(prog->aux->btf);
+	info.attach_btf_id = prog->aux->attach_btf_id;
+	if (prog->aux->attach_btf)
+		info.attach_btf_obj_id = btf_obj_id(prog->aux->attach_btf);
+	else if (prog->aux->dst_prog)
+		info.attach_btf_obj_id = btf_obj_id(prog->aux->dst_prog->aux->attach_btf);
 
 	ulen = info.nr_func_info;
 	info.nr_func_info = prog->aux->func_info_cnt;
@@ -4540,6 +4554,8 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 			ret = bpf_raw_tp_link_attach(prog, NULL);
 		else if (prog->expected_attach_type == BPF_TRACE_ITER)
 			ret = bpf_iter_link_attach(attr, uattr, prog);
+		else if (prog->expected_attach_type == BPF_LSM_CGROUP)
+			ret = cgroup_bpf_link_attach(attr, prog);
 		else
 			ret = bpf_tracing_prog_attach(prog,
 						      attr->link_create.target_fd,
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 93c7675f0c9e..6cd226584c33 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -11,6 +11,8 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/module.h>
 #include <linux/static_call.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf_lsm.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -410,7 +412,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 	}
 }
 
-int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 {
 	enum bpf_tramp_prog_type kind;
 	struct bpf_tramp_link *link_exiting;
@@ -418,44 +420,33 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
 	int cnt = 0, i;
 
 	kind = bpf_attach_type_to_tramp(link->link.prog);
-	mutex_lock(&tr->mutex);
-	if (tr->extension_prog) {
+	if (tr->extension_prog)
 		/* cannot attach fentry/fexit if extension prog is attached.
 		 * cannot overwrite extension prog either.
 		 */
-		err = -EBUSY;
-		goto out;
-	}
+		return -EBUSY;
 
 	for (i = 0; i < BPF_TRAMP_MAX; i++)
 		cnt += tr->progs_cnt[i];
 
 	if (kind == BPF_TRAMP_REPLACE) {
 		/* Cannot attach extension if fentry/fexit are in use. */
-		if (cnt) {
-			err = -EBUSY;
-			goto out;
-		}
+		if (cnt)
+			return -EBUSY;
 		tr->extension_prog = link->link.prog;
-		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
-					 link->link.prog->bpf_func);
-		goto out;
-	}
-	if (cnt >= BPF_MAX_TRAMP_LINKS) {
-		err = -E2BIG;
-		goto out;
+		return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL,
+					  link->link.prog->bpf_func);
 	}
-	if (!hlist_unhashed(&link->tramp_hlist)) {
+	if (cnt >= BPF_MAX_TRAMP_LINKS)
+		return -E2BIG;
+	if (!hlist_unhashed(&link->tramp_hlist))
 		/* prog already linked */
-		err = -EBUSY;
-		goto out;
-	}
+		return -EBUSY;
 	hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) {
 		if (link_exiting->link.prog != link->link.prog)
 			continue;
 		/* prog already linked */
-		err = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}
 
 	hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
@@ -465,34 +456,220 @@ int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline
 		hlist_del_init(&link->tramp_hlist);
 		tr->progs_cnt[kind]--;
 	}
-out:
+	return err;
+}
+
+int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+	int err;
+
+	mutex_lock(&tr->mutex);
+	err = __bpf_trampoline_link_prog(link, tr);
 	mutex_unlock(&tr->mutex);
 	return err;
 }
 
-/* bpf_trampoline_unlink_prog() should never fail. */
-int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
 {
 	enum bpf_tramp_prog_type kind;
 	int err;
 
 	kind = bpf_attach_type_to_tramp(link->link.prog);
-	mutex_lock(&tr->mutex);
 	if (kind == BPF_TRAMP_REPLACE) {
 		WARN_ON_ONCE(!tr->extension_prog);
 		err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP,
 					 tr->extension_prog->bpf_func, NULL);
 		tr->extension_prog = NULL;
-		goto out;
+		return err;
 	}
 	hlist_del_init(&link->tramp_hlist);
 	tr->progs_cnt[kind]--;
-	err = bpf_trampoline_update(tr);
-out:
+	return bpf_trampoline_update(tr);
+}
+
+/* bpf_trampoline_unlink_prog() should never fail. */
+int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr)
+{
+	int err;
+
+	mutex_lock(&tr->mutex);
+	err = __bpf_trampoline_unlink_prog(link, tr);
+	mutex_unlock(&tr->mutex);
+	return err;
+}
+
+#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+static void bpf_shim_tramp_link_release(struct bpf_link *link)
+{
+	struct bpf_shim_tramp_link *shim_link =
+		container_of(link, struct bpf_shim_tramp_link, link.link);
+
+	/* paired with 'shim_link->trampoline = tr' in bpf_trampoline_link_cgroup_shim */
+	if (!shim_link->trampoline)
+		return;
+
+	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline));
+	bpf_trampoline_put(shim_link->trampoline);
+}
+
+static void bpf_shim_tramp_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_shim_tramp_link *shim_link =
+		container_of(link, struct bpf_shim_tramp_link, link.link);
+
+	kfree(shim_link);
+}
+
+static const struct bpf_link_ops bpf_shim_tramp_link_lops = {
+	.release = bpf_shim_tramp_link_release,
+	.dealloc = bpf_shim_tramp_link_dealloc,
+};
+
+static struct bpf_shim_tramp_link *cgroup_shim_alloc(const struct bpf_prog *prog,
+						     bpf_func_t bpf_func,
+						     int cgroup_atype)
+{
+	struct bpf_shim_tramp_link *shim_link = NULL;
+	struct bpf_prog *p;
+
+	shim_link = kzalloc(sizeof(*shim_link), GFP_USER);
+	if (!shim_link)
+		return NULL;
+
+	p = bpf_prog_alloc(1, 0);
+	if (!p) {
+		kfree(shim_link);
+		return NULL;
+	}
+
+	p->jited = false;
+	p->bpf_func = bpf_func;
+
+	p->aux->cgroup_atype = cgroup_atype;
+	p->aux->attach_func_proto = prog->aux->attach_func_proto;
+	p->aux->attach_btf_id = prog->aux->attach_btf_id;
+	p->aux->attach_btf = prog->aux->attach_btf;
+	btf_get(p->aux->attach_btf);
+	p->type = BPF_PROG_TYPE_LSM;
+	p->expected_attach_type = BPF_LSM_MAC;
+	bpf_prog_inc(p);
+	bpf_link_init(&shim_link->link.link, BPF_LINK_TYPE_UNSPEC,
+		      &bpf_shim_tramp_link_lops, p);
+	bpf_cgroup_atype_get(p->aux->attach_btf_id, cgroup_atype);
+
+	return shim_link;
+}
+
+static struct bpf_shim_tramp_link *cgroup_shim_find(struct bpf_trampoline *tr,
+						    bpf_func_t bpf_func)
+{
+	struct bpf_tramp_link *link;
+	int kind;
+
+	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
+		hlist_for_each_entry(link, &tr->progs_hlist[kind], tramp_hlist) {
+			struct bpf_prog *p = link->link.prog;
+
+			if (p->bpf_func == bpf_func)
+				return container_of(link, struct bpf_shim_tramp_link, link);
+		}
+	}
+
+	return NULL;
+}
+
+int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
+				    int cgroup_atype)
+{
+	struct bpf_shim_tramp_link *shim_link = NULL;
+	struct bpf_attach_target_info tgt_info = {};
+	struct bpf_trampoline *tr;
+	bpf_func_t bpf_func;
+	u64 key;
+	int err;
+
+	err = bpf_check_attach_target(NULL, prog, NULL,
+				      prog->aux->attach_btf_id,
+				      &tgt_info);
+	if (err)
+		return err;
+
+	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+					 prog->aux->attach_btf_id);
+
+	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+	tr = bpf_trampoline_get(key, &tgt_info);
+	if (!tr)
+		return  -ENOMEM;
+
+	mutex_lock(&tr->mutex);
+
+	shim_link = cgroup_shim_find(tr, bpf_func);
+	if (shim_link) {
+		/* Reusing existing shim attached by the other program. */
+		bpf_link_inc(&shim_link->link.link);
+
+		mutex_unlock(&tr->mutex);
+		bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+		return 0;
+	}
+
+	/* Allocate and install new shim. */
+
+	shim_link = cgroup_shim_alloc(prog, bpf_func, cgroup_atype);
+	if (!shim_link) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	err = __bpf_trampoline_link_prog(&shim_link->link, tr);
+	if (err)
+		goto err;
+
+	shim_link->trampoline = tr;
+	/* note, we're still holding tr refcnt from above */
+
 	mutex_unlock(&tr->mutex);
+
+	return 0;
+err:
+	mutex_unlock(&tr->mutex);
+
+	if (shim_link)
+		bpf_link_put(&shim_link->link.link);
+
+	/* have to release tr while _not_ holding its mutex */
+	bpf_trampoline_put(tr); /* bpf_trampoline_get above */
+
 	return err;
 }
 
+void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
+{
+	struct bpf_shim_tramp_link *shim_link = NULL;
+	struct bpf_trampoline *tr;
+	bpf_func_t bpf_func;
+	u64 key;
+
+	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
+					 prog->aux->attach_btf_id);
+
+	bpf_lsm_find_cgroup_shim(prog, &bpf_func);
+	tr = bpf_trampoline_lookup(key);
+	if (WARN_ON_ONCE(!tr))
+		return;
+
+	mutex_lock(&tr->mutex);
+	shim_link = cgroup_shim_find(tr, bpf_func);
+	mutex_unlock(&tr->mutex);
+
+	if (shim_link)
+		bpf_link_put(&shim_link->link.link);
+
+	bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */
+}
+#endif
+
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info)
 {
@@ -625,6 +802,31 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
 	rcu_read_unlock();
 }
 
+u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
+					struct bpf_tramp_run_ctx *run_ctx)
+	__acquires(RCU)
+{
+	/* Runtime stats are exported via actual BPF_LSM_CGROUP
+	 * programs, not the shims.
+	 */
+	rcu_read_lock();
+	migrate_disable();
+
+	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
+	return NO_START_TIME;
+}
+
+void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
+					struct bpf_tramp_run_ctx *run_ctx)
+	__releases(RCU)
+{
+	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
+	migrate_enable();
+	rcu_read_unlock();
+}
+
 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 {
 	rcu_read_lock_trace();
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a4012b35fdbd..328cfab3af60 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6153,7 +6153,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 
 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
 {
-	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+	return env->prog->jit_requested &&
+	       bpf_jit_supports_subprog_tailcalls();
 }
 
 static int check_map_func_compatibility(struct bpf_verifier_env *env,
@@ -7121,6 +7122,41 @@ static int check_get_func_ip(struct bpf_verifier_env *env)
 	return -ENOTSUPP;
 }
 
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+	return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_reg_state *reg = &regs[BPF_REG_4];
+	bool reg_is_null = register_is_null(reg);
+
+	if (reg_is_null)
+		mark_chain_precision(env, BPF_REG_4);
+
+	return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+	if (!state->initialized) {
+		state->initialized = 1;
+		state->fit_for_inline = loop_flag_is_zero(env);
+		state->callback_subprogno = subprogno;
+		return;
+	}
+
+	if (!state->fit_for_inline)
+		return;
+
+	state->fit_for_inline = (loop_flag_is_zero(env) &&
+				 state->callback_subprogno == subprogno);
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			     int *insn_idx_p)
 {
@@ -7273,6 +7309,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		err = check_bpf_snprintf_call(env, regs);
 		break;
 	case BPF_FUNC_loop:
+		update_loop_inline_state(env, meta.subprogno);
 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 					set_loop_callback_state);
 		break;
@@ -7282,6 +7319,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 				reg_type_str(env, regs[BPF_REG_1].type));
 			return -EACCES;
 		}
+		break;
+	case BPF_FUNC_set_retval:
+		if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
+			if (!env->prog->aux->attach_func_proto->type) {
+				/* Make sure programs that attach to void
+				 * hooks don't try to modify return value.
+				 */
+				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
+				return -EINVAL;
+			}
+		}
+		break;
 	}
 
 	if (err)
@@ -7679,11 +7728,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
 	return true;
 }
 
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
-	return &env->insn_aux_data[env->insn_idx];
-}
-
 enum {
 	REASON_BOUNDS	= -1,
 	REASON_TYPE	= -2,
@@ -9054,7 +9098,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	if (opcode == BPF_END || opcode == BPF_NEG) {
 		if (opcode == BPF_NEG) {
-			if (BPF_SRC(insn->code) != 0 ||
+			if (BPF_SRC(insn->code) != BPF_K ||
 			    insn->src_reg != BPF_REG_0 ||
 			    insn->off != 0 || insn->imm != 0) {
 				verbose(env, "BPF_NEG uses reserved fields\n");
@@ -10381,11 +10425,21 @@ static int check_return_code(struct bpf_verifier_env *env)
 	const bool is_subprog = frame->subprogno;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
-	if (!is_subprog &&
-	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
-	     prog_type == BPF_PROG_TYPE_LSM) &&
-	    !prog->aux->attach_func_proto->type)
-		return 0;
+	if (!is_subprog) {
+		switch (prog_type) {
+		case BPF_PROG_TYPE_LSM:
+			if (prog->expected_attach_type == BPF_LSM_CGROUP)
+				/* See below, can be 0 or 0-1 depending on hook. */
+				break;
+			fallthrough;
+		case BPF_PROG_TYPE_STRUCT_OPS:
+			if (!prog->aux->attach_func_proto->type)
+				return 0;
+			break;
+		default:
+			break;
+		}
+	}
 
 	/* eBPF calling convention is such that R0 is used
 	 * to return the value from eBPF program.
@@ -10476,6 +10530,22 @@ static int check_return_code(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_SK_LOOKUP:
 		range = tnum_range(SK_DROP, SK_PASS);
 		break;
+
+	case BPF_PROG_TYPE_LSM:
+		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
+			/* Regular BPF_PROG_TYPE_LSM programs can return
+			 * any value.
+			 */
+			return 0;
+		}
+		if (!env->prog->aux->attach_func_proto->type) {
+			/* Make sure programs that attach to void
+			 * hooks don't try to modify return value.
+			 */
+			range = tnum_range(1, 1);
+		}
+		break;
+
 	case BPF_PROG_TYPE_EXT:
 		/* freplace program can return anything as its return value
 		 * depends on the to-be-replaced kernel func or bpf program.
@@ -10492,6 +10562,10 @@ static int check_return_code(struct bpf_verifier_env *env)
 
 	if (!tnum_in(range, reg->var_off)) {
 		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
+		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
+		    prog_type == BPF_PROG_TYPE_LSM &&
+		    !prog->aux->attach_func_proto->type)
+			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
 		return -EINVAL;
 	}
 
@@ -14296,6 +14370,142 @@ patch_call_imm:
 	return 0;
 }
 
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+					int position,
+					s32 stack_base,
+					u32 callback_subprogno,
+					u32 *cnt)
+{
+	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+	int reg_loop_max = BPF_REG_6;
+	int reg_loop_cnt = BPF_REG_7;
+	int reg_loop_ctx = BPF_REG_8;
+
+	struct bpf_prog *new_prog;
+	u32 callback_start;
+	u32 call_insn_offset;
+	s32 callback_offset;
+
+	/* This represents an inlined version of bpf_iter.c:bpf_loop,
+	 * be careful to modify this code in sync.
+	 */
+	struct bpf_insn insn_buf[] = {
+		/* Return error and jump to the end of the patch if
+		 * expected number of iterations is too big.
+		 */
+		BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+		BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+		/* spill R6, R7, R8 to use these as loop vars */
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+		/* initialize loop vars */
+		BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+		BPF_MOV32_IMM(reg_loop_cnt, 0),
+		BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+		/* loop header,
+		 * if reg_loop_cnt >= reg_loop_max skip the loop body
+		 */
+		BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+		/* callback call,
+		 * correct callback offset would be set after patching
+		 */
+		BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+		BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+		BPF_CALL_REL(0),
+		/* increment loop counter */
+		BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+		/* jump to loop header if callback returned 0 */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+		/* return value of bpf_loop,
+		 * set R0 to the number of iterations
+		 */
+		BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+		/* restore original values of R6, R7, R8 */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+		BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+	};
+
+	*cnt = ARRAY_SIZE(insn_buf);
+	new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+	if (!new_prog)
+		return new_prog;
+
+	/* callback start is known only after patching */
+	callback_start = env->subprog_info[callback_subprogno].start;
+	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+	call_insn_offset = position + 12;
+	callback_offset = callback_start - call_insn_offset - 1;
+	new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+	return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+	return insn->code == (BPF_JMP | BPF_CALL) &&
+		insn->src_reg == 0 &&
+		insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8.  These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	int i, cur_subprog = 0, cnt, delta = 0;
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	u16 stack_depth = subprogs[cur_subprog].stack_depth;
+	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+	u16 stack_depth_extra = 0;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		struct bpf_loop_inline_state *inline_state =
+			&env->insn_aux_data[i + delta].loop_inline_state;
+
+		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+			struct bpf_prog *new_prog;
+
+			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+			new_prog = inline_bpf_loop(env,
+						   i + delta,
+						   -(stack_depth + stack_depth_extra),
+						   inline_state->callback_subprogno,
+						   &cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta     += cnt - 1;
+			env->prog  = new_prog;
+			insn       = new_prog->insnsi + i + delta;
+		}
+
+		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+			subprogs[cur_subprog].stack_depth += stack_depth_extra;
+			cur_subprog++;
+			stack_depth = subprogs[cur_subprog].stack_depth;
+			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+			stack_depth_extra = 0;
+		}
+	}
+
+	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+	return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state_list *sl, *sln;
@@ -14715,6 +14925,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 		fallthrough;
 	case BPF_MODIFY_RETURN:
 	case BPF_LSM_MAC:
+	case BPF_LSM_CGROUP:
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
 		if (!btf_type_is_func(t)) {
@@ -15033,6 +15244,9 @@ skip_full_check:
 		ret = check_max_stack_depth(env);
 
 	/* instruction rewrites happen after this point */
+	if (ret == 0)
+		ret = optimize_bpf_loop(env);
+
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 1f5351cae25f..88ba5b4bd0c5 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1343,6 +1343,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 	int size, esize;
 	int rctx;
 
+#ifdef CONFIG_BPF_EVENTS
 	if (bpf_prog_array_valid(call)) {
 		u32 ret;
 
@@ -1350,6 +1351,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
 		if (!ret)
 			return;
 	}
+#endif /* CONFIG_BPF_EVENTS */
 
 	esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 2a7836e115b4..5820704165a6 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = {
 		.build_skb = build_test_skb_linear_no_head_frag,
 		.features = NETIF_F_SG | NETIF_F_FRAGLIST |
 			    NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO |
-			    NETIF_F_LLTX_BIT | NETIF_F_GRO |
+			    NETIF_F_LLTX | NETIF_F_GRO |
 			    NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
-			    NETIF_F_HW_VLAN_STAG_TX_BIT
+			    NETIF_F_HW_VLAN_STAG_TX
 	}
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 994d91680b12..4ef77ec5255e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5012,8 +5012,8 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static int _bpf_setsockopt(struct sock *sk, int level, int optname,
-			   char *optval, int optlen)
+static int __bpf_setsockopt(struct sock *sk, int level, int optname,
+			    char *optval, int optlen)
 {
 	char devname[IFNAMSIZ];
 	int val, valbool;
@@ -5024,8 +5024,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 	if (!sk_fullsock(sk))
 		return -EINVAL;
 
-	sock_owned_by_me(sk);
-
 	if (level == SOL_SOCKET) {
 		if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
 			return -EINVAL;
@@ -5258,14 +5256,20 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 	return ret;
 }
 
-static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 			   char *optval, int optlen)
 {
+	if (sk_fullsock(sk))
+		sock_owned_by_me(sk);
+	return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+static int __bpf_getsockopt(struct sock *sk, int level, int optname,
+			    char *optval, int optlen)
+{
 	if (!sk_fullsock(sk))
 		goto err_clear;
 
-	sock_owned_by_me(sk);
-
 	if (level == SOL_SOCKET) {
 		if (optlen != sizeof(int))
 			goto err_clear;
@@ -5360,6 +5364,14 @@ err_clear:
 	return -EINVAL;
 }
 
+static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+			   char *optval, int optlen)
+{
+	if (sk_fullsock(sk))
+		sock_owned_by_me(sk);
+	return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
 BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
 	   int, optname, char *, optval, int, optlen)
 {
@@ -5400,6 +5412,40 @@ const struct bpf_func_proto bpf_sk_getsockopt_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
+	   int, optname, char *, optval, int, optlen)
+{
+	return __bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
+	.func		= bpf_unlocked_sk_setsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM | MEM_RDONLY,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
+	   int, optname, char *, optval, int, optlen)
+{
+	return __bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
+	.func		= bpf_unlocked_sk_getsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
@@ -6470,8 +6516,8 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 		 u64 flags)
 {
 	struct sock *sk = NULL;
-	u8 family = AF_UNSPEC;
 	struct net *net;
+	u8 family;
 	int sdif;
 
 	if (len == sizeof(tuple->ipv4))
@@ -6481,8 +6527,7 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	else
 		return NULL;
 
-	if (unlikely(family == AF_UNSPEC || flags ||
-		     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
+	if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
 		goto out;
 
 	if (family == AF_INET)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fc69154bbc88..266d3b7b7d0b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -497,23 +497,27 @@ bool sk_msg_is_readable(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_msg_is_readable);
 
-static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
-						  struct sk_buff *skb)
+static struct sk_msg *alloc_sk_msg(void)
 {
 	struct sk_msg *msg;
 
-	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!msg))
 		return NULL;
+	sg_init_marker(msg->sg.data, NR_MSG_FRAG_IDS);
+	return msg;
+}
 
-	if (!sk_rmem_schedule(sk, skb, skb->truesize))
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+						  struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
 		return NULL;
 
-	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
-	if (unlikely(!msg))
+	if (!sk_rmem_schedule(sk, skb, skb->truesize))
 		return NULL;
 
-	sk_msg_init(msg);
-	return msg;
+	return alloc_sk_msg();
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
@@ -590,13 +594,12 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
 static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
 				     u32 off, u32 len)
 {
-	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	struct sk_msg *msg = alloc_sk_msg();
 	struct sock *sk = psock->sk;
 	int err;
 
 	if (unlikely(!msg))
 		return -EAGAIN;
-	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
 	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
@@ -1165,21 +1168,14 @@ static void sk_psock_done_strp(struct sk_psock *psock)
 }
 #endif /* CONFIG_BPF_STREAM_PARSER */
 
-static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
-				 unsigned int offset, size_t orig_len)
+static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct sock *sk = (struct sock *)desc->arg.data;
 	struct sk_psock *psock;
 	struct bpf_prog *prog;
 	int ret = __SK_DROP;
-	int len = orig_len;
+	int len = skb->len;
 
-	/* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */
-	skb = skb_clone(skb, GFP_ATOMIC);
-	if (!skb) {
-		desc->error = -ENOMEM;
-		return 0;
-	}
+	skb_get(skb);
 
 	rcu_read_lock();
 	psock = sk_psock(sk);
@@ -1192,12 +1188,10 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
 	if (!prog)
 		prog = READ_ONCE(psock->progs.skb_verdict);
 	if (likely(prog)) {
-		skb->sk = sk;
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
 		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
-		skb->sk = NULL;
 	}
 	if (sk_psock_verdict_apply(psock, skb, ret) < 0)
 		len = 0;
@@ -1209,16 +1203,10 @@ out:
 static void sk_psock_verdict_data_ready(struct sock *sk)
 {
 	struct socket *sock = sk->sk_socket;
-	read_descriptor_t desc;
 
-	if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
+	if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
 		return;
-
-	desc.arg.data = sk;
-	desc.error = 0;
-	desc.count = 1;
-
-	sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
+	sock->ops->read_skb(sk, sk_psock_verdict_recv);
 }
 
 void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9f08ccfaf6da..028813dfecb0 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1578,7 +1578,7 @@ void sock_map_destroy(struct sock *sk)
 	saved_destroy = psock->saved_destroy;
 	sock_map_remove_links(sk, psock);
 	rcu_read_unlock();
-	sk_psock_stop(psock, true);
+	sk_psock_stop(psock, false);
 	sk_psock_put(sk, psock);
 	saved_destroy(sk);
 }
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index da81f56fdd1c..7abd652a558f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1040,6 +1040,7 @@ const struct proto_ops inet_stream_ops = {
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.read_skb	   = tcp_read_skb,
 	.sendmsg_locked    = tcp_sendmsg_locked,
 	.sendpage_locked   = tcp_sendpage_locked,
 	.peek_len	   = tcp_peek_len,
@@ -1067,7 +1068,7 @@ const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.read_sock	   = udp_read_sock,
+	.read_skb	   = udp_read_skb,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index f79ab942f03b..7a181631b995 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -14,18 +14,6 @@
 /* "extern" is to avoid sparse warning.  It is only used in bpf_struct_ops.c. */
 extern struct bpf_struct_ops bpf_tcp_congestion_ops;
 
-static u32 optional_ops[] = {
-	offsetof(struct tcp_congestion_ops, init),
-	offsetof(struct tcp_congestion_ops, release),
-	offsetof(struct tcp_congestion_ops, set_state),
-	offsetof(struct tcp_congestion_ops, cwnd_event),
-	offsetof(struct tcp_congestion_ops, in_ack_event),
-	offsetof(struct tcp_congestion_ops, pkts_acked),
-	offsetof(struct tcp_congestion_ops, min_tso_segs),
-	offsetof(struct tcp_congestion_ops, sndbuf_expand),
-	offsetof(struct tcp_congestion_ops, cong_control),
-};
-
 static u32 unsupported_ops[] = {
 	offsetof(struct tcp_congestion_ops, get_info),
 };
@@ -51,18 +39,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
 	return 0;
 }
 
-static bool is_optional(u32 member_offset)
-{
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(optional_ops); i++) {
-		if (member_offset == optional_ops[i])
-			return true;
-	}
-
-	return false;
-}
-
 static bool is_unsupported(u32 member_offset)
 {
 	unsigned int i;
@@ -111,6 +87,12 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log,
 	}
 
 	switch (off) {
+	case offsetof(struct sock, sk_pacing_rate):
+		end = offsetofend(struct sock, sk_pacing_rate);
+		break;
+	case offsetof(struct sock, sk_pacing_status):
+		end = offsetofend(struct sock, sk_pacing_status);
+		break;
 	case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv):
 		end = offsetofend(struct inet_connection_sock, icsk_ca_priv);
 		break;
@@ -240,7 +222,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
 {
 	const struct tcp_congestion_ops *utcp_ca;
 	struct tcp_congestion_ops *tcp_ca;
-	int prog_fd;
 	u32 moff;
 
 	utcp_ca = (const struct tcp_congestion_ops *)udata;
@@ -262,14 +243,6 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
 		return 1;
 	}
 
-	if (!btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL))
-		return 0;
-
-	/* Ensure bpf_prog is provided for compulsory func ptr */
-	prog_fd = (int)(*(unsigned long *)(udata + moff));
-	if (!prog_fd && !is_optional(moff) && !is_unsupported(moff))
-		return -EINVAL;
-
 	return 0;
 }
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d2ca56aa18eb..21bdee88383b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1734,6 +1734,50 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 seq = tp->copied_seq;
+	struct sk_buff *skb;
+	int copied = 0;
+	u32 offset;
+
+	if (sk->sk_state == TCP_LISTEN)
+		return -ENOTCONN;
+
+	while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+		int used;
+
+		__skb_unlink(skb, &sk->sk_receive_queue);
+		used = recv_actor(sk, skb);
+		if (used <= 0) {
+			if (!copied)
+				copied = used;
+			break;
+		}
+		seq += used;
+		copied += used;
+
+		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
+			consume_skb(skb);
+			++seq;
+			break;
+		}
+		consume_skb(skb);
+		break;
+	}
+	WRITE_ONCE(tp->copied_seq, seq);
+
+	tcp_rcv_space_adjust(sk);
+
+	/* Clean up data we have read: This will do ACK frames. */
+	if (copied > 0)
+		tcp_cleanup_rbuf(sk, copied);
+
+	return copied;
+}
+EXPORT_SYMBOL(tcp_read_skb);
+
 int tcp_peek_len(struct socket *sock)
 {
 	return tcp_inq(sock->sk);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6172b4750a88..2516078aa03e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1797,8 +1797,7 @@ busy_check:
 }
 EXPORT_SYMBOL(__skb_recv_udp);
 
-int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
-		  sk_read_actor_t recv_actor)
+int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
 	int copied = 0;
 
@@ -1820,7 +1819,8 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 			continue;
 		}
 
-		used = recv_actor(desc, skb, 0, skb->len);
+		WARN_ON(!skb_set_owner_sk_safe(skb, sk));
+		used = recv_actor(sk, skb);
 		if (used <= 0) {
 			if (!copied)
 				copied = used;
@@ -1831,13 +1831,12 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		}
 
 		kfree_skb(skb);
-		if (!desc->count)
-			break;
+		break;
 	}
 
 	return copied;
 }
-EXPORT_SYMBOL(udp_read_sock);
+EXPORT_SYMBOL(udp_read_skb);
 
 /*
  * 	This should be easy, if there is something there we
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 658823e91eca..0ee0770e79aa 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -702,6 +702,7 @@ const struct proto_ops inet6_stream_ops = {
 	.sendpage_locked   = tcp_sendpage_locked,
 	.splice_read	   = tcp_splice_read,
 	.read_sock	   = tcp_read_sock,
+	.read_skb	   = tcp_read_skb,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	   = inet6_compat_ioctl,
@@ -727,7 +728,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet6_sendmsg,		/* retpoline's sake */
 	.recvmsg	   = inet6_recvmsg,		/* retpoline's sake */
-	.read_sock	   = udp_read_sock,
+	.read_skb	   = udp_read_skb,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 784b4b30ce9a..bf338b782fc4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -763,10 +763,8 @@ static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 				       unsigned int flags);
 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
-			  sk_read_actor_t recv_actor);
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
-				 sk_read_actor_t recv_actor);
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 			      int, int);
 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -820,7 +818,7 @@ static const struct proto_ops unix_stream_ops = {
 	.shutdown =	unix_shutdown,
 	.sendmsg =	unix_stream_sendmsg,
 	.recvmsg =	unix_stream_recvmsg,
-	.read_sock =	unix_stream_read_sock,
+	.read_skb =	unix_stream_read_skb,
 	.mmap =		sock_no_mmap,
 	.sendpage =	unix_stream_sendpage,
 	.splice_read =	unix_stream_splice_read,
@@ -845,7 +843,7 @@ static const struct proto_ops unix_dgram_ops = {
 	.listen =	sock_no_listen,
 	.shutdown =	unix_shutdown,
 	.sendmsg =	unix_dgram_sendmsg,
-	.read_sock =	unix_read_sock,
+	.read_skb =	unix_read_skb,
 	.recvmsg =	unix_dgram_recvmsg,
 	.mmap =		sock_no_mmap,
 	.sendpage =	sock_no_sendpage,
@@ -2506,8 +2504,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
 	return __unix_dgram_recvmsg(sk, msg, size, flags);
 }
 
-static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
-			  sk_read_actor_t recv_actor)
+static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
 	int copied = 0;
 
@@ -2522,7 +2519,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 		if (!skb)
 			return err;
 
-		used = recv_actor(desc, skb, 0, skb->len);
+		used = recv_actor(sk, skb);
 		if (used <= 0) {
 			if (!copied)
 				copied = used;
@@ -2533,8 +2530,7 @@ static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 		}
 
 		kfree_skb(skb);
-		if (!desc->count)
-			break;
+		break;
 	}
 
 	return copied;
@@ -2669,13 +2665,12 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
 }
 #endif
 
-static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
-				 sk_read_actor_t recv_actor)
+static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
 	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
 		return -ENOTCONN;
 
-	return unix_read_sock(sk, desc, recv_actor);
+	return unix_read_skb(sk, recv_actor);
 }
 
 static int unix_stream_read_generic(struct unix_stream_read_state *state,
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 03e3d3529ac9..5002a5b9a7da 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -45,9 +45,6 @@ tprogs-y += xdp_rxq_info
 tprogs-y += syscall_tp
 tprogs-y += cpustat
 tprogs-y += xdp_adjust_tail
-tprogs-y += xdpsock
-tprogs-y += xdpsock_ctrl_proc
-tprogs-y += xsk_fwd
 tprogs-y += xdp_fwd
 tprogs-y += task_fd_query
 tprogs-y += xdp_sample_pkts
@@ -109,9 +106,6 @@ xdp_rxq_info-objs := xdp_rxq_info_user.o
 syscall_tp-objs := syscall_tp_user.o
 cpustat-objs := cpustat_user.o
 xdp_adjust_tail-objs := xdp_adjust_tail_user.o
-xdpsock-objs := xdpsock_user.o
-xdpsock_ctrl_proc-objs := xdpsock_ctrl_proc.o
-xsk_fwd-objs := xsk_fwd.o
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o
@@ -179,7 +173,6 @@ always-y += xdp_sample_pkts_kern.o
 always-y += ibumad_kern.o
 always-y += hbm_out_kern.o
 always-y += hbm_edt_kern.o
-always-y += xdpsock_kern.o
 
 ifeq ($(ARCH), arm)
 # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux
@@ -224,8 +217,6 @@ TPROGLDLIBS_tracex4		+= -lrt
 TPROGLDLIBS_trace_output	+= -lrt
 TPROGLDLIBS_map_perf_test	+= -lrt
 TPROGLDLIBS_test_overhead	+= -lrt
-TPROGLDLIBS_xdpsock		+= -pthread -lcap
-TPROGLDLIBS_xsk_fwd		+= -pthread
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 # make M=samples/bpf LLC=~/git/llvm-project/llvm/build/bin/llc CLANG=~/git/llvm-project/llvm/build/bin/clang
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c
index f0c5d95084de..0a5c704badd0 100644
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -39,11 +39,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
 	return ip6h->nexthdr;
 }
 
-SEC("xdp1")
+#define XDPBUFSIZE	64
+SEC("xdp.frags")
 int xdp_prog1(struct xdp_md *ctx)
 {
-	void *data_end = (void *)(long)ctx->data_end;
-	void *data = (void *)(long)ctx->data;
+	__u8 pkt[XDPBUFSIZE] = {};
+	void *data_end = &pkt[XDPBUFSIZE-1];
+	void *data = pkt;
 	struct ethhdr *eth = data;
 	int rc = XDP_DROP;
 	long *value;
@@ -51,6 +53,9 @@ int xdp_prog1(struct xdp_md *ctx)
 	u64 nh_off;
 	u32 ipproto;
 
+	if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+		return rc;
+
 	nh_off = sizeof(*eth);
 	if (data + nh_off > data_end)
 		return rc;
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c
index d8a64ab077b0..3332ba6bb95f 100644
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -55,11 +55,13 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end)
 	return ip6h->nexthdr;
 }
 
-SEC("xdp1")
+#define XDPBUFSIZE	64
+SEC("xdp.frags")
 int xdp_prog1(struct xdp_md *ctx)
 {
-	void *data_end = (void *)(long)ctx->data_end;
-	void *data = (void *)(long)ctx->data;
+	__u8 pkt[XDPBUFSIZE] = {};
+	void *data_end = &pkt[XDPBUFSIZE-1];
+	void *data = pkt;
 	struct ethhdr *eth = data;
 	int rc = XDP_DROP;
 	long *value;
@@ -67,6 +69,9 @@ int xdp_prog1(struct xdp_md *ctx)
 	u64 nh_off;
 	u32 ipproto;
 
+	if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt)))
+		return rc;
+
 	nh_off = sizeof(*eth);
 	if (data + nh_off > data_end)
 		return rc;
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
index 575d57e4b8d6..0e2bca3a3fff 100644
--- a/samples/bpf/xdp_tx_iptunnel_kern.c
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -212,7 +212,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
 	return XDP_TX;
 }
 
-SEC("xdp_tx_iptunnel")
+SEC("xdp.frags")
 int _xdp_tx_iptunnel(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h
deleted file mode 100644
index fd70cce60712..000000000000
--- a/samples/bpf/xdpsock.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
- * Copyright(c) 2019 Intel Corporation.
- */
-
-#ifndef XDPSOCK_H_
-#define XDPSOCK_H_
-
-#define MAX_SOCKS 4
-
-#define SOCKET_NAME "sock_cal_bpf_fd"
-#define MAX_NUM_OF_CLIENTS 10
-
-#define CLOSE_CONN  1
-
-typedef __u64 u64;
-typedef __u32 u32;
-
-#endif /* XDPSOCK_H */
diff --git a/samples/bpf/xdpsock_ctrl_proc.c b/samples/bpf/xdpsock_ctrl_proc.c
deleted file mode 100644
index 28b5f2a9fa08..000000000000
--- a/samples/bpf/xdpsock_ctrl_proc.c
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation. */
-
-#include <errno.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <net/if.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <unistd.h>
-
-#include <bpf/bpf.h>
-#include <bpf/xsk.h>
-#include "xdpsock.h"
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-static const char *opt_if = "";
-
-static struct option long_options[] = {
-	{"interface", required_argument, 0, 'i'},
-	{0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
-	const char *str =
-		"  Usage: %s [OPTIONS]\n"
-		"  Options:\n"
-		"  -i, --interface=n	Run on interface n\n"
-		"\n";
-	fprintf(stderr, "%s\n", str);
-
-	exit(0);
-}
-
-static void parse_command_line(int argc, char **argv)
-{
-	int option_index, c;
-
-	opterr = 0;
-
-	for (;;) {
-		c = getopt_long(argc, argv, "i:",
-				long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'i':
-			opt_if = optarg;
-			break;
-		default:
-			usage(basename(argv[0]));
-		}
-	}
-}
-
-static int send_xsks_map_fd(int sock, int fd)
-{
-	char cmsgbuf[CMSG_SPACE(sizeof(int))];
-	struct msghdr msg;
-	struct iovec iov;
-	int value = 0;
-
-	if (fd == -1) {
-		fprintf(stderr, "Incorrect fd = %d\n", fd);
-		return -1;
-	}
-	iov.iov_base = &value;
-	iov.iov_len = sizeof(int);
-
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	msg.msg_flags = 0;
-	msg.msg_control = cmsgbuf;
-	msg.msg_controllen = CMSG_LEN(sizeof(int));
-
-	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
-
-	cmsg->cmsg_level = SOL_SOCKET;
-	cmsg->cmsg_type = SCM_RIGHTS;
-	cmsg->cmsg_len = CMSG_LEN(sizeof(int));
-
-	*(int *)CMSG_DATA(cmsg) = fd;
-	int ret = sendmsg(sock, &msg, 0);
-
-	if (ret == -1) {
-		fprintf(stderr, "Sendmsg failed with %s", strerror(errno));
-		return -errno;
-	}
-
-	return ret;
-}
-
-int
-main(int argc, char **argv)
-{
-	struct sockaddr_un server;
-	int listening = 1;
-	int rval, msgsock;
-	int ifindex = 0;
-	int flag = 1;
-	int cmd = 0;
-	int sock;
-	int err;
-	int xsks_map_fd;
-
-	parse_command_line(argc, argv);
-
-	ifindex = if_nametoindex(opt_if);
-	if (ifindex == 0) {
-		fprintf(stderr, "Unable to get ifindex for Interface %s. Reason:%s",
-			opt_if, strerror(errno));
-		return -errno;
-	}
-
-	sock = socket(AF_UNIX, SOCK_STREAM, 0);
-	if (sock < 0) {
-		fprintf(stderr, "Opening socket stream failed: %s", strerror(errno));
-		return -errno;
-	}
-
-	server.sun_family = AF_UNIX;
-	strcpy(server.sun_path, SOCKET_NAME);
-
-	setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int));
-
-	if (bind(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un))) {
-		fprintf(stderr, "Binding to socket stream failed: %s", strerror(errno));
-		return -errno;
-	}
-
-	listen(sock, MAX_NUM_OF_CLIENTS);
-
-	err = xsk_setup_xdp_prog(ifindex, &xsks_map_fd);
-	if (err) {
-		fprintf(stderr, "Setup of xdp program failed\n");
-		goto close_sock;
-	}
-
-	while (listening) {
-		msgsock = accept(sock, 0, 0);
-		if (msgsock == -1) {
-			fprintf(stderr, "Error accepting connection: %s", strerror(errno));
-			err = -errno;
-			goto close_sock;
-		}
-		err = send_xsks_map_fd(msgsock, xsks_map_fd);
-		if (err <= 0) {
-			fprintf(stderr, "Error %d sending xsks_map_fd\n", err);
-			goto cleanup;
-		}
-		do {
-			rval = read(msgsock, &cmd, sizeof(int));
-			if (rval < 0) {
-				fprintf(stderr, "Error reading stream message");
-			} else {
-				if (cmd != CLOSE_CONN)
-					fprintf(stderr, "Recv unknown cmd = %d\n", cmd);
-				listening = 0;
-				break;
-			}
-		} while (rval > 0);
-	}
-	close(msgsock);
-	close(sock);
-	unlink(SOCKET_NAME);
-
-	/* Unset fd for given ifindex */
-	err = bpf_xdp_detach(ifindex, 0, NULL);
-	if (err) {
-		fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex);
-		return err;
-	}
-
-	return 0;
-
-cleanup:
-	close(msgsock);
-close_sock:
-	close(sock);
-	unlink(SOCKET_NAME);
-	return err;
-}
diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c
deleted file mode 100644
index 05430484375c..000000000000
--- a/samples/bpf/xdpsock_kern.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "xdpsock.h"
-
-/* This XDP program is only needed for the XDP_SHARED_UMEM mode.
- * If you do not use this mode, libbpf can supply an XDP program for you.
- */
-
-struct {
-	__uint(type, BPF_MAP_TYPE_XSKMAP);
-	__uint(max_entries, MAX_SOCKS);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
-} xsks_map SEC(".maps");
-
-static unsigned int rr;
-
-SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
-{
-	rr = (rr + 1) & (MAX_SOCKS - 1);
-
-	return bpf_redirect_map(&xsks_map, rr, XDP_DROP);
-}
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
deleted file mode 100644
index be7d2572e3e6..000000000000
--- a/samples/bpf/xdpsock_user.c
+++ /dev/null
@@ -1,2019 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2017 - 2018 Intel Corporation. */
-
-#include <errno.h>
-#include <getopt.h>
-#include <libgen.h>
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <linux/if_xdp.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-#include <linux/limits.h>
-#include <linux/udp.h>
-#include <arpa/inet.h>
-#include <locale.h>
-#include <net/ethernet.h>
-#include <netinet/ether.h>
-#include <net/if.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/capability.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/un.h>
-#include <time.h>
-#include <unistd.h>
-#include <sched.h>
-
-#include <bpf/libbpf.h>
-#include <bpf/xsk.h>
-#include <bpf/bpf.h>
-#include "xdpsock.h"
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#ifndef SOL_XDP
-#define SOL_XDP 283
-#endif
-
-#ifndef AF_XDP
-#define AF_XDP 44
-#endif
-
-#ifndef PF_XDP
-#define PF_XDP AF_XDP
-#endif
-
-#define NUM_FRAMES (4 * 1024)
-#define MIN_PKT_SIZE 64
-
-#define DEBUG_HEXDUMP 0
-
-#define VLAN_PRIO_MASK		0xe000 /* Priority Code Point */
-#define VLAN_PRIO_SHIFT		13
-#define VLAN_VID_MASK		0x0fff /* VLAN Identifier */
-#define VLAN_VID__DEFAULT	1
-#define VLAN_PRI__DEFAULT	0
-
-#define NSEC_PER_SEC		1000000000UL
-#define NSEC_PER_USEC		1000
-
-#define SCHED_PRI__DEFAULT	0
-
-typedef __u64 u64;
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8  u8;
-
-static unsigned long prev_time;
-static long tx_cycle_diff_min;
-static long tx_cycle_diff_max;
-static double tx_cycle_diff_ave;
-static long tx_cycle_cnt;
-
-enum benchmark_type {
-	BENCH_RXDROP = 0,
-	BENCH_TXONLY = 1,
-	BENCH_L2FWD = 2,
-};
-
-static enum benchmark_type opt_bench = BENCH_RXDROP;
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static const char *opt_if = "";
-static int opt_ifindex;
-static int opt_queue;
-static unsigned long opt_duration;
-static unsigned long start_time;
-static bool benchmark_done;
-static u32 opt_batch_size = 64;
-static int opt_pkt_count;
-static u16 opt_pkt_size = MIN_PKT_SIZE;
-static u32 opt_pkt_fill_pattern = 0x12345678;
-static bool opt_vlan_tag;
-static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT;
-static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT;
-static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe,
-					 0x9e, 0x7f, 0x71 }};
-static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7,
-					 0x98, 0x3a, 0xc0 }};
-static bool opt_extra_stats;
-static bool opt_quiet;
-static bool opt_app_stats;
-static const char *opt_irq_str = "";
-static u32 irq_no;
-static int irqs_at_init = -1;
-static u32 sequence;
-static int opt_poll;
-static int opt_interval = 1;
-static int opt_retries = 3;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
-static u32 opt_umem_flags;
-static int opt_unaligned_chunks;
-static int opt_mmap_flags;
-static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-static int opt_timeout = 1000;
-static bool opt_need_wakeup = true;
-static u32 opt_num_xsks = 1;
-static u32 prog_id;
-static bool opt_busy_poll;
-static bool opt_reduced_cap;
-static clockid_t opt_clock = CLOCK_MONOTONIC;
-static unsigned long opt_tx_cycle_ns;
-static int opt_schpolicy = SCHED_OTHER;
-static int opt_schprio = SCHED_PRI__DEFAULT;
-static bool opt_tstamp;
-
-struct vlan_ethhdr {
-	unsigned char h_dest[6];
-	unsigned char h_source[6];
-	__be16 h_vlan_proto;
-	__be16 h_vlan_TCI;
-	__be16 h_vlan_encapsulated_proto;
-};
-
-#define PKTGEN_MAGIC 0xbe9be955
-struct pktgen_hdr {
-	__be32 pgh_magic;
-	__be32 seq_num;
-	__be32 tv_sec;
-	__be32 tv_usec;
-};
-
-struct xsk_ring_stats {
-	unsigned long rx_npkts;
-	unsigned long tx_npkts;
-	unsigned long rx_dropped_npkts;
-	unsigned long rx_invalid_npkts;
-	unsigned long tx_invalid_npkts;
-	unsigned long rx_full_npkts;
-	unsigned long rx_fill_empty_npkts;
-	unsigned long tx_empty_npkts;
-	unsigned long prev_rx_npkts;
-	unsigned long prev_tx_npkts;
-	unsigned long prev_rx_dropped_npkts;
-	unsigned long prev_rx_invalid_npkts;
-	unsigned long prev_tx_invalid_npkts;
-	unsigned long prev_rx_full_npkts;
-	unsigned long prev_rx_fill_empty_npkts;
-	unsigned long prev_tx_empty_npkts;
-};
-
-struct xsk_driver_stats {
-	unsigned long intrs;
-	unsigned long prev_intrs;
-};
-
-struct xsk_app_stats {
-	unsigned long rx_empty_polls;
-	unsigned long fill_fail_polls;
-	unsigned long copy_tx_sendtos;
-	unsigned long tx_wakeup_sendtos;
-	unsigned long opt_polls;
-	unsigned long prev_rx_empty_polls;
-	unsigned long prev_fill_fail_polls;
-	unsigned long prev_copy_tx_sendtos;
-	unsigned long prev_tx_wakeup_sendtos;
-	unsigned long prev_opt_polls;
-};
-
-struct xsk_umem_info {
-	struct xsk_ring_prod fq;
-	struct xsk_ring_cons cq;
-	struct xsk_umem *umem;
-	void *buffer;
-};
-
-struct xsk_socket_info {
-	struct xsk_ring_cons rx;
-	struct xsk_ring_prod tx;
-	struct xsk_umem_info *umem;
-	struct xsk_socket *xsk;
-	struct xsk_ring_stats ring_stats;
-	struct xsk_app_stats app_stats;
-	struct xsk_driver_stats drv_stats;
-	u32 outstanding_tx;
-};
-
-static const struct clockid_map {
-	const char *name;
-	clockid_t clockid;
-} clockids_map[] = {
-	{ "REALTIME", CLOCK_REALTIME },
-	{ "TAI", CLOCK_TAI },
-	{ "BOOTTIME", CLOCK_BOOTTIME },
-	{ "MONOTONIC", CLOCK_MONOTONIC },
-	{ NULL }
-};
-
-static const struct sched_map {
-	const char *name;
-	int policy;
-} schmap[] = {
-	{ "OTHER", SCHED_OTHER },
-	{ "FIFO", SCHED_FIFO },
-	{ NULL }
-};
-
-static int num_socks;
-struct xsk_socket_info *xsks[MAX_SOCKS];
-int sock;
-
-static int get_clockid(clockid_t *id, const char *name)
-{
-	const struct clockid_map *clk;
-
-	for (clk = clockids_map; clk->name; clk++) {
-		if (strcasecmp(clk->name, name) == 0) {
-			*id = clk->clockid;
-			return 0;
-		}
-	}
-
-	return -1;
-}
-
-static int get_schpolicy(int *policy, const char *name)
-{
-	const struct sched_map *sch;
-
-	for (sch = schmap; sch->name; sch++) {
-		if (strcasecmp(sch->name, name) == 0) {
-			*policy = sch->policy;
-			return 0;
-		}
-	}
-
-	return -1;
-}
-
-static unsigned long get_nsecs(void)
-{
-	struct timespec ts;
-
-	clock_gettime(opt_clock, &ts);
-	return ts.tv_sec * 1000000000UL + ts.tv_nsec;
-}
-
-static void print_benchmark(bool running)
-{
-	const char *bench_str = "INVALID";
-
-	if (opt_bench == BENCH_RXDROP)
-		bench_str = "rxdrop";
-	else if (opt_bench == BENCH_TXONLY)
-		bench_str = "txonly";
-	else if (opt_bench == BENCH_L2FWD)
-		bench_str = "l2fwd";
-
-	printf("%s:%d %s ", opt_if, opt_queue, bench_str);
-	if (opt_xdp_flags & XDP_FLAGS_SKB_MODE)
-		printf("xdp-skb ");
-	else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE)
-		printf("xdp-drv ");
-	else
-		printf("	");
-
-	if (opt_poll)
-		printf("poll() ");
-
-	if (running) {
-		printf("running...");
-		fflush(stdout);
-	}
-}
-
-static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk)
-{
-	struct xdp_statistics stats;
-	socklen_t optlen;
-	int err;
-
-	optlen = sizeof(stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-	if (err)
-		return err;
-
-	if (optlen == sizeof(struct xdp_statistics)) {
-		xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped;
-		xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs;
-		xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs;
-		xsk->ring_stats.rx_full_npkts = stats.rx_ring_full;
-		xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs;
-		xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static void dump_app_stats(long dt)
-{
-	int i;
-
-	for (i = 0; i < num_socks && xsks[i]; i++) {
-		char *fmt = "%-18s %'-14.0f %'-14lu\n";
-		double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps,
-				tx_wakeup_sendtos_ps, opt_polls_ps;
-
-		rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls -
-					xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt;
-		fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls -
-					xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt;
-		copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos -
-					xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt;
-		tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos -
-					xsks[i]->app_stats.prev_tx_wakeup_sendtos)
-										* 1000000000. / dt;
-		opt_polls_ps = (xsks[i]->app_stats.opt_polls -
-					xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt;
-
-		printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count");
-		printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls);
-		printf(fmt, "fill fail polls", fill_fail_polls_ps,
-							xsks[i]->app_stats.fill_fail_polls);
-		printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps,
-							xsks[i]->app_stats.copy_tx_sendtos);
-		printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps,
-							xsks[i]->app_stats.tx_wakeup_sendtos);
-		printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls);
-
-		xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls;
-		xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls;
-		xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos;
-		xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
-		xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
-	}
-
-	if (opt_tx_cycle_ns) {
-		printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n",
-		       "", "period", "min", "ave", "max", "cycle");
-		printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n",
-		       "Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min,
-		       (long)(tx_cycle_diff_ave / tx_cycle_cnt),
-		       tx_cycle_diff_max, tx_cycle_cnt);
-	}
-}
-
-static bool get_interrupt_number(void)
-{
-	FILE *f_int_proc;
-	char line[4096];
-	bool found = false;
-
-	f_int_proc = fopen("/proc/interrupts", "r");
-	if (f_int_proc == NULL) {
-		printf("Failed to open /proc/interrupts.\n");
-		return found;
-	}
-
-	while (!feof(f_int_proc) && !found) {
-		/* Make sure to read a full line at a time */
-		if (fgets(line, sizeof(line), f_int_proc) == NULL ||
-				line[strlen(line) - 1] != '\n') {
-			printf("Error reading from interrupts file\n");
-			break;
-		}
-
-		/* Extract interrupt number from line */
-		if (strstr(line, opt_irq_str) != NULL) {
-			irq_no = atoi(line);
-			found = true;
-			break;
-		}
-	}
-
-	fclose(f_int_proc);
-
-	return found;
-}
-
-static int get_irqs(void)
-{
-	char count_path[PATH_MAX];
-	int total_intrs = -1;
-	FILE *f_count_proc;
-	char line[4096];
-
-	snprintf(count_path, sizeof(count_path),
-		"/sys/kernel/irq/%i/per_cpu_count", irq_no);
-	f_count_proc = fopen(count_path, "r");
-	if (f_count_proc == NULL) {
-		printf("Failed to open %s\n", count_path);
-		return total_intrs;
-	}
-
-	if (fgets(line, sizeof(line), f_count_proc) == NULL ||
-			line[strlen(line) - 1] != '\n') {
-		printf("Error reading from %s\n", count_path);
-	} else {
-		static const char com[2] = ",";
-		char *token;
-
-		total_intrs = 0;
-		token = strtok(line, com);
-		while (token != NULL) {
-			/* sum up interrupts across all cores */
-			total_intrs += atoi(token);
-			token = strtok(NULL, com);
-		}
-	}
-
-	fclose(f_count_proc);
-
-	return total_intrs;
-}
-
-static void dump_driver_stats(long dt)
-{
-	int i;
-
-	for (i = 0; i < num_socks && xsks[i]; i++) {
-		char *fmt = "%-18s %'-14.0f %'-14lu\n";
-		double intrs_ps;
-		int n_ints = get_irqs();
-
-		if (n_ints < 0) {
-			printf("error getting intr info for intr %i\n", irq_no);
-			return;
-		}
-		xsks[i]->drv_stats.intrs = n_ints - irqs_at_init;
-
-		intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) *
-			 1000000000. / dt;
-
-		printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count");
-		printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs);
-
-		xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs;
-	}
-}
-
-static void dump_stats(void)
-{
-	unsigned long now = get_nsecs();
-	long dt = now - prev_time;
-	int i;
-
-	prev_time = now;
-
-	for (i = 0; i < num_socks && xsks[i]; i++) {
-		char *fmt = "%-18s %'-14.0f %'-14lu\n";
-		double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps,
-			tx_invalid_pps, tx_empty_pps;
-
-		rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) *
-			 1000000000. / dt;
-		tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) *
-			 1000000000. / dt;
-
-		printf("\n sock%d@", i);
-		print_benchmark(false);
-		printf("\n");
-
-		printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts",
-		       dt / 1000000000.);
-		printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts);
-		printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts);
-
-		xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts;
-		xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts;
-
-		if (opt_extra_stats) {
-			if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) {
-				dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts -
-						xsks[i]->ring_stats.prev_rx_dropped_npkts) *
-							1000000000. / dt;
-				rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts -
-						xsks[i]->ring_stats.prev_rx_invalid_npkts) *
-							1000000000. / dt;
-				tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts -
-						xsks[i]->ring_stats.prev_tx_invalid_npkts) *
-							1000000000. / dt;
-				full_pps = (xsks[i]->ring_stats.rx_full_npkts -
-						xsks[i]->ring_stats.prev_rx_full_npkts) *
-							1000000000. / dt;
-				fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts -
-						xsks[i]->ring_stats.prev_rx_fill_empty_npkts) *
-							1000000000. / dt;
-				tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts -
-						xsks[i]->ring_stats.prev_tx_empty_npkts) *
-							1000000000. / dt;
-
-				printf(fmt, "rx dropped", dropped_pps,
-				       xsks[i]->ring_stats.rx_dropped_npkts);
-				printf(fmt, "rx invalid", rx_invalid_pps,
-				       xsks[i]->ring_stats.rx_invalid_npkts);
-				printf(fmt, "tx invalid", tx_invalid_pps,
-				       xsks[i]->ring_stats.tx_invalid_npkts);
-				printf(fmt, "rx queue full", full_pps,
-				       xsks[i]->ring_stats.rx_full_npkts);
-				printf(fmt, "fill ring empty", fill_empty_pps,
-				       xsks[i]->ring_stats.rx_fill_empty_npkts);
-				printf(fmt, "tx ring empty", tx_empty_pps,
-				       xsks[i]->ring_stats.tx_empty_npkts);
-
-				xsks[i]->ring_stats.prev_rx_dropped_npkts =
-					xsks[i]->ring_stats.rx_dropped_npkts;
-				xsks[i]->ring_stats.prev_rx_invalid_npkts =
-					xsks[i]->ring_stats.rx_invalid_npkts;
-				xsks[i]->ring_stats.prev_tx_invalid_npkts =
-					xsks[i]->ring_stats.tx_invalid_npkts;
-				xsks[i]->ring_stats.prev_rx_full_npkts =
-					xsks[i]->ring_stats.rx_full_npkts;
-				xsks[i]->ring_stats.prev_rx_fill_empty_npkts =
-					xsks[i]->ring_stats.rx_fill_empty_npkts;
-				xsks[i]->ring_stats.prev_tx_empty_npkts =
-					xsks[i]->ring_stats.tx_empty_npkts;
-			} else {
-				printf("%-15s\n", "Error retrieving extra stats");
-			}
-		}
-	}
-
-	if (opt_app_stats)
-		dump_app_stats(dt);
-	if (irq_no)
-		dump_driver_stats(dt);
-}
-
-static bool is_benchmark_done(void)
-{
-	if (opt_duration > 0) {
-		unsigned long dt = (get_nsecs() - start_time);
-
-		if (dt >= opt_duration)
-			benchmark_done = true;
-	}
-	return benchmark_done;
-}
-
-static void *poller(void *arg)
-{
-	(void)arg;
-	while (!is_benchmark_done()) {
-		sleep(opt_interval);
-		dump_stats();
-	}
-
-	return NULL;
-}
-
-static void remove_xdp_program(void)
-{
-	u32 curr_prog_id = 0;
-
-	if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) {
-		printf("bpf_xdp_query_id failed\n");
-		exit(EXIT_FAILURE);
-	}
-
-	if (prog_id == curr_prog_id)
-		bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL);
-	else if (!curr_prog_id)
-		printf("couldn't find a prog id on a given interface\n");
-	else
-		printf("program on interface changed, not removing\n");
-}
-
-static void int_exit(int sig)
-{
-	benchmark_done = true;
-}
-
-static void __exit_with_error(int error, const char *file, const char *func,
-			      int line)
-{
-	fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func,
-		line, error, strerror(error));
-
-	if (opt_num_xsks > 1)
-		remove_xdp_program();
-	exit(EXIT_FAILURE);
-}
-
-#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-
-static void xdpsock_cleanup(void)
-{
-	struct xsk_umem *umem = xsks[0]->umem->umem;
-	int i, cmd = CLOSE_CONN;
-
-	dump_stats();
-	for (i = 0; i < num_socks; i++)
-		xsk_socket__delete(xsks[i]->xsk);
-	(void)xsk_umem__delete(umem);
-
-	if (opt_reduced_cap) {
-		if (write(sock, &cmd, sizeof(int)) < 0)
-			exit_with_error(errno);
-	}
-
-	if (opt_num_xsks > 1)
-		remove_xdp_program();
-}
-
-static void swap_mac_addresses(void *data)
-{
-	struct ether_header *eth = (struct ether_header *)data;
-	struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
-	struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
-	struct ether_addr tmp;
-
-	tmp = *src_addr;
-	*src_addr = *dst_addr;
-	*dst_addr = tmp;
-}
-
-static void hex_dump(void *pkt, size_t length, u64 addr)
-{
-	const unsigned char *address = (unsigned char *)pkt;
-	const unsigned char *line = address;
-	size_t line_size = 32;
-	unsigned char c;
-	char buf[32];
-	int i = 0;
-
-	if (!DEBUG_HEXDUMP)
-		return;
-
-	sprintf(buf, "addr=%llu", addr);
-	printf("length = %zu\n", length);
-	printf("%s | ", buf);
-	while (length-- > 0) {
-		printf("%02X ", *address++);
-		if (!(++i % line_size) || (length == 0 && i % line_size)) {
-			if (length == 0) {
-				while (i++ % line_size)
-					printf("__ ");
-			}
-			printf(" | ");	/* right close */
-			while (line < address) {
-				c = *line++;
-				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
-			}
-			printf("\n");
-			if (length > 0)
-				printf("%s | ", buf);
-		}
-	}
-	printf("\n");
-}
-
-static void *memset32_htonl(void *dest, u32 val, u32 size)
-{
-	u32 *ptr = (u32 *)dest;
-	int i;
-
-	val = htonl(val);
-
-	for (i = 0; i < (size & (~0x3)); i += 4)
-		ptr[i >> 2] = val;
-
-	for (; i < size; i++)
-		((char *)dest)[i] = ((char *)&val)[i & 3];
-
-	return dest;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
-	/* add up 16-bit and 16-bit for 16+c bit */
-	x = (x & 0xffff) + (x >> 16);
-	/* add up carry.. */
-	x = (x & 0xffff) + (x >> 16);
-	return x;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static unsigned int do_csum(const unsigned char *buff, int len)
-{
-	unsigned int result = 0;
-	int odd;
-
-	if (len <= 0)
-		goto out;
-	odd = 1 & (unsigned long)buff;
-	if (odd) {
-#ifdef __LITTLE_ENDIAN
-		result += (*buff << 8);
-#else
-		result = *buff;
-#endif
-		len--;
-		buff++;
-	}
-	if (len >= 2) {
-		if (2 & (unsigned long)buff) {
-			result += *(unsigned short *)buff;
-			len -= 2;
-			buff += 2;
-		}
-		if (len >= 4) {
-			const unsigned char *end = buff +
-						   ((unsigned int)len & ~3);
-			unsigned int carry = 0;
-
-			do {
-				unsigned int w = *(unsigned int *)buff;
-
-				buff += 4;
-				result += carry;
-				result += w;
-				carry = (w > result);
-			} while (buff < end);
-			result += carry;
-			result = (result & 0xffff) + (result >> 16);
-		}
-		if (len & 2) {
-			result += *(unsigned short *)buff;
-			buff += 2;
-		}
-	}
-	if (len & 1)
-#ifdef __LITTLE_ENDIAN
-		result += *buff;
-#else
-		result += (*buff << 8);
-#endif
-	result = from32to16(result);
-	if (odd)
-		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-	return result;
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- *	This function code has been taken from
- *	Linux kernel lib/checksum.c
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	return (__sum16)~do_csum(iph, ihl * 4);
-}
-
-/*
- * Fold a partial checksum
- * This function code has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16 csum_fold(__wsum csum)
-{
-	u32 sum = (u32)csum;
-
-	sum = (sum & 0xffff) + (sum >> 16);
-	sum = (sum & 0xffff) + (sum >> 16);
-	return (__sum16)~sum;
-}
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline u32 from64to32(u64 x)
-{
-	/* add up 32-bit and 32-bit for 32+c bit */
-	x = (x & 0xffffffff) + (x >> 32);
-	/* add up carry.. */
-	x = (x & 0xffffffff) + (x >> 32);
-	return (u32)x;
-}
-
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-			  __u32 len, __u8 proto, __wsum sum);
-
-/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-			  __u32 len, __u8 proto, __wsum sum)
-{
-	unsigned long long s = (u32)sum;
-
-	s += (u32)saddr;
-	s += (u32)daddr;
-#ifdef __BIG_ENDIAN__
-	s += proto + len;
-#else
-	s += (proto + len) << 8;
-#endif
-	return (__wsum)from64to32(s);
-}
-
-/*
- * This function has been taken from
- * Linux kernel include/asm-generic/checksum.h
- */
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
-		  __u8 proto, __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
-}
-
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
-			   u8 proto, u16 *udp_pkt)
-{
-	u32 csum = 0;
-	u32 cnt = 0;
-
-	/* udp hdr and data */
-	for (; cnt < len; cnt += 2)
-		csum += udp_pkt[cnt >> 1];
-
-	return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
-}
-
-#define ETH_FCS_SIZE 4
-
-#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \
-		      sizeof(struct ethhdr))
-#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0)
-#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \
-		      sizeof(struct udphdr) + PKTGEN_HDR_SIZE)
-#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \
-			   sizeof(struct udphdr))
-#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \
-			 ETH_FCS_SIZE)
-
-#define PKT_SIZE		(opt_pkt_size - ETH_FCS_SIZE)
-#define IP_PKT_SIZE		(PKT_SIZE - ETH_HDR_SIZE)
-#define UDP_PKT_SIZE		(IP_PKT_SIZE - sizeof(struct iphdr))
-#define UDP_PKT_DATA_SIZE	(UDP_PKT_SIZE - \
-				 (sizeof(struct udphdr) + PKTGEN_HDR_SIZE))
-
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-
-static void gen_eth_hdr_data(void)
-{
-	struct pktgen_hdr *pktgen_hdr;
-	struct udphdr *udp_hdr;
-	struct iphdr *ip_hdr;
-
-	if (opt_vlan_tag) {
-		struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data;
-		u16 vlan_tci = 0;
-
-		udp_hdr = (struct udphdr *)(pkt_data +
-					    sizeof(struct vlan_ethhdr) +
-					    sizeof(struct iphdr));
-		ip_hdr = (struct iphdr *)(pkt_data +
-					  sizeof(struct vlan_ethhdr));
-		pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
-						   sizeof(struct vlan_ethhdr) +
-						   sizeof(struct iphdr) +
-						   sizeof(struct udphdr));
-		/* ethernet & VLAN header */
-		memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
-		memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN);
-		veth_hdr->h_vlan_proto = htons(ETH_P_8021Q);
-		vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK;
-		vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
-		veth_hdr->h_vlan_TCI = htons(vlan_tci);
-		veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP);
-	} else {
-		struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
-
-		udp_hdr = (struct udphdr *)(pkt_data +
-					    sizeof(struct ethhdr) +
-					    sizeof(struct iphdr));
-		ip_hdr = (struct iphdr *)(pkt_data +
-					  sizeof(struct ethhdr));
-		pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
-						   sizeof(struct ethhdr) +
-						   sizeof(struct iphdr) +
-						   sizeof(struct udphdr));
-		/* ethernet header */
-		memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
-		memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN);
-		eth_hdr->h_proto = htons(ETH_P_IP);
-	}
-
-
-	/* IP header */
-	ip_hdr->version = IPVERSION;
-	ip_hdr->ihl = 0x5; /* 20 byte header */
-	ip_hdr->tos = 0x0;
-	ip_hdr->tot_len = htons(IP_PKT_SIZE);
-	ip_hdr->id = 0;
-	ip_hdr->frag_off = 0;
-	ip_hdr->ttl = IPDEFTTL;
-	ip_hdr->protocol = IPPROTO_UDP;
-	ip_hdr->saddr = htonl(0x0a0a0a10);
-	ip_hdr->daddr = htonl(0x0a0a0a20);
-
-	/* IP header checksum */
-	ip_hdr->check = 0;
-	ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
-
-	/* UDP header */
-	udp_hdr->source = htons(0x1000);
-	udp_hdr->dest = htons(0x1000);
-	udp_hdr->len = htons(UDP_PKT_SIZE);
-
-	if (opt_tstamp)
-		pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC);
-
-	/* UDP data */
-	memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
-		       UDP_PKT_DATA_SIZE);
-
-	/* UDP header checksum */
-	udp_hdr->check = 0;
-	udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
-				  IPPROTO_UDP, (u16 *)udp_hdr);
-}
-
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
-	memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
-	       PKT_SIZE);
-}
-
-static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
-{
-	struct xsk_umem_info *umem;
-	struct xsk_umem_config cfg = {
-		/* We recommend that you set the fill ring size >= HW RX ring size +
-		 * AF_XDP RX ring size. Make sure you fill up the fill ring
-		 * with buffers at regular intervals, and you will with this setting
-		 * avoid allocation failures in the driver. These are usually quite
-		 * expensive since drivers have not been written to assume that
-		 * allocation failures are common. For regular sockets, kernel
-		 * allocated memory is used that only runs out in OOM situations
-		 * that should be rare.
-		 */
-		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.frame_size = opt_xsk_frame_size,
-		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
-		.flags = opt_umem_flags
-	};
-	int ret;
-
-	umem = calloc(1, sizeof(*umem));
-	if (!umem)
-		exit_with_error(errno);
-
-	ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
-			       &cfg);
-	if (ret)
-		exit_with_error(-ret);
-
-	umem->buffer = buffer;
-	return umem;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
-	int ret, i;
-	u32 idx;
-
-	ret = xsk_ring_prod__reserve(&umem->fq,
-				     XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx);
-	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2)
-		exit_with_error(-ret);
-	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++)
-		*xsk_ring_prod__fill_addr(&umem->fq, idx++) =
-			i * opt_xsk_frame_size;
-	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2);
-}
-
-static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
-						    bool rx, bool tx)
-{
-	struct xsk_socket_config cfg;
-	struct xsk_socket_info *xsk;
-	struct xsk_ring_cons *rxr;
-	struct xsk_ring_prod *txr;
-	int ret;
-
-	xsk = calloc(1, sizeof(*xsk));
-	if (!xsk)
-		exit_with_error(errno);
-
-	xsk->umem = umem;
-	cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	if (opt_num_xsks > 1 || opt_reduced_cap)
-		cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
-	else
-		cfg.libbpf_flags = 0;
-	cfg.xdp_flags = opt_xdp_flags;
-	cfg.bind_flags = opt_xdp_bind_flags;
-
-	rxr = rx ? &xsk->rx : NULL;
-	txr = tx ? &xsk->tx : NULL;
-	ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem,
-				 rxr, txr, &cfg);
-	if (ret)
-		exit_with_error(-ret);
-
-	ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id);
-	if (ret)
-		exit_with_error(-ret);
-
-	xsk->app_stats.rx_empty_polls = 0;
-	xsk->app_stats.fill_fail_polls = 0;
-	xsk->app_stats.copy_tx_sendtos = 0;
-	xsk->app_stats.tx_wakeup_sendtos = 0;
-	xsk->app_stats.opt_polls = 0;
-	xsk->app_stats.prev_rx_empty_polls = 0;
-	xsk->app_stats.prev_fill_fail_polls = 0;
-	xsk->app_stats.prev_copy_tx_sendtos = 0;
-	xsk->app_stats.prev_tx_wakeup_sendtos = 0;
-	xsk->app_stats.prev_opt_polls = 0;
-
-	return xsk;
-}
-
-static struct option long_options[] = {
-	{"rxdrop", no_argument, 0, 'r'},
-	{"txonly", no_argument, 0, 't'},
-	{"l2fwd", no_argument, 0, 'l'},
-	{"interface", required_argument, 0, 'i'},
-	{"queue", required_argument, 0, 'q'},
-	{"poll", no_argument, 0, 'p'},
-	{"xdp-skb", no_argument, 0, 'S'},
-	{"xdp-native", no_argument, 0, 'N'},
-	{"interval", required_argument, 0, 'n'},
-	{"retries", required_argument, 0, 'O'},
-	{"zero-copy", no_argument, 0, 'z'},
-	{"copy", no_argument, 0, 'c'},
-	{"frame-size", required_argument, 0, 'f'},
-	{"no-need-wakeup", no_argument, 0, 'm'},
-	{"unaligned", no_argument, 0, 'u'},
-	{"shared-umem", no_argument, 0, 'M'},
-	{"force", no_argument, 0, 'F'},
-	{"duration", required_argument, 0, 'd'},
-	{"clock", required_argument, 0, 'w'},
-	{"batch-size", required_argument, 0, 'b'},
-	{"tx-pkt-count", required_argument, 0, 'C'},
-	{"tx-pkt-size", required_argument, 0, 's'},
-	{"tx-pkt-pattern", required_argument, 0, 'P'},
-	{"tx-vlan", no_argument, 0, 'V'},
-	{"tx-vlan-id", required_argument, 0, 'J'},
-	{"tx-vlan-pri", required_argument, 0, 'K'},
-	{"tx-dmac", required_argument, 0, 'G'},
-	{"tx-smac", required_argument, 0, 'H'},
-	{"tx-cycle", required_argument, 0, 'T'},
-	{"tstamp", no_argument, 0, 'y'},
-	{"policy", required_argument, 0, 'W'},
-	{"schpri", required_argument, 0, 'U'},
-	{"extra-stats", no_argument, 0, 'x'},
-	{"quiet", no_argument, 0, 'Q'},
-	{"app-stats", no_argument, 0, 'a'},
-	{"irq-string", no_argument, 0, 'I'},
-	{"busy-poll", no_argument, 0, 'B'},
-	{"reduce-cap", no_argument, 0, 'R'},
-	{0, 0, 0, 0}
-};
-
-static void usage(const char *prog)
-{
-	const char *str =
-		"  Usage: %s [OPTIONS]\n"
-		"  Options:\n"
-		"  -r, --rxdrop		Discard all incoming packets (default)\n"
-		"  -t, --txonly		Only send packets\n"
-		"  -l, --l2fwd		MAC swap L2 forwarding\n"
-		"  -i, --interface=n	Run on interface n\n"
-		"  -q, --queue=n	Use queue n (default 0)\n"
-		"  -p, --poll		Use poll syscall\n"
-		"  -S, --xdp-skb=n	Use XDP skb-mod\n"
-		"  -N, --xdp-native=n	Enforce XDP native mode\n"
-		"  -n, --interval=n	Specify statistics update interval (default 1 sec).\n"
-		"  -O, --retries=n	Specify time-out retries (1s interval) attempt (default 3).\n"
-		"  -z, --zero-copy      Force zero-copy mode.\n"
-		"  -c, --copy           Force copy mode.\n"
-		"  -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
-		"  -f, --frame-size=n   Set the frame size (must be a power of two in aligned mode, default is %d).\n"
-		"  -u, --unaligned	Enable unaligned chunk placement\n"
-		"  -M, --shared-umem	Enable XDP_SHARED_UMEM (cannot be used with -R)\n"
-		"  -F, --force		Force loading the XDP prog\n"
-		"  -d, --duration=n	Duration in secs to run command.\n"
-		"			Default: forever.\n"
-		"  -w, --clock=CLOCK	Clock NAME (default MONOTONIC).\n"
-		"  -b, --batch-size=n	Batch size for sending or receiving\n"
-		"			packets. Default: %d\n"
-		"  -C, --tx-pkt-count=n	Number of packets to send.\n"
-		"			Default: Continuous packets.\n"
-		"  -s, --tx-pkt-size=n	Transmit packet size.\n"
-		"			(Default: %d bytes)\n"
-		"			Min size: %d, Max size %d.\n"
-		"  -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
-		"  -V, --tx-vlan        Send VLAN tagged  packets (For -t|--txonly)\n"
-		"  -J, --tx-vlan-id=n   Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n"
-		"  -K, --tx-vlan-pri=n  Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
-		"  -G, --tx-dmac=<MAC>  Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
-		"  -H, --tx-smac=<MAC>  Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
-		"  -T, --tx-cycle=n     Tx cycle time in micro-seconds (For -t|--txonly).\n"
-		"  -y, --tstamp         Add time-stamp to packet (For -t|--txonly).\n"
-		"  -W, --policy=POLICY  Schedule policy. Default: SCHED_OTHER\n"
-		"  -U, --schpri=n       Schedule priority. Default: %d\n"
-		"  -x, --extra-stats	Display extra statistics.\n"
-		"  -Q, --quiet          Do not display any stats.\n"
-		"  -a, --app-stats	Display application (syscall) statistics.\n"
-		"  -I, --irq-string	Display driver interrupt statistics for interface associated with irq-string.\n"
-		"  -B, --busy-poll      Busy poll.\n"
-		"  -R, --reduce-cap	Use reduced capabilities (cannot be used with -M)\n"
-		"\n";
-	fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
-		opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
-		XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern,
-		VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT,
-		SCHED_PRI__DEFAULT);
-
-	exit(EXIT_FAILURE);
-}
-
-static void parse_command_line(int argc, char **argv)
-{
-	int option_index, c;
-
-	opterr = 0;
-
-	for (;;) {
-		c = getopt_long(argc, argv,
-				"Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR",
-				long_options, &option_index);
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'r':
-			opt_bench = BENCH_RXDROP;
-			break;
-		case 't':
-			opt_bench = BENCH_TXONLY;
-			break;
-		case 'l':
-			opt_bench = BENCH_L2FWD;
-			break;
-		case 'i':
-			opt_if = optarg;
-			break;
-		case 'q':
-			opt_queue = atoi(optarg);
-			break;
-		case 'p':
-			opt_poll = 1;
-			break;
-		case 'S':
-			opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
-			opt_xdp_bind_flags |= XDP_COPY;
-			break;
-		case 'N':
-			/* default, set below */
-			break;
-		case 'n':
-			opt_interval = atoi(optarg);
-			break;
-		case 'w':
-			if (get_clockid(&opt_clock, optarg)) {
-				fprintf(stderr,
-					"ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n",
-					optarg);
-				opt_clock = CLOCK_MONOTONIC;
-			}
-			break;
-		case 'O':
-			opt_retries = atoi(optarg);
-			break;
-		case 'z':
-			opt_xdp_bind_flags |= XDP_ZEROCOPY;
-			break;
-		case 'c':
-			opt_xdp_bind_flags |= XDP_COPY;
-			break;
-		case 'u':
-			opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-			opt_unaligned_chunks = 1;
-			opt_mmap_flags = MAP_HUGETLB;
-			break;
-		case 'F':
-			opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
-			break;
-		case 'f':
-			opt_xsk_frame_size = atoi(optarg);
-			break;
-		case 'm':
-			opt_need_wakeup = false;
-			opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP;
-			break;
-		case 'M':
-			opt_num_xsks = MAX_SOCKS;
-			break;
-		case 'd':
-			opt_duration = atoi(optarg);
-			opt_duration *= 1000000000;
-			break;
-		case 'b':
-			opt_batch_size = atoi(optarg);
-			break;
-		case 'C':
-			opt_pkt_count = atoi(optarg);
-			break;
-		case 's':
-			opt_pkt_size = atoi(optarg);
-			if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
-			    opt_pkt_size < MIN_PKT_SIZE) {
-				fprintf(stderr,
-					"ERROR: Invalid frame size %d\n",
-					opt_pkt_size);
-				usage(basename(argv[0]));
-			}
-			break;
-		case 'P':
-			opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
-			break;
-		case 'V':
-			opt_vlan_tag = true;
-			break;
-		case 'J':
-			opt_pkt_vlan_id = atoi(optarg);
-			break;
-		case 'K':
-			opt_pkt_vlan_pri = atoi(optarg);
-			break;
-		case 'G':
-			if (!ether_aton_r(optarg,
-					  (struct ether_addr *)&opt_txdmac)) {
-				fprintf(stderr, "Invalid dmac address:%s\n",
-					optarg);
-				usage(basename(argv[0]));
-			}
-			break;
-		case 'H':
-			if (!ether_aton_r(optarg,
-					  (struct ether_addr *)&opt_txsmac)) {
-				fprintf(stderr, "Invalid smac address:%s\n",
-					optarg);
-				usage(basename(argv[0]));
-			}
-			break;
-		case 'T':
-			opt_tx_cycle_ns = atoi(optarg);
-			opt_tx_cycle_ns *= NSEC_PER_USEC;
-			break;
-		case 'y':
-			opt_tstamp = 1;
-			break;
-		case 'W':
-			if (get_schpolicy(&opt_schpolicy, optarg)) {
-				fprintf(stderr,
-					"ERROR: Invalid policy %s. Default to SCHED_OTHER.\n",
-					optarg);
-				opt_schpolicy = SCHED_OTHER;
-			}
-			break;
-		case 'U':
-			opt_schprio = atoi(optarg);
-			break;
-		case 'x':
-			opt_extra_stats = 1;
-			break;
-		case 'Q':
-			opt_quiet = 1;
-			break;
-		case 'a':
-			opt_app_stats = 1;
-			break;
-		case 'I':
-			opt_irq_str = optarg;
-			if (get_interrupt_number())
-				irqs_at_init = get_irqs();
-			if (irqs_at_init < 0) {
-				fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str);
-				usage(basename(argv[0]));
-			}
-			break;
-		case 'B':
-			opt_busy_poll = 1;
-			break;
-		case 'R':
-			opt_reduced_cap = true;
-			break;
-		default:
-			usage(basename(argv[0]));
-		}
-	}
-
-	if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
-		opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
-
-	opt_ifindex = if_nametoindex(opt_if);
-	if (!opt_ifindex) {
-		fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
-			opt_if);
-		usage(basename(argv[0]));
-	}
-
-	if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) &&
-	    !opt_unaligned_chunks) {
-		fprintf(stderr, "--frame-size=%d is not a power of two\n",
-			opt_xsk_frame_size);
-		usage(basename(argv[0]));
-	}
-
-	if (opt_reduced_cap && opt_num_xsks > 1) {
-		fprintf(stderr, "ERROR: -M and -R cannot be used together\n");
-		usage(basename(argv[0]));
-	}
-}
-
-static void kick_tx(struct xsk_socket_info *xsk)
-{
-	int ret;
-
-	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN ||
-	    errno == EBUSY || errno == ENETDOWN)
-		return;
-	exit_with_error(errno);
-}
-
-static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk)
-{
-	struct xsk_umem_info *umem = xsk->umem;
-	u32 idx_cq = 0, idx_fq = 0;
-	unsigned int rcvd;
-	size_t ndescs;
-
-	if (!xsk->outstanding_tx)
-		return;
-
-	/* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to
-	 * really send the packets. In zero-copy mode we do not have to do this, since Tx
-	 * is driven by the NAPI loop. So as an optimization, we do not have to call
-	 * sendto() all the time in zero-copy mode for l2fwd.
-	 */
-	if (opt_xdp_bind_flags & XDP_COPY) {
-		xsk->app_stats.copy_tx_sendtos++;
-		kick_tx(xsk);
-	}
-
-	ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
-		xsk->outstanding_tx;
-
-	/* re-add completed Tx buffers */
-	rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq);
-	if (rcvd > 0) {
-		unsigned int i;
-		int ret;
-
-		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		while (ret != rcvd) {
-			if (ret < 0)
-				exit_with_error(-ret);
-			if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				xsk->app_stats.fill_fail_polls++;
-				recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL,
-					 NULL);
-			}
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		}
-
-		for (i = 0; i < rcvd; i++)
-			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) =
-				*xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
-
-		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
-		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-		xsk->outstanding_tx -= rcvd;
-	}
-}
-
-static inline void complete_tx_only(struct xsk_socket_info *xsk,
-				    int batch_size)
-{
-	unsigned int rcvd;
-	u32 idx;
-
-	if (!xsk->outstanding_tx)
-		return;
-
-	if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-		xsk->app_stats.tx_wakeup_sendtos++;
-		kick_tx(xsk);
-	}
-
-	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
-	if (rcvd > 0) {
-		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-		xsk->outstanding_tx -= rcvd;
-	}
-}
-
-static void rx_drop(struct xsk_socket_info *xsk)
-{
-	unsigned int rcvd, i;
-	u32 idx_rx = 0, idx_fq = 0;
-	int ret;
-
-	rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
-	if (!rcvd) {
-		if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-			xsk->app_stats.rx_empty_polls++;
-			recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-		}
-		return;
-	}
-
-	ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-	while (ret != rcvd) {
-		if (ret < 0)
-			exit_with_error(-ret);
-		if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-			xsk->app_stats.fill_fail_polls++;
-			recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-		}
-		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
-	}
-
-	for (i = 0; i < rcvd; i++) {
-		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
-		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
-		u64 orig = xsk_umem__extract_addr(addr);
-
-		addr = xsk_umem__add_offset_to_addr(addr);
-		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
-		hex_dump(pkt, len, addr);
-		*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
-	}
-
-	xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
-	xsk_ring_cons__release(&xsk->rx, rcvd);
-	xsk->ring_stats.rx_npkts += rcvd;
-}
-
-static void rx_drop_all(void)
-{
-	struct pollfd fds[MAX_SOCKS] = {};
-	int i, ret;
-
-	for (i = 0; i < num_socks; i++) {
-		fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
-		fds[i].events = POLLIN;
-	}
-
-	for (;;) {
-		if (opt_poll) {
-			for (i = 0; i < num_socks; i++)
-				xsks[i]->app_stats.opt_polls++;
-			ret = poll(fds, num_socks, opt_timeout);
-			if (ret <= 0)
-				continue;
-		}
-
-		for (i = 0; i < num_socks; i++)
-			rx_drop(xsks[i]);
-
-		if (benchmark_done)
-			break;
-	}
-}
-
-static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb,
-		   int batch_size, unsigned long tx_ns)
-{
-	u32 idx, tv_sec, tv_usec;
-	unsigned int i;
-
-	while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
-				      batch_size) {
-		complete_tx_only(xsk, batch_size);
-		if (benchmark_done)
-			return 0;
-	}
-
-	if (opt_tstamp) {
-		tv_sec = (u32)(tx_ns / NSEC_PER_SEC);
-		tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000);
-	}
-
-	for (i = 0; i < batch_size; i++) {
-		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
-								  idx + i);
-		tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
-		tx_desc->len = PKT_SIZE;
-
-		if (opt_tstamp) {
-			struct pktgen_hdr *pktgen_hdr;
-			u64 addr = tx_desc->addr;
-			char *pkt;
-
-			pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-			pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET);
-
-			pktgen_hdr->seq_num = htonl(sequence++);
-			pktgen_hdr->tv_sec = htonl(tv_sec);
-			pktgen_hdr->tv_usec = htonl(tv_usec);
-
-			hex_dump(pkt, PKT_SIZE, addr);
-		}
-	}
-
-	xsk_ring_prod__submit(&xsk->tx, batch_size);
-	xsk->ring_stats.tx_npkts += batch_size;
-	xsk->outstanding_tx += batch_size;
-	*frame_nb += batch_size;
-	*frame_nb %= NUM_FRAMES;
-	complete_tx_only(xsk, batch_size);
-
-	return batch_size;
-}
-
-static inline int get_batch_size(int pkt_cnt)
-{
-	if (!opt_pkt_count)
-		return opt_batch_size;
-
-	if (pkt_cnt + opt_batch_size <= opt_pkt_count)
-		return opt_batch_size;
-
-	return opt_pkt_count - pkt_cnt;
-}
-
-static void complete_tx_only_all(void)
-{
-	bool pending;
-	int i;
-
-	do {
-		pending = false;
-		for (i = 0; i < num_socks; i++) {
-			if (xsks[i]->outstanding_tx) {
-				complete_tx_only(xsks[i], opt_batch_size);
-				pending = !!xsks[i]->outstanding_tx;
-			}
-		}
-		sleep(1);
-	} while (pending && opt_retries-- > 0);
-}
-
-static void tx_only_all(void)
-{
-	struct pollfd fds[MAX_SOCKS] = {};
-	u32 frame_nb[MAX_SOCKS] = {};
-	unsigned long next_tx_ns = 0;
-	int pkt_cnt = 0;
-	int i, ret;
-
-	if (opt_poll && opt_tx_cycle_ns) {
-		fprintf(stderr,
-			"Error: --poll and --tx-cycles are both set\n");
-		return;
-	}
-
-	for (i = 0; i < num_socks; i++) {
-		fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
-		fds[0].events = POLLOUT;
-	}
-
-	if (opt_tx_cycle_ns) {
-		/* Align Tx time to micro-second boundary */
-		next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) *
-			     NSEC_PER_USEC;
-		next_tx_ns += opt_tx_cycle_ns;
-
-		/* Initialize periodic Tx scheduling variance */
-		tx_cycle_diff_min = 1000000000;
-		tx_cycle_diff_max = 0;
-		tx_cycle_diff_ave = 0.0;
-	}
-
-	while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
-		int batch_size = get_batch_size(pkt_cnt);
-		unsigned long tx_ns = 0;
-		struct timespec next;
-		int tx_cnt = 0;
-		long diff;
-		int err;
-
-		if (opt_poll) {
-			for (i = 0; i < num_socks; i++)
-				xsks[i]->app_stats.opt_polls++;
-			ret = poll(fds, num_socks, opt_timeout);
-			if (ret <= 0)
-				continue;
-
-			if (!(fds[0].revents & POLLOUT))
-				continue;
-		}
-
-		if (opt_tx_cycle_ns) {
-			next.tv_sec = next_tx_ns / NSEC_PER_SEC;
-			next.tv_nsec = next_tx_ns % NSEC_PER_SEC;
-			err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL);
-			if (err) {
-				if (err != EINTR)
-					fprintf(stderr,
-						"clock_nanosleep failed. Err:%d errno:%d\n",
-						err, errno);
-				break;
-			}
-
-			/* Measure periodic Tx scheduling variance */
-			tx_ns = get_nsecs();
-			diff = tx_ns - next_tx_ns;
-			if (diff < tx_cycle_diff_min)
-				tx_cycle_diff_min = diff;
-
-			if (diff > tx_cycle_diff_max)
-				tx_cycle_diff_max = diff;
-
-			tx_cycle_diff_ave += (double)diff;
-			tx_cycle_cnt++;
-		} else if (opt_tstamp) {
-			tx_ns = get_nsecs();
-		}
-
-		for (i = 0; i < num_socks; i++)
-			tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns);
-
-		pkt_cnt += tx_cnt;
-
-		if (benchmark_done)
-			break;
-
-		if (opt_tx_cycle_ns)
-			next_tx_ns += opt_tx_cycle_ns;
-	}
-
-	if (opt_pkt_count)
-		complete_tx_only_all();
-}
-
-static void l2fwd(struct xsk_socket_info *xsk)
-{
-	unsigned int rcvd, i;
-	u32 idx_rx = 0, idx_tx = 0;
-	int ret;
-
-	complete_tx_l2fwd(xsk);
-
-	rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
-	if (!rcvd) {
-		if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
-			xsk->app_stats.rx_empty_polls++;
-			recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-		}
-		return;
-	}
-	xsk->ring_stats.rx_npkts += rcvd;
-
-	ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-	while (ret != rcvd) {
-		if (ret < 0)
-			exit_with_error(-ret);
-		complete_tx_l2fwd(xsk);
-		if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-			xsk->app_stats.tx_wakeup_sendtos++;
-			kick_tx(xsk);
-		}
-		ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx);
-	}
-
-	for (i = 0; i < rcvd; i++) {
-		u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
-		u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len;
-		u64 orig = addr;
-
-		addr = xsk_umem__add_offset_to_addr(addr);
-		char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
-
-		swap_mac_addresses(pkt);
-
-		hex_dump(pkt, len, addr);
-		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig;
-		xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len;
-	}
-
-	xsk_ring_prod__submit(&xsk->tx, rcvd);
-	xsk_ring_cons__release(&xsk->rx, rcvd);
-
-	xsk->ring_stats.tx_npkts += rcvd;
-	xsk->outstanding_tx += rcvd;
-}
-
-static void l2fwd_all(void)
-{
-	struct pollfd fds[MAX_SOCKS] = {};
-	int i, ret;
-
-	for (;;) {
-		if (opt_poll) {
-			for (i = 0; i < num_socks; i++) {
-				fds[i].fd = xsk_socket__fd(xsks[i]->xsk);
-				fds[i].events = POLLOUT | POLLIN;
-				xsks[i]->app_stats.opt_polls++;
-			}
-			ret = poll(fds, num_socks, opt_timeout);
-			if (ret <= 0)
-				continue;
-		}
-
-		for (i = 0; i < num_socks; i++)
-			l2fwd(xsks[i]);
-
-		if (benchmark_done)
-			break;
-	}
-}
-
-static void load_xdp_program(char **argv, struct bpf_object **obj)
-{
-	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type      = BPF_PROG_TYPE_XDP,
-	};
-	char xdp_filename[256];
-	int prog_fd;
-
-	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]);
-	prog_load_attr.file = xdp_filename;
-
-	if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd))
-		exit(EXIT_FAILURE);
-	if (prog_fd < 0) {
-		fprintf(stderr, "ERROR: no program found: %s\n",
-			strerror(prog_fd));
-		exit(EXIT_FAILURE);
-	}
-
-	if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) {
-		fprintf(stderr, "ERROR: link set xdp fd failed\n");
-		exit(EXIT_FAILURE);
-	}
-}
-
-static void enter_xsks_into_map(struct bpf_object *obj)
-{
-	struct bpf_map *map;
-	int i, xsks_map;
-
-	map = bpf_object__find_map_by_name(obj, "xsks_map");
-	xsks_map = bpf_map__fd(map);
-	if (xsks_map < 0) {
-		fprintf(stderr, "ERROR: no xsks map found: %s\n",
-			strerror(xsks_map));
-			exit(EXIT_FAILURE);
-	}
-
-	for (i = 0; i < num_socks; i++) {
-		int fd = xsk_socket__fd(xsks[i]->xsk);
-		int key, ret;
-
-		key = i;
-		ret = bpf_map_update_elem(xsks_map, &key, &fd, 0);
-		if (ret) {
-			fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i);
-			exit(EXIT_FAILURE);
-		}
-	}
-}
-
-static void apply_setsockopt(struct xsk_socket_info *xsk)
-{
-	int sock_opt;
-
-	if (!opt_busy_poll)
-		return;
-
-	sock_opt = 1;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = 20;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = opt_batch_size;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-}
-
-static int recv_xsks_map_fd_from_ctrl_node(int sock, int *_fd)
-{
-	char cms[CMSG_SPACE(sizeof(int))];
-	struct cmsghdr *cmsg;
-	struct msghdr msg;
-	struct iovec iov;
-	int value;
-	int len;
-
-	iov.iov_base = &value;
-	iov.iov_len = sizeof(int);
-
-	msg.msg_name = 0;
-	msg.msg_namelen = 0;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	msg.msg_flags = 0;
-	msg.msg_control = (caddr_t)cms;
-	msg.msg_controllen = sizeof(cms);
-
-	len = recvmsg(sock, &msg, 0);
-
-	if (len < 0) {
-		fprintf(stderr, "Recvmsg failed length incorrect.\n");
-		return -EINVAL;
-	}
-
-	if (len == 0) {
-		fprintf(stderr, "Recvmsg failed no data\n");
-		return -EINVAL;
-	}
-
-	cmsg = CMSG_FIRSTHDR(&msg);
-	*_fd = *(int *)CMSG_DATA(cmsg);
-
-	return 0;
-}
-
-static int
-recv_xsks_map_fd(int *xsks_map_fd)
-{
-	struct sockaddr_un server;
-	int err;
-
-	sock = socket(AF_UNIX, SOCK_STREAM, 0);
-	if (sock < 0) {
-		fprintf(stderr, "Error opening socket stream: %s", strerror(errno));
-		return errno;
-	}
-
-	server.sun_family = AF_UNIX;
-	strcpy(server.sun_path, SOCKET_NAME);
-
-	if (connect(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) {
-		close(sock);
-		fprintf(stderr, "Error connecting stream socket: %s", strerror(errno));
-		return errno;
-	}
-
-	err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd);
-	if (err) {
-		fprintf(stderr, "Error %d receiving fd\n", err);
-		return err;
-	}
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
-	struct __user_cap_data_struct data[2] = { { 0 } };
-	bool rx = false, tx = false;
-	struct sched_param schparam;
-	struct xsk_umem_info *umem;
-	struct bpf_object *obj;
-	int xsks_map_fd = 0;
-	pthread_t pt;
-	int i, ret;
-	void *bufs;
-
-	parse_command_line(argc, argv);
-
-	if (opt_reduced_cap) {
-		if (capget(&hdr, data)  < 0)
-			fprintf(stderr, "Error getting capabilities\n");
-
-		data->effective &= CAP_TO_MASK(CAP_NET_RAW);
-		data->permitted &= CAP_TO_MASK(CAP_NET_RAW);
-
-		if (capset(&hdr, data) < 0)
-			fprintf(stderr, "Setting capabilities failed\n");
-
-		if (capget(&hdr, data)  < 0) {
-			fprintf(stderr, "Error getting capabilities\n");
-		} else {
-			fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n",
-				data[0].effective, data[0].inheritable, data[0].permitted);
-			fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n",
-				data[1].effective, data[1].inheritable, data[1].permitted);
-		}
-	} else {
-		/* Use libbpf 1.0 API mode */
-		libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-		if (opt_num_xsks > 1)
-			load_xdp_program(argv, &obj);
-	}
-
-	/* Reserve memory for the umem. Use hugepages if unaligned chunk mode */
-	bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size,
-		    PROT_READ | PROT_WRITE,
-		    MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0);
-	if (bufs == MAP_FAILED) {
-		printf("ERROR: mmap failed\n");
-		exit(EXIT_FAILURE);
-	}
-
-	/* Create sockets... */
-	umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size);
-	if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) {
-		rx = true;
-		xsk_populate_fill_ring(umem);
-	}
-	if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY)
-		tx = true;
-	for (i = 0; i < opt_num_xsks; i++)
-		xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
-
-	for (i = 0; i < opt_num_xsks; i++)
-		apply_setsockopt(xsks[i]);
-
-	if (opt_bench == BENCH_TXONLY) {
-		if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN)
-			opt_pkt_size = PKTGEN_SIZE_MIN;
-
-		gen_eth_hdr_data();
-
-		for (i = 0; i < NUM_FRAMES; i++)
-			gen_eth_frame(umem, i * opt_xsk_frame_size);
-	}
-
-	if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
-		enter_xsks_into_map(obj);
-
-	if (opt_reduced_cap) {
-		ret = recv_xsks_map_fd(&xsks_map_fd);
-		if (ret) {
-			fprintf(stderr, "Error %d receiving xsks_map_fd\n", ret);
-			exit_with_error(ret);
-		}
-		if (xsks[0]->xsk) {
-			ret = xsk_socket__update_xskmap(xsks[0]->xsk, xsks_map_fd);
-			if (ret) {
-				fprintf(stderr, "Update of BPF map failed(%d)\n", ret);
-				exit_with_error(ret);
-			}
-		}
-	}
-
-	signal(SIGINT, int_exit);
-	signal(SIGTERM, int_exit);
-	signal(SIGABRT, int_exit);
-
-	setlocale(LC_ALL, "");
-
-	prev_time = get_nsecs();
-	start_time = prev_time;
-
-	if (!opt_quiet) {
-		ret = pthread_create(&pt, NULL, poller, NULL);
-		if (ret)
-			exit_with_error(ret);
-	}
-
-	/* Configure sched priority for better wake-up accuracy */
-	memset(&schparam, 0, sizeof(schparam));
-	schparam.sched_priority = opt_schprio;
-	ret = sched_setscheduler(0, opt_schpolicy, &schparam);
-	if (ret) {
-		fprintf(stderr, "Error(%d) in setting priority(%d): %s\n",
-			errno, opt_schprio, strerror(errno));
-		goto out;
-	}
-
-	if (opt_bench == BENCH_RXDROP)
-		rx_drop_all();
-	else if (opt_bench == BENCH_TXONLY)
-		tx_only_all();
-	else
-		l2fwd_all();
-
-out:
-	benchmark_done = true;
-
-	if (!opt_quiet)
-		pthread_join(pt, NULL);
-
-	xdpsock_cleanup();
-
-	munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
-
-	return 0;
-}
diff --git a/samples/bpf/xsk_fwd.c b/samples/bpf/xsk_fwd.c
deleted file mode 100644
index 2324e18ccc7e..000000000000
--- a/samples/bpf/xsk_fwd.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 2020 Intel Corporation. */
-
-#define _GNU_SOURCE
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
-#include <sched.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-#include <getopt.h>
-#include <netinet/ether.h>
-#include <net/if.h>
-
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <linux/if_xdp.h>
-
-#include <bpf/libbpf.h>
-#include <bpf/xsk.h>
-#include <bpf/bpf.h>
-
-/* libbpf APIs for AF_XDP are deprecated starting from v0.7 */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-typedef __u64 u64;
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8  u8;
-
-/* This program illustrates the packet forwarding between multiple AF_XDP
- * sockets in multi-threaded environment. All threads are sharing a common
- * buffer pool, with each socket having its own private buffer cache.
- *
- * Example 1: Single thread handling two sockets. The packets received by socket
- * A (interface IFA, queue QA) are forwarded to socket B (interface IFB, queue
- * QB), while the packets received by socket B are forwarded to socket A. The
- * thread is running on CPU core X:
- *
- *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -c X
- *
- * Example 2: Two threads, each handling two sockets. The thread running on CPU
- * core X forwards all the packets received by socket A to socket B, and all the
- * packets received by socket B to socket A. The thread running on CPU core Y is
- * performing the same packet forwarding between sockets C and D:
- *
- *         ./xsk_fwd -i IFA -q QA -i IFB -q QB -i IFC -q QC -i IFD -q QD
- *         -c CX -c CY
- */
-
-/*
- * Buffer pool and buffer cache
- *
- * For packet forwarding, the packet buffers are typically allocated from the
- * pool for packet reception and freed back to the pool for further reuse once
- * the packet transmission is completed.
- *
- * The buffer pool is shared between multiple threads. In order to minimize the
- * access latency to the shared buffer pool, each thread creates one (or
- * several) buffer caches, which, unlike the buffer pool, are private to the
- * thread that creates them and therefore cannot be shared with other threads.
- * The access to the shared pool is only needed either (A) when the cache gets
- * empty due to repeated buffer allocations and it needs to be replenished from
- * the pool, or (B) when the cache gets full due to repeated buffer free and it
- * needs to be flushed back to the pull.
- *
- * In a packet forwarding system, a packet received on any input port can
- * potentially be transmitted on any output port, depending on the forwarding
- * configuration. For AF_XDP sockets, for this to work with zero-copy of the
- * packet buffers when, it is required that the buffer pool memory fits into the
- * UMEM area shared by all the sockets.
- */
-
-struct bpool_params {
-	u32 n_buffers;
-	u32 buffer_size;
-	int mmap_flags;
-
-	u32 n_users_max;
-	u32 n_buffers_per_slab;
-};
-
-/* This buffer pool implementation organizes the buffers into equally sized
- * slabs of *n_buffers_per_slab*. Initially, there are *n_slabs* slabs in the
- * pool that are completely filled with buffer pointers (full slabs).
- *
- * Each buffer cache has a slab for buffer allocation and a slab for buffer
- * free, with both of these slabs initially empty. When the cache's allocation
- * slab goes empty, it is swapped with one of the available full slabs from the
- * pool, if any is available. When the cache's free slab goes full, it is
- * swapped for one of the empty slabs from the pool, which is guaranteed to
- * succeed.
- *
- * Partially filled slabs never get traded between the cache and the pool
- * (except when the cache itself is destroyed), which enables fast operation
- * through pointer swapping.
- */
-struct bpool {
-	struct bpool_params params;
-	pthread_mutex_t lock;
-	void *addr;
-
-	u64 **slabs;
-	u64 **slabs_reserved;
-	u64 *buffers;
-	u64 *buffers_reserved;
-
-	u64 n_slabs;
-	u64 n_slabs_reserved;
-	u64 n_buffers;
-
-	u64 n_slabs_available;
-	u64 n_slabs_reserved_available;
-
-	struct xsk_umem_config umem_cfg;
-	struct xsk_ring_prod umem_fq;
-	struct xsk_ring_cons umem_cq;
-	struct xsk_umem *umem;
-};
-
-static struct bpool *
-bpool_init(struct bpool_params *params,
-	   struct xsk_umem_config *umem_cfg)
-{
-	u64 n_slabs, n_slabs_reserved, n_buffers, n_buffers_reserved;
-	u64 slabs_size, slabs_reserved_size;
-	u64 buffers_size, buffers_reserved_size;
-	u64 total_size, i;
-	struct bpool *bp;
-	u8 *p;
-	int status;
-
-	/* Use libbpf 1.0 API mode */
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
-	/* bpool internals dimensioning. */
-	n_slabs = (params->n_buffers + params->n_buffers_per_slab - 1) /
-		params->n_buffers_per_slab;
-	n_slabs_reserved = params->n_users_max * 2;
-	n_buffers = n_slabs * params->n_buffers_per_slab;
-	n_buffers_reserved = n_slabs_reserved * params->n_buffers_per_slab;
-
-	slabs_size = n_slabs * sizeof(u64 *);
-	slabs_reserved_size = n_slabs_reserved * sizeof(u64 *);
-	buffers_size = n_buffers * sizeof(u64);
-	buffers_reserved_size = n_buffers_reserved * sizeof(u64);
-
-	total_size = sizeof(struct bpool) +
-		slabs_size + slabs_reserved_size +
-		buffers_size + buffers_reserved_size;
-
-	/* bpool memory allocation. */
-	p = calloc(total_size, sizeof(u8));
-	if (!p)
-		return NULL;
-
-	/* bpool memory initialization. */
-	bp = (struct bpool *)p;
-	memcpy(&bp->params, params, sizeof(*params));
-	bp->params.n_buffers = n_buffers;
-
-	bp->slabs = (u64 **)&p[sizeof(struct bpool)];
-	bp->slabs_reserved = (u64 **)&p[sizeof(struct bpool) +
-		slabs_size];
-	bp->buffers = (u64 *)&p[sizeof(struct bpool) +
-		slabs_size + slabs_reserved_size];
-	bp->buffers_reserved = (u64 *)&p[sizeof(struct bpool) +
-		slabs_size + slabs_reserved_size + buffers_size];
-
-	bp->n_slabs = n_slabs;
-	bp->n_slabs_reserved = n_slabs_reserved;
-	bp->n_buffers = n_buffers;
-
-	for (i = 0; i < n_slabs; i++)
-		bp->slabs[i] = &bp->buffers[i * params->n_buffers_per_slab];
-	bp->n_slabs_available = n_slabs;
-
-	for (i = 0; i < n_slabs_reserved; i++)
-		bp->slabs_reserved[i] = &bp->buffers_reserved[i *
-			params->n_buffers_per_slab];
-	bp->n_slabs_reserved_available = n_slabs_reserved;
-
-	for (i = 0; i < n_buffers; i++)
-		bp->buffers[i] = i * params->buffer_size;
-
-	/* lock. */
-	status = pthread_mutex_init(&bp->lock, NULL);
-	if (status) {
-		free(p);
-		return NULL;
-	}
-
-	/* mmap. */
-	bp->addr = mmap(NULL,
-			n_buffers * params->buffer_size,
-			PROT_READ | PROT_WRITE,
-			MAP_PRIVATE | MAP_ANONYMOUS | params->mmap_flags,
-			-1,
-			0);
-	if (bp->addr == MAP_FAILED) {
-		pthread_mutex_destroy(&bp->lock);
-		free(p);
-		return NULL;
-	}
-
-	/* umem. */
-	status = xsk_umem__create(&bp->umem,
-				  bp->addr,
-				  bp->params.n_buffers * bp->params.buffer_size,
-				  &bp->umem_fq,
-				  &bp->umem_cq,
-				  umem_cfg);
-	if (status) {
-		munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
-		pthread_mutex_destroy(&bp->lock);
-		free(p);
-		return NULL;
-	}
-	memcpy(&bp->umem_cfg, umem_cfg, sizeof(*umem_cfg));
-
-	return bp;
-}
-
-static void
-bpool_free(struct bpool *bp)
-{
-	if (!bp)
-		return;
-
-	xsk_umem__delete(bp->umem);
-	munmap(bp->addr, bp->params.n_buffers * bp->params.buffer_size);
-	pthread_mutex_destroy(&bp->lock);
-	free(bp);
-}
-
-struct bcache {
-	struct bpool *bp;
-
-	u64 *slab_cons;
-	u64 *slab_prod;
-
-	u64 n_buffers_cons;
-	u64 n_buffers_prod;
-};
-
-static u32
-bcache_slab_size(struct bcache *bc)
-{
-	struct bpool *bp = bc->bp;
-
-	return bp->params.n_buffers_per_slab;
-}
-
-static struct bcache *
-bcache_init(struct bpool *bp)
-{
-	struct bcache *bc;
-
-	bc = calloc(1, sizeof(struct bcache));
-	if (!bc)
-		return NULL;
-
-	bc->bp = bp;
-	bc->n_buffers_cons = 0;
-	bc->n_buffers_prod = 0;
-
-	pthread_mutex_lock(&bp->lock);
-	if (bp->n_slabs_reserved_available == 0) {
-		pthread_mutex_unlock(&bp->lock);
-		free(bc);
-		return NULL;
-	}
-
-	bc->slab_cons = bp->slabs_reserved[bp->n_slabs_reserved_available - 1];
-	bc->slab_prod = bp->slabs_reserved[bp->n_slabs_reserved_available - 2];
-	bp->n_slabs_reserved_available -= 2;
-	pthread_mutex_unlock(&bp->lock);
-
-	return bc;
-}
-
-static void
-bcache_free(struct bcache *bc)
-{
-	struct bpool *bp;
-
-	if (!bc)
-		return;
-
-	/* In order to keep this example simple, the case of freeing any
-	 * existing buffers from the cache back to the pool is ignored.
-	 */
-
-	bp = bc->bp;
-	pthread_mutex_lock(&bp->lock);
-	bp->slabs_reserved[bp->n_slabs_reserved_available] = bc->slab_prod;
-	bp->slabs_reserved[bp->n_slabs_reserved_available + 1] = bc->slab_cons;
-	bp->n_slabs_reserved_available += 2;
-	pthread_mutex_unlock(&bp->lock);
-
-	free(bc);
-}
-
-/* To work correctly, the implementation requires that the *n_buffers* input
- * argument is never greater than the buffer pool's *n_buffers_per_slab*. This
- * is typically the case, with one exception taking place when large number of
- * buffers are allocated at init time (e.g. for the UMEM fill queue setup).
- */
-static inline u32
-bcache_cons_check(struct bcache *bc, u32 n_buffers)
-{
-	struct bpool *bp = bc->bp;
-	u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
-	u64 n_buffers_cons = bc->n_buffers_cons;
-	u64 n_slabs_available;
-	u64 *slab_full;
-
-	/*
-	 * Consumer slab is not empty: Use what's available locally. Do not
-	 * look for more buffers from the pool when the ask can only be
-	 * partially satisfied.
-	 */
-	if (n_buffers_cons)
-		return (n_buffers_cons < n_buffers) ?
-			n_buffers_cons :
-			n_buffers;
-
-	/*
-	 * Consumer slab is empty: look to trade the current consumer slab
-	 * (full) for a full slab from the pool, if any is available.
-	 */
-	pthread_mutex_lock(&bp->lock);
-	n_slabs_available = bp->n_slabs_available;
-	if (!n_slabs_available) {
-		pthread_mutex_unlock(&bp->lock);
-		return 0;
-	}
-
-	n_slabs_available--;
-	slab_full = bp->slabs[n_slabs_available];
-	bp->slabs[n_slabs_available] = bc->slab_cons;
-	bp->n_slabs_available = n_slabs_available;
-	pthread_mutex_unlock(&bp->lock);
-
-	bc->slab_cons = slab_full;
-	bc->n_buffers_cons = n_buffers_per_slab;
-	return n_buffers;
-}
-
-static inline u64
-bcache_cons(struct bcache *bc)
-{
-	u64 n_buffers_cons = bc->n_buffers_cons - 1;
-	u64 buffer;
-
-	buffer = bc->slab_cons[n_buffers_cons];
-	bc->n_buffers_cons = n_buffers_cons;
-	return buffer;
-}
-
-static inline void
-bcache_prod(struct bcache *bc, u64 buffer)
-{
-	struct bpool *bp = bc->bp;
-	u64 n_buffers_per_slab = bp->params.n_buffers_per_slab;
-	u64 n_buffers_prod = bc->n_buffers_prod;
-	u64 n_slabs_available;
-	u64 *slab_empty;
-
-	/*
-	 * Producer slab is not yet full: store the current buffer to it.
-	 */
-	if (n_buffers_prod < n_buffers_per_slab) {
-		bc->slab_prod[n_buffers_prod] = buffer;
-		bc->n_buffers_prod = n_buffers_prod + 1;
-		return;
-	}
-
-	/*
-	 * Producer slab is full: trade the cache's current producer slab
-	 * (full) for an empty slab from the pool, then store the current
-	 * buffer to the new producer slab. As one full slab exists in the
-	 * cache, it is guaranteed that there is at least one empty slab
-	 * available in the pool.
-	 */
-	pthread_mutex_lock(&bp->lock);
-	n_slabs_available = bp->n_slabs_available;
-	slab_empty = bp->slabs[n_slabs_available];
-	bp->slabs[n_slabs_available] = bc->slab_prod;
-	bp->n_slabs_available = n_slabs_available + 1;
-	pthread_mutex_unlock(&bp->lock);
-
-	slab_empty[0] = buffer;
-	bc->slab_prod = slab_empty;
-	bc->n_buffers_prod = 1;
-}
-
-/*
- * Port
- *
- * Each of the forwarding ports sits on top of an AF_XDP socket. In order for
- * packet forwarding to happen with no packet buffer copy, all the sockets need
- * to share the same UMEM area, which is used as the buffer pool memory.
- */
-#ifndef MAX_BURST_RX
-#define MAX_BURST_RX 64
-#endif
-
-#ifndef MAX_BURST_TX
-#define MAX_BURST_TX 64
-#endif
-
-struct burst_rx {
-	u64 addr[MAX_BURST_RX];
-	u32 len[MAX_BURST_RX];
-};
-
-struct burst_tx {
-	u64 addr[MAX_BURST_TX];
-	u32 len[MAX_BURST_TX];
-	u32 n_pkts;
-};
-
-struct port_params {
-	struct xsk_socket_config xsk_cfg;
-	struct bpool *bp;
-	const char *iface;
-	u32 iface_queue;
-};
-
-struct port {
-	struct port_params params;
-
-	struct bcache *bc;
-
-	struct xsk_ring_cons rxq;
-	struct xsk_ring_prod txq;
-	struct xsk_ring_prod umem_fq;
-	struct xsk_ring_cons umem_cq;
-	struct xsk_socket *xsk;
-	int umem_fq_initialized;
-
-	u64 n_pkts_rx;
-	u64 n_pkts_tx;
-};
-
-static void
-port_free(struct port *p)
-{
-	if (!p)
-		return;
-
-	/* To keep this example simple, the code to free the buffers from the
-	 * socket's receive and transmit queues, as well as from the UMEM fill
-	 * and completion queues, is not included.
-	 */
-
-	if (p->xsk)
-		xsk_socket__delete(p->xsk);
-
-	bcache_free(p->bc);
-
-	free(p);
-}
-
-static struct port *
-port_init(struct port_params *params)
-{
-	struct port *p;
-	u32 umem_fq_size, pos = 0;
-	int status, i;
-
-	/* Memory allocation and initialization. */
-	p = calloc(sizeof(struct port), 1);
-	if (!p)
-		return NULL;
-
-	memcpy(&p->params, params, sizeof(p->params));
-	umem_fq_size = params->bp->umem_cfg.fill_size;
-
-	/* bcache. */
-	p->bc = bcache_init(params->bp);
-	if (!p->bc ||
-	    (bcache_slab_size(p->bc) < umem_fq_size) ||
-	    (bcache_cons_check(p->bc, umem_fq_size) < umem_fq_size)) {
-		port_free(p);
-		return NULL;
-	}
-
-	/* xsk socket. */
-	status = xsk_socket__create_shared(&p->xsk,
-					   params->iface,
-					   params->iface_queue,
-					   params->bp->umem,
-					   &p->rxq,
-					   &p->txq,
-					   &p->umem_fq,
-					   &p->umem_cq,
-					   &params->xsk_cfg);
-	if (status) {
-		port_free(p);
-		return NULL;
-	}
-
-	/* umem fq. */
-	xsk_ring_prod__reserve(&p->umem_fq, umem_fq_size, &pos);
-
-	for (i = 0; i < umem_fq_size; i++)
-		*xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
-			bcache_cons(p->bc);
-
-	xsk_ring_prod__submit(&p->umem_fq, umem_fq_size);
-	p->umem_fq_initialized = 1;
-
-	return p;
-}
-
-static inline u32
-port_rx_burst(struct port *p, struct burst_rx *b)
-{
-	u32 n_pkts, pos, i;
-
-	/* Free buffers for FQ replenish. */
-	n_pkts = ARRAY_SIZE(b->addr);
-
-	n_pkts = bcache_cons_check(p->bc, n_pkts);
-	if (!n_pkts)
-		return 0;
-
-	/* RXQ. */
-	n_pkts = xsk_ring_cons__peek(&p->rxq, n_pkts, &pos);
-	if (!n_pkts) {
-		if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
-			struct pollfd pollfd = {
-				.fd = xsk_socket__fd(p->xsk),
-				.events = POLLIN,
-			};
-
-			poll(&pollfd, 1, 0);
-		}
-		return 0;
-	}
-
-	for (i = 0; i < n_pkts; i++) {
-		b->addr[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->addr;
-		b->len[i] = xsk_ring_cons__rx_desc(&p->rxq, pos + i)->len;
-	}
-
-	xsk_ring_cons__release(&p->rxq, n_pkts);
-	p->n_pkts_rx += n_pkts;
-
-	/* UMEM FQ. */
-	for ( ; ; ) {
-		int status;
-
-		status = xsk_ring_prod__reserve(&p->umem_fq, n_pkts, &pos);
-		if (status == n_pkts)
-			break;
-
-		if (xsk_ring_prod__needs_wakeup(&p->umem_fq)) {
-			struct pollfd pollfd = {
-				.fd = xsk_socket__fd(p->xsk),
-				.events = POLLIN,
-			};
-
-			poll(&pollfd, 1, 0);
-		}
-	}
-
-	for (i = 0; i < n_pkts; i++)
-		*xsk_ring_prod__fill_addr(&p->umem_fq, pos + i) =
-			bcache_cons(p->bc);
-
-	xsk_ring_prod__submit(&p->umem_fq, n_pkts);
-
-	return n_pkts;
-}
-
-static inline void
-port_tx_burst(struct port *p, struct burst_tx *b)
-{
-	u32 n_pkts, pos, i;
-	int status;
-
-	/* UMEM CQ. */
-	n_pkts = p->params.bp->umem_cfg.comp_size;
-
-	n_pkts = xsk_ring_cons__peek(&p->umem_cq, n_pkts, &pos);
-
-	for (i = 0; i < n_pkts; i++) {
-		u64 addr = *xsk_ring_cons__comp_addr(&p->umem_cq, pos + i);
-
-		bcache_prod(p->bc, addr);
-	}
-
-	xsk_ring_cons__release(&p->umem_cq, n_pkts);
-
-	/* TXQ. */
-	n_pkts = b->n_pkts;
-
-	for ( ; ; ) {
-		status = xsk_ring_prod__reserve(&p->txq, n_pkts, &pos);
-		if (status == n_pkts)
-			break;
-
-		if (xsk_ring_prod__needs_wakeup(&p->txq))
-			sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT,
-			       NULL, 0);
-	}
-
-	for (i = 0; i < n_pkts; i++) {
-		xsk_ring_prod__tx_desc(&p->txq, pos + i)->addr = b->addr[i];
-		xsk_ring_prod__tx_desc(&p->txq, pos + i)->len = b->len[i];
-	}
-
-	xsk_ring_prod__submit(&p->txq, n_pkts);
-	if (xsk_ring_prod__needs_wakeup(&p->txq))
-		sendto(xsk_socket__fd(p->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	p->n_pkts_tx += n_pkts;
-}
-
-/*
- * Thread
- *
- * Packet forwarding threads.
- */
-#ifndef MAX_PORTS_PER_THREAD
-#define MAX_PORTS_PER_THREAD 16
-#endif
-
-struct thread_data {
-	struct port *ports_rx[MAX_PORTS_PER_THREAD];
-	struct port *ports_tx[MAX_PORTS_PER_THREAD];
-	u32 n_ports_rx;
-	struct burst_rx burst_rx;
-	struct burst_tx burst_tx[MAX_PORTS_PER_THREAD];
-	u32 cpu_core_id;
-	int quit;
-};
-
-static void swap_mac_addresses(void *data)
-{
-	struct ether_header *eth = (struct ether_header *)data;
-	struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost;
-	struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost;
-	struct ether_addr tmp;
-
-	tmp = *src_addr;
-	*src_addr = *dst_addr;
-	*dst_addr = tmp;
-}
-
-static void *
-thread_func(void *arg)
-{
-	struct thread_data *t = arg;
-	cpu_set_t cpu_cores;
-	u32 i;
-
-	CPU_ZERO(&cpu_cores);
-	CPU_SET(t->cpu_core_id, &cpu_cores);
-	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores);
-
-	for (i = 0; !t->quit; i = (i + 1) & (t->n_ports_rx - 1)) {
-		struct port *port_rx = t->ports_rx[i];
-		struct port *port_tx = t->ports_tx[i];
-		struct burst_rx *brx = &t->burst_rx;
-		struct burst_tx *btx = &t->burst_tx[i];
-		u32 n_pkts, j;
-
-		/* RX. */
-		n_pkts = port_rx_burst(port_rx, brx);
-		if (!n_pkts)
-			continue;
-
-		/* Process & TX. */
-		for (j = 0; j < n_pkts; j++) {
-			u64 addr = xsk_umem__add_offset_to_addr(brx->addr[j]);
-			u8 *pkt = xsk_umem__get_data(port_rx->params.bp->addr,
-						     addr);
-
-			swap_mac_addresses(pkt);
-
-			btx->addr[btx->n_pkts] = brx->addr[j];
-			btx->len[btx->n_pkts] = brx->len[j];
-			btx->n_pkts++;
-
-			if (btx->n_pkts == MAX_BURST_TX) {
-				port_tx_burst(port_tx, btx);
-				btx->n_pkts = 0;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Process
- */
-static const struct bpool_params bpool_params_default = {
-	.n_buffers = 64 * 1024,
-	.buffer_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-	.mmap_flags = 0,
-
-	.n_users_max = 16,
-	.n_buffers_per_slab = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-};
-
-static const struct xsk_umem_config umem_cfg_default = {
-	.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2,
-	.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-	.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-	.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
-	.flags = 0,
-};
-
-static const struct port_params port_params_default = {
-	.xsk_cfg = {
-		.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-		.libbpf_flags = 0,
-		.xdp_flags = XDP_FLAGS_DRV_MODE,
-		.bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY,
-	},
-
-	.bp = NULL,
-	.iface = NULL,
-	.iface_queue = 0,
-};
-
-#ifndef MAX_PORTS
-#define MAX_PORTS 64
-#endif
-
-#ifndef MAX_THREADS
-#define MAX_THREADS 64
-#endif
-
-static struct bpool_params bpool_params;
-static struct xsk_umem_config umem_cfg;
-static struct bpool *bp;
-
-static struct port_params port_params[MAX_PORTS];
-static struct port *ports[MAX_PORTS];
-static u64 n_pkts_rx[MAX_PORTS];
-static u64 n_pkts_tx[MAX_PORTS];
-static int n_ports;
-
-static pthread_t threads[MAX_THREADS];
-static struct thread_data thread_data[MAX_THREADS];
-static int n_threads;
-
-static void
-print_usage(char *prog_name)
-{
-	const char *usage =
-		"Usage:\n"
-		"\t%s [ -b SIZE ] -c CORE -i INTERFACE [ -q QUEUE ]\n"
-		"\n"
-		"-c CORE        CPU core to run a packet forwarding thread\n"
-		"               on. May be invoked multiple times.\n"
-		"\n"
-		"-b SIZE        Number of buffers in the buffer pool shared\n"
-		"               by all the forwarding threads. Default: %u.\n"
-		"\n"
-		"-i INTERFACE   Network interface. Each (INTERFACE, QUEUE)\n"
-		"               pair specifies one forwarding port. May be\n"
-		"               invoked multiple times.\n"
-		"\n"
-		"-q QUEUE       Network interface queue for RX and TX. Each\n"
-		"               (INTERFACE, QUEUE) pair specified one\n"
-		"               forwarding port. Default: %u. May be invoked\n"
-		"               multiple times.\n"
-		"\n";
-	printf(usage,
-	       prog_name,
-	       bpool_params_default.n_buffers,
-	       port_params_default.iface_queue);
-}
-
-static int
-parse_args(int argc, char **argv)
-{
-	struct option lgopts[] = {
-		{ NULL,  0, 0, 0 }
-	};
-	int opt, option_index;
-
-	/* Parse the input arguments. */
-	for ( ; ;) {
-		opt = getopt_long(argc, argv, "c:i:q:", lgopts, &option_index);
-		if (opt == EOF)
-			break;
-
-		switch (opt) {
-		case 'b':
-			bpool_params.n_buffers = atoi(optarg);
-			break;
-
-		case 'c':
-			if (n_threads == MAX_THREADS) {
-				printf("Max number of threads (%d) reached.\n",
-				       MAX_THREADS);
-				return -1;
-			}
-
-			thread_data[n_threads].cpu_core_id = atoi(optarg);
-			n_threads++;
-			break;
-
-		case 'i':
-			if (n_ports == MAX_PORTS) {
-				printf("Max number of ports (%d) reached.\n",
-				       MAX_PORTS);
-				return -1;
-			}
-
-			port_params[n_ports].iface = optarg;
-			port_params[n_ports].iface_queue = 0;
-			n_ports++;
-			break;
-
-		case 'q':
-			if (n_ports == 0) {
-				printf("No port specified for queue.\n");
-				return -1;
-			}
-			port_params[n_ports - 1].iface_queue = atoi(optarg);
-			break;
-
-		default:
-			printf("Illegal argument.\n");
-			return -1;
-		}
-	}
-
-	optind = 1; /* reset getopt lib */
-
-	/* Check the input arguments. */
-	if (!n_ports) {
-		printf("No ports specified.\n");
-		return -1;
-	}
-
-	if (!n_threads) {
-		printf("No threads specified.\n");
-		return -1;
-	}
-
-	if (n_ports % n_threads) {
-		printf("Ports cannot be evenly distributed to threads.\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-static void
-print_port(u32 port_id)
-{
-	struct port *port = ports[port_id];
-
-	printf("Port %u: interface = %s, queue = %u\n",
-	       port_id, port->params.iface, port->params.iface_queue);
-}
-
-static void
-print_thread(u32 thread_id)
-{
-	struct thread_data *t = &thread_data[thread_id];
-	u32 i;
-
-	printf("Thread %u (CPU core %u): ",
-	       thread_id, t->cpu_core_id);
-
-	for (i = 0; i < t->n_ports_rx; i++) {
-		struct port *port_rx = t->ports_rx[i];
-		struct port *port_tx = t->ports_tx[i];
-
-		printf("(%s, %u) -> (%s, %u), ",
-		       port_rx->params.iface,
-		       port_rx->params.iface_queue,
-		       port_tx->params.iface,
-		       port_tx->params.iface_queue);
-	}
-
-	printf("\n");
-}
-
-static void
-print_port_stats_separator(void)
-{
-	printf("+-%4s-+-%12s-+-%13s-+-%12s-+-%13s-+\n",
-	       "----",
-	       "------------",
-	       "-------------",
-	       "------------",
-	       "-------------");
-}
-
-static void
-print_port_stats_header(void)
-{
-	print_port_stats_separator();
-	printf("| %4s | %12s | %13s | %12s | %13s |\n",
-	       "Port",
-	       "RX packets",
-	       "RX rate (pps)",
-	       "TX packets",
-	       "TX_rate (pps)");
-	print_port_stats_separator();
-}
-
-static void
-print_port_stats_trailer(void)
-{
-	print_port_stats_separator();
-	printf("\n");
-}
-
-static void
-print_port_stats(int port_id, u64 ns_diff)
-{
-	struct port *p = ports[port_id];
-	double rx_pps, tx_pps;
-
-	rx_pps = (p->n_pkts_rx - n_pkts_rx[port_id]) * 1000000000. / ns_diff;
-	tx_pps = (p->n_pkts_tx - n_pkts_tx[port_id]) * 1000000000. / ns_diff;
-
-	printf("| %4d | %12llu | %13.0f | %12llu | %13.0f |\n",
-	       port_id,
-	       p->n_pkts_rx,
-	       rx_pps,
-	       p->n_pkts_tx,
-	       tx_pps);
-
-	n_pkts_rx[port_id] = p->n_pkts_rx;
-	n_pkts_tx[port_id] = p->n_pkts_tx;
-}
-
-static void
-print_port_stats_all(u64 ns_diff)
-{
-	int i;
-
-	print_port_stats_header();
-	for (i = 0; i < n_ports; i++)
-		print_port_stats(i, ns_diff);
-	print_port_stats_trailer();
-}
-
-static int quit;
-
-static void
-signal_handler(int sig)
-{
-	quit = 1;
-}
-
-static void remove_xdp_program(void)
-{
-	int i;
-
-	for (i = 0 ; i < n_ports; i++)
-		bpf_xdp_detach(if_nametoindex(port_params[i].iface),
-			       port_params[i].xsk_cfg.xdp_flags, NULL);
-}
-
-int main(int argc, char **argv)
-{
-	struct timespec time;
-	u64 ns0;
-	int i;
-
-	/* Parse args. */
-	memcpy(&bpool_params, &bpool_params_default,
-	       sizeof(struct bpool_params));
-	memcpy(&umem_cfg, &umem_cfg_default,
-	       sizeof(struct xsk_umem_config));
-	for (i = 0; i < MAX_PORTS; i++)
-		memcpy(&port_params[i], &port_params_default,
-		       sizeof(struct port_params));
-
-	if (parse_args(argc, argv)) {
-		print_usage(argv[0]);
-		return -1;
-	}
-
-	/* Buffer pool initialization. */
-	bp = bpool_init(&bpool_params, &umem_cfg);
-	if (!bp) {
-		printf("Buffer pool initialization failed.\n");
-		return -1;
-	}
-	printf("Buffer pool created successfully.\n");
-
-	/* Ports initialization. */
-	for (i = 0; i < MAX_PORTS; i++)
-		port_params[i].bp = bp;
-
-	for (i = 0; i < n_ports; i++) {
-		ports[i] = port_init(&port_params[i]);
-		if (!ports[i]) {
-			printf("Port %d initialization failed.\n", i);
-			return -1;
-		}
-		print_port(i);
-	}
-	printf("All ports created successfully.\n");
-
-	/* Threads. */
-	for (i = 0; i < n_threads; i++) {
-		struct thread_data *t = &thread_data[i];
-		u32 n_ports_per_thread = n_ports / n_threads, j;
-
-		for (j = 0; j < n_ports_per_thread; j++) {
-			t->ports_rx[j] = ports[i * n_ports_per_thread + j];
-			t->ports_tx[j] = ports[i * n_ports_per_thread +
-				(j + 1) % n_ports_per_thread];
-		}
-
-		t->n_ports_rx = n_ports_per_thread;
-
-		print_thread(i);
-	}
-
-	for (i = 0; i < n_threads; i++) {
-		int status;
-
-		status = pthread_create(&threads[i],
-					NULL,
-					thread_func,
-					&thread_data[i]);
-		if (status) {
-			printf("Thread %d creation failed.\n", i);
-			return -1;
-		}
-	}
-	printf("All threads created successfully.\n");
-
-	/* Print statistics. */
-	signal(SIGINT, signal_handler);
-	signal(SIGTERM, signal_handler);
-	signal(SIGABRT, signal_handler);
-
-	clock_gettime(CLOCK_MONOTONIC, &time);
-	ns0 = time.tv_sec * 1000000000UL + time.tv_nsec;
-	for ( ; !quit; ) {
-		u64 ns1, ns_diff;
-
-		sleep(1);
-		clock_gettime(CLOCK_MONOTONIC, &time);
-		ns1 = time.tv_sec * 1000000000UL + time.tv_nsec;
-		ns_diff = ns1 - ns0;
-		ns0 = ns1;
-
-		print_port_stats_all(ns_diff);
-	}
-
-	/* Threads completion. */
-	printf("Quit.\n");
-	for (i = 0; i < n_threads; i++)
-		thread_data[i].quit = 1;
-
-	for (i = 0; i < n_threads; i++)
-		pthread_join(threads[i], NULL);
-
-	for (i = 0; i < n_ports; i++)
-		port_free(ports[i]);
-
-	bpool_free(bp);
-
-	remove_xdp_program();
-
-	return 0;
-}
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 4ce9a77bc1e0..e44039f89be7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -24,9 +24,11 @@ FEATURE COMMANDS
 ================
 
 |	**bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
+|	**bpftool** **feature list_builtins** *GROUP*
 |	**bpftool** **feature help**
 |
 |	*COMPONENT* := { **kernel** | **dev** *NAME* }
+|	*GROUP* := { **prog_types** | **map_types** | **attach_types** | **link_types** | **helpers** }
 
 DESCRIPTION
 ===========
@@ -70,6 +72,16 @@ DESCRIPTION
 		  The keywords **full**, **macros** and **prefix** have the
 		  same role as when probing the kernel.
 
+	**bpftool feature list_builtins** *GROUP*
+		  List items known to bpftool. These can be BPF program types
+		  (**prog_types**), BPF map types (**map_types**), attach types
+		  (**attach_types**), link types (**link_types**), or BPF helper
+		  functions (**helpers**). The command does not probe the system, but
+		  simply lists the elements that bpftool knows from compilation time,
+		  as provided from libbpf (for all object types) or from the BPF UAPI
+		  header (list of helpers). This can be used in scripts to iterate over
+		  BPF types or helpers.
+
 	**bpftool feature help**
 		  Print short help message.
 
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c19e0e4c41bd..6b5b3a99f79d 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -93,10 +93,8 @@ INSTALL ?= install
 RM ?= rm -f
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args zlib libcap \
-	clang-bpf-co-re
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
-	clang-bpf-co-re
+FEATURE_TESTS = libbfd disassembler-four-args libcap clang-bpf-co-re
+FEATURE_DISPLAY = libbfd disassembler-four-args libcap clang-bpf-co-re
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -204,11 +202,6 @@ $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
 	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@
 
-$(OUTPUT)feature.o:
-ifneq ($(feature-zlib), 1)
-	$(error "No zlib found")
-endif
-
 $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP)
 	$(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@
 
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 91f89a9a5b36..dc1641e3670e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -703,15 +703,8 @@ _bpftool()
                             return 0
                             ;;
                         type)
-                            local BPFTOOL_MAP_CREATE_TYPES='hash array \
-                                prog_array perf_event_array percpu_hash \
-                                percpu_array stack_trace cgroup_array lru_hash \
-                                lru_percpu_hash lpm_trie array_of_maps \
-                                hash_of_maps devmap devmap_hash sockmap cpumap \
-                                xskmap sockhash cgroup_storage reuseport_sockarray \
-                                percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops ringbuf inode_storage task_storage \
-                                bloom_filter'
+                            local BPFTOOL_MAP_CREATE_TYPES="$(bpftool feature list_builtins map_types 2>/dev/null | \
+                                grep -v '^unspec$')"
                             COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                             return 0
                             ;;
@@ -1039,14 +1032,8 @@ _bpftool()
                     return 0
                     ;;
                 attach|detach)
-                    local BPFTOOL_CGROUP_ATTACH_TYPES='cgroup_inet_ingress cgroup_inet_egress \
-                        cgroup_inet_sock_create cgroup_sock_ops cgroup_device cgroup_inet4_bind \
-                        cgroup_inet6_bind cgroup_inet4_post_bind cgroup_inet6_post_bind \
-                        cgroup_inet4_connect cgroup_inet6_connect cgroup_inet4_getpeername \
-                        cgroup_inet6_getpeername cgroup_inet4_getsockname cgroup_inet6_getsockname \
-                        cgroup_udp4_sendmsg cgroup_udp6_sendmsg cgroup_udp4_recvmsg \
-                        cgroup_udp6_recvmsg cgroup_sysctl cgroup_getsockopt cgroup_setsockopt \
-                        cgroup_inet_sock_release'
+                    local BPFTOOL_CGROUP_ATTACH_TYPES="$(bpftool feature list_builtins attach_types 2>/dev/null | \
+                        grep '^cgroup_')"
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
                     # Check for $prev = $command first
@@ -1175,9 +1162,14 @@ _bpftool()
                     _bpftool_once_attr 'full unprivileged'
                     return 0
                     ;;
+                list_builtins)
+                    [[ $prev != "$command" ]] && return 0
+                    COMPREPLY=( $( compgen -W 'prog_types map_types \
+                        attach_types link_types helpers' -- "$cur" ) )
+                    ;;
                 *)
                     [[ $prev == $object ]] && \
-                        COMPREPLY=( $( compgen -W 'help probe' -- "$cur" ) )
+                        COMPREPLY=( $( compgen -W 'help list_builtins probe' -- "$cur" ) )
                     ;;
             esac
             ;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 42421fe47a58..cced668fb2a3 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -15,6 +15,7 @@
 #include <unistd.h>
 
 #include <bpf/bpf.h>
+#include <bpf/btf.h>
 
 #include "main.h"
 
@@ -36,6 +37,8 @@
 	"                        cgroup_inet_sock_release }"
 
 static unsigned int query_flags;
+static struct btf *btf_vmlinux;
+static __u32 btf_vmlinux_id;
 
 static enum bpf_attach_type parse_attach_type(const char *str)
 {
@@ -64,11 +67,38 @@ static enum bpf_attach_type parse_attach_type(const char *str)
 	return __MAX_BPF_ATTACH_TYPE;
 }
 
+static void guess_vmlinux_btf_id(__u32 attach_btf_obj_id)
+{
+	struct bpf_btf_info btf_info = {};
+	__u32 btf_len = sizeof(btf_info);
+	char name[16] = {};
+	int err;
+	int fd;
+
+	btf_info.name = ptr_to_u64(name);
+	btf_info.name_len = sizeof(name);
+
+	fd = bpf_btf_get_fd_by_id(attach_btf_obj_id);
+	if (fd < 0)
+		return;
+
+	err = bpf_obj_get_info_by_fd(fd, &btf_info, &btf_len);
+	if (err)
+		goto out;
+
+	if (btf_info.kernel_btf && strncmp(name, "vmlinux", sizeof(name)) == 0)
+		btf_vmlinux_id = btf_info.id;
+
+out:
+	close(fd);
+}
+
 static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 			 const char *attach_flags_str,
 			 int level)
 {
 	char prog_name[MAX_PROG_FULL_NAME];
+	const char *attach_btf_name = NULL;
 	struct bpf_prog_info info = {};
 	const char *attach_type_str;
 	__u32 info_len = sizeof(info);
@@ -84,6 +114,20 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 	}
 
 	attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+
+	if (btf_vmlinux) {
+		if (!btf_vmlinux_id)
+			guess_vmlinux_btf_id(info.attach_btf_obj_id);
+
+		if (btf_vmlinux_id == info.attach_btf_obj_id &&
+		    info.attach_btf_id < btf__type_cnt(btf_vmlinux)) {
+			const struct btf_type *t =
+				btf__type_by_id(btf_vmlinux, info.attach_btf_id);
+			attach_btf_name =
+				btf__name_by_offset(btf_vmlinux, t->name_off);
+		}
+	}
+
 	get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name));
 	if (json_output) {
 		jsonw_start_object(json_wtr);
@@ -95,6 +139,10 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 		jsonw_string_field(json_wtr, "attach_flags",
 				   attach_flags_str);
 		jsonw_string_field(json_wtr, "name", prog_name);
+		if (attach_btf_name)
+			jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name);
+		jsonw_uint_field(json_wtr, "attach_btf_obj_id", info.attach_btf_obj_id);
+		jsonw_uint_field(json_wtr, "attach_btf_id", info.attach_btf_id);
 		jsonw_end_object(json_wtr);
 	} else {
 		printf("%s%-8u ", level ? "    " : "", info.id);
@@ -102,7 +150,13 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 			printf("%-15s", attach_type_str);
 		else
 			printf("type %-10u", attach_type);
-		printf(" %-15s %-15s\n", attach_flags_str, prog_name);
+		printf(" %-15s %-15s", attach_flags_str, prog_name);
+		if (attach_btf_name)
+			printf(" %-15s", attach_btf_name);
+		else if (info.attach_btf_id)
+			printf(" attach_btf_obj_id=%d attach_btf_id=%d",
+			       info.attach_btf_obj_id, info.attach_btf_id);
+		printf("\n");
 	}
 
 	close(prog_fd);
@@ -144,40 +198,49 @@ static int cgroup_has_attached_progs(int cgroup_fd)
 static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 				   int level)
 {
+	LIBBPF_OPTS(bpf_prog_query_opts, p);
+	__u32 prog_attach_flags[1024] = {0};
 	const char *attach_flags_str;
 	__u32 prog_ids[1024] = {0};
-	__u32 prog_cnt, iter;
-	__u32 attach_flags;
 	char buf[32];
+	__u32 iter;
 	int ret;
 
-	prog_cnt = ARRAY_SIZE(prog_ids);
-	ret = bpf_prog_query(cgroup_fd, type, query_flags, &attach_flags,
-			     prog_ids, &prog_cnt);
+	p.query_flags = query_flags;
+	p.prog_cnt = ARRAY_SIZE(prog_ids);
+	p.prog_ids = prog_ids;
+	p.prog_attach_flags = prog_attach_flags;
+
+	ret = bpf_prog_query_opts(cgroup_fd, type, &p);
 	if (ret)
 		return ret;
 
-	if (prog_cnt == 0)
+	if (p.prog_cnt == 0)
 		return 0;
 
-	switch (attach_flags) {
-	case BPF_F_ALLOW_MULTI:
-		attach_flags_str = "multi";
-		break;
-	case BPF_F_ALLOW_OVERRIDE:
-		attach_flags_str = "override";
-		break;
-	case 0:
-		attach_flags_str = "";
-		break;
-	default:
-		snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
-		attach_flags_str = buf;
-	}
+	for (iter = 0; iter < p.prog_cnt; iter++) {
+		__u32 attach_flags;
+
+		attach_flags = prog_attach_flags[iter] ?: p.attach_flags;
+
+		switch (attach_flags) {
+		case BPF_F_ALLOW_MULTI:
+			attach_flags_str = "multi";
+			break;
+		case BPF_F_ALLOW_OVERRIDE:
+			attach_flags_str = "override";
+			break;
+		case 0:
+			attach_flags_str = "";
+			break;
+		default:
+			snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+			attach_flags_str = buf;
+		}
 
-	for (iter = 0; iter < prog_cnt; iter++)
 		show_bpf_prog(prog_ids[iter], type,
 			      attach_flags_str, level);
+	}
 
 	return 0;
 }
@@ -233,6 +296,7 @@ static int do_show(int argc, char **argv)
 		printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
 		       "AttachFlags", "Name");
 
+	btf_vmlinux = libbpf_find_kernel_btf();
 	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
 		/*
 		 * Not all attach types may be supported, so it's expected,
@@ -296,6 +360,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
 		printf("%s\n", fpath);
 	}
 
+	btf_vmlinux = libbpf_find_kernel_btf();
 	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
 		show_attached_bpf_progs(cgroup_fd, type, ftw->level);
 
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index a0d4acd7c54a..067e9ea59e3b 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -13,14 +13,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <linux/limits.h>
-#include <linux/magic.h>
 #include <net/if.h>
 #include <sys/mount.h>
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/vfs.h>
 
+#include <linux/filter.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <linux/unistd.h>
+
 #include <bpf/bpf.h>
 #include <bpf/hashmap.h>
 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
@@ -73,11 +76,73 @@ static bool is_bpffs(char *path)
 	return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
 }
 
+/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
+ * memcg-based memory accounting for BPF maps and programs. This was done in
+ * commit 97306be45fbe ("Merge branch 'switch to memcg-based memory
+ * accounting'"), in Linux 5.11.
+ *
+ * Libbpf also offers to probe for memcg-based accounting vs rlimit, but does
+ * so by checking for the availability of a given BPF helper and this has
+ * failed on some kernels with backports in the past, see commit 6b4384ff1088
+ * ("Revert "bpftool: Use libbpf 1.0 API mode instead of RLIMIT_MEMLOCK"").
+ * Instead, we can probe by lowering the process-based rlimit to 0, trying to
+ * load a BPF object, and resetting the rlimit. If the load succeeds then
+ * memcg-based accounting is supported.
+ *
+ * This would be too dangerous to do in the library, because multithreaded
+ * applications might attempt to load items while the rlimit is at 0. Given
+ * that bpftool is single-threaded, this is fine to do here.
+ */
+static bool known_to_need_rlimit(void)
+{
+	struct rlimit rlim_init, rlim_cur_zero = {};
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	size_t insn_cnt = ARRAY_SIZE(insns);
+	union bpf_attr attr;
+	int prog_fd, err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = insn_cnt;
+	attr.license = ptr_to_u64("GPL");
+
+	if (getrlimit(RLIMIT_MEMLOCK, &rlim_init))
+		return false;
+
+	/* Drop the soft limit to zero. We maintain the hard limit to its
+	 * current value, because lowering it would be a permanent operation
+	 * for unprivileged users.
+	 */
+	rlim_cur_zero.rlim_max = rlim_init.rlim_max;
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_cur_zero))
+		return false;
+
+	/* Do not use bpf_prog_load() from libbpf here, because it calls
+	 * bump_rlimit_memlock(), interfering with the current probe.
+	 */
+	prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = errno;
+
+	/* reset soft rlimit to its initial value */
+	setrlimit(RLIMIT_MEMLOCK, &rlim_init);
+
+	if (prog_fd < 0)
+		return err == EPERM;
+
+	close(prog_fd);
+	return false;
+}
+
 void set_max_rlimit(void)
 {
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 
-	setrlimit(RLIMIT_MEMLOCK, &rinf);
+	if (known_to_need_rlimit())
+		setrlimit(RLIMIT_MEMLOCK, &rinf);
 }
 
 static int
@@ -251,6 +316,7 @@ const char *get_fd_type_name(enum bpf_obj_type type)
 		[BPF_OBJ_UNKNOWN]	= "unknown",
 		[BPF_OBJ_PROG]		= "prog",
 		[BPF_OBJ_MAP]		= "map",
+		[BPF_OBJ_LINK]		= "link",
 	};
 
 	if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index bac4ef428a02..7ecabf7947fb 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1258,6 +1258,58 @@ exit_close_json:
 	return 0;
 }
 
+static const char *get_helper_name(unsigned int id)
+{
+	if (id >= ARRAY_SIZE(helper_name))
+		return NULL;
+
+	return helper_name[id];
+}
+
+static int do_list_builtins(int argc, char **argv)
+{
+	const char *(*get_name)(unsigned int id);
+	unsigned int id = 0;
+
+	if (argc < 1)
+		usage();
+
+	if (is_prefix(*argv, "prog_types")) {
+		get_name = (const char *(*)(unsigned int))libbpf_bpf_prog_type_str;
+	} else if (is_prefix(*argv, "map_types")) {
+		get_name = (const char *(*)(unsigned int))libbpf_bpf_map_type_str;
+	} else if (is_prefix(*argv, "attach_types")) {
+		get_name = (const char *(*)(unsigned int))libbpf_bpf_attach_type_str;
+	} else if (is_prefix(*argv, "link_types")) {
+		get_name = (const char *(*)(unsigned int))libbpf_bpf_link_type_str;
+	} else if (is_prefix(*argv, "helpers")) {
+		get_name = get_helper_name;
+	} else {
+		p_err("expected 'prog_types', 'map_types', 'attach_types', 'link_types' or 'helpers', got: %s", *argv);
+		return -1;
+	}
+
+	if (json_output)
+		jsonw_start_array(json_wtr);	/* root array */
+
+	while (true) {
+		const char *name;
+
+		name = get_name(id++);
+		if (!name)
+			break;
+		if (json_output)
+			jsonw_string(json_wtr, name);
+		else
+			printf("%s\n", name);
+	}
+
+	if (json_output)
+		jsonw_end_array(json_wtr);	/* root array */
+
+	return 0;
+}
+
 static int do_help(int argc, char **argv)
 {
 	if (json_output) {
@@ -1267,9 +1319,11 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+		"       %1$s %2$s list_builtins GROUP\n"
 		"       %1$s %2$s help\n"
 		"\n"
 		"       COMPONENT := { kernel | dev NAME }\n"
+		"       GROUP := { prog_types | map_types | attach_types | link_types | helpers }\n"
 		"       " HELP_SPEC_OPTIONS " }\n"
 		"",
 		bin_name, argv[-2]);
@@ -1278,8 +1332,9 @@ static int do_help(int argc, char **argv)
 }
 
 static const struct cmd cmds[] = {
-	{ "probe",	do_probe },
-	{ "help",	do_help },
+	{ "probe",		do_probe },
+	{ "list_builtins",	do_list_builtins },
+	{ "help",		do_help },
 	{ 0 }
 };
 
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 480cbd859359..1cf53bb01936 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1762,6 +1762,7 @@ btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_poi
 		}
 		break;
 	case BTF_KIND_CONST:
+	case BTF_KIND_RESTRICT:
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_TYPEDEF:
 		err = btfgen_mark_type(info, btf_type->type, follow_pointers);
@@ -1856,6 +1857,112 @@ static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_sp
 	return 0;
 }
 
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ *
+ * The `behind_ptr` argument is used to stop marking of composite types reached
+ * through a pointer. This way, we can keep BTF size in check while providing
+ * reasonable match semantics.
+ */
+static int btfgen_mark_type_match(struct btfgen_info *info, __u32 type_id, bool behind_ptr)
+{
+	const struct btf_type *btf_type;
+	struct btf *btf = info->src_btf;
+	struct btf_type *cloned_type;
+	int i, err;
+
+	if (type_id == 0)
+		return 0;
+
+	btf_type = btf__type_by_id(btf, type_id);
+	/* mark type on cloned BTF as used */
+	cloned_type = (struct btf_type *)btf__type_by_id(info->marked_btf, type_id);
+	cloned_type->name_off = MARKED;
+
+	switch (btf_kind(btf_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_INT:
+	case BTF_KIND_FLOAT:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		break;
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		struct btf_member *m = btf_members(btf_type);
+		__u16 vlen = btf_vlen(btf_type);
+
+		if (behind_ptr)
+			break;
+
+		for (i = 0; i < vlen; i++, m++) {
+			/* mark member */
+			btfgen_mark_member(info, type_id, i);
+
+			/* mark member's type */
+			err = btfgen_mark_type_match(info, m->type, false);
+			if (err)
+				return err;
+		}
+		break;
+	}
+	case BTF_KIND_CONST:
+	case BTF_KIND_FWD:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_VOLATILE:
+		return btfgen_mark_type_match(info, btf_type->type, behind_ptr);
+	case BTF_KIND_PTR:
+		return btfgen_mark_type_match(info, btf_type->type, true);
+	case BTF_KIND_ARRAY: {
+		struct btf_array *array;
+
+		array = btf_array(btf_type);
+		/* mark array type */
+		err = btfgen_mark_type_match(info, array->type, false);
+		/* mark array's index type */
+		err = err ? : btfgen_mark_type_match(info, array->index_type, false);
+		if (err)
+			return err;
+		break;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		__u16 vlen = btf_vlen(btf_type);
+		struct btf_param *param;
+
+		/* mark ret type */
+		err = btfgen_mark_type_match(info, btf_type->type, false);
+		if (err)
+			return err;
+
+		/* mark parameters types */
+		param = btf_params(btf_type);
+		for (i = 0; i < vlen; i++) {
+			err = btfgen_mark_type_match(info, param->type, false);
+			if (err)
+				return err;
+			param++;
+		}
+		break;
+	}
+	/* tells if some other type needs to be handled */
+	default:
+		p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Mark types, members, and member types. Compared to btfgen_record_field_relo,
+ * this function does not rely on the target spec for inferring members, but
+ * uses the associated BTF.
+ */
+static int btfgen_record_type_match_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
+{
+	return btfgen_mark_type_match(info, targ_spec->root_type_id, false);
+}
+
 static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec)
 {
 	return btfgen_mark_type(info, targ_spec->root_type_id, true);
@@ -1882,6 +1989,8 @@ static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *r
 	case BPF_CORE_TYPE_EXISTS:
 	case BPF_CORE_TYPE_SIZE:
 		return btfgen_record_type_relo(info, res);
+	case BPF_CORE_TYPE_MATCHES:
+		return btfgen_record_type_match_relo(info, res);
 	case BPF_CORE_ENUMVAL_EXISTS:
 	case BPF_CORE_ENUMVAL_VALUE:
 		return btfgen_record_enumval_relo(info, res);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 589cb76b227a..5e5060c2ac04 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -63,8 +63,6 @@ static inline void *u64_to_ptr(__u64 ptr)
 #define HELP_SPEC_LINK							\
 	"LINK := { id LINK_ID | pinned FILE }"
 
-extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
-
 /* keep in sync with the definition in skeleton/pid_iter.bpf.c */
 enum bpf_obj_type {
 	BPF_OBJ_UNKNOWN,
diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h
index 57890b357f85..71e54b1e3796 100644
--- a/tools/include/linux/btf_ids.h
+++ b/tools/include/linux/btf_ids.h
@@ -73,7 +73,7 @@ asm(							\
 __BTF_ID_LIST(name, local)				\
 extern u32 name[];
 
-#define BTF_ID_LIST_GLOBAL(name)			\
+#define BTF_ID_LIST_GLOBAL(name, n)			\
 __BTF_ID_LIST(name, globl)
 
 /* The BTF_ID_LIST_SINGLE macro defines a BTF_ID_LIST with
@@ -82,6 +82,9 @@ __BTF_ID_LIST(name, globl)
 #define BTF_ID_LIST_SINGLE(name, prefix, typename)	\
 	BTF_ID_LIST(name) \
 	BTF_ID(prefix, typename)
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) \
+	BTF_ID_LIST_GLOBAL(name, 1)			  \
+	BTF_ID(prefix, typename)
 
 /*
  * The BTF_ID_UNUSED macro defines 4 zero bytes.
@@ -143,13 +146,14 @@ extern struct btf_id_set name;
 
 #else
 
-#define BTF_ID_LIST(name) static u32 name[5];
+#define BTF_ID_LIST(name) static u32 __maybe_unused name[5];
 #define BTF_ID(prefix, name)
 #define BTF_ID_UNUSED
-#define BTF_ID_LIST_GLOBAL(name) u32 name[1];
-#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1];
-#define BTF_SET_START(name) static struct btf_id_set name = { 0 };
-#define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 };
+#define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n];
+#define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1];
+#define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1];
+#define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 };
+#define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 };
 #define BTF_SET_END(name)
 
 #endif /* CONFIG_DEBUG_INFO_BTF */
@@ -172,7 +176,10 @@ extern struct btf_id_set name;
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock)		\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)			\
 	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)			\
-	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_MPTCP, mptcp_sock)			\
+	BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
 
 enum {
 #define BTF_SOCK_TYPE(name, str) name,
@@ -184,4 +191,18 @@ MAX_BTF_SOCK_TYPE,
 extern u32 btf_sock_ids[];
 #endif
 
+#define BTF_TRACING_TYPE_xxx	\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_TASK, task_struct)	\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_FILE, file)		\
+	BTF_TRACING_TYPE(BTF_TRACING_TYPE_VMA, vm_area_struct)
+
+enum {
+#define BTF_TRACING_TYPE(name, type) name,
+BTF_TRACING_TYPE_xxx
+#undef BTF_TRACING_TYPE
+MAX_BTF_TRACING_TYPE,
+};
+
+extern u32 btf_tracing_ids[];
+
 #endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e81362891596..379e68fb866f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -998,6 +998,7 @@ enum bpf_attach_type {
 	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
 	BPF_PERF_EVENT,
 	BPF_TRACE_KPROBE_MULTI,
+	BPF_LSM_CGROUP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -1431,6 +1432,7 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
+		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
@@ -6075,6 +6077,8 @@ struct bpf_prog_info {
 	__u64 run_cnt;
 	__u64 recursion_misses;
 	__u32 verified_insns;
+	__u32 attach_btf_obj_id;
+	__u32 attach_btf_id;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -6782,6 +6786,7 @@ enum bpf_core_relo_kind {
 	BPF_CORE_TYPE_SIZE = 9,              /* type size in bytes */
 	BPF_CORE_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
 	BPF_CORE_ENUMVAL_VALUE = 11,         /* enum value integer value */
+	BPF_CORE_TYPE_MATCHES = 12,          /* type match in target kernel */
 };
 
 /*
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 31a1a9015902..5a3dfb56d78f 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
-	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
+	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
 	    usdt.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index a1265b152027..4c904ef0b47e 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -237,7 +237,7 @@ install_lib: all_cmd
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
-SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h	     \
+SRC_HDRS := bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h	     \
 	    bpf_helpers.h bpf_tracing.h bpf_endian.h bpf_core_read.h	     \
 	    skel_internal.h libbpf_version.h usdt.bpf.h
 GEN_HDRS := $(BPF_GENERATED)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 240186aac8e6..5eb0df90eb2b 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -147,10 +147,6 @@ int bump_rlimit_memlock(void)
 {
 	struct rlimit rlim;
 
-	/* this the default in libbpf 1.0, but for now user has to opt-in explicitly */
-	if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK))
-		return 0;
-
 	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
 	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT))
 		return 0;
@@ -233,11 +229,10 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,
 	return info;
 }
 
-DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0)
-int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
-		         const char *prog_name, const char *license,
-		         const struct bpf_insn *insns, size_t insn_cnt,
-		         const struct bpf_prog_load_opts *opts)
+int bpf_prog_load(enum bpf_prog_type prog_type,
+		  const char *prog_name, const char *license,
+		  const struct bpf_insn *insns, size_t insn_cnt,
+		  const struct bpf_prog_load_opts *opts)
 {
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
@@ -384,94 +379,6 @@ done:
 	return libbpf_err_errno(fd);
 }
 
-__attribute__((alias("bpf_load_program_xattr2")))
-int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-			   char *log_buf, size_t log_buf_sz);
-
-static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr,
-				   char *log_buf, size_t log_buf_sz)
-{
-	LIBBPF_OPTS(bpf_prog_load_opts, p);
-
-	if (!load_attr || !log_buf != !log_buf_sz)
-		return libbpf_err(-EINVAL);
-
-	p.expected_attach_type = load_attr->expected_attach_type;
-	switch (load_attr->prog_type) {
-	case BPF_PROG_TYPE_STRUCT_OPS:
-	case BPF_PROG_TYPE_LSM:
-		p.attach_btf_id = load_attr->attach_btf_id;
-		break;
-	case BPF_PROG_TYPE_TRACING:
-	case BPF_PROG_TYPE_EXT:
-		p.attach_btf_id = load_attr->attach_btf_id;
-		p.attach_prog_fd = load_attr->attach_prog_fd;
-		break;
-	default:
-		p.prog_ifindex = load_attr->prog_ifindex;
-		p.kern_version = load_attr->kern_version;
-	}
-	p.log_level = load_attr->log_level;
-	p.log_buf = log_buf;
-	p.log_size = log_buf_sz;
-	p.prog_btf_fd = load_attr->prog_btf_fd;
-	p.func_info_rec_size = load_attr->func_info_rec_size;
-	p.func_info_cnt = load_attr->func_info_cnt;
-	p.func_info = load_attr->func_info;
-	p.line_info_rec_size = load_attr->line_info_rec_size;
-	p.line_info_cnt = load_attr->line_info_cnt;
-	p.line_info = load_attr->line_info;
-	p.prog_flags = load_attr->prog_flags;
-
-	return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license,
-			     load_attr->insns, load_attr->insns_cnt, &p);
-}
-
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		     size_t insns_cnt, const char *license,
-		     __u32 kern_version, char *log_buf,
-		     size_t log_buf_sz)
-{
-	struct bpf_load_program_attr load_attr;
-
-	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
-	load_attr.prog_type = type;
-	load_attr.expected_attach_type = 0;
-	load_attr.name = NULL;
-	load_attr.insns = insns;
-	load_attr.insns_cnt = insns_cnt;
-	load_attr.license = license;
-	load_attr.kern_version = kern_version;
-
-	return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);
-}
-
-int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
-		       size_t insns_cnt, __u32 prog_flags, const char *license,
-		       __u32 kern_version, char *log_buf, size_t log_buf_sz,
-		       int log_level)
-{
-	union bpf_attr attr;
-	int fd;
-
-	bump_rlimit_memlock();
-
-	memset(&attr, 0, sizeof(attr));
-	attr.prog_type = type;
-	attr.insn_cnt = (__u32)insns_cnt;
-	attr.insns = ptr_to_u64(insns);
-	attr.license = ptr_to_u64(license);
-	attr.log_buf = ptr_to_u64(log_buf);
-	attr.log_size = log_buf_sz;
-	attr.log_level = log_level;
-	log_buf[0] = 0;
-	attr.kern_version = kern_version;
-	attr.prog_flags = prog_flags;
-
-	fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);
-	return libbpf_err_errno(fd);
-}
-
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
@@ -888,80 +795,48 @@ int bpf_iter_create(int link_fd)
 	return libbpf_err_errno(fd);
 }
 
-int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
-		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+int bpf_prog_query_opts(int target_fd,
+			enum bpf_attach_type type,
+			struct bpf_prog_query_opts *opts)
 {
 	union bpf_attr attr;
 	int ret;
 
+	if (!OPTS_VALID(opts, bpf_prog_query_opts))
+		return libbpf_err(-EINVAL);
+
 	memset(&attr, 0, sizeof(attr));
+
 	attr.query.target_fd	= target_fd;
 	attr.query.attach_type	= type;
-	attr.query.query_flags	= query_flags;
-	attr.query.prog_cnt	= *prog_cnt;
-	attr.query.prog_ids	= ptr_to_u64(prog_ids);
+	attr.query.query_flags	= OPTS_GET(opts, query_flags, 0);
+	attr.query.prog_cnt	= OPTS_GET(opts, prog_cnt, 0);
+	attr.query.prog_ids	= ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
+	attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
 
 	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
 
-	if (attach_flags)
-		*attach_flags = attr.query.attach_flags;
-	*prog_cnt = attr.query.prog_cnt;
-
-	return libbpf_err_errno(ret);
-}
-
-int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
-		      void *data_out, __u32 *size_out, __u32 *retval,
-		      __u32 *duration)
-{
-	union bpf_attr attr;
-	int ret;
-
-	memset(&attr, 0, sizeof(attr));
-	attr.test.prog_fd = prog_fd;
-	attr.test.data_in = ptr_to_u64(data);
-	attr.test.data_out = ptr_to_u64(data_out);
-	attr.test.data_size_in = size;
-	attr.test.repeat = repeat;
-
-	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
-
-	if (size_out)
-		*size_out = attr.test.data_size_out;
-	if (retval)
-		*retval = attr.test.retval;
-	if (duration)
-		*duration = attr.test.duration;
+	OPTS_SET(opts, attach_flags, attr.query.attach_flags);
+	OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
 
 	return libbpf_err_errno(ret);
 }
 
-int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
 {
-	union bpf_attr attr;
+	LIBBPF_OPTS(bpf_prog_query_opts, opts);
 	int ret;
 
-	if (!test_attr->data_out && test_attr->data_size_out > 0)
-		return libbpf_err(-EINVAL);
-
-	memset(&attr, 0, sizeof(attr));
-	attr.test.prog_fd = test_attr->prog_fd;
-	attr.test.data_in = ptr_to_u64(test_attr->data_in);
-	attr.test.data_out = ptr_to_u64(test_attr->data_out);
-	attr.test.data_size_in = test_attr->data_size_in;
-	attr.test.data_size_out = test_attr->data_size_out;
-	attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
-	attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
-	attr.test.ctx_size_in = test_attr->ctx_size_in;
-	attr.test.ctx_size_out = test_attr->ctx_size_out;
-	attr.test.repeat = test_attr->repeat;
+	opts.query_flags = query_flags;
+	opts.prog_ids = prog_ids;
+	opts.prog_cnt = *prog_cnt;
 
-	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	ret = bpf_prog_query_opts(target_fd, type, &opts);
 
-	test_attr->data_size_out = attr.test.data_size_out;
-	test_attr->ctx_size_out = attr.test.ctx_size_out;
-	test_attr->retval = attr.test.retval;
-	test_attr->duration = attr.test.duration;
+	if (attach_flags)
+		*attach_flags = opts.attach_flags;
+	*prog_cnt = opts.prog_cnt;
 
 	return libbpf_err_errno(ret);
 }
@@ -1162,27 +1037,6 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_loa
 	return libbpf_err_errno(fd);
 }
 
-int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log)
-{
-	LIBBPF_OPTS(bpf_btf_load_opts, opts);
-	int fd;
-
-retry:
-	if (do_log && log_buf && log_buf_size) {
-		opts.log_buf = log_buf;
-		opts.log_size = log_buf_size;
-		opts.log_level = 1;
-	}
-
-	fd = bpf_btf_load(btf, btf_size, &opts);
-	if (fd < 0 && !do_log && log_buf && log_buf_size) {
-		do_log = true;
-		goto retry;
-	}
-
-	return libbpf_err_errno(fd);
-}
-
 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
 		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
 		      __u64 *probe_addr)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index cabc03703e29..88a7cc4bd76f 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -103,54 +103,6 @@ LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
 			     const char *prog_name, const char *license,
 			     const struct bpf_insn *insns, size_t insn_cnt,
 			     const struct bpf_prog_load_opts *opts);
-/* this "specialization" should go away in libbpf 1.0 */
-LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type,
-				    const char *prog_name, const char *license,
-				    const struct bpf_insn *insns, size_t insn_cnt,
-				    const struct bpf_prog_load_opts *opts);
-
-/* This is an elaborate way to not conflict with deprecated bpf_prog_load()
- * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone.
- * With this approach, if someone is calling bpf_prog_load() with
- * 4 arguments, they will use the deprecated API, which keeps backwards
- * compatibility (both source code and binary). If bpf_prog_load() is called
- * with 6 arguments, though, it gets redirected to __bpf_prog_load.
- * So looking forward to libbpf 1.0 when this hack will be gone and
- * __bpf_prog_load() will be called just bpf_prog_load().
- */
-#ifndef bpf_prog_load
-#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__)
-#define ___bpf_prog_load4(file, type, pobj, prog_fd) \
-	bpf_prog_load_deprecated(file, type, pobj, prog_fd)
-#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \
-	bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts)
-#endif /* bpf_prog_load */
-
-struct bpf_load_program_attr {
-	enum bpf_prog_type prog_type;
-	enum bpf_attach_type expected_attach_type;
-	const char *name;
-	const struct bpf_insn *insns;
-	size_t insns_cnt;
-	const char *license;
-	union {
-		__u32 kern_version;
-		__u32 attach_prog_fd;
-	};
-	union {
-		__u32 prog_ifindex;
-		__u32 attach_btf_id;
-	};
-	__u32 prog_btf_fd;
-	__u32 func_info_rec_size;
-	const void *func_info;
-	__u32 func_info_cnt;
-	__u32 line_info_rec_size;
-	const void *line_info;
-	__u32 line_info_cnt;
-	__u32 log_level;
-	__u32 prog_flags;
-};
 
 /* Flags to direct loading requirements */
 #define MAPS_RELAX_COMPAT	0x01
@@ -158,22 +110,6 @@ struct bpf_load_program_attr {
 /* Recommended log buffer size */
 #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
-				      char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
-				const struct bpf_insn *insns, size_t insns_cnt,
-				const char *license, __u32 kern_version,
-				char *log_buf, size_t log_buf_sz);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")
-LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
-				  const struct bpf_insn *insns,
-				  size_t insns_cnt, __u32 prog_flags,
-				  const char *license, __u32 kern_version,
-				  char *log_buf, size_t log_buf_sz,
-				  int log_level);
-
 struct bpf_btf_load_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 
@@ -187,10 +123,6 @@ struct bpf_btf_load_opts {
 LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size,
 			    const struct bpf_btf_load_opts *opts);
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead")
-LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
-			    __u32 log_buf_size, bool do_log);
-
 LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
 				   __u64 flags);
 
@@ -353,10 +285,6 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
 LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd,
 				     enum bpf_attach_type type,
 				     const struct bpf_prog_attach_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead")
-LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
-				     enum bpf_attach_type type,
-				     const struct bpf_prog_attach_opts *opts);
 LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
 				enum bpf_attach_type type);
@@ -422,17 +350,6 @@ struct bpf_prog_test_run_attr {
 			     * out: length of cxt_out */
 };
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
-
-/*
- * bpf_prog_test_run does not check that data_out is large enough. Consider
- * using bpf_prog_test_run_opts instead.
- */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_test_run_opts() instead")
-LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
-				 __u32 size, void *data_out, __u32 *size_out,
-				 __u32 *retval, __u32 *duration);
 LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
@@ -442,9 +359,24 @@ LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
+
+struct bpf_prog_query_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+	__u32 query_flags;
+	__u32 attach_flags; /* output argument */
+	__u32 *prog_ids;
+	__u32 prog_cnt; /* input+output argument */
+	__u32 *prog_attach_flags;
+};
+#define bpf_prog_query_opts__last_field prog_attach_flags
+
+LIBBPF_API int bpf_prog_query_opts(int target_fd,
+				   enum bpf_attach_type type,
+				   struct bpf_prog_query_opts *opts);
 LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
 			      __u32 query_flags, __u32 *attach_flags,
 			      __u32 *prog_ids, __u32 *prog_cnt);
+
 LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index fd48b1ff59ca..496e6a8ee0dc 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -29,6 +29,7 @@ enum bpf_type_id_kind {
 enum bpf_type_info_kind {
 	BPF_TYPE_EXISTS = 0,		/* type existence in target kernel */
 	BPF_TYPE_SIZE = 1,		/* type size in target kernel */
+	BPF_TYPE_MATCHES = 2,		/* type match in target kernel */
 };
 
 /* second argument to __builtin_preserve_enum_value() built-in */
@@ -184,6 +185,16 @@ enum bpf_enum_value_kind {
 	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS)
 
 /*
+ * Convenience macro to check that provided named type
+ * (struct/union/enum/typedef) "matches" that in a target kernel.
+ * Returns:
+ *    1, if the type matches in the target kernel's BTF;
+ *    0, if the type does not match any in the target kernel
+ */
+#define bpf_core_type_matches(type)					    \
+	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_MATCHES)
+
+/*
  * Convenience macro to get the byte size of a provided named type
  * (struct/union/enum/typedef) in a target kernel.
  * Returns:
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index fb04eaf367f1..7349b16b8e2f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -22,12 +22,25 @@
  * To allow use of SEC() with externs (e.g., for extern .maps declarations),
  * make sure __attribute__((unused)) doesn't trigger compilation warning.
  */
+#if __GNUC__ && !__clang__
+
+/*
+ * Pragma macros are broken on GCC
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
+ */
+#define SEC(name) __attribute__((section(name), used))
+
+#else
+
 #define SEC(name) \
 	_Pragma("GCC diagnostic push")					    \
 	_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"")	    \
 	__attribute__((section(name), used))				    \
 	_Pragma("GCC diagnostic pop")					    \
 
+#endif
+
 /* Avoid 'linux/stddef.h' definition of '__always_inline'. */
 #undef __always_inline
 #define __always_inline inline __attribute__((always_inline))
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 01ce121c302d..11f9096407fc 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -233,7 +233,7 @@ struct pt_regs___arm64 {
 #define __PT_PARM5_REG a4
 #define __PT_RET_REG ra
 #define __PT_FP_REG s0
-#define __PT_RC_REG a5
+#define __PT_RC_REG a0
 #define __PT_SP_REG sp
 #define __PT_IP_REG pc
 /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index ae1520f7e1b0..2d14f1a52d7a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -448,11 +448,6 @@ static int btf_parse_type_sec(struct btf *btf)
 	return 0;
 }
 
-__u32 btf__get_nr_types(const struct btf *btf)
-{
-	return btf->start_id + btf->nr_types - 1;
-}
-
 __u32 btf__type_cnt(const struct btf *btf)
 {
 	return btf->start_id + btf->nr_types;
@@ -1408,92 +1403,6 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
 	return btf__load_from_kernel_by_id_split(id, NULL);
 }
 
-int btf__get_from_id(__u32 id, struct btf **btf)
-{
-	struct btf *res;
-	int err;
-
-	*btf = NULL;
-	res = btf__load_from_kernel_by_id(id);
-	err = libbpf_get_error(res);
-
-	if (err)
-		return libbpf_err(err);
-
-	*btf = res;
-	return 0;
-}
-
-int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-			 __u32 expected_key_size, __u32 expected_value_size,
-			 __u32 *key_type_id, __u32 *value_type_id)
-{
-	const struct btf_type *container_type;
-	const struct btf_member *key, *value;
-	const size_t max_name = 256;
-	char container_name[max_name];
-	__s64 key_size, value_size;
-	__s32 container_id;
-
-	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
-		pr_warn("map:%s length of '____btf_map_%s' is too long\n",
-			map_name, map_name);
-		return libbpf_err(-EINVAL);
-	}
-
-	container_id = btf__find_by_name(btf, container_name);
-	if (container_id < 0) {
-		pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
-			 map_name, container_name);
-		return libbpf_err(container_id);
-	}
-
-	container_type = btf__type_by_id(btf, container_id);
-	if (!container_type) {
-		pr_warn("map:%s cannot find BTF type for container_id:%u\n",
-			map_name, container_id);
-		return libbpf_err(-EINVAL);
-	}
-
-	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
-		pr_warn("map:%s container_name:%s is an invalid container struct\n",
-			map_name, container_name);
-		return libbpf_err(-EINVAL);
-	}
-
-	key = btf_members(container_type);
-	value = key + 1;
-
-	key_size = btf__resolve_size(btf, key->type);
-	if (key_size < 0) {
-		pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-		return libbpf_err(key_size);
-	}
-
-	if (expected_key_size != key_size) {
-		pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
-			map_name, (__u32)key_size, expected_key_size);
-		return libbpf_err(-EINVAL);
-	}
-
-	value_size = btf__resolve_size(btf, value->type);
-	if (value_size < 0) {
-		pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-		return libbpf_err(value_size);
-	}
-
-	if (expected_value_size != value_size) {
-		pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
-			map_name, (__u32)value_size, expected_value_size);
-		return libbpf_err(-EINVAL);
-	}
-
-	*key_type_id = key->type;
-	*value_type_id = value->type;
-
-	return 0;
-}
-
 static void btf_invalidate_raw_data(struct btf *btf)
 {
 	if (btf->raw_data) {
@@ -2965,81 +2874,6 @@ const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
 	return btf_ext->data;
 }
 
-static int btf_ext_reloc_info(const struct btf *btf,
-			      const struct btf_ext_info *ext_info,
-			      const char *sec_name, __u32 insns_cnt,
-			      void **info, __u32 *cnt)
-{
-	__u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
-	__u32 i, record_size, existing_len, records_len;
-	struct btf_ext_info_sec *sinfo;
-	const char *info_sec_name;
-	__u64 remain_len;
-	void *data;
-
-	record_size = ext_info->rec_size;
-	sinfo = ext_info->info;
-	remain_len = ext_info->len;
-	while (remain_len > 0) {
-		records_len = sinfo->num_info * record_size;
-		info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
-		if (strcmp(info_sec_name, sec_name)) {
-			remain_len -= sec_hdrlen + records_len;
-			sinfo = (void *)sinfo + sec_hdrlen + records_len;
-			continue;
-		}
-
-		existing_len = (*cnt) * record_size;
-		data = realloc(*info, existing_len + records_len);
-		if (!data)
-			return libbpf_err(-ENOMEM);
-
-		memcpy(data + existing_len, sinfo->data, records_len);
-		/* adjust insn_off only, the rest data will be passed
-		 * to the kernel.
-		 */
-		for (i = 0; i < sinfo->num_info; i++) {
-			__u32 *insn_off;
-
-			insn_off = data + existing_len + (i * record_size);
-			*insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
-		}
-		*info = data;
-		*cnt += sinfo->num_info;
-		return 0;
-	}
-
-	return libbpf_err(-ENOENT);
-}
-
-int btf_ext__reloc_func_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **func_info, __u32 *cnt)
-{
-	return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
-				  insns_cnt, func_info, cnt);
-}
-
-int btf_ext__reloc_line_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **line_info, __u32 *cnt)
-{
-	return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
-				  insns_cnt, line_info, cnt);
-}
-
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
-{
-	return btf_ext->func_info.rec_size;
-}
-
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
-{
-	return btf_ext->line_info.rec_size;
-}
-
 struct btf_dedup;
 
 static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);
@@ -3189,9 +3023,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d);
  * deduplicating structs/unions is described in greater details in comments for
  * `btf_dedup_is_equiv` function.
  */
-
-DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0)
-int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)
+int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
 {
 	struct btf_dedup *d;
 	int err;
@@ -3251,19 +3083,6 @@ done:
 	return libbpf_err(err);
 }
 
-COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2)
-int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts)
-{
-	LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext);
-
-	if (unused_opts) {
-		pr_warn("please use new version of btf__dedup() that supports options\n");
-		return libbpf_err(-ENOTSUP);
-	}
-
-	return btf__dedup(btf, &opts);
-}
-
 #define BTF_UNPROCESSED_ID ((__u32)-1)
 #define BTF_IN_PROGRESS_ID ((__u32)-2)
 
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 9fb416eb5644..583760df83b4 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -120,20 +120,12 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
 LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
 LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
-LIBBPF_DEPRECATED_SINCE(0, 6, "intended for internal libbpf use only")
-LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
-LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
-LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
 				   const char *type_name);
 LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
 					const char *type_name, __u32 kind);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__type_cnt() instead; note that btf__get_nr_types() == btf__type_cnt() - 1")
-LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
 LIBBPF_API __u32 btf__type_cnt(const struct btf *btf);
 LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
@@ -150,29 +142,10 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
 LIBBPF_API const void *btf__raw_data(const struct btf *btf, __u32 *size);
 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
 LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_DEPRECATED_SINCE(0, 7, "this API is not necessary when BTF-defined maps are used")
-LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
-				    __u32 expected_key_size,
-				    __u32 expected_value_size,
-				    __u32 *key_type_id, __u32 *value_type_id);
 
 LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);
 LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
 LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_func_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **func_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions")
-int btf_ext__reloc_line_info(const struct btf *btf,
-			     const struct btf_ext *btf_ext,
-			     const char *sec_name, __u32 insns_cnt,
-			     void **line_info, __u32 *cnt);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info is deprecated; write custom func_info parsing to fetch rec_size")
-__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info is deprecated; write custom line_info parsing to fetch rec_size")
-__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
 
 LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
 LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
@@ -259,22 +232,12 @@ struct btf_dedup_opts {
 
 LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
 
-LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead")
-LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts);
-#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__)
-#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts)
-#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)
-
 struct btf_dump;
 
 struct btf_dump_opts {
-	union {
-		size_t sz;
-		void *ctx; /* DEPRECATED: will be gone in v1.0 */
-	};
+	size_t sz;
 };
+#define btf_dump_opts__last_field sz
 
 typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
 
@@ -283,51 +246,6 @@ LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
 					  void *ctx,
 					  const struct btf_dump_opts *opts);
 
-LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-						 btf_dump_printf_fn_t printf_fn,
-						 void *ctx,
-						 const struct btf_dump_opts *opts);
-
-LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-						     const struct btf_ext *btf_ext,
-						     const struct btf_dump_opts *opts,
-						     btf_dump_printf_fn_t printf_fn);
-
-/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the
- * type of 4th argument. If it's btf_dump's print callback, use deprecated
- * API; otherwise, choose the new btf_dump__new(). ___libbpf_override()
- * doesn't work here because both variants have 4 input arguments.
- *
- * (void *) casts are necessary to avoid compilation warnings about type
- * mismatches, because even though __builtin_choose_expr() only ever evaluates
- * one side the other side still has to satisfy type constraints (this is
- * compiler implementation limitation which might be lifted eventually,
- * according to the documentation). So passing struct btf_ext in place of
- * btf_dump_printf_fn_t would be generating compilation warning.  Casting to
- * void * avoids this issue.
- *
- * Also, two type compatibility checks for a function and function pointer are
- * required because passing function reference into btf_dump__new() as
- * btf_dump__new(..., my_callback, ...) and as btf_dump__new(...,
- * &my_callback, ...) (not explicit ampersand in the latter case) actually
- * differs as far as __builtin_types_compatible_p() is concerned. Thus two
- * checks are combined to detect callback argument.
- *
- * The rest works just like in case of ___libbpf_override() usage with symbol
- * versioning.
- *
- * C++ compilers don't support __builtin_types_compatible_p(), so at least
- * don't screw up compilation for them and let C++ users pick btf_dump__new
- * vs btf_dump__new_deprecated explicitly.
- */
-#ifndef __cplusplus
-#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr(				\
-	__builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) ||		\
-	__builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)),	\
-	btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4),	\
-	btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4))
-#endif
-
 LIBBPF_API void btf_dump__free(struct btf_dump *d);
 
 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index f5275f819027..400e84fd0578 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -144,15 +144,17 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
 static int btf_dump_mark_referenced(struct btf_dump *d);
 static int btf_dump_resize(struct btf_dump *d);
 
-DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0)
-struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf,
-				      btf_dump_printf_fn_t printf_fn,
-				      void *ctx,
-				      const struct btf_dump_opts *opts)
+struct btf_dump *btf_dump__new(const struct btf *btf,
+			       btf_dump_printf_fn_t printf_fn,
+			       void *ctx,
+			       const struct btf_dump_opts *opts)
 {
 	struct btf_dump *d;
 	int err;
 
+	if (!OPTS_VALID(opts, btf_dump_opts))
+		return libbpf_err_ptr(-EINVAL);
+
 	if (!printf_fn)
 		return libbpf_err_ptr(-EINVAL);
 
@@ -188,17 +190,6 @@ err:
 	return libbpf_err_ptr(err);
 }
 
-COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4)
-struct btf_dump *btf_dump__new_deprecated(const struct btf *btf,
-					  const struct btf_ext *btf_ext,
-					  const struct btf_dump_opts *opts,
-					  btf_dump_printf_fn_t printf_fn)
-{
-	if (!printf_fn)
-		return libbpf_err_ptr(-EINVAL);
-	return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts);
-}
-
 static int btf_dump_resize(struct btf_dump *d)
 {
 	int err, last_id = btf__type_cnt(d->btf) - 1;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 49e359cd34df..cb49408eb298 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -31,7 +31,6 @@
 #include <linux/bpf.h>
 #include <linux/btf.h>
 #include <linux/filter.h>
-#include <linux/list.h>
 #include <linux/limits.h>
 #include <linux/perf_event.h>
 #include <linux/ring_buffer.h>
@@ -107,6 +106,7 @@ static const char * const attach_type_name[] = {
 	[BPF_TRACE_FEXIT]		= "trace_fexit",
 	[BPF_MODIFY_RETURN]		= "modify_return",
 	[BPF_LSM_MAC]			= "lsm_mac",
+	[BPF_LSM_CGROUP]		= "lsm_cgroup",
 	[BPF_SK_LOOKUP]			= "sk_lookup",
 	[BPF_TRACE_ITER]		= "trace_iter",
 	[BPF_XDP_DEVMAP]		= "xdp_devmap",
@@ -279,12 +279,9 @@ static inline __u64 ptr_to_u64(const void *ptr)
 	return (__u64) (unsigned long) ptr;
 }
 
-/* this goes away in libbpf 1.0 */
-enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
-
 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 {
-	libbpf_mode = mode;
+	/* as of v1.0 libbpf_set_strict_mode() is a no-op */
 	return 0;
 }
 
@@ -347,12 +344,8 @@ enum sec_def_flags {
 	SEC_ATTACH_BTF = 4,
 	/* BPF program type allows sleeping/blocking in kernel */
 	SEC_SLEEPABLE = 8,
-	/* allow non-strict prefix matching */
-	SEC_SLOPPY_PFX = 16,
 	/* BPF program support non-linear XDP buffer */
-	SEC_XDP_FRAGS = 32,
-	/* deprecated sec definitions not supposed to be used */
-	SEC_DEPRECATED = 64,
+	SEC_XDP_FRAGS = 16,
 };
 
 struct bpf_sec_def {
@@ -372,9 +365,10 @@ struct bpf_sec_def {
  * linux/filter.h.
  */
 struct bpf_program {
-	const struct bpf_sec_def *sec_def;
+	char *name;
 	char *sec_name;
 	size_t sec_idx;
+	const struct bpf_sec_def *sec_def;
 	/* this program's instruction offset (in number of instructions)
 	 * within its containing ELF section
 	 */
@@ -394,12 +388,6 @@ struct bpf_program {
 	 */
 	size_t sub_insn_off;
 
-	char *name;
-	/* name with / replaced by _; makes recursive pinning
-	 * in bpf_object__pin_programs easier
-	 */
-	char *pin_name;
-
 	/* instructions that belong to BPF program; insns[0] is located at
 	 * sec_insn_off instruction within its ELF section in ELF file, so
 	 * when mapping ELF file instruction index to the local instruction,
@@ -420,24 +408,19 @@ struct bpf_program {
 	size_t log_size;
 	__u32 log_level;
 
-	struct {
-		int nr;
-		int *fds;
-	} instances;
-	bpf_program_prep_t preprocessor;
-
 	struct bpf_object *obj;
-	void *priv;
-	bpf_program_clear_priv_t clear_priv;
 
+	int fd;
 	bool autoload;
 	bool mark_btf_static;
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
+
 	int prog_ifindex;
 	__u32 attach_btf_obj_fd;
 	__u32 attach_btf_id;
 	__u32 attach_prog_fd;
+
 	void *func_info;
 	__u32 func_info_rec_size;
 	__u32 func_info_cnt;
@@ -484,6 +467,14 @@ enum libbpf_map_type {
 	LIBBPF_MAP_KCONFIG,
 };
 
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+};
+
 struct bpf_map {
 	struct bpf_object *obj;
 	char *name;
@@ -504,8 +495,6 @@ struct bpf_map {
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
 	__u32 btf_vmlinux_value_type_id;
-	void *priv;
-	bpf_map_clear_priv_t clear_priv;
 	enum libbpf_map_type libbpf_type;
 	void *mmaped;
 	struct bpf_struct_ops *st_ops;
@@ -568,8 +557,6 @@ struct extern_desc {
 	};
 };
 
-static LIST_HEAD(bpf_objects_list);
-
 struct module_btf {
 	struct btf *btf;
 	char *name;
@@ -638,12 +625,6 @@ struct bpf_object {
 
 	/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 	struct elf_state efile;
-	/*
-	 * All loaded bpf_object are linked in a list, which is
-	 * hidden to caller. bpf_objects__<func> handlers deal with
-	 * all objects.
-	 */
-	struct list_head list;
 
 	struct btf *btf;
 	struct btf_ext *btf_ext;
@@ -669,9 +650,6 @@ struct bpf_object {
 	size_t log_size;
 	__u32 log_level;
 
-	void *priv;
-	bpf_object_clear_priv_t clear_priv;
-
 	int *fd_array;
 	size_t fd_array_cap;
 	size_t fd_array_cnt;
@@ -693,25 +671,10 @@ static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 
 void bpf_program__unload(struct bpf_program *prog)
 {
-	int i;
-
 	if (!prog)
 		return;
 
-	/*
-	 * If the object is opened but the program was never loaded,
-	 * it is possible that prog->instances.nr == -1.
-	 */
-	if (prog->instances.nr > 0) {
-		for (i = 0; i < prog->instances.nr; i++)
-			zclose(prog->instances.fds[i]);
-	} else if (prog->instances.nr != -1) {
-		pr_warn("Internal error: instances.nr is %d\n",
-			prog->instances.nr);
-	}
-
-	prog->instances.nr = -1;
-	zfree(&prog->instances.fds);
+	zclose(prog->fd);
 
 	zfree(&prog->func_info);
 	zfree(&prog->line_info);
@@ -722,16 +685,9 @@ static void bpf_program__exit(struct bpf_program *prog)
 	if (!prog)
 		return;
 
-	if (prog->clear_priv)
-		prog->clear_priv(prog, prog->priv);
-
-	prog->priv = NULL;
-	prog->clear_priv = NULL;
-
 	bpf_program__unload(prog);
 	zfree(&prog->name);
 	zfree(&prog->sec_name);
-	zfree(&prog->pin_name);
 	zfree(&prog->insns);
 	zfree(&prog->reloc_desc);
 
@@ -740,26 +696,6 @@ static void bpf_program__exit(struct bpf_program *prog)
 	prog->sec_idx = -1;
 }
 
-static char *__bpf_program__pin_name(struct bpf_program *prog)
-{
-	char *name, *p;
-
-	if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
-		name = strdup(prog->name);
-	else
-		name = strdup(prog->sec_name);
-
-	if (!name)
-		return NULL;
-
-	p = name;
-
-	while ((p = strchr(p, '/')))
-		*p = '_';
-
-	return name;
-}
-
 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 {
 	return BPF_CLASS(insn->code) == BPF_JMP &&
@@ -801,6 +737,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 	prog->insns_cnt = prog->sec_insn_cnt;
 
 	prog->type = BPF_PROG_TYPE_UNSPEC;
+	prog->fd = -1;
 
 	/* libbpf's convention for SEC("?abc...") is that it's just like
 	 * SEC("abc...") but the corresponding bpf_program starts out with
@@ -814,9 +751,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		prog->autoload = true;
 	}
 
-	prog->instances.fds = NULL;
-	prog->instances.nr = -1;
-
 	/* inherit object's log_level */
 	prog->log_level = obj->log_level;
 
@@ -828,10 +762,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 	if (!prog->name)
 		goto errout;
 
-	prog->pin_name = __bpf_program__pin_name(prog);
-	if (!prog->pin_name)
-		goto errout;
-
 	prog->insns = malloc(insn_data_sz);
 	if (!prog->insns)
 		goto errout;
@@ -1313,7 +1243,6 @@ static struct bpf_object *bpf_object__new(const char *path,
 					  size_t obj_buf_sz,
 					  const char *obj_name)
 {
-	bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
 	struct bpf_object *obj;
 	char *end;
 
@@ -1351,9 +1280,6 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->kern_version = get_kernel_version();
 	obj->loaded = false;
 
-	INIT_LIST_HEAD(&obj->list);
-	if (!strict)
-		list_add(&obj->list, &bpf_objects_list);
 	return obj;
 }
 
@@ -1386,10 +1312,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)
 	}
 
 	if (obj->efile.obj_buf_sz > 0) {
-		/*
-		 * obj_buf should have been validated by
-		 * bpf_object__open_buffer().
-		 */
+		/* obj_buf should have been validated by bpf_object__open_mem(). */
 		elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
 	} else {
 		obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
@@ -2052,143 +1975,6 @@ static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 	return 0;
 }
 
-static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
-{
-	Elf_Data *symbols = obj->efile.symbols;
-	int i, map_def_sz = 0, nr_maps = 0, nr_syms;
-	Elf_Data *data = NULL;
-	Elf_Scn *scn;
-
-	if (obj->efile.maps_shndx < 0)
-		return 0;
-
-	if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) {
-		pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	if (!symbols)
-		return -EINVAL;
-
-	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
-	data = elf_sec_data(obj, scn);
-	if (!scn || !data) {
-		pr_warn("elf: failed to get legacy map definitions for %s\n",
-			obj->path);
-		return -EINVAL;
-	}
-
-	/*
-	 * Count number of maps. Each map has a name.
-	 * Array of maps is not supported: only the first element is
-	 * considered.
-	 *
-	 * TODO: Detect array of map and report error.
-	 */
-	nr_syms = symbols->d_size / sizeof(Elf64_Sym);
-	for (i = 0; i < nr_syms; i++) {
-		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
-
-		if (sym->st_shndx != obj->efile.maps_shndx)
-			continue;
-		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
-			continue;
-		nr_maps++;
-	}
-	/* Assume equally sized map definitions */
-	pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
-		 nr_maps, data->d_size, obj->path);
-
-	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
-		pr_warn("elf: unable to determine legacy map definition size in %s\n",
-			obj->path);
-		return -EINVAL;
-	}
-	map_def_sz = data->d_size / nr_maps;
-
-	/* Fill obj->maps using data in "maps" section.  */
-	for (i = 0; i < nr_syms; i++) {
-		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
-		const char *map_name;
-		struct bpf_map_def *def;
-		struct bpf_map *map;
-
-		if (sym->st_shndx != obj->efile.maps_shndx)
-			continue;
-		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
-			continue;
-
-		map = bpf_object__add_map(obj);
-		if (IS_ERR(map))
-			return PTR_ERR(map);
-
-		map_name = elf_sym_str(obj, sym->st_name);
-		if (!map_name) {
-			pr_warn("failed to get map #%d name sym string for obj %s\n",
-				i, obj->path);
-			return -LIBBPF_ERRNO__FORMAT;
-		}
-
-		pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name);
-
-		if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
-			pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
-			return -ENOTSUP;
-		}
-
-		map->libbpf_type = LIBBPF_MAP_UNSPEC;
-		map->sec_idx = sym->st_shndx;
-		map->sec_offset = sym->st_value;
-		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
-			 map_name, map->sec_idx, map->sec_offset);
-		if (sym->st_value + map_def_sz > data->d_size) {
-			pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
-				obj->path, map_name);
-			return -EINVAL;
-		}
-
-		map->name = strdup(map_name);
-		if (!map->name) {
-			pr_warn("map '%s': failed to alloc map name\n", map_name);
-			return -ENOMEM;
-		}
-		pr_debug("map %d is \"%s\"\n", i, map->name);
-		def = (struct bpf_map_def *)(data->d_buf + sym->st_value);
-		/*
-		 * If the definition of the map in the object file fits in
-		 * bpf_map_def, copy it.  Any extra fields in our version
-		 * of bpf_map_def will default to zero as a result of the
-		 * calloc above.
-		 */
-		if (map_def_sz <= sizeof(struct bpf_map_def)) {
-			memcpy(&map->def, def, map_def_sz);
-		} else {
-			/*
-			 * Here the map structure being read is bigger than what
-			 * we expect, truncate if the excess bits are all zero.
-			 * If they are not zero, reject this map as
-			 * incompatible.
-			 */
-			char *b;
-
-			for (b = ((char *)def) + sizeof(struct bpf_map_def);
-			     b < ((char *)def) + map_def_sz; b++) {
-				if (*b != 0) {
-					pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
-						obj->path, map_name);
-					if (strict)
-						return -EINVAL;
-				}
-			}
-			memcpy(&map->def, def, sizeof(struct bpf_map_def));
-		}
-
-		/* btf info may not exist but fill it in if it does exist */
-		(void) bpf_map_find_btf_info(obj, map);
-	}
-	return 0;
-}
-
 const struct btf_type *
 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 {
@@ -2306,6 +2092,13 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	return bpf_map__set_pin_path(map, buf);
 }
 
+/* should match definition in bpf_helpers.h */
+enum libbpf_pin_type {
+	LIBBPF_PIN_NONE,
+	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+	LIBBPF_PIN_BY_NAME,
+};
+
 int parse_btf_map_def(const char *map_name, struct btf *btf,
 		      const struct btf_type *def_t, bool strict,
 		      struct btf_map_def *map_def, struct btf_map_def *inner_def)
@@ -2738,12 +2531,11 @@ static int bpf_object__init_maps(struct bpf_object *obj,
 {
 	const char *pin_root_path;
 	bool strict;
-	int err;
+	int err = 0;
 
 	strict = !OPTS_GET(opts, relaxed_maps, false);
 	pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
 
-	err = bpf_object__init_user_maps(obj, strict);
 	err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
 	err = err ?: bpf_object__init_global_data_maps(obj);
 	err = err ?: bpf_object__init_kconfig_map(obj);
@@ -3061,11 +2853,6 @@ static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
 	return libbpf_err(err);
 }
 
-int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
-{
-	return btf_finalize_data(obj, btf);
-}
-
 static int bpf_object__finalize_btf(struct bpf_object *obj)
 {
 	int err;
@@ -4022,41 +3809,8 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 	return 0;
 }
 
-struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
-				  const char *title)
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
 {
-	struct bpf_program *pos;
-
-	bpf_object__for_each_program(pos, obj) {
-		if (pos->sec_name && !strcmp(pos->sec_name, title))
-			return pos;
-	}
-	return errno = ENOENT, NULL;
-}
-
-static bool prog_is_subprog(const struct bpf_object *obj,
-			    const struct bpf_program *prog)
-{
-	/* For legacy reasons, libbpf supports an entry-point BPF programs
-	 * without SEC() attribute, i.e., those in the .text section. But if
-	 * there are 2 or more such programs in the .text section, they all
-	 * must be subprograms called from entry-point BPF programs in
-	 * designated SEC()'tions, otherwise there is no way to distinguish
-	 * which of those programs should be loaded vs which are a subprogram.
-	 * Similarly, if there is a function/program in .text and at least one
-	 * other BPF program with custom SEC() attribute, then we just assume
-	 * .text programs are subprograms (even if they are not called from
-	 * other programs), because libbpf never explicitly supported mixing
-	 * SEC()-designated BPF programs and .text entry-point BPF programs.
-	 *
-	 * In libbpf 1.0 strict mode, we always consider .text
-	 * programs to be subprograms.
-	 */
-
-	if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
-		return prog->sec_idx == obj->efile.text_shndx;
-
 	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 }
 
@@ -4397,9 +4151,7 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat
 
 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 {
-	struct bpf_map_def *def = &map->def;
-	__u32 key_type_id = 0, value_type_id = 0;
-	int ret;
+	int id;
 
 	if (!obj->btf)
 		return -ENOENT;
@@ -4408,31 +4160,22 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 	 * For struct_ops map, it does not need btf_key_type_id and
 	 * btf_value_type_id.
 	 */
-	if (map->sec_idx == obj->efile.btf_maps_shndx ||
-	    bpf_map__is_struct_ops(map))
+	if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
 		return 0;
 
-	if (!bpf_map__is_internal(map)) {
-		pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n");
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
-					   def->value_size, &key_type_id,
-					   &value_type_id);
-#pragma GCC diagnostic pop
-	} else {
-		/*
-		 * LLVM annotates global data differently in BTF, that is,
-		 * only as '.data', '.bss' or '.rodata'.
-		 */
-		ret = btf__find_by_name(obj->btf, map->real_name);
-	}
-	if (ret < 0)
-		return ret;
+	/*
+	 * LLVM annotates global data differently in BTF, that is,
+	 * only as '.data', '.bss' or '.rodata'.
+	 */
+	if (!bpf_map__is_internal(map))
+		return -ENOENT;
+
+	id = btf__find_by_name(obj->btf, map->real_name);
+	if (id < 0)
+		return id;
 
-	map->btf_key_type_id = key_type_id;
-	map->btf_value_type_id = bpf_map__is_internal(map) ?
-				 ret : value_type_id;
+	map->btf_key_type_id = 0;
+	map->btf_value_type_id = id;
 	return 0;
 }
 
@@ -4564,14 +4307,6 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 	return 0;
 }
 
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
-{
-	if (!map || !max_entries)
-		return libbpf_err(-EINVAL);
-
-	return bpf_map__set_max_entries(map, max_entries);
-}
-
 static int
 bpf_object__probe_loading(struct bpf_object *obj)
 {
@@ -5732,77 +5467,13 @@ err_out:
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id)
 {
-	const struct btf_type *local_type, *targ_type;
-	int depth = 32; /* max recursion depth */
-
-	/* caller made sure that names match (ignoring flavor suffix) */
-	local_type = btf__type_by_id(local_btf, local_id);
-	targ_type = btf__type_by_id(targ_btf, targ_id);
-	if (!btf_kind_core_compat(local_type, targ_type))
-		return 0;
-
-recur:
-	depth--;
-	if (depth < 0)
-		return -EINVAL;
-
-	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
-	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-	if (!local_type || !targ_type)
-		return -EINVAL;
-
-	if (!btf_kind_core_compat(local_type, targ_type))
-		return 0;
-
-	switch (btf_kind(local_type)) {
-	case BTF_KIND_UNKN:
-	case BTF_KIND_STRUCT:
-	case BTF_KIND_UNION:
-	case BTF_KIND_ENUM:
-	case BTF_KIND_ENUM64:
-	case BTF_KIND_FWD:
-		return 1;
-	case BTF_KIND_INT:
-		/* just reject deprecated bitfield-like integers; all other
-		 * integers are by default compatible between each other
-		 */
-		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
-	case BTF_KIND_PTR:
-		local_id = local_type->type;
-		targ_id = targ_type->type;
-		goto recur;
-	case BTF_KIND_ARRAY:
-		local_id = btf_array(local_type)->type;
-		targ_id = btf_array(targ_type)->type;
-		goto recur;
-	case BTF_KIND_FUNC_PROTO: {
-		struct btf_param *local_p = btf_params(local_type);
-		struct btf_param *targ_p = btf_params(targ_type);
-		__u16 local_vlen = btf_vlen(local_type);
-		__u16 targ_vlen = btf_vlen(targ_type);
-		int i, err;
-
-		if (local_vlen != targ_vlen)
-			return 0;
-
-		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
-			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
-			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
-			err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
-			if (err <= 0)
-				return err;
-		}
+	return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
+}
 
-		/* tail recurse for return type check */
-		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
-		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
-		goto recur;
-	}
-	default:
-		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
-			btf_kind_str(local_type), local_id, targ_id);
-		return 0;
-	}
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
+			 const struct btf *targ_btf, __u32 targ_id)
+{
+	return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
 }
 
 static size_t bpf_core_hash_fn(const void *key, void *ctx)
@@ -6926,11 +6597,6 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
 	if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
 		opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
 
-	if (def & SEC_DEPRECATED) {
-		pr_warn("SEC(\"%s\") is deprecated, please see https://github.com/libbpf/libbpf/wiki/Libbpf-1.0-migration-guide#bpf-program-sec-annotation-deprecations for details\n",
-			prog->sec_name);
-	}
-
 	if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
 		int btf_obj_fd = 0, btf_type_id = 0, err;
 		const char *attach_name;
@@ -6973,10 +6639,9 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog,
 
 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
 
-static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog,
-					 struct bpf_insn *insns, int insns_cnt,
-					 const char *license, __u32 kern_version,
-					 int *prog_fd)
+static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
+				struct bpf_insn *insns, int insns_cnt,
+				const char *license, __u32 kern_version, int *prog_fd)
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
 	const char *prog_name = NULL;
@@ -7343,93 +7008,6 @@ static int bpf_program_record_relos(struct bpf_program *prog)
 	return 0;
 }
 
-static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
-				const char *license, __u32 kern_ver)
-{
-	int err = 0, fd, i;
-
-	if (obj->loaded) {
-		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
-		return libbpf_err(-EINVAL);
-	}
-
-	if (prog->instances.nr < 0 || !prog->instances.fds) {
-		if (prog->preprocessor) {
-			pr_warn("Internal error: can't load program '%s'\n",
-				prog->name);
-			return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
-		}
-
-		prog->instances.fds = malloc(sizeof(int));
-		if (!prog->instances.fds) {
-			pr_warn("Not enough memory for BPF fds\n");
-			return libbpf_err(-ENOMEM);
-		}
-		prog->instances.nr = 1;
-		prog->instances.fds[0] = -1;
-	}
-
-	if (!prog->preprocessor) {
-		if (prog->instances.nr != 1) {
-			pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
-				prog->name, prog->instances.nr);
-		}
-		if (obj->gen_loader)
-			bpf_program_record_relos(prog);
-		err = bpf_object_load_prog_instance(obj, prog,
-						    prog->insns, prog->insns_cnt,
-						    license, kern_ver, &fd);
-		if (!err)
-			prog->instances.fds[0] = fd;
-		goto out;
-	}
-
-	for (i = 0; i < prog->instances.nr; i++) {
-		struct bpf_prog_prep_result result;
-		bpf_program_prep_t preprocessor = prog->preprocessor;
-
-		memset(&result, 0, sizeof(result));
-		err = preprocessor(prog, i, prog->insns,
-				   prog->insns_cnt, &result);
-		if (err) {
-			pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
-				i, prog->name);
-			goto out;
-		}
-
-		if (!result.new_insn_ptr || !result.new_insn_cnt) {
-			pr_debug("Skip loading the %dth instance of program '%s'\n",
-				 i, prog->name);
-			prog->instances.fds[i] = -1;
-			if (result.pfd)
-				*result.pfd = -1;
-			continue;
-		}
-
-		err = bpf_object_load_prog_instance(obj, prog,
-						    result.new_insn_ptr, result.new_insn_cnt,
-						    license, kern_ver, &fd);
-		if (err) {
-			pr_warn("Loading the %dth instance of program '%s' failed\n",
-				i, prog->name);
-			goto out;
-		}
-
-		if (result.pfd)
-			*result.pfd = fd;
-		prog->instances.fds[i] = fd;
-	}
-out:
-	if (err)
-		pr_warn("failed to load program '%s'\n", prog->name);
-	return libbpf_err(err);
-}
-
-int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver)
-{
-	return bpf_object_load_prog(prog->obj, prog, license, kern_ver);
-}
-
 static int
 bpf_object__load_progs(struct bpf_object *obj, int log_level)
 {
@@ -7453,9 +7031,16 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 			continue;
 		}
 		prog->log_level |= log_level;
-		err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version);
-		if (err)
+
+		if (obj->gen_loader)
+			bpf_program_record_relos(prog);
+
+		err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
+					   obj->license, obj->kern_version, &prog->fd);
+		if (err) {
+			pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
 			return err;
+		}
 	}
 
 	bpf_object__free_relocs(obj);
@@ -7481,13 +7066,6 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
 		prog->type = prog->sec_def->prog_type;
 		prog->expected_attach_type = prog->sec_def->expected_attach_type;
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
-		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
-			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-#pragma GCC diagnostic pop
-
 		/* sec_def can have custom callback which should be called
 		 * after bpf_program is initialized to adjust its properties
 		 */
@@ -7593,36 +7171,6 @@ out:
 	return ERR_PTR(err);
 }
 
-static struct bpf_object *
-__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
-{
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-		.relaxed_maps = flags & MAPS_RELAX_COMPAT,
-	);
-
-	/* param validation */
-	if (!attr->file)
-		return NULL;
-
-	pr_debug("loading %s\n", attr->file);
-	return bpf_object_open(attr->file, NULL, 0, &opts);
-}
-
-struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
-{
-	return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
-}
-
-struct bpf_object *bpf_object__open(const char *path)
-{
-	struct bpf_object_open_attr attr = {
-		.file		= path,
-		.prog_type	= BPF_PROG_TYPE_UNSPEC,
-	};
-
-	return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
-}
-
 struct bpf_object *
 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 {
@@ -7634,6 +7182,11 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 	return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
 }
 
+struct bpf_object *bpf_object__open(const char *path)
+{
+	return bpf_object__open_file(path, NULL);
+}
+
 struct bpf_object *
 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 		     const struct bpf_object_open_opts *opts)
@@ -7644,23 +7197,6 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
 }
 
-struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
-			const char *name)
-{
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-		.object_name = name,
-		/* wrong default, but backwards-compatible */
-		.relaxed_maps = true,
-	);
-
-	/* returning NULL is wrong, but backwards-compatible */
-	if (!obj_buf || obj_buf_sz == 0)
-		return errno = EINVAL, NULL;
-
-	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts));
-}
-
 static int bpf_object_unload(struct bpf_object *obj)
 {
 	size_t i;
@@ -8093,11 +7629,6 @@ out:
 	return libbpf_err(err);
 }
 
-int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
-{
-	return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path);
-}
-
 int bpf_object__load(struct bpf_object *obj)
 {
 	return bpf_object_load(obj, 0, NULL);
@@ -8155,11 +7686,16 @@ static int check_path(const char *path)
 	return err;
 }
 
-static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__pin(struct bpf_program *prog, const char *path)
 {
 	char *cp, errmsg[STRERR_BUFSIZE];
 	int err;
 
+	if (prog->fd < 0) {
+		pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
+		return libbpf_err(-EINVAL);
+	}
+
 	err = make_parent_dir(path);
 	if (err)
 		return libbpf_err(err);
@@ -8168,170 +7704,35 @@ static int bpf_program_pin_instance(struct bpf_program *prog, const char *path,
 	if (err)
 		return libbpf_err(err);
 
-	if (prog == NULL) {
-		pr_warn("invalid program pointer\n");
-		return libbpf_err(-EINVAL);
-	}
-
-	if (instance < 0 || instance >= prog->instances.nr) {
-		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
-			instance, prog->name, prog->instances.nr);
-		return libbpf_err(-EINVAL);
-	}
-
-	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+	if (bpf_obj_pin(prog->fd, path)) {
 		err = -errno;
 		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-		pr_warn("failed to pin program: %s\n", cp);
+		pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
 		return libbpf_err(err);
 	}
-	pr_debug("pinned program '%s'\n", path);
 
+	pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
 	return 0;
 }
 
-static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance)
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
 {
 	int err;
 
-	err = check_path(path);
-	if (err)
-		return libbpf_err(err);
-
-	if (prog == NULL) {
-		pr_warn("invalid program pointer\n");
-		return libbpf_err(-EINVAL);
-	}
-
-	if (instance < 0 || instance >= prog->instances.nr) {
-		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
-			instance, prog->name, prog->instances.nr);
+	if (prog->fd < 0) {
+		pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
 		return libbpf_err(-EINVAL);
 	}
 
-	err = unlink(path);
-	if (err != 0)
-		return libbpf_err(-errno);
-
-	pr_debug("unpinned program '%s'\n", path);
-
-	return 0;
-}
-
-__attribute__((alias("bpf_program_pin_instance")))
-int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance);
-
-__attribute__((alias("bpf_program_unpin_instance")))
-int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance);
-
-int bpf_program__pin(struct bpf_program *prog, const char *path)
-{
-	int i, err;
-
-	err = make_parent_dir(path);
-	if (err)
-		return libbpf_err(err);
-
-	err = check_path(path);
-	if (err)
-		return libbpf_err(err);
-
-	if (prog == NULL) {
-		pr_warn("invalid program pointer\n");
-		return libbpf_err(-EINVAL);
-	}
-
-	if (prog->instances.nr <= 0) {
-		pr_warn("no instances of prog %s to pin\n", prog->name);
-		return libbpf_err(-EINVAL);
-	}
-
-	if (prog->instances.nr == 1) {
-		/* don't create subdirs when pinning single instance */
-		return bpf_program_pin_instance(prog, path, 0);
-	}
-
-	for (i = 0; i < prog->instances.nr; i++) {
-		char buf[PATH_MAX];
-		int len;
-
-		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-		if (len < 0) {
-			err = -EINVAL;
-			goto err_unpin;
-		} else if (len >= PATH_MAX) {
-			err = -ENAMETOOLONG;
-			goto err_unpin;
-		}
-
-		err = bpf_program_pin_instance(prog, buf, i);
-		if (err)
-			goto err_unpin;
-	}
-
-	return 0;
-
-err_unpin:
-	for (i = i - 1; i >= 0; i--) {
-		char buf[PATH_MAX];
-		int len;
-
-		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-		if (len < 0)
-			continue;
-		else if (len >= PATH_MAX)
-			continue;
-
-		bpf_program_unpin_instance(prog, buf, i);
-	}
-
-	rmdir(path);
-
-	return libbpf_err(err);
-}
-
-int bpf_program__unpin(struct bpf_program *prog, const char *path)
-{
-	int i, err;
-
 	err = check_path(path);
 	if (err)
 		return libbpf_err(err);
 
-	if (prog == NULL) {
-		pr_warn("invalid program pointer\n");
-		return libbpf_err(-EINVAL);
-	}
-
-	if (prog->instances.nr <= 0) {
-		pr_warn("no instances of prog %s to pin\n", prog->name);
-		return libbpf_err(-EINVAL);
-	}
-
-	if (prog->instances.nr == 1) {
-		/* don't create subdirs when pinning single instance */
-		return bpf_program_unpin_instance(prog, path, 0);
-	}
-
-	for (i = 0; i < prog->instances.nr; i++) {
-		char buf[PATH_MAX];
-		int len;
-
-		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
-		if (len < 0)
-			return libbpf_err(-EINVAL);
-		else if (len >= PATH_MAX)
-			return libbpf_err(-ENAMETOOLONG);
-
-		err = bpf_program_unpin_instance(prog, buf, i);
-		if (err)
-			return err;
-	}
-
-	err = rmdir(path);
+	err = unlink(path);
 	if (err)
 		return libbpf_err(-errno);
 
+	pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
 	return 0;
 }
 
@@ -8578,8 +7979,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 		char buf[PATH_MAX];
 		int len;
 
-		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->pin_name);
+		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 		if (len < 0) {
 			err = -EINVAL;
 			goto err_unpin_programs;
@@ -8600,8 +8000,7 @@ err_unpin_programs:
 		char buf[PATH_MAX];
 		int len;
 
-		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->pin_name);
+		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 		if (len < 0)
 			continue;
 		else if (len >= PATH_MAX)
@@ -8625,8 +8024,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 		char buf[PATH_MAX];
 		int len;
 
-		len = snprintf(buf, PATH_MAX, "%s/%s", path,
-			       prog->pin_name);
+		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 		if (len < 0)
 			return libbpf_err(-EINVAL);
 		else if (len >= PATH_MAX)
@@ -8659,11 +8057,6 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 
 static void bpf_map__destroy(struct bpf_map *map)
 {
-	if (map->clear_priv)
-		map->clear_priv(map, map->priv);
-	map->priv = NULL;
-	map->clear_priv = NULL;
-
 	if (map->inner_map) {
 		bpf_map__destroy(map->inner_map);
 		zfree(&map->inner_map);
@@ -8699,9 +8092,6 @@ void bpf_object__close(struct bpf_object *obj)
 	if (IS_ERR_OR_NULL(obj))
 		return;
 
-	if (obj->clear_priv)
-		obj->clear_priv(obj, obj->priv);
-
 	usdt_manager_free(obj->usdt_man);
 	obj->usdt_man = NULL;
 
@@ -8728,33 +8118,9 @@ void bpf_object__close(struct bpf_object *obj)
 	}
 	zfree(&obj->programs);
 
-	list_del(&obj->list);
 	free(obj);
 }
 
-struct bpf_object *
-bpf_object__next(struct bpf_object *prev)
-{
-	struct bpf_object *next;
-	bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
-
-	if (strict)
-		return NULL;
-
-	if (!prev)
-		next = list_first_entry(&bpf_objects_list,
-					struct bpf_object,
-					list);
-	else
-		next = list_next_entry(prev, list);
-
-	/* Empty list is noticed here so don't need checking on entry. */
-	if (&next->list == &bpf_objects_list)
-		return NULL;
-
-	return next;
-}
-
 const char *bpf_object__name(const struct bpf_object *obj)
 {
 	return obj ? obj->name : libbpf_err_ptr(-EINVAL);
@@ -8785,22 +8151,6 @@ int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 	return 0;
 }
 
-int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-			 bpf_object_clear_priv_t clear_priv)
-{
-	if (obj->priv && obj->clear_priv)
-		obj->clear_priv(obj, obj->priv);
-
-	obj->priv = priv;
-	obj->clear_priv = clear_priv;
-	return 0;
-}
-
-void *bpf_object__priv(const struct bpf_object *obj)
-{
-	return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
-}
-
 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 {
 	struct bpf_gen *gen;
@@ -8844,12 +8194,6 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 }
 
 struct bpf_program *
-bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
-{
-	return bpf_object__next_program(obj, prev);
-}
-
-struct bpf_program *
 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 {
 	struct bpf_program *prog = prev;
@@ -8862,12 +8206,6 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 }
 
 struct bpf_program *
-bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
-{
-	return bpf_object__prev_program(obj, next);
-}
-
-struct bpf_program *
 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 {
 	struct bpf_program *prog = next;
@@ -8879,22 +8217,6 @@ bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 	return prog;
 }
 
-int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-			  bpf_program_clear_priv_t clear_priv)
-{
-	if (prog->priv && prog->clear_priv)
-		prog->clear_priv(prog, prog->priv);
-
-	prog->priv = priv;
-	prog->clear_priv = clear_priv;
-	return 0;
-}
-
-void *bpf_program__priv(const struct bpf_program *prog)
-{
-	return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
-}
-
 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 {
 	prog->prog_ifindex = ifindex;
@@ -8910,22 +8232,6 @@ const char *bpf_program__section_name(const struct bpf_program *prog)
 	return prog->sec_name;
 }
 
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
-{
-	const char *title;
-
-	title = prog->sec_name;
-	if (needs_copy) {
-		title = strdup(title);
-		if (!title) {
-			pr_warn("failed to strdup program title\n");
-			return libbpf_err_ptr(-ENOMEM);
-		}
-	}
-
-	return title;
-}
-
 bool bpf_program__autoload(const struct bpf_program *prog)
 {
 	return prog->autoload;
@@ -8940,18 +8246,6 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 	return 0;
 }
 
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n);
-
-int bpf_program__fd(const struct bpf_program *prog)
-{
-	return bpf_program_nth_fd(prog, 0);
-}
-
-size_t bpf_program__size(const struct bpf_program *prog)
-{
-	return prog->insns_cnt * BPF_INSN_SZ;
-}
-
 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 {
 	return prog->insns;
@@ -8982,58 +8276,15 @@ int bpf_program__set_insns(struct bpf_program *prog,
 	return 0;
 }
 
-int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
-			  bpf_program_prep_t prep)
-{
-	int *instances_fds;
-
-	if (nr_instances <= 0 || !prep)
-		return libbpf_err(-EINVAL);
-
-	if (prog->instances.nr > 0 || prog->instances.fds) {
-		pr_warn("Can't set pre-processor after loading\n");
-		return libbpf_err(-EINVAL);
-	}
-
-	instances_fds = malloc(sizeof(int) * nr_instances);
-	if (!instances_fds) {
-		pr_warn("alloc memory failed for fds\n");
-		return libbpf_err(-ENOMEM);
-	}
-
-	/* fill all fd with -1 */
-	memset(instances_fds, -1, sizeof(int) * nr_instances);
-
-	prog->instances.nr = nr_instances;
-	prog->instances.fds = instances_fds;
-	prog->preprocessor = prep;
-	return 0;
-}
-
-__attribute__((alias("bpf_program_nth_fd")))
-int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-static int bpf_program_nth_fd(const struct bpf_program *prog, int n)
+int bpf_program__fd(const struct bpf_program *prog)
 {
-	int fd;
-
 	if (!prog)
 		return libbpf_err(-EINVAL);
 
-	if (n >= prog->instances.nr || n < 0) {
-		pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
-			n, prog->name, prog->instances.nr);
-		return libbpf_err(-EINVAL);
-	}
-
-	fd = prog->instances.fds[n];
-	if (fd < 0) {
-		pr_warn("%dth instance of program '%s' is invalid\n",
-			n, prog->name);
+	if (prog->fd < 0)
 		return libbpf_err(-ENOENT);
-	}
 
-	return fd;
+	return prog->fd;
 }
 
 __alias(bpf_program__type)
@@ -9053,39 +8304,6 @@ int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 	return 0;
 }
 
-static bool bpf_program__is_type(const struct bpf_program *prog,
-				 enum bpf_prog_type type)
-{
-	return prog ? (prog->type == type) : false;
-}
-
-#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\
-int bpf_program__set_##NAME(struct bpf_program *prog)		\
-{								\
-	if (!prog)						\
-		return libbpf_err(-EINVAL);			\
-	return bpf_program__set_type(prog, TYPE);			\
-}								\
-								\
-bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
-{								\
-	return bpf_program__is_type(prog, TYPE);		\
-}								\
-
-BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
-BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
-BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
-BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
-BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
-BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
-BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
-BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
-BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
-BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
-BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
-BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
-BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
-
 __alias(bpf_program__expected_attach_type)
 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 
@@ -9172,9 +8390,9 @@ static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_li
 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 
 static const struct bpf_sec_def section_defs[] = {
-	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE),
+	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
+	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
 	SEC_DEF("kprobe+",		KPROBE,	0, SEC_NONE, attach_kprobe),
 	SEC_DEF("uprobe+",		KPROBE,	0, SEC_NONE, attach_uprobe),
 	SEC_DEF("uprobe.s+",		KPROBE,	0, SEC_SLEEPABLE, attach_uprobe),
@@ -9185,8 +8403,8 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("kretprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 	SEC_DEF("usdt+",		KPROBE,	0, SEC_NONE, attach_usdt),
 	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
-	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX | SEC_DEPRECATED),
-	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
+	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE),
+	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE),
 	SEC_DEF("tracepoint+",		TRACEPOINT, 0, SEC_NONE, attach_tp),
 	SEC_DEF("tp+",			TRACEPOINT, 0, SEC_NONE, attach_tp),
 	SEC_DEF("raw_tracepoint+",	RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
@@ -9203,55 +8421,54 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("freplace+",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 	SEC_DEF("lsm+",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s+",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
+	SEC_DEF("lsm_cgroup+",		LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
 	SEC_DEF("iter+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
 	SEC_DEF("iter.s+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 	SEC_DEF("xdp.frags/devmap",	XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
 	SEC_DEF("xdp/devmap",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
-	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
 	SEC_DEF("xdp.frags/cpumap",	XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
 	SEC_DEF("xdp/cpumap",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
-	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE | SEC_DEPRECATED),
 	SEC_DEF("xdp.frags",		XDP, BPF_XDP, SEC_XDP_FRAGS),
-	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("lwt_in",		LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("lwt_out",		LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("lwt_xmit",		LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("lwt_seg6local",	LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup_skb/ingress",	CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup_skb/egress",	CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/skb",		CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sock_create",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sock_release",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sock",		CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/post_bind4",	CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/post_bind6",	CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/dev",		CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("sockops",		SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_skb/stream_parser",	SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_skb",		SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
-	SEC_DEF("sk_msg",		SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("lirc_mode2",		LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("flow_dissector",	FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/bind4",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
-	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
+	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE),
+	SEC_DEF("lwt_in",		LWT_IN, 0, SEC_NONE),
+	SEC_DEF("lwt_out",		LWT_OUT, 0, SEC_NONE),
+	SEC_DEF("lwt_xmit",		LWT_XMIT, 0, SEC_NONE),
+	SEC_DEF("lwt_seg6local",	LWT_SEG6LOCAL, 0, SEC_NONE),
+	SEC_DEF("sockops",		SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
+	SEC_DEF("sk_skb/stream_parser",	SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
+	SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
+	SEC_DEF("sk_skb",		SK_SKB, 0, SEC_NONE),
+	SEC_DEF("sk_msg",		SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
+	SEC_DEF("lirc_mode2",		LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
+	SEC_DEF("flow_dissector",	FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
+	SEC_DEF("cgroup_skb/ingress",	CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
+	SEC_DEF("cgroup_skb/egress",	CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
+	SEC_DEF("cgroup/skb",		CGROUP_SKB, 0, SEC_NONE),
+	SEC_DEF("cgroup/sock_create",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sock_release",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sock",		CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
+	SEC_DEF("cgroup/post_bind4",	CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/post_bind6",	CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/bind4",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
+	SEC_DEF("cgroup/dev",		CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
 	SEC_DEF("struct_ops+",		STRUCT_OPS, 0, SEC_NONE),
-	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
+	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
 };
 
 static size_t custom_sec_def_cnt;
@@ -9346,8 +8563,7 @@ int libbpf_unregister_prog_handler(int handler_id)
 	return 0;
 }
 
-static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name,
-			    bool allow_sloppy)
+static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
 {
 	size_t len = strlen(sec_def->sec);
 
@@ -9372,17 +8588,6 @@ static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_n
 		return false;
 	}
 
-	/* SEC_SLOPPY_PFX definitions are allowed to be just prefix
-	 * matches, unless strict section name mode
-	 * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the
-	 * match has to be exact.
-	 */
-	if (allow_sloppy && str_has_pfx(sec_name, sec_def->sec))
-		return true;
-
-	/* Definitions not marked SEC_SLOPPY_PFX (e.g.,
-	 * SEC("syscall")) are exact matches in both modes.
-	 */
 	return strcmp(sec_name, sec_def->sec) == 0;
 }
 
@@ -9390,20 +8595,18 @@ static const struct bpf_sec_def *find_sec_def(const char *sec_name)
 {
 	const struct bpf_sec_def *sec_def;
 	int i, n;
-	bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME, allow_sloppy;
 
 	n = custom_sec_def_cnt;
 	for (i = 0; i < n; i++) {
 		sec_def = &custom_sec_defs[i];
-		if (sec_def_matches(sec_def, sec_name, false))
+		if (sec_def_matches(sec_def, sec_name))
 			return sec_def;
 	}
 
 	n = ARRAY_SIZE(section_defs);
 	for (i = 0; i < n; i++) {
 		sec_def = &section_defs[i];
-		allow_sloppy = (sec_def->cookie & SEC_SLOPPY_PFX) && !strict;
-		if (sec_def_matches(sec_def, sec_name, allow_sloppy))
+		if (sec_def_matches(sec_def, sec_name))
 			return sec_def;
 	}
 
@@ -9656,6 +8859,7 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 		*kind = BTF_KIND_TYPEDEF;
 		break;
 	case BPF_LSM_MAC:
+	case BPF_LSM_CGROUP:
 		*prefix = BTF_LSM_PREFIX;
 		*kind = BTF_KIND_FUNC;
 		break;
@@ -9859,11 +9063,6 @@ int bpf_map__fd(const struct bpf_map *map)
 	return map ? map->fd : libbpf_err(-EINVAL);
 }
 
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
-{
-	return map ? &map->def : libbpf_err_ptr(-EINVAL);
-}
-
 static bool map_uses_real_name(const struct bpf_map *map)
 {
 	/* Since libbpf started to support custom .data.* and .rodata.* maps,
@@ -9978,27 +9177,6 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
 	return map ? map->btf_value_type_id : 0;
 }
 
-int bpf_map__set_priv(struct bpf_map *map, void *priv,
-		     bpf_map_clear_priv_t clear_priv)
-{
-	if (!map)
-		return libbpf_err(-EINVAL);
-
-	if (map->priv) {
-		if (map->clear_priv)
-			map->clear_priv(map, map->priv);
-	}
-
-	map->priv = priv;
-	map->clear_priv = clear_priv;
-	return 0;
-}
-
-void *bpf_map__priv(const struct bpf_map *map)
-{
-	return map ? map->priv : libbpf_err_ptr(-EINVAL);
-}
-
 int bpf_map__set_initial_value(struct bpf_map *map,
 			       const void *data, size_t size)
 {
@@ -10018,11 +9196,6 @@ const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
 	return map->mmaped;
 }
 
-bool bpf_map__is_offload_neutral(const struct bpf_map *map)
-{
-	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
-}
-
 bool bpf_map__is_internal(const struct bpf_map *map)
 {
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
@@ -10084,12 +9257,6 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 }
 
 struct bpf_map *
-bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
-{
-	return bpf_object__next_map(obj, prev);
-}
-
-struct bpf_map *
 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
 {
 	if (prev == NULL)
@@ -10099,12 +9266,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
 }
 
 struct bpf_map *
-bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
-{
-	return bpf_object__prev_map(obj, next);
-}
-
-struct bpf_map *
 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
 {
 	if (next == NULL) {
@@ -10149,12 +9310,6 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 }
 
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
-{
-	return libbpf_err_ptr(-ENOTSUP);
-}
-
 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
 			   size_t value_sz, bool check_value_sz)
 {
@@ -10275,95 +9430,6 @@ long libbpf_get_error(const void *ptr)
 	return -errno;
 }
 
-__attribute__((alias("bpf_prog_load_xattr2")))
-int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-			struct bpf_object **pobj, int *prog_fd);
-
-static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr,
-				struct bpf_object **pobj, int *prog_fd)
-{
-	struct bpf_object_open_attr open_attr = {};
-	struct bpf_program *prog, *first_prog = NULL;
-	struct bpf_object *obj;
-	struct bpf_map *map;
-	int err;
-
-	if (!attr)
-		return libbpf_err(-EINVAL);
-	if (!attr->file)
-		return libbpf_err(-EINVAL);
-
-	open_attr.file = attr->file;
-	open_attr.prog_type = attr->prog_type;
-
-	obj = __bpf_object__open_xattr(&open_attr, 0);
-	err = libbpf_get_error(obj);
-	if (err)
-		return libbpf_err(-ENOENT);
-
-	bpf_object__for_each_program(prog, obj) {
-		enum bpf_attach_type attach_type = attr->expected_attach_type;
-		/*
-		 * to preserve backwards compatibility, bpf_prog_load treats
-		 * attr->prog_type, if specified, as an override to whatever
-		 * bpf_object__open guessed
-		 */
-		if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
-			prog->type = attr->prog_type;
-			prog->expected_attach_type = attach_type;
-		}
-		if (bpf_program__type(prog) == BPF_PROG_TYPE_UNSPEC) {
-			/*
-			 * we haven't guessed from section name and user
-			 * didn't provide a fallback type, too bad...
-			 */
-			bpf_object__close(obj);
-			return libbpf_err(-EINVAL);
-		}
-
-		prog->prog_ifindex = attr->ifindex;
-		prog->log_level = attr->log_level;
-		prog->prog_flags |= attr->prog_flags;
-		if (!first_prog)
-			first_prog = prog;
-	}
-
-	bpf_object__for_each_map(map, obj) {
-		if (map->def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
-			map->map_ifindex = attr->ifindex;
-	}
-
-	if (!first_prog) {
-		pr_warn("object file doesn't contain bpf program\n");
-		bpf_object__close(obj);
-		return libbpf_err(-ENOENT);
-	}
-
-	err = bpf_object__load(obj);
-	if (err) {
-		bpf_object__close(obj);
-		return libbpf_err(err);
-	}
-
-	*pobj = obj;
-	*prog_fd = bpf_program__fd(first_prog);
-	return 0;
-}
-
-COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1)
-int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
-			     struct bpf_object **pobj, int *prog_fd)
-{
-	struct bpf_prog_load_attr attr;
-
-	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
-	attr.file = file;
-	attr.prog_type = type;
-	attr.expected_attach_type = 0;
-
-	return bpf_prog_load_xattr2(&attr, pobj, prog_fd);
-}
-
 /* Replace link's underlying BPF program with the new one */
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
@@ -10811,10 +9877,11 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 	}
 	type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
 	if (type < 0) {
+		err = type;
 		pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
 			kfunc_name, offset,
-			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
-		return type;
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		goto err_clean_legacy;
 	}
 	attr.size = sizeof(attr);
 	attr.config = type;
@@ -10828,9 +9895,14 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 		err = -errno;
 		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		return err;
+		goto err_clean_legacy;
 	}
 	return pfd;
+
+err_clean_legacy:
+	/* Clear the newly added legacy kprobe_event */
+	remove_kprobe_event_legacy(probe_name, retprobe);
+	return err;
 }
 
 struct bpf_link *
@@ -10887,7 +9959,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 			prog->name, retprobe ? "kretprobe" : "kprobe",
 			func_name, offset,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		goto err_out;
+		goto err_clean_legacy;
 	}
 	if (legacy) {
 		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -10898,6 +9970,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 	}
 
 	return link;
+
+err_clean_legacy:
+	if (legacy)
+		remove_kprobe_event_legacy(legacy_probe, retprobe);
 err_out:
 	free(legacy_probe);
 	return libbpf_err_ptr(err);
@@ -11172,9 +10248,10 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
 	}
 	type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
 	if (type < 0) {
+		err = type;
 		pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
 			binary_path, offset, err);
-		return type;
+		goto err_clean_legacy;
 	}
 
 	memset(&attr, 0, sizeof(attr));
@@ -11189,9 +10266,14 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
 	if (pfd < 0) {
 		err = -errno;
 		pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
-		return err;
+		goto err_clean_legacy;
 	}
 	return pfd;
+
+err_clean_legacy:
+	/* Clear the newly added legacy uprobe_event */
+	remove_uprobe_event_legacy(probe_name, retprobe);
+	return err;
 }
 
 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
@@ -11525,7 +10607,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 			prog->name, retprobe ? "uretprobe" : "uprobe",
 			binary_path, func_offset,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-		goto err_out;
+		goto err_clean_legacy;
 	}
 	if (legacy) {
 		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
@@ -11535,10 +10617,13 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 		perf_link->legacy_is_retprobe = retprobe;
 	}
 	return link;
+
+err_clean_legacy:
+	if (legacy)
+		remove_uprobe_event_legacy(legacy_probe, retprobe);
 err_out:
 	free(legacy_probe);
 	return libbpf_err_ptr(err);
-
 }
 
 /* Format of u[ret]probe section definition supporting auto-attach:
@@ -12150,6 +11235,9 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 	return link;
 }
 
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+							  void *private_data);
+
 static enum bpf_perf_event_ret
 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 		       void **copy_mem, size_t *copy_size,
@@ -12198,12 +11286,6 @@ perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
 	return libbpf_err(ret);
 }
 
-__attribute__((alias("perf_event_read_simple")))
-enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
-			   void **copy_mem, size_t *copy_size,
-			   bpf_perf_event_print_t fn, void *private_data);
-
 struct perf_buffer;
 
 struct perf_buffer_params {
@@ -12337,12 +11419,11 @@ error:
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
 					      struct perf_buffer_params *p);
 
-DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
-					    perf_buffer_sample_fn sample_cb,
-					    perf_buffer_lost_fn lost_cb,
-					    void *ctx,
-					    const struct perf_buffer_opts *opts)
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+				     perf_buffer_sample_fn sample_cb,
+				     perf_buffer_lost_fn lost_cb,
+				     void *ctx,
+				     const struct perf_buffer_opts *opts)
 {
 	struct perf_buffer_params p = {};
 	struct perf_event_attr attr = {};
@@ -12364,22 +11445,10 @@ struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
 	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
-COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
-						const struct perf_buffer_opts *opts)
-{
-	return perf_buffer__new_v0_6_0(map_fd, page_cnt,
-				       opts ? opts->sample_cb : NULL,
-				       opts ? opts->lost_cb : NULL,
-				       opts ? opts->ctx : NULL,
-				       NULL);
-}
-
-DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0)
-struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
-						struct perf_event_attr *attr,
-						perf_buffer_event_fn event_cb, void *ctx,
-						const struct perf_buffer_raw_opts *opts)
+struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
+					 struct perf_event_attr *attr,
+					 perf_buffer_event_fn event_cb, void *ctx,
+					 const struct perf_buffer_raw_opts *opts)
 {
 	struct perf_buffer_params p = {};
 
@@ -12399,20 +11468,6 @@ struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt,
 	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
-COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4)
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
-						    const struct perf_buffer_raw_opts *opts)
-{
-	LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts,
-		.cpu_cnt = opts->cpu_cnt,
-		.cpus = opts->cpus,
-		.map_keys = opts->map_keys,
-	);
-
-	return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr,
-					   opts->event_cb, opts->ctx, &inner_opts);
-}
-
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
 					      struct perf_buffer_params *p)
 {
@@ -12714,254 +11769,6 @@ int perf_buffer__consume(struct perf_buffer *pb)
 	return 0;
 }
 
-struct bpf_prog_info_array_desc {
-	int	array_offset;	/* e.g. offset of jited_prog_insns */
-	int	count_offset;	/* e.g. offset of jited_prog_len */
-	int	size_offset;	/* > 0: offset of rec size,
-				 * < 0: fix size of -size_offset
-				 */
-};
-
-static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
-	[BPF_PROG_INFO_JITED_INSNS] = {
-		offsetof(struct bpf_prog_info, jited_prog_insns),
-		offsetof(struct bpf_prog_info, jited_prog_len),
-		-1,
-	},
-	[BPF_PROG_INFO_XLATED_INSNS] = {
-		offsetof(struct bpf_prog_info, xlated_prog_insns),
-		offsetof(struct bpf_prog_info, xlated_prog_len),
-		-1,
-	},
-	[BPF_PROG_INFO_MAP_IDS] = {
-		offsetof(struct bpf_prog_info, map_ids),
-		offsetof(struct bpf_prog_info, nr_map_ids),
-		-(int)sizeof(__u32),
-	},
-	[BPF_PROG_INFO_JITED_KSYMS] = {
-		offsetof(struct bpf_prog_info, jited_ksyms),
-		offsetof(struct bpf_prog_info, nr_jited_ksyms),
-		-(int)sizeof(__u64),
-	},
-	[BPF_PROG_INFO_JITED_FUNC_LENS] = {
-		offsetof(struct bpf_prog_info, jited_func_lens),
-		offsetof(struct bpf_prog_info, nr_jited_func_lens),
-		-(int)sizeof(__u32),
-	},
-	[BPF_PROG_INFO_FUNC_INFO] = {
-		offsetof(struct bpf_prog_info, func_info),
-		offsetof(struct bpf_prog_info, nr_func_info),
-		offsetof(struct bpf_prog_info, func_info_rec_size),
-	},
-	[BPF_PROG_INFO_LINE_INFO] = {
-		offsetof(struct bpf_prog_info, line_info),
-		offsetof(struct bpf_prog_info, nr_line_info),
-		offsetof(struct bpf_prog_info, line_info_rec_size),
-	},
-	[BPF_PROG_INFO_JITED_LINE_INFO] = {
-		offsetof(struct bpf_prog_info, jited_line_info),
-		offsetof(struct bpf_prog_info, nr_jited_line_info),
-		offsetof(struct bpf_prog_info, jited_line_info_rec_size),
-	},
-	[BPF_PROG_INFO_PROG_TAGS] = {
-		offsetof(struct bpf_prog_info, prog_tags),
-		offsetof(struct bpf_prog_info, nr_prog_tags),
-		-(int)sizeof(__u8) * BPF_TAG_SIZE,
-	},
-
-};
-
-static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
-					   int offset)
-{
-	__u32 *array = (__u32 *)info;
-
-	if (offset >= 0)
-		return array[offset / sizeof(__u32)];
-	return -(int)offset;
-}
-
-static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
-					   int offset)
-{
-	__u64 *array = (__u64 *)info;
-
-	if (offset >= 0)
-		return array[offset / sizeof(__u64)];
-	return -(int)offset;
-}
-
-static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
-					 __u32 val)
-{
-	__u32 *array = (__u32 *)info;
-
-	if (offset >= 0)
-		array[offset / sizeof(__u32)] = val;
-}
-
-static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
-					 __u64 val)
-{
-	__u64 *array = (__u64 *)info;
-
-	if (offset >= 0)
-		array[offset / sizeof(__u64)] = val;
-}
-
-struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays)
-{
-	struct bpf_prog_info_linear *info_linear;
-	struct bpf_prog_info info = {};
-	__u32 info_len = sizeof(info);
-	__u32 data_len = 0;
-	int i, err;
-	void *ptr;
-
-	if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
-		return libbpf_err_ptr(-EINVAL);
-
-	/* step 1: get array dimensions */
-	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
-	if (err) {
-		pr_debug("can't get prog info: %s", strerror(errno));
-		return libbpf_err_ptr(-EFAULT);
-	}
-
-	/* step 2: calculate total size of all arrays */
-	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-		bool include_array = (arrays & (1UL << i)) > 0;
-		struct bpf_prog_info_array_desc *desc;
-		__u32 count, size;
-
-		desc = bpf_prog_info_array_desc + i;
-
-		/* kernel is too old to support this field */
-		if (info_len < desc->array_offset + sizeof(__u32) ||
-		    info_len < desc->count_offset + sizeof(__u32) ||
-		    (desc->size_offset > 0 && info_len < desc->size_offset))
-			include_array = false;
-
-		if (!include_array) {
-			arrays &= ~(1UL << i);	/* clear the bit */
-			continue;
-		}
-
-		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-
-		data_len += count * size;
-	}
-
-	/* step 3: allocate continuous memory */
-	data_len = roundup(data_len, sizeof(__u64));
-	info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
-	if (!info_linear)
-		return libbpf_err_ptr(-ENOMEM);
-
-	/* step 4: fill data to info_linear->info */
-	info_linear->arrays = arrays;
-	memset(&info_linear->info, 0, sizeof(info));
-	ptr = info_linear->data;
-
-	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-		struct bpf_prog_info_array_desc *desc;
-		__u32 count, size;
-
-		if ((arrays & (1UL << i)) == 0)
-			continue;
-
-		desc  = bpf_prog_info_array_desc + i;
-		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-		bpf_prog_info_set_offset_u32(&info_linear->info,
-					     desc->count_offset, count);
-		bpf_prog_info_set_offset_u32(&info_linear->info,
-					     desc->size_offset, size);
-		bpf_prog_info_set_offset_u64(&info_linear->info,
-					     desc->array_offset,
-					     ptr_to_u64(ptr));
-		ptr += count * size;
-	}
-
-	/* step 5: call syscall again to get required arrays */
-	err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
-	if (err) {
-		pr_debug("can't get prog info: %s", strerror(errno));
-		free(info_linear);
-		return libbpf_err_ptr(-EFAULT);
-	}
-
-	/* step 6: verify the data */
-	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-		struct bpf_prog_info_array_desc *desc;
-		__u32 v1, v2;
-
-		if ((arrays & (1UL << i)) == 0)
-			continue;
-
-		desc = bpf_prog_info_array_desc + i;
-		v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
-		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
-						   desc->count_offset);
-		if (v1 != v2)
-			pr_warn("%s: mismatch in element count\n", __func__);
-
-		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
-		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
-						   desc->size_offset);
-		if (v1 != v2)
-			pr_warn("%s: mismatch in rec size\n", __func__);
-	}
-
-	/* step 7: update info_len and data_len */
-	info_linear->info_len = sizeof(struct bpf_prog_info);
-	info_linear->data_len = data_len;
-
-	return info_linear;
-}
-
-void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
-{
-	int i;
-
-	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-		struct bpf_prog_info_array_desc *desc;
-		__u64 addr, offs;
-
-		if ((info_linear->arrays & (1UL << i)) == 0)
-			continue;
-
-		desc = bpf_prog_info_array_desc + i;
-		addr = bpf_prog_info_read_offset_u64(&info_linear->info,
-						     desc->array_offset);
-		offs = addr - ptr_to_u64(info_linear->data);
-		bpf_prog_info_set_offset_u64(&info_linear->info,
-					     desc->array_offset, offs);
-	}
-}
-
-void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
-{
-	int i;
-
-	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
-		struct bpf_prog_info_array_desc *desc;
-		__u64 addr, offs;
-
-		if ((info_linear->arrays & (1UL << i)) == 0)
-			continue;
-
-		desc = bpf_prog_info_array_desc + i;
-		offs = bpf_prog_info_read_offset_u64(&info_linear->info,
-						     desc->array_offset);
-		addr = offs + ptr_to_u64(info_linear->data);
-		bpf_prog_info_set_offset_u64(&info_linear->info,
-					     desc->array_offset, addr);
-	}
-}
-
 int bpf_program__set_attach_target(struct bpf_program *prog,
 				   int attach_prog_fd,
 				   const char *attach_func_name)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index fa27969da0da..e4d5353f757b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -101,11 +101,6 @@ LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
 /* Hide internal to user */
 struct bpf_object;
 
-struct bpf_object_open_attr {
-	const char *file;
-	enum bpf_prog_type prog_type;
-};
-
 struct bpf_object_open_opts {
 	/* size of this struct, for forward/backward compatibility */
 	size_t sz;
@@ -118,21 +113,12 @@ struct bpf_object_open_opts {
 	const char *object_name;
 	/* parse map definitions non-strictly, allowing extra attributes/data */
 	bool relaxed_maps;
-	/* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures.
-	 * Value is ignored. Relocations always are processed non-strictly.
-	 * Non-relocatable instructions are replaced with invalid ones to
-	 * prevent accidental errors.
-	 * */
-	LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect")
-	bool relaxed_core_relocs;
 	/* maps that set the 'pinning' attribute in their definition will have
 	 * their pin_path attribute set to a file in this directory, and be
 	 * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
 	 */
 	const char *pin_root_path;
-
-	LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program")
-	__u32 attach_prog_fd;
+	long :0;
 	/* Additional kernel config content that augments and overrides
 	 * system Kconfig for CONFIG_xxx externs.
 	 */
@@ -215,20 +201,10 @@ LIBBPF_API struct bpf_object *
 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 		     const struct bpf_object_open_opts *opts);
 
-/* deprecated bpf_object__open variants */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open_mem() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
-			const char *name);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open_file() instead")
-LIBBPF_API struct bpf_object *
-bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
 
-enum libbpf_pin_type {
-	LIBBPF_PIN_NONE,
-	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
-	LIBBPF_PIN_BY_NAME,
-};
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
 
 /* pin_maps and unpin_maps can both be called with a NULL path, in which case
  * they will use the pin_path attribute of each map (and ignore all maps that
@@ -242,20 +218,6 @@ LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
 LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
 					  const char *path);
 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
-LIBBPF_API void bpf_object__close(struct bpf_object *object);
-
-struct bpf_object_load_attr {
-	struct bpf_object *obj;
-	int log_level;
-	const char *target_btf_path;
-};
-
-/* Load/unload object into/from kernel */
-LIBBPF_API int bpf_object__load(struct bpf_object *obj);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead")
-LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
-LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead")
-LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
 
 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
@@ -265,29 +227,10 @@ struct btf;
 LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
 LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
 
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead")
-LIBBPF_API struct bpf_program *
-bpf_object__find_program_by_title(const struct bpf_object *obj,
-				  const char *title);
 LIBBPF_API struct bpf_program *
 bpf_object__find_program_by_name(const struct bpf_object *obj,
 				 const char *name);
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "track bpf_objects in application code instead")
-struct bpf_object *bpf_object__next(struct bpf_object *prev);
-#define bpf_object__for_each_safe(pos, tmp)			\
-	for ((pos) = bpf_object__next(NULL),		\
-		(tmp) = bpf_object__next(pos);		\
-	     (pos) != NULL;				\
-	     (pos) = (tmp), (tmp) = bpf_object__next(tmp))
-
-typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
-				    bpf_object_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
-
 LIBBPF_API int
 libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 			 enum bpf_attach_type *expected_attach_type);
@@ -298,9 +241,7 @@ LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
 
 /* Accessors of bpf_program */
 struct bpf_program;
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_program() instead")
-struct bpf_program *bpf_program__next(struct bpf_program *prog,
-				      const struct bpf_object *obj);
+
 LIBBPF_API struct bpf_program *
 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog);
 
@@ -309,33 +250,17 @@ bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prog)
 	     (pos) != NULL;					\
 	     (pos) = bpf_object__next_program((obj), (pos)))
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_program() instead")
-struct bpf_program *bpf_program__prev(struct bpf_program *prog,
-				      const struct bpf_object *obj);
 LIBBPF_API struct bpf_program *
 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *prog);
 
-typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
-				     bpf_program_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
 					 __u32 ifindex);
 
 LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
-LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead")
-const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);
 LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
 
-/* returns program size in bytes */
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insn_cnt() instead")
-LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
-
 struct bpf_insn;
 
 /**
@@ -388,17 +313,7 @@ LIBBPF_API int bpf_program__set_insns(struct bpf_program *prog,
  */
 LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog);
 
-LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead")
-LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version);
 LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
-					 const char *path,
-					 int instance);
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
-					   const char *path,
-					   int instance);
 
 /**
  * @brief **bpf_program__pin()** pins the BPF program to a file
@@ -698,99 +613,6 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_iter(const struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts);
 
-/*
- * Libbpf allows callers to adjust BPF programs before being loaded
- * into kernel. One program in an object file can be transformed into
- * multiple variants to be attached to different hooks.
- *
- * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
- * form an API for this purpose.
- *
- * - bpf_program_prep_t:
- *   Defines a 'preprocessor', which is a caller defined function
- *   passed to libbpf through bpf_program__set_prep(), and will be
- *   called before program is loaded. The processor should adjust
- *   the program one time for each instance according to the instance id
- *   passed to it.
- *
- * - bpf_program__set_prep:
- *   Attaches a preprocessor to a BPF program. The number of instances
- *   that should be created is also passed through this function.
- *
- * - bpf_program__nth_fd:
- *   After the program is loaded, get resulting FD of a given instance
- *   of the BPF program.
- *
- * If bpf_program__set_prep() is not used, the program would be loaded
- * without adjustment during bpf_object__load(). The program has only
- * one instance. In this case bpf_program__fd(prog) is equal to
- * bpf_program__nth_fd(prog, 0).
- */
-struct bpf_prog_prep_result {
-	/*
-	 * If not NULL, load new instruction array.
-	 * If set to NULL, don't load this instance.
-	 */
-	struct bpf_insn *new_insn_ptr;
-	int new_insn_cnt;
-
-	/* If not NULL, result FD is written to it. */
-	int *pfd;
-};
-
-/*
- * Parameters of bpf_program_prep_t:
- *  - prog:	The bpf_program being loaded.
- *  - n:	Index of instance being generated.
- *  - insns:	BPF instructions array.
- *  - insns_cnt:Number of instructions in insns.
- *  - res:	Output parameter, result of transformation.
- *
- * Return value:
- *  - Zero:	pre-processing success.
- *  - Non-zero:	pre-processing error, stop loading.
- */
-typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
-				  struct bpf_insn *insns, int insns_cnt,
-				  struct bpf_prog_prep_result *res);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions")
-LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
-				     bpf_program_prep_t prep);
-
-LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")
-LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
-
-/*
- * Adjust type of BPF program. Default is kprobe.
- */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__set_type() instead")
-LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-
 LIBBPF_API enum bpf_prog_type bpf_program__type(const struct bpf_program *prog);
 
 /**
@@ -853,47 +675,6 @@ LIBBPF_API int
 bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
 			       const char *attach_func_name);
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_struct_ops(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_extension(const struct bpf_program *prog);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_program__type() instead")
-LIBBPF_API bool bpf_program__is_sk_lookup(const struct bpf_program *prog);
-
-/*
- * No need for __attribute__((packed)), all members of 'bpf_map_def'
- * are all aligned.  In addition, using __attribute__((packed))
- * would trigger a -Wpacked warning message, and lead to an error
- * if -Werror is set.
- */
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-};
-
 /**
  * @brief **bpf_object__find_map_by_name()** returns BPF map of
  * the given name, if it exists within the passed BPF object
@@ -908,16 +689,6 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
 LIBBPF_API int
 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
 
-/*
- * Get bpf_map through the offset of corresponding struct bpf_map_def
- * in the BPF object file.
- */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
-struct bpf_map *
-bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
-struct bpf_map *bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
 
@@ -927,8 +698,6 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *map);
 	     (pos) = bpf_object__next_map((obj), (pos)))
 #define bpf_map__for_each bpf_object__for_each_map
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__prev_map() instead")
-struct bpf_map *bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
 LIBBPF_API struct bpf_map *
 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *map);
 
@@ -962,9 +731,6 @@ LIBBPF_API bool bpf_map__autocreate(const struct bpf_map *map);
  */
 LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-/* get map definition */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead")
-const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
 /* get map name */
 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
 /* get/set map type */
@@ -973,8 +739,6 @@ LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
 /* get/set map size (max_entries) */
 LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__set_max_entries() instead")
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
 /* get/set map flags */
 LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
@@ -997,17 +761,9 @@ LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
 LIBBPF_API __u64 bpf_map__map_extra(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
 
-typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
-				 bpf_map_clear_priv_t clear_priv);
-LIBBPF_DEPRECATED_SINCE(0, 7, "storage via set_priv/priv is deprecated")
-LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
 					  const void *data, size_t size);
 LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
-LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
 
 /**
  * @brief **bpf_map__is_internal()** tells the caller whether or not the
@@ -1130,65 +886,6 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
 LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
 				     const void *cur_key, void *next_key, size_t key_sz);
 
-/**
- * @brief **libbpf_get_error()** extracts the error code from the passed
- * pointer
- * @param ptr pointer returned from libbpf API function
- * @return error code; or 0 if no error occured
- *
- * Many libbpf API functions which return pointers have logic to encode error
- * codes as pointers, and do not return NULL. Meaning **libbpf_get_error()**
- * should be used on the return value from these functions immediately after
- * calling the API function, with no intervening calls that could clobber the
- * `errno` variable. Consult the individual functions documentation to verify
- * if this logic applies should be used.
- *
- * For these API functions, if `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)`
- * is enabled, NULL is returned on error instead.
- *
- * If ptr is NULL, then errno should be already set by the failing
- * API, because libbpf never returns NULL on success and it now always
- * sets errno on error.
- *
- * Example usage:
- *
- *   struct perf_buffer *pb;
- *
- *   pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES, &opts);
- *   err = libbpf_get_error(pb);
- *   if (err) {
- *	  pb = NULL;
- *	  fprintf(stderr, "failed to open perf buffer: %d\n", err);
- *	  goto cleanup;
- *   }
- */
-LIBBPF_API long libbpf_get_error(const void *ptr);
-
-struct bpf_prog_load_attr {
-	const char *file;
-	enum bpf_prog_type prog_type;
-	enum bpf_attach_type expected_attach_type;
-	int ifindex;
-	int log_level;
-	int prog_flags;
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
-				   struct bpf_object **pobj, int *prog_fd);
-LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead")
-LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type,
-					struct bpf_object **pobj, int *prog_fd);
-
-/* XDP related API */
-struct xdp_link_info {
-	__u32 prog_id;
-	__u32 drv_prog_id;
-	__u32 hw_prog_id;
-	__u32 skb_prog_id;
-	__u8 attach_mode;
-};
-
 struct bpf_xdp_set_link_opts {
 	size_t sz;
 	int old_fd;
@@ -1196,17 +893,6 @@ struct bpf_xdp_set_link_opts {
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead")
-LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
-					const struct bpf_xdp_set_link_opts *opts);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead")
-LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead")
-LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-				     size_t info_size, __u32 flags);
-
 struct bpf_xdp_attach_opts {
 	size_t sz;
 	int old_prog_fd;
@@ -1305,17 +991,7 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
 
 /* common use perf buffer options */
 struct perf_buffer_opts {
-	union {
-		size_t sz;
-		struct { /* DEPRECATED: will be removed in v1.0 */
-			/* if specified, sample_cb is called for each sample */
-			perf_buffer_sample_fn sample_cb;
-			/* if specified, lost_cb is called for each batch of lost samples */
-			perf_buffer_lost_fn lost_cb;
-			/* ctx is provided to sample_cb and lost_cb */
-			void *ctx;
-		};
-	};
+	size_t sz;
 };
 #define perf_buffer_opts__last_field sz
 
@@ -1336,21 +1012,6 @@ perf_buffer__new(int map_fd, size_t page_cnt,
 		 perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
 		 const struct perf_buffer_opts *opts);
 
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt,
-			perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,
-			const struct perf_buffer_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead")
-struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt,
-						const struct perf_buffer_opts *opts);
-
-#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__)
-#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \
-	perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts)
-#define ___perf_buffer_new3(map_fd, page_cnt, opts) \
-	perf_buffer__new_deprecated(map_fd, page_cnt, opts)
-
 enum bpf_perf_event_ret {
 	LIBBPF_PERF_EVENT_DONE	= 0,
 	LIBBPF_PERF_EVENT_ERROR	= -1,
@@ -1364,21 +1025,9 @@ typedef enum bpf_perf_event_ret
 
 /* raw perf buffer options, giving most power and control */
 struct perf_buffer_raw_opts {
-	union {
-		struct {
-			size_t sz;
-			long :0;
-			long :0;
-		};
-		struct { /* DEPRECATED: will be removed in v1.0 */
-			/* perf event attrs passed directly into perf_event_open() */
-			struct perf_event_attr *attr;
-			/* raw event callback */
-			perf_buffer_event_fn event_cb;
-			/* ctx is provided to event_cb */
-			void *ctx;
-		};
-	};
+	size_t sz;
+	long :0;
+	long :0;
 	/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
 	 * max_entries of given PERF_EVENT_ARRAY map)
 	 */
@@ -1390,26 +1039,13 @@ struct perf_buffer_raw_opts {
 };
 #define perf_buffer_raw_opts__last_field map_keys
 
+struct perf_event_attr;
+
 LIBBPF_API struct perf_buffer *
 perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
 		     perf_buffer_event_fn event_cb, void *ctx,
 		     const struct perf_buffer_raw_opts *opts);
 
-LIBBPF_API struct perf_buffer *
-perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr,
-			    perf_buffer_event_fn event_cb, void *ctx,
-			    const struct perf_buffer_raw_opts *opts);
-
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead")
-struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt,
-						    const struct perf_buffer_raw_opts *opts);
-
-#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__)
-#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \
-	perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts)
-#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \
-	perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts)
-
 LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
@@ -1418,15 +1054,6 @@ LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_id
 LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
 
-typedef enum bpf_perf_event_ret
-	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
-				  void *private_data);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or  perf_buffer__consume() instead")
-LIBBPF_API enum bpf_perf_event_ret
-bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
-			   void **copy_mem, size_t *copy_size,
-			   bpf_perf_event_print_t fn, void *private_data);
-
 struct bpf_prog_linfo;
 struct bpf_prog_info;
 
@@ -1448,14 +1075,6 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
  * user, causing subsequent probes to fail. In this case, the caller may want
  * to adjust that limit with setrlimit().
  */
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead")
-LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead")
-LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead")
-LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex);
-LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection")
-LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex);
 
 /**
  * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports
@@ -1499,72 +1118,6 @@ LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void
 LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type,
 				       enum bpf_func_id helper_id, const void *opts);
 
-/*
- * Get bpf_prog_info in continuous memory
- *
- * struct bpf_prog_info has multiple arrays. The user has option to choose
- * arrays to fetch from kernel. The following APIs provide an uniform way to
- * fetch these data. All arrays in bpf_prog_info are stored in a single
- * continuous memory region. This makes it easy to store the info in a
- * file.
- *
- * Before writing bpf_prog_info_linear to files, it is necessary to
- * translate pointers in bpf_prog_info to offsets. Helper functions
- * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
- * are introduced to switch between pointers and offsets.
- *
- * Examples:
- *   # To fetch map_ids and prog_tags:
- *   __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
- *           (1UL << BPF_PROG_INFO_PROG_TAGS);
- *   struct bpf_prog_info_linear *info_linear =
- *           bpf_program__get_prog_info_linear(fd, arrays);
- *
- *   # To save data in file
- *   bpf_program__bpil_addr_to_offs(info_linear);
- *   write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
- *
- *   # To read data from file
- *   read(f, info_linear, <proper_size>);
- *   bpf_program__bpil_offs_to_addr(info_linear);
- */
-enum bpf_prog_info_array {
-	BPF_PROG_INFO_FIRST_ARRAY = 0,
-	BPF_PROG_INFO_JITED_INSNS = 0,
-	BPF_PROG_INFO_XLATED_INSNS,
-	BPF_PROG_INFO_MAP_IDS,
-	BPF_PROG_INFO_JITED_KSYMS,
-	BPF_PROG_INFO_JITED_FUNC_LENS,
-	BPF_PROG_INFO_FUNC_INFO,
-	BPF_PROG_INFO_LINE_INFO,
-	BPF_PROG_INFO_JITED_LINE_INFO,
-	BPF_PROG_INFO_PROG_TAGS,
-	BPF_PROG_INFO_LAST_ARRAY,
-};
-
-struct bpf_prog_info_linear {
-	/* size of struct bpf_prog_info, when the tool is compiled */
-	__u32			info_len;
-	/* total bytes allocated for data, round up to 8 bytes */
-	__u32			data_len;
-	/* which arrays are included in data */
-	__u64			arrays;
-	struct bpf_prog_info	info;
-	__u8			data[];
-};
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API struct bpf_prog_info_linear *
-bpf_program__get_prog_info_linear(int fd, __u64 arrays);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
-
-LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")
-LIBBPF_API void
-bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
-
 /**
  * @brief **libbpf_num_possible_cpus()** is a helper function to get the
  * number of possible CPUs that the host kernel supports and expects.
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 116a2a8ee7c2..94b589ecfeaa 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -1,29 +1,14 @@
 LIBBPF_0.0.1 {
 	global:
 		bpf_btf_get_fd_by_id;
-		bpf_create_map;
-		bpf_create_map_in_map;
-		bpf_create_map_in_map_node;
-		bpf_create_map_name;
-		bpf_create_map_node;
-		bpf_create_map_xattr;
-		bpf_load_btf;
-		bpf_load_program;
-		bpf_load_program_xattr;
 		bpf_map__btf_key_type_id;
 		bpf_map__btf_value_type_id;
-		bpf_map__def;
 		bpf_map__fd;
-		bpf_map__is_offload_neutral;
 		bpf_map__name;
-		bpf_map__next;
 		bpf_map__pin;
-		bpf_map__prev;
-		bpf_map__priv;
 		bpf_map__reuse_fd;
 		bpf_map__set_ifindex;
 		bpf_map__set_inner_map_fd;
-		bpf_map__set_priv;
 		bpf_map__unpin;
 		bpf_map_delete_elem;
 		bpf_map_get_fd_by_id;
@@ -38,79 +23,37 @@ LIBBPF_0.0.1 {
 		bpf_object__btf_fd;
 		bpf_object__close;
 		bpf_object__find_map_by_name;
-		bpf_object__find_map_by_offset;
-		bpf_object__find_program_by_title;
 		bpf_object__kversion;
 		bpf_object__load;
 		bpf_object__name;
-		bpf_object__next;
 		bpf_object__open;
-		bpf_object__open_buffer;
-		bpf_object__open_xattr;
 		bpf_object__pin;
 		bpf_object__pin_maps;
 		bpf_object__pin_programs;
-		bpf_object__priv;
-		bpf_object__set_priv;
-		bpf_object__unload;
 		bpf_object__unpin_maps;
 		bpf_object__unpin_programs;
-		bpf_perf_event_read_simple;
 		bpf_prog_attach;
 		bpf_prog_detach;
 		bpf_prog_detach2;
 		bpf_prog_get_fd_by_id;
 		bpf_prog_get_next_id;
-		bpf_prog_load;
-		bpf_prog_load_xattr;
 		bpf_prog_query;
-		bpf_prog_test_run;
-		bpf_prog_test_run_xattr;
 		bpf_program__fd;
-		bpf_program__is_kprobe;
-		bpf_program__is_perf_event;
-		bpf_program__is_raw_tracepoint;
-		bpf_program__is_sched_act;
-		bpf_program__is_sched_cls;
-		bpf_program__is_socket_filter;
-		bpf_program__is_tracepoint;
-		bpf_program__is_xdp;
-		bpf_program__load;
-		bpf_program__next;
-		bpf_program__nth_fd;
 		bpf_program__pin;
-		bpf_program__pin_instance;
-		bpf_program__prev;
-		bpf_program__priv;
 		bpf_program__set_expected_attach_type;
 		bpf_program__set_ifindex;
-		bpf_program__set_kprobe;
-		bpf_program__set_perf_event;
-		bpf_program__set_prep;
-		bpf_program__set_priv;
-		bpf_program__set_raw_tracepoint;
-		bpf_program__set_sched_act;
-		bpf_program__set_sched_cls;
-		bpf_program__set_socket_filter;
-		bpf_program__set_tracepoint;
 		bpf_program__set_type;
-		bpf_program__set_xdp;
-		bpf_program__title;
 		bpf_program__unload;
 		bpf_program__unpin;
-		bpf_program__unpin_instance;
 		bpf_prog_linfo__free;
 		bpf_prog_linfo__new;
 		bpf_prog_linfo__lfind_addr_func;
 		bpf_prog_linfo__lfind;
 		bpf_raw_tracepoint_open;
-		bpf_set_link_xdp_fd;
 		bpf_task_fd_query;
-		bpf_verify_program;
 		btf__fd;
 		btf__find_by_name;
 		btf__free;
-		btf__get_from_id;
 		btf__name_by_offset;
 		btf__new;
 		btf__resolve_size;
@@ -127,48 +70,24 @@ LIBBPF_0.0.1 {
 
 LIBBPF_0.0.2 {
 	global:
-		bpf_probe_helper;
-		bpf_probe_map_type;
-		bpf_probe_prog_type;
-		bpf_map__resize;
 		bpf_map_lookup_elem_flags;
 		bpf_object__btf;
 		bpf_object__find_map_fd_by_name;
-		bpf_get_link_xdp_id;
-		btf__dedup;
-		btf__get_map_kv_tids;
-		btf__get_nr_types;
 		btf__get_raw_data;
-		btf__load;
 		btf_ext__free;
-		btf_ext__func_info_rec_size;
 		btf_ext__get_raw_data;
-		btf_ext__line_info_rec_size;
 		btf_ext__new;
-		btf_ext__reloc_func_info;
-		btf_ext__reloc_line_info;
-		xsk_umem__create;
-		xsk_socket__create;
-		xsk_umem__delete;
-		xsk_socket__delete;
-		xsk_umem__fd;
-		xsk_socket__fd;
-		bpf_program__get_prog_info_linear;
-		bpf_program__bpil_addr_to_offs;
-		bpf_program__bpil_offs_to_addr;
 } LIBBPF_0.0.1;
 
 LIBBPF_0.0.3 {
 	global:
 		bpf_map__is_internal;
 		bpf_map_freeze;
-		btf__finalize_data;
 } LIBBPF_0.0.2;
 
 LIBBPF_0.0.4 {
 	global:
 		bpf_link__destroy;
-		bpf_object__load_xattr;
 		bpf_program__attach_kprobe;
 		bpf_program__attach_perf_event;
 		bpf_program__attach_raw_tracepoint;
@@ -176,14 +95,10 @@ LIBBPF_0.0.4 {
 		bpf_program__attach_uprobe;
 		btf_dump__dump_type;
 		btf_dump__free;
-		btf_dump__new;
 		btf__parse_elf;
 		libbpf_num_possible_cpus;
 		perf_buffer__free;
-		perf_buffer__new;
-		perf_buffer__new_raw;
 		perf_buffer__poll;
-		xsk_umem__create;
 } LIBBPF_0.0.3;
 
 LIBBPF_0.0.5 {
@@ -193,7 +108,6 @@ LIBBPF_0.0.5 {
 
 LIBBPF_0.0.6 {
 	global:
-		bpf_get_link_xdp_info;
 		bpf_map__get_pin_path;
 		bpf_map__is_pinned;
 		bpf_map__set_pin_path;
@@ -202,9 +116,6 @@ LIBBPF_0.0.6 {
 		bpf_program__attach_trace;
 		bpf_program__get_expected_attach_type;
 		bpf_program__get_type;
-		bpf_program__is_tracing;
-		bpf_program__set_tracing;
-		bpf_program__size;
 		btf__find_by_name_kind;
 		libbpf_find_vmlinux_btf_id;
 } LIBBPF_0.0.5;
@@ -224,14 +135,8 @@ LIBBPF_0.0.7 {
 		bpf_object__detach_skeleton;
 		bpf_object__load_skeleton;
 		bpf_object__open_skeleton;
-		bpf_probe_large_insn_limit;
-		bpf_prog_attach_xattr;
 		bpf_program__attach;
 		bpf_program__name;
-		bpf_program__is_extension;
-		bpf_program__is_struct_ops;
-		bpf_program__set_extension;
-		bpf_program__set_struct_ops;
 		btf__align_of;
 		libbpf_find_kernel_btf;
 } LIBBPF_0.0.6;
@@ -250,10 +155,7 @@ LIBBPF_0.0.8 {
 		bpf_prog_attach_opts;
 		bpf_program__attach_cgroup;
 		bpf_program__attach_lsm;
-		bpf_program__is_lsm;
 		bpf_program__set_attach_target;
-		bpf_program__set_lsm;
-		bpf_set_link_xdp_fd_opts;
 } LIBBPF_0.0.7;
 
 LIBBPF_0.0.9 {
@@ -291,9 +193,7 @@ LIBBPF_0.1.0 {
 		bpf_map__value_size;
 		bpf_program__attach_xdp;
 		bpf_program__autoload;
-		bpf_program__is_sk_lookup;
 		bpf_program__set_autoload;
-		bpf_program__set_sk_lookup;
 		btf__parse;
 		btf__parse_raw;
 		btf__pointer_size;
@@ -336,7 +236,6 @@ LIBBPF_0.2.0 {
 		perf_buffer__buffer_fd;
 		perf_buffer__epoll_fd;
 		perf_buffer__consume_buffer;
-		xsk_socket__create_shared;
 } LIBBPF_0.1.0;
 
 LIBBPF_0.3.0 {
@@ -348,8 +247,6 @@ LIBBPF_0.3.0 {
 		btf__new_empty_split;
 		btf__new_split;
 		ring_buffer__epoll_fd;
-		xsk_setup_xdp_prog;
-		xsk_socket__update_xskmap;
 } LIBBPF_0.2.0;
 
 LIBBPF_0.4.0 {
@@ -397,7 +294,6 @@ LIBBPF_0.6.0 {
 		bpf_object__next_program;
 		bpf_object__prev_map;
 		bpf_object__prev_program;
-		bpf_prog_load_deprecated;
 		bpf_prog_load;
 		bpf_program__flags;
 		bpf_program__insn_cnt;
@@ -407,18 +303,14 @@ LIBBPF_0.6.0 {
 		btf__add_decl_tag;
 		btf__add_type_tag;
 		btf__dedup;
-		btf__dedup_deprecated;
 		btf__raw_data;
 		btf__type_cnt;
 		btf_dump__new;
-		btf_dump__new_deprecated;
 		libbpf_major_version;
 		libbpf_minor_version;
 		libbpf_version_string;
 		perf_buffer__new;
-		perf_buffer__new_deprecated;
 		perf_buffer__new_raw;
-		perf_buffer__new_raw_deprecated;
 } LIBBPF_0.5.0;
 
 LIBBPF_0.7.0 {
@@ -434,10 +326,11 @@ LIBBPF_0.7.0 {
 		bpf_xdp_detach;
 		bpf_xdp_query;
 		bpf_xdp_query_id;
+		btf_ext__raw_data;
 		libbpf_probe_bpf_helper;
 		libbpf_probe_bpf_map_type;
 		libbpf_probe_bpf_prog_type;
-		libbpf_set_memlock_rlim_max;
+		libbpf_set_memlock_rlim;
 } LIBBPF_0.6.0;
 
 LIBBPF_0.8.0 {
@@ -462,12 +355,11 @@ LIBBPF_0.8.0 {
 
 LIBBPF_1.0.0 {
 	global:
+		bpf_prog_query_opts;
 		btf__add_enum64;
 		btf__add_enum64_value;
 		libbpf_bpf_attach_type_str;
 		libbpf_bpf_link_type_str;
 		libbpf_bpf_map_type_str;
 		libbpf_bpf_prog_type_str;
-
-	local: *;
 };
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 000e37798ff2..9a7937f339df 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -30,20 +30,10 @@
 /* Add checks for other versions below when planning deprecation of API symbols
  * with the LIBBPF_DEPRECATED_SINCE macro.
  */
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6)
-#define __LIBBPF_MARK_DEPRECATED_0_6(X) X
+#if __LIBBPF_CURRENT_VERSION_GEQ(1, 0)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X) X
 #else
-#define __LIBBPF_MARK_DEPRECATED_0_6(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7)
-#define __LIBBPF_MARK_DEPRECATED_0_7(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_7(X)
-#endif
-#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8)
-#define __LIBBPF_MARK_DEPRECATED_0_8(X) X
-#else
-#define __LIBBPF_MARK_DEPRECATED_0_8(X)
+#define __LIBBPF_MARK_DEPRECATED_1_0(X)
 #endif
 
 /* This set of internal macros allows to do "function overloading" based on
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index a1ad145ffa74..9cd7829cbe41 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -15,7 +15,6 @@
 #include <linux/err.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include "libbpf_legacy.h"
 #include "relo_core.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
@@ -478,8 +477,6 @@ int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
 				 __u32 kind);
 
-extern enum libbpf_strict_mode libbpf_mode;
-
 typedef int (*kallsyms_cb_t)(unsigned long long sym_addr, char sym_type,
 			     const char *sym_name, void *ctx);
 
@@ -498,12 +495,8 @@ static inline int libbpf_err(int ret)
  */
 static inline int libbpf_err_errno(int ret)
 {
-	if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
-		/* errno is already assumed to be set on error */
-		return ret < 0 ? -errno : ret;
-
-	/* legacy: on error return -1 directly and don't touch errno */
-	return ret;
+	/* errno is already assumed to be set on error */
+	return ret < 0 ? -errno : ret;
 }
 
 /* handle error for pointer-returning APIs, err is assumed to be < 0 always */
@@ -511,12 +504,7 @@ static inline void *libbpf_err_ptr(int err)
 {
 	/* set errno on error, this doesn't break anything */
 	errno = -err;
-
-	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-		return NULL;
-
-	/* legacy: encode err as ptr */
-	return ERR_PTR(err);
+	return NULL;
 }
 
 /* handle pointer-returning APIs' error handling */
@@ -526,11 +514,7 @@ static inline void *libbpf_ptr(void *ret)
 	if (IS_ERR(ret))
 		errno = -PTR_ERR(ret);
 
-	if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
-		return IS_ERR(ret) ? NULL : ret;
-
-	/* legacy: pass-through original pointer */
-	return ret;
+	return IS_ERR(ret) ? NULL : ret;
 }
 
 static inline bool str_is_empty(const char *s)
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index d7bcbd01f66f..5b7e0155db6a 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -20,6 +20,11 @@
 extern "C" {
 #endif
 
+/* As of libbpf 1.0 libbpf_set_strict_mode() and enum libbpf_struct_mode have
+ * no effect. But they are left in libbpf_legacy.h so that applications that
+ * prepared for libbpf 1.0 before final release by using
+ * libbpf_set_strict_mode() still work with libbpf 1.0+ without any changes.
+ */
 enum libbpf_strict_mode {
 	/* Turn on all supported strict features of libbpf to simulate libbpf
 	 * v1.0 behavior.
@@ -71,8 +76,8 @@ enum libbpf_strict_mode {
 	 * first BPF program or map creation operation. This is done only if
 	 * kernel is too old to support memcg-based memory accounting for BPF
 	 * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY,
-	 * but it can be overriden with libbpf_set_memlock_rlim_max() API.
-	 * Note that libbpf_set_memlock_rlim_max() needs to be called before
+	 * but it can be overriden with libbpf_set_memlock_rlim() API.
+	 * Note that libbpf_set_memlock_rlim() needs to be called before
 	 * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load()
 	 * operation.
 	 */
@@ -88,6 +93,25 @@ enum libbpf_strict_mode {
 
 LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
 
+/**
+ * @brief **libbpf_get_error()** extracts the error code from the passed
+ * pointer
+ * @param ptr pointer returned from libbpf API function
+ * @return error code; or 0 if no error occured
+ *
+ * Note, as of libbpf 1.0 this function is not necessary and not recommended
+ * to be used. Libbpf doesn't return error code embedded into the pointer
+ * itself. Instead, NULL is returned on error and error code is passed through
+ * thread-local errno variable. **libbpf_get_error()** is just returning -errno
+ * value if it receives NULL, which is correct only if errno hasn't been
+ * modified between libbpf API call and corresponding **libbpf_get_error()**
+ * call. Prefer to check return for NULL and use errno directly.
+ *
+ * This API is left in libbpf 1.0 to allow applications that were 1.0-ready
+ * before final libbpf 1.0 without needing to change them.
+ */
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
 #define DECLARE_LIBBPF_OPTS LIBBPF_OPTS
 
 /* "Discouraged" APIs which don't follow consistent libbpf naming patterns.
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 97b06cede56f..0b5398786bf3 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -17,47 +17,14 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 
-static bool grep(const char *buffer, const char *pattern)
-{
-	return !!strstr(buffer, pattern);
-}
-
-static int get_vendor_id(int ifindex)
-{
-	char ifname[IF_NAMESIZE], path[64], buf[8];
-	ssize_t len;
-	int fd;
-
-	if (!if_indextoname(ifindex, ifname))
-		return -1;
-
-	snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
-
-	fd = open(path, O_RDONLY | O_CLOEXEC);
-	if (fd < 0)
-		return -1;
-
-	len = read(fd, buf, sizeof(buf));
-	close(fd);
-	if (len < 0)
-		return -1;
-	if (len >= (ssize_t)sizeof(buf))
-		return -1;
-	buf[len] = '\0';
-
-	return strtol(buf, NULL, 0);
-}
-
 static int probe_prog_load(enum bpf_prog_type prog_type,
 			   const struct bpf_insn *insns, size_t insns_cnt,
-			   char *log_buf, size_t log_buf_sz,
-			   __u32 ifindex)
+			   char *log_buf, size_t log_buf_sz)
 {
 	LIBBPF_OPTS(bpf_prog_load_opts, opts,
 		.log_buf = log_buf,
 		.log_size = log_buf_sz,
 		.log_level = log_buf ? 1 : 0,
-		.prog_ifindex = ifindex,
 	);
 	int fd, err, exp_err = 0;
 	const char *exp_msg = NULL;
@@ -161,31 +128,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
 	if (opts)
 		return libbpf_err(-EINVAL);
 
-	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0);
+	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0);
 	return libbpf_err(ret);
 }
 
-bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
-{
-	struct bpf_insn insns[2] = {
-		BPF_MOV64_IMM(BPF_REG_0, 0),
-		BPF_EXIT_INSN()
-	};
-
-	/* prefer libbpf_probe_bpf_prog_type() unless offload is requested */
-	if (ifindex == 0)
-		return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1;
-
-	if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
-		/* nfp returns -EINVAL on exit(0) with TC offload */
-		insns[0].imm = 2;
-
-	errno = 0;
-	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
-
-	return errno != EINVAL && errno != EOPNOTSUPP;
-}
-
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len)
 {
@@ -242,15 +188,13 @@ static int load_local_storage_btf(void)
 				     strs, sizeof(strs));
 }
 
-static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
+static int probe_map_create(enum bpf_map_type map_type)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, opts);
 	int key_size, value_size, max_entries;
 	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
 	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
 
-	opts.map_ifindex = ifindex;
-
 	key_size	= sizeof(__u32);
 	value_size	= sizeof(__u32);
 	max_entries	= 1;
@@ -326,12 +270,6 @@ static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)
 
 	if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 	    map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
-		/* TODO: probe for device, once libbpf has a function to create
-		 * map-in-map for offload
-		 */
-		if (ifindex)
-			goto cleanup;
-
 		fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL,
 					  sizeof(__u32), sizeof(__u32), 1, NULL);
 		if (fd_inner < 0)
@@ -370,15 +308,10 @@ int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts)
 	if (opts)
 		return libbpf_err(-EINVAL);
 
-	ret = probe_map_create(map_type, 0);
+	ret = probe_map_create(map_type);
 	return libbpf_err(ret);
 }
 
-bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
-{
-	return probe_map_create(map_type, ifindex) == 1;
-}
-
 int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id,
 			    const void *opts)
 {
@@ -407,7 +340,7 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
 	}
 
 	buf[0] = '\0';
-	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0);
+	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf));
 	if (ret < 0)
 		return libbpf_err(ret);
 
@@ -427,51 +360,3 @@ int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helpe
 		return 0;
 	return 1; /* assume supported */
 }
-
-bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
-		      __u32 ifindex)
-{
-	struct bpf_insn insns[2] = {
-		BPF_EMIT_CALL(id),
-		BPF_EXIT_INSN()
-	};
-	char buf[4096] = {};
-	bool res;
-
-	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);
-	res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
-
-	if (ifindex) {
-		switch (get_vendor_id(ifindex)) {
-		case 0x19ee: /* Netronome specific */
-			res = res && !grep(buf, "not supported by FW") &&
-				!grep(buf, "unsupported function id");
-			break;
-		default:
-			break;
-		}
-	}
-
-	return res;
-}
-
-/*
- * Probe for availability of kernel commit (5.3):
- *
- * c04c0d2b968a ("bpf: increase complexity limit and maximum program size")
- */
-bool bpf_probe_large_insn_limit(__u32 ifindex)
-{
-	struct bpf_insn insns[BPF_MAXINSNS + 1];
-	int i;
-
-	for (i = 0; i < BPF_MAXINSNS; i++)
-		insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
-	insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
-
-	errno = 0;
-	probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0,
-			ifindex);
-
-	return errno != E2BIG && errno != EINVAL;
-}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index cbc8967d5402..6c013168032d 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -27,6 +27,14 @@ typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
 typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
 			      void *cookie);
 
+struct xdp_link_info {
+	__u32 prog_id;
+	__u32 drv_prog_id;
+	__u32 hw_prog_id;
+	__u32 skb_prog_id;
+	__u8 attach_mode;
+};
+
 struct xdp_id_md {
 	int ifindex;
 	__u32 flags;
@@ -288,31 +296,6 @@ int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *o
 	return bpf_xdp_attach(ifindex, -1, flags, opts);
 }
 
-int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
-			     const struct bpf_xdp_set_link_opts *opts)
-{
-	int old_fd = -1, ret;
-
-	if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-		return libbpf_err(-EINVAL);
-
-	if (OPTS_HAS(opts, old_fd)) {
-		old_fd = OPTS_GET(opts, old_fd, -1);
-		flags |= XDP_FLAGS_REPLACE;
-	}
-
-	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
-	return libbpf_err(ret);
-}
-
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
-{
-	int ret;
-
-	ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
-	return libbpf_err(ret);
-}
-
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
 			     libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
 {
@@ -413,30 +396,6 @@ int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts)
 	return 0;
 }
 
-int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
-			  size_t info_size, __u32 flags)
-{
-	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	size_t sz;
-	int err;
-
-	if (!info_size)
-		return libbpf_err(-EINVAL);
-
-	err = bpf_xdp_query(ifindex, flags, &opts);
-	if (err)
-		return libbpf_err(err);
-
-	/* struct xdp_link_info field layout matches struct bpf_xdp_query_opts
-	 * layout after sz field
-	 */
-	sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode));
-	memcpy(info, &opts.prog_id, sz);
-	memset((void *)info + sz, 0, info_size - sz);
-
-	return 0;
-}
-
 int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 {
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
@@ -463,11 +422,6 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 }
 
 
-int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
-{
-	return bpf_xdp_query_id(ifindex, flags, prog_id);
-}
-
 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
 
 static int clsact_config(struct libbpf_nla_req *req)
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 6ad3c3891a9a..c4b0e81ae293 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -95,6 +95,7 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
 	case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id";
 	case BPF_CORE_TYPE_ID_TARGET: return "target_type_id";
 	case BPF_CORE_TYPE_EXISTS: return "type_exists";
+	case BPF_CORE_TYPE_MATCHES: return "type_matches";
 	case BPF_CORE_TYPE_SIZE: return "type_size";
 	case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists";
 	case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";
@@ -123,6 +124,7 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
 	case BPF_CORE_TYPE_ID_LOCAL:
 	case BPF_CORE_TYPE_ID_TARGET:
 	case BPF_CORE_TYPE_EXISTS:
+	case BPF_CORE_TYPE_MATCHES:
 	case BPF_CORE_TYPE_SIZE:
 		return true;
 	default:
@@ -141,6 +143,86 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
 	}
 }
 
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id, int level)
+{
+	const struct btf_type *local_type, *targ_type;
+	int depth = 32; /* max recursion depth */
+
+	/* caller made sure that names match (ignoring flavor suffix) */
+	local_type = btf_type_by_id(local_btf, local_id);
+	targ_type = btf_type_by_id(targ_btf, targ_id);
+	if (!btf_kind_core_compat(local_type, targ_type))
+		return 0;
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_type || !targ_type)
+		return -EINVAL;
+
+	if (!btf_kind_core_compat(local_type, targ_type))
+		return 0;
+
+	switch (btf_kind(local_type)) {
+	case BTF_KIND_UNKN:
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+	case BTF_KIND_ENUM:
+	case BTF_KIND_FWD:
+	case BTF_KIND_ENUM64:
+		return 1;
+	case BTF_KIND_INT:
+		/* just reject deprecated bitfield-like integers; all other
+		 * integers are by default compatible between each other
+		 */
+		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
+	case BTF_KIND_PTR:
+		local_id = local_type->type;
+		targ_id = targ_type->type;
+		goto recur;
+	case BTF_KIND_ARRAY:
+		local_id = btf_array(local_type)->type;
+		targ_id = btf_array(targ_type)->type;
+		goto recur;
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_type);
+		struct btf_param *targ_p = btf_params(targ_type);
+		__u16 local_vlen = btf_vlen(local_type);
+		__u16 targ_vlen = btf_vlen(targ_type);
+		int i, err;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			if (level <= 0)
+				return -EINVAL;
+
+			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
+			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
+			err = __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
+							  level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
+		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_type), local_id, targ_id);
+		return 0;
+	}
+}
+
 /*
  * Turn bpf_core_relo into a low- and high-level spec representation,
  * validating correctness along the way, as well as calculating resulting
@@ -171,7 +253,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
  *   - field 'a' access (corresponds to '2' in low-level spec);
  *   - array element #3 access (corresponds to '3' in low-level spec).
  *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * Type-based relocations (TYPE_EXISTS/TYPE_MATCHES/TYPE_SIZE,
  * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
  * spec and raw_spec are kept empty.
  *
@@ -488,9 +570,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
 	targ_spec->relo_kind = local_spec->relo_kind;
 
 	if (core_relo_is_type_based(local_spec->relo_kind)) {
-		return bpf_core_types_are_compat(local_spec->btf,
-						 local_spec->root_type_id,
-						 targ_btf, targ_id);
+		if (local_spec->relo_kind == BPF_CORE_TYPE_MATCHES)
+			return bpf_core_types_match(local_spec->btf,
+						    local_spec->root_type_id,
+						    targ_btf, targ_id);
+		else
+			return bpf_core_types_are_compat(local_spec->btf,
+							 local_spec->root_type_id,
+							 targ_btf, targ_id);
 	}
 
 	local_acc = &local_spec->spec[0];
@@ -739,6 +826,7 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
 			*validate = false;
 		break;
 	case BPF_CORE_TYPE_EXISTS:
+	case BPF_CORE_TYPE_MATCHES:
 		*val = 1;
 		break;
 	case BPF_CORE_TYPE_SIZE:
@@ -1330,3 +1418,273 @@ int bpf_core_calc_relo_insn(const char *prog_name,
 
 	return 0;
 }
+
+static bool bpf_core_names_match(const struct btf *local_btf, size_t local_name_off,
+				 const struct btf *targ_btf, size_t targ_name_off)
+{
+	const char *local_n, *targ_n;
+	size_t local_len, targ_len;
+
+	local_n = btf__name_by_offset(local_btf, local_name_off);
+	targ_n = btf__name_by_offset(targ_btf, targ_name_off);
+
+	if (str_is_empty(targ_n))
+		return str_is_empty(local_n);
+
+	targ_len = bpf_core_essential_name_len(targ_n);
+	local_len = bpf_core_essential_name_len(local_n);
+
+	return targ_len == local_len && strncmp(local_n, targ_n, local_len) == 0;
+}
+
+static int bpf_core_enums_match(const struct btf *local_btf, const struct btf_type *local_t,
+				const struct btf *targ_btf, const struct btf_type *targ_t)
+{
+	__u16 local_vlen = btf_vlen(local_t);
+	__u16 targ_vlen = btf_vlen(targ_t);
+	int i, j;
+
+	if (local_t->size != targ_t->size)
+		return 0;
+
+	if (local_vlen > targ_vlen)
+		return 0;
+
+	/* iterate over the local enum's variants and make sure each has
+	 * a symbolic name correspondent in the target
+	 */
+	for (i = 0; i < local_vlen; i++) {
+		bool matched = false;
+		__u32 local_n_off, targ_n_off;
+
+		local_n_off = btf_is_enum(local_t) ? btf_enum(local_t)[i].name_off :
+						     btf_enum64(local_t)[i].name_off;
+
+		for (j = 0; j < targ_vlen; j++) {
+			targ_n_off = btf_is_enum(targ_t) ? btf_enum(targ_t)[j].name_off :
+							   btf_enum64(targ_t)[j].name_off;
+
+			if (bpf_core_names_match(local_btf, local_n_off, targ_btf, targ_n_off)) {
+				matched = true;
+				break;
+			}
+		}
+
+		if (!matched)
+			return 0;
+	}
+	return 1;
+}
+
+static int bpf_core_composites_match(const struct btf *local_btf, const struct btf_type *local_t,
+				     const struct btf *targ_btf, const struct btf_type *targ_t,
+				     bool behind_ptr, int level)
+{
+	const struct btf_member *local_m = btf_members(local_t);
+	__u16 local_vlen = btf_vlen(local_t);
+	__u16 targ_vlen = btf_vlen(targ_t);
+	int i, j, err;
+
+	if (local_vlen > targ_vlen)
+		return 0;
+
+	/* check that all local members have a match in the target */
+	for (i = 0; i < local_vlen; i++, local_m++) {
+		const struct btf_member *targ_m = btf_members(targ_t);
+		bool matched = false;
+
+		for (j = 0; j < targ_vlen; j++, targ_m++) {
+			if (!bpf_core_names_match(local_btf, local_m->name_off,
+						  targ_btf, targ_m->name_off))
+				continue;
+
+			err = __bpf_core_types_match(local_btf, local_m->type, targ_btf,
+						     targ_m->type, behind_ptr, level - 1);
+			if (err < 0)
+				return err;
+			if (err > 0) {
+				matched = true;
+				break;
+			}
+		}
+
+		if (!matched)
+			return 0;
+	}
+	return 1;
+}
+
+/* Check that two types "match". This function assumes that root types were
+ * already checked for name match.
+ *
+ * The matching relation is defined as follows:
+ * - modifiers and typedefs are stripped (and, hence, effectively ignored)
+ * - generally speaking types need to be of same kind (struct vs. struct, union
+ *   vs. union, etc.)
+ *   - exceptions are struct/union behind a pointer which could also match a
+ *     forward declaration of a struct or union, respectively, and enum vs.
+ *     enum64 (see below)
+ * Then, depending on type:
+ * - integers:
+ *   - match if size and signedness match
+ * - arrays & pointers:
+ *   - target types are recursively matched
+ * - structs & unions:
+ *   - local members need to exist in target with the same name
+ *   - for each member we recursively check match unless it is already behind a
+ *     pointer, in which case we only check matching names and compatible kind
+ * - enums:
+ *   - local variants have to have a match in target by symbolic name (but not
+ *     numeric value)
+ *   - size has to match (but enum may match enum64 and vice versa)
+ * - function pointers:
+ *   - number and position of arguments in local type has to match target
+ *   - for each argument and the return value we recursively check match
+ */
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			   __u32 targ_id, bool behind_ptr, int level)
+{
+	const struct btf_type *local_t, *targ_t;
+	int depth = 32; /* max recursion depth */
+	__u16 local_k, targ_k;
+
+	if (level <= 0)
+		return -EINVAL;
+
+	local_t = btf_type_by_id(local_btf, local_id);
+	targ_t = btf_type_by_id(targ_btf, targ_id);
+
+recur:
+	depth--;
+	if (depth < 0)
+		return -EINVAL;
+
+	local_t = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+	targ_t = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+	if (!local_t || !targ_t)
+		return -EINVAL;
+
+	/* While the name check happens after typedefs are skipped, root-level
+	 * typedefs would still be name-matched as that's the contract with
+	 * callers.
+	 */
+	if (!bpf_core_names_match(local_btf, local_t->name_off, targ_btf, targ_t->name_off))
+		return 0;
+
+	local_k = btf_kind(local_t);
+	targ_k = btf_kind(targ_t);
+
+	switch (local_k) {
+	case BTF_KIND_UNKN:
+		return local_k == targ_k;
+	case BTF_KIND_FWD: {
+		bool local_f = BTF_INFO_KFLAG(local_t->info);
+
+		if (behind_ptr) {
+			if (local_k == targ_k)
+				return local_f == BTF_INFO_KFLAG(targ_t->info);
+
+			/* for forward declarations kflag dictates whether the
+			 * target is a struct (0) or union (1)
+			 */
+			return (targ_k == BTF_KIND_STRUCT && !local_f) ||
+			       (targ_k == BTF_KIND_UNION && local_f);
+		} else {
+			if (local_k != targ_k)
+				return 0;
+
+			/* match if the forward declaration is for the same kind */
+			return local_f == BTF_INFO_KFLAG(targ_t->info);
+		}
+	}
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		if (!btf_is_any_enum(targ_t))
+			return 0;
+
+		return bpf_core_enums_match(local_btf, local_t, targ_btf, targ_t);
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION:
+		if (behind_ptr) {
+			bool targ_f = BTF_INFO_KFLAG(targ_t->info);
+
+			if (local_k == targ_k)
+				return 1;
+
+			if (targ_k != BTF_KIND_FWD)
+				return 0;
+
+			return (local_k == BTF_KIND_UNION) == targ_f;
+		} else {
+			if (local_k != targ_k)
+				return 0;
+
+			return bpf_core_composites_match(local_btf, local_t, targ_btf, targ_t,
+							 behind_ptr, level);
+		}
+	case BTF_KIND_INT: {
+		__u8 local_sgn;
+		__u8 targ_sgn;
+
+		if (local_k != targ_k)
+			return 0;
+
+		local_sgn = btf_int_encoding(local_t) & BTF_INT_SIGNED;
+		targ_sgn = btf_int_encoding(targ_t) & BTF_INT_SIGNED;
+
+		return local_t->size == targ_t->size && local_sgn == targ_sgn;
+	}
+	case BTF_KIND_PTR:
+		if (local_k != targ_k)
+			return 0;
+
+		behind_ptr = true;
+
+		local_id = local_t->type;
+		targ_id = targ_t->type;
+		goto recur;
+	case BTF_KIND_ARRAY: {
+		const struct btf_array *local_array = btf_array(local_t);
+		const struct btf_array *targ_array = btf_array(targ_t);
+
+		if (local_k != targ_k)
+			return 0;
+
+		if (local_array->nelems != targ_array->nelems)
+			return 0;
+
+		local_id = local_array->type;
+		targ_id = targ_array->type;
+		goto recur;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		struct btf_param *local_p = btf_params(local_t);
+		struct btf_param *targ_p = btf_params(targ_t);
+		__u16 local_vlen = btf_vlen(local_t);
+		__u16 targ_vlen = btf_vlen(targ_t);
+		int i, err;
+
+		if (local_k != targ_k)
+			return 0;
+
+		if (local_vlen != targ_vlen)
+			return 0;
+
+		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
+			err = __bpf_core_types_match(local_btf, local_p->type, targ_btf,
+						     targ_p->type, behind_ptr, level - 1);
+			if (err <= 0)
+				return err;
+		}
+
+		/* tail recurse for return type check */
+		local_id = local_t->type;
+		targ_id = targ_t->type;
+		goto recur;
+	}
+	default:
+		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
+			btf_kind_str(local_t), local_id, targ_id);
+		return 0;
+	}
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
index 7df0da082f2c..1c0566daf8e8 100644
--- a/tools/lib/bpf/relo_core.h
+++ b/tools/lib/bpf/relo_core.h
@@ -68,8 +68,14 @@ struct bpf_core_relo_res {
 	__u32 new_type_id;
 };
 
+int __bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+				const struct btf *targ_btf, __u32 targ_id, int level);
 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 			      const struct btf *targ_btf, __u32 targ_id);
+int __bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			   __u32 targ_id, bool behind_ptr, int level);
+int bpf_core_types_match(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf,
+			 __u32 targ_id);
 
 size_t bpf_core_essential_name_len(const char *name);
 
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 5159207cbfd9..d18e37982344 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -652,11 +652,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
 		 *
 		 *   [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
 		 */
-		usdt_rel_ip = usdt_abs_ip = note.loc_addr;
-		if (base_addr) {
+		usdt_abs_ip = note.loc_addr;
+		if (base_addr)
 			usdt_abs_ip += base_addr - note.base_addr;
-			usdt_rel_ip += base_addr - note.base_addr;
-		}
 
 		/* When attaching uprobes (which is what USDTs basically are)
 		 * kernel expects file offset to be specified, not a relative
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f8ad581ea247..6bd7c288e820 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -9,6 +9,7 @@
 #include <linux/bpf.h>
 #include <bpf/libbpf.h>
 #include <bpf/bpf.h>
+#include <linux/filter.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -49,6 +50,7 @@ struct bpf_prog_priv {
 	struct bpf_insn *insns_buf;
 	int nr_types;
 	int *type_mapping;
+	int *prologue_fds;
 };
 
 struct bpf_perf_object {
@@ -56,6 +58,11 @@ struct bpf_perf_object {
 	struct bpf_object *obj;
 };
 
+struct bpf_preproc_result {
+	struct bpf_insn *new_insn_ptr;
+	int new_insn_cnt;
+};
+
 static LIST_HEAD(bpf_objects_list);
 static struct hashmap *bpf_program_hash;
 static struct hashmap *bpf_map_hash;
@@ -86,6 +93,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev)
 	     (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp))
 
 static bool libbpf_initialized;
+static int libbpf_sec_handler;
 
 static int bpf_perf_object__add(struct bpf_object *obj)
 {
@@ -99,12 +107,76 @@ static int bpf_perf_object__add(struct bpf_object *obj)
 	return perf_obj ? 0 : -ENOMEM;
 }
 
+static void *program_priv(const struct bpf_program *prog)
+{
+	void *priv;
+
+	if (IS_ERR_OR_NULL(bpf_program_hash))
+		return NULL;
+	if (!hashmap__find(bpf_program_hash, prog, &priv))
+		return NULL;
+	return priv;
+}
+
+static struct bpf_insn prologue_init_insn[] = {
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_MOV64_IMM(BPF_REG_5, 0),
+};
+
+static int libbpf_prog_prepare_load_fn(struct bpf_program *prog,
+				       struct bpf_prog_load_opts *opts __maybe_unused,
+				       long cookie __maybe_unused)
+{
+	size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn);
+	size_t orig_insn_cnt, insn_cnt, init_size, orig_size;
+	struct bpf_prog_priv *priv = program_priv(prog);
+	const struct bpf_insn *orig_insn;
+	struct bpf_insn *insn;
+
+	if (IS_ERR_OR_NULL(priv)) {
+		pr_debug("bpf: failed to get private field\n");
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
+
+	if (!priv->need_prologue)
+		return 0;
+
+	/* prepend initialization code to program instructions */
+	orig_insn = bpf_program__insns(prog);
+	orig_insn_cnt = bpf_program__insn_cnt(prog);
+	init_size = init_size_cnt * sizeof(*insn);
+	orig_size = orig_insn_cnt * sizeof(*insn);
+
+	insn_cnt = orig_insn_cnt + init_size_cnt;
+	insn = malloc(insn_cnt * sizeof(*insn));
+	if (!insn)
+		return -ENOMEM;
+
+	memcpy(insn, prologue_init_insn, init_size);
+	memcpy((char *) insn + init_size, orig_insn, orig_size);
+	bpf_program__set_insns(prog, insn, insn_cnt);
+	return 0;
+}
+
 static int libbpf_init(void)
 {
+	LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts,
+		.prog_prepare_load_fn = libbpf_prog_prepare_load_fn,
+	);
+
 	if (libbpf_initialized)
 		return 0;
 
 	libbpf_set_print(libbpf_perf_print);
+	libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE,
+							  0, &handler_opts);
+	if (libbpf_sec_handler < 0) {
+		pr_debug("bpf: failed to register libbpf section handler: %d\n",
+			 libbpf_sec_handler);
+		return -BPF_LOADER_ERRNO__INTERNAL;
+	}
 	libbpf_initialized = true;
 	return 0;
 }
@@ -188,14 +260,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
 	return obj;
 }
 
+static void close_prologue_programs(struct bpf_prog_priv *priv)
+{
+	struct perf_probe_event *pev;
+	int i, fd;
+
+	if (!priv->need_prologue)
+		return;
+	pev = &priv->pev;
+	for (i = 0; i < pev->ntevs; i++) {
+		fd = priv->prologue_fds[i];
+		if (fd != -1)
+			close(fd);
+	}
+}
+
 static void
 clear_prog_priv(const struct bpf_program *prog __maybe_unused,
 		void *_priv)
 {
 	struct bpf_prog_priv *priv = _priv;
 
+	close_prologue_programs(priv);
 	cleanup_perf_probe_events(&priv->pev, 1);
 	zfree(&priv->insns_buf);
+	zfree(&priv->prologue_fds);
 	zfree(&priv->type_mapping);
 	zfree(&priv->sys_name);
 	zfree(&priv->evt_name);
@@ -243,17 +332,6 @@ static bool ptr_equal(const void *key1, const void *key2,
 	return key1 == key2;
 }
 
-static void *program_priv(const struct bpf_program *prog)
-{
-	void *priv;
-
-	if (IS_ERR_OR_NULL(bpf_program_hash))
-		return NULL;
-	if (!hashmap__find(bpf_program_hash, prog, &priv))
-		return NULL;
-	return priv;
-}
-
 static int program_set_priv(struct bpf_program *prog, void *priv)
 {
 	void *old_priv;
@@ -558,8 +636,8 @@ static int bpf__prepare_probe(void)
 
 static int
 preproc_gen_prologue(struct bpf_program *prog, int n,
-		     struct bpf_insn *orig_insns, int orig_insns_cnt,
-		     struct bpf_prog_prep_result *res)
+		     const struct bpf_insn *orig_insns, int orig_insns_cnt,
+		     struct bpf_preproc_result *res)
 {
 	struct bpf_prog_priv *priv = program_priv(prog);
 	struct probe_trace_event *tev;
@@ -607,7 +685,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
 
 	res->new_insn_ptr = buf;
 	res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
-	res->pfd = NULL;
 	return 0;
 
 errout:
@@ -715,7 +792,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
 	struct bpf_prog_priv *priv = program_priv(prog);
 	struct perf_probe_event *pev;
 	bool need_prologue = false;
-	int err, i;
+	int i;
 
 	if (IS_ERR_OR_NULL(priv)) {
 		pr_debug("Internal error when hook preprocessor\n");
@@ -753,6 +830,13 @@ static int hook_load_preprocessor(struct bpf_program *prog)
 		return -ENOMEM;
 	}
 
+	priv->prologue_fds = malloc(sizeof(int) * pev->ntevs);
+	if (!priv->prologue_fds) {
+		pr_debug("Not enough memory: alloc prologue fds failed\n");
+		return -ENOMEM;
+	}
+	memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs);
+
 	priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
 	if (!priv->type_mapping) {
 		pr_debug("Not enough memory: alloc type_mapping failed\n");
@@ -761,13 +845,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
 	memset(priv->type_mapping, -1,
 	       sizeof(int) * pev->ntevs);
 
-	err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
-	if (err)
-		return err;
-
-	err = bpf_program__set_prep(prog, priv->nr_types,
-				    preproc_gen_prologue);
-	return err;
+	return map_prologue(pev, priv->type_mapping, &priv->nr_types);
 }
 
 int bpf__probe(struct bpf_object *obj)
@@ -874,6 +952,77 @@ int bpf__unprobe(struct bpf_object *obj)
 	return ret;
 }
 
+static int bpf_object__load_prologue(struct bpf_object *obj)
+{
+	int init_cnt = ARRAY_SIZE(prologue_init_insn);
+	const struct bpf_insn *orig_insns;
+	struct bpf_preproc_result res;
+	struct perf_probe_event *pev;
+	struct bpf_program *prog;
+	int orig_insns_cnt;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct bpf_prog_priv *priv = program_priv(prog);
+		int err, i, fd;
+
+		if (IS_ERR_OR_NULL(priv)) {
+			pr_debug("bpf: failed to get private field\n");
+			return -BPF_LOADER_ERRNO__INTERNAL;
+		}
+
+		if (!priv->need_prologue)
+			continue;
+
+		/*
+		 * For each program that needs prologue we do following:
+		 *
+		 * - take its current instructions and use them
+		 *   to generate the new code with prologue
+		 * - load new instructions with bpf_prog_load
+		 *   and keep the fd in prologue_fds
+		 * - new fd will be used in bpf__foreach_event
+		 *   to connect this program with perf evsel
+		 */
+		orig_insns = bpf_program__insns(prog);
+		orig_insns_cnt = bpf_program__insn_cnt(prog);
+
+		pev = &priv->pev;
+		for (i = 0; i < pev->ntevs; i++) {
+			/*
+			 * Skipping artificall prologue_init_insn instructions
+			 * (init_cnt), so the prologue can be generated instead
+			 * of them.
+			 */
+			err = preproc_gen_prologue(prog, i,
+						   orig_insns + init_cnt,
+						   orig_insns_cnt - init_cnt,
+						   &res);
+			if (err)
+				return err;
+
+			fd = bpf_prog_load(bpf_program__get_type(prog),
+					   bpf_program__name(prog), "GPL",
+					   res.new_insn_ptr,
+					   res.new_insn_cnt, NULL);
+			if (fd < 0) {
+				char bf[128];
+
+				libbpf_strerror(-errno, bf, sizeof(bf));
+				pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n",
+					 -errno, bf);
+				return -errno;
+			}
+			priv->prologue_fds[i] = fd;
+		}
+		/*
+		 * We no longer need the original program,
+		 * we can unload it.
+		 */
+		bpf_program__unload(prog);
+	}
+	return 0;
+}
+
 int bpf__load(struct bpf_object *obj)
 {
 	int err;
@@ -885,7 +1034,7 @@ int bpf__load(struct bpf_object *obj)
 		pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
 		return err;
 	}
-	return 0;
+	return bpf_object__load_prologue(obj);
 }
 
 int bpf__foreach_event(struct bpf_object *obj,
@@ -920,13 +1069,10 @@ int bpf__foreach_event(struct bpf_object *obj,
 		for (i = 0; i < pev->ntevs; i++) {
 			tev = &pev->tevs[i];
 
-			if (priv->need_prologue) {
-				int type = priv->type_mapping[i];
-
-				fd = bpf_program__nth_fd(prog, type);
-			} else {
+			if (priv->need_prologue)
+				fd = priv->prologue_fds[i];
+			else
 				fd = bpf_program__fd(prog);
-			}
 
 			if (fd < 0) {
 				pr_debug("bpf: failed to get file descriptor\n");
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index ca2f47f45670..3a8cb2404ea6 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -41,6 +41,6 @@ test_cpp
 /bench
 *.ko
 *.tmp
-xdpxceiver
+xskxceiver
 xdp_redirect_multi
 xdp_synproxy
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index cb8e552e1418..8d59ec7f4c2d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-	xdpxceiver xdp_redirect_multi xdp_synproxy
+	xskxceiver xdp_redirect_multi xdp_synproxy
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 
@@ -230,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS)
 $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
 $(OUTPUT)/test_maps: $(TESTING_HELPERS)
 $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
+$(OUTPUT)/xsk.o: $(BPFOBJ)
+$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o
 
 BPFTOOL ?= $(DEFAULT_BPFTOOL)
 $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
@@ -571,6 +573,8 @@ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
 $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
 $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
 $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
+$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -583,7 +587,9 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 		 $(OUTPUT)/bench_bloom_filter_map.o \
 		 $(OUTPUT)/bench_bpf_loop.o \
 		 $(OUTPUT)/bench_strncmp.o \
-		 $(OUTPUT)/bench_bpf_hashmap_full_update.o
+		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+		 $(OUTPUT)/bench_local_storage.o \
+		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index d8aa62be996b..c1f20a147462 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
 	       hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
 }
 
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+	int i;
+
+	memset(gp_stat, 0, sizeof(struct basic_stats));
+
+	for (i = 0; i < res_cnt; i++)
+		gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+	if (res_cnt > 1) {
+		for (i = 0; i < res_cnt; i++)
+			gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+	}
+	gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+	int i;
+
+	memset(gp_stat, 0, sizeof(struct basic_stats));
+	for (i = 0; i < res_cnt; i++)
+		gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+	if (res_cnt > 1) {
+		for (i = 0; i < res_cnt; i++)
+			gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+	}
+	gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
 void hits_drops_report_final(struct bench_res res[], int res_cnt)
 {
 	int i;
@@ -150,6 +187,53 @@ void ops_report_final(struct bench_res res[], int res_cnt)
 	printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
 }
 
+void local_storage_report_progress(int iter, struct bench_res *res,
+				   long delta_ns)
+{
+	double important_hits_per_sec, hits_per_sec;
+	double delta_sec = delta_ns / 1000000000.0;
+
+	hits_per_sec = res->hits / 1000000.0 / delta_sec;
+	important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+	printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+	printf("hits %8.3lfM/s ", hits_per_sec);
+	printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+	double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+	double hits_mean = 0.0, hits_stddev = 0.0;
+	int i;
+
+	for (i = 0; i < res_cnt; i++) {
+		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+		important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+	}
+
+	if (res_cnt > 1)  {
+		for (i = 0; i < res_cnt; i++) {
+			hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+				       (hits_mean - res[i].hits / 1000000.0) /
+				       (res_cnt - 1.0);
+			important_hits_stddev +=
+				       (important_hits_mean - res[i].important_hits / 1000000.0) *
+				       (important_hits_mean - res[i].important_hits / 1000000.0) /
+				       (res_cnt - 1.0);
+		}
+
+		hits_stddev = sqrt(hits_stddev);
+		important_hits_stddev = sqrt(important_hits_stddev);
+	}
+	printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+	       hits_mean, hits_stddev);
+	printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+	printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+	       important_hits_mean, important_hits_stddev);
+}
+
 const char *argp_program_version = "benchmark";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 const char argp_program_doc[] =
@@ -188,13 +272,18 @@ static const struct argp_option opts[] = {
 extern struct argp bench_ringbufs_argp;
 extern struct argp bench_bloom_map_argp;
 extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
 	{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
 	{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+	{ &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
 	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
+		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
 	{},
 };
 
@@ -397,6 +486,10 @@ extern const struct bench bench_bpf_loop;
 extern const struct bench bench_strncmp_no_helper;
 extern const struct bench bench_strncmp_helper;
 extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -432,6 +525,10 @@ static const struct bench *benchs[] = {
 	&bench_strncmp_no_helper,
 	&bench_strncmp_helper,
 	&bench_bpf_hashmap_full_update,
+	&bench_local_storage_cache_seq_get,
+	&bench_local_storage_cache_interleaved_get,
+	&bench_local_storage_cache_hashmap_control,
+	&bench_local_storage_tasks_trace,
 };
 
 static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index fb3e213df3dc..d748255877e2 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -30,10 +30,19 @@ struct env {
 	struct cpu_set cons_cpus;
 };
 
+struct basic_stats {
+	double mean;
+	double stddev;
+};
+
 struct bench_res {
 	long hits;
 	long drops;
 	long false_hits;
+	long important_hits;
+	unsigned long gp_ns;
+	unsigned long gp_ct;
+	unsigned int stime;
 };
 
 struct bench {
@@ -61,6 +70,13 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
 void false_hits_report_final(struct bench_res res[], int res_cnt);
 void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
 void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+				   long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
+void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
+				      struct basic_stats *gp_stat);
+void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
+				    struct basic_stats *gp_stat);
 
 static inline __u64 get_time_ns(void)
 {
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644
index 000000000000..5a378c84e81f
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+	__u32 nr_maps;
+	__u32 hashmap_nr_keys_used;
+} args = {
+	.nr_maps = 1000,
+	.hashmap_nr_keys_used = 1000,
+};
+
+enum {
+	ARG_NR_MAPS = 6000,
+	ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+		"Set number of local_storage maps"},
+	{ "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+		0, "When doing hashmap test, set number of hashmap keys test uses"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_NR_MAPS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid nr_maps");
+			argp_usage(state);
+		}
+		args.nr_maps = ret;
+		break;
+	case ARG_HASHMAP_NR_KEYS_USED:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid hashmap_nr_keys_used");
+			argp_usage(state);
+		}
+		args.hashmap_nr_keys_used = ret;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.nr_maps > MAX_NR_MAPS) {
+		fprintf(stderr, "nr_maps must be <= 1000\n");
+		exit(1);
+	}
+
+	if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+		fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+		exit(1);
+	}
+}
+
+static struct {
+	struct local_storage_bench *skel;
+	void *bpf_obj;
+	struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+	int i, key, val;
+
+	/* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+	 * populate the hashmap for a similar comparison
+	 */
+	for (i = 0; i < HASHMAP_SZ; i++) {
+		key = val = i;
+		if (bpf_map_update_elem(fd, &key, &val, 0)) {
+			fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+			exit(1);
+		}
+	}
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+	struct bpf_map *inner_map;
+	int i, fd, mim_fd, err;
+
+	LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+	if (!hashmap)
+		create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+	ctx.skel->rodata->num_maps = args.nr_maps;
+	ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+	inner_map = bpf_map__inner_map(ctx.array_of_maps);
+	create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+	create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+	err = local_storage_bench__load(ctx.skel);
+	if (err) {
+		fprintf(stderr, "Error loading skeleton\n");
+		goto err_out;
+	}
+
+	create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+	mim_fd = bpf_map__fd(ctx.array_of_maps);
+	if (mim_fd < 0) {
+		fprintf(stderr, "Error getting map_in_map fd\n");
+		goto err_out;
+	}
+
+	for (i = 0; i < args.nr_maps; i++) {
+		if (hashmap)
+			fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+					    sizeof(int), HASHMAP_SZ, &create_opts);
+		else
+			fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+					    sizeof(int), 0, &create_opts);
+		if (fd < 0) {
+			fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+			goto err_out;
+		}
+
+		if (hashmap)
+			prepopulate_hashmap(fd);
+
+		err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+		if (err) {
+			fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+			goto err_out;
+		}
+	}
+
+	if (!bpf_program__attach(prog)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	return;
+err_out:
+	exit(1);
+}
+
+static void hashmap_setup(void)
+{
+	struct local_storage_bench *skel;
+
+	setup_libbpf();
+
+	skel = local_storage_bench__open();
+	ctx.skel = skel;
+	ctx.array_of_maps = skel->maps.array_of_hash_maps;
+	skel->rodata->use_hashmap = 1;
+	skel->rodata->interleave = 0;
+
+	__setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+	struct local_storage_bench *skel;
+
+	setup_libbpf();
+
+	skel = local_storage_bench__open();
+	ctx.skel = skel;
+	ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+	skel->rodata->use_hashmap = 0;
+	skel->rodata->interleave = 0;
+
+	__setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+	struct local_storage_bench *skel;
+
+	setup_libbpf();
+
+	skel = local_storage_bench__open();
+	ctx.skel = skel;
+	ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+	skel->rodata->use_hashmap = 0;
+	skel->rodata->interleave = 1;
+
+	__setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+	res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+	syscall(__NR_getpgid);
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void *producer(void *input)
+{
+	while (true)
+		trigger_bpf_program();
+
+	return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ *	'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ *	is to mimic environment where many progs are accessing their local_storage
+ *	maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+	.name = "local-storage-cache-seq-get",
+	.validate = validate,
+	.setup = local_storage_cache_get_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = local_storage_report_progress,
+	.report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+	.name = "local-storage-cache-int-get",
+	.validate = validate,
+	.setup = local_storage_cache_get_interleaved_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = local_storage_report_progress,
+	.report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+	.name = "local-storage-cache-hashmap-control",
+	.validate = validate,
+	.setup = hashmap_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = local_storage_report_progress,
+	.report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644
index 000000000000..43f109d93130
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+	__u32 nr_procs;
+	__u32 kthread_pid;
+	bool quiet;
+} args = {
+	.nr_procs = 1000,
+	.kthread_pid = 0,
+	.quiet = false,
+};
+
+enum {
+	ARG_NR_PROCS = 7000,
+	ARG_KTHREAD_PID = 7001,
+	ARG_QUIET = 7002,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+		"Set number of user processes to spin up"},
+	{ "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+		"Pid of rcu_tasks_trace kthread for ticks tracking"},
+	{ "quiet", ARG_QUIET, "{0,1}", 0,
+		"If true, don't report progress"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_NR_PROCS:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "invalid nr_procs\n");
+			argp_usage(state);
+		}
+		args.nr_procs = ret;
+		break;
+	case ARG_KTHREAD_PID:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1) {
+			fprintf(stderr, "invalid kthread_pid\n");
+			argp_usage(state);
+		}
+		args.kthread_pid = ret;
+		break;
+	case ARG_QUIET:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 0 || ret > 1) {
+			fprintf(stderr, "invalid quiet %ld\n", ret);
+			argp_usage(state);
+		}
+		args.quiet = ret;
+		break;
+break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.nr_procs > MAX_SLEEP_PROCS) {
+		fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+			MAX_SLEEP_PROCS);
+		exit(1);
+	}
+}
+
+static long kthread_pid_ticks(void)
+{
+	char procfs_path[100];
+	long stime;
+	FILE *f;
+
+	if (!args.kthread_pid)
+		return -1;
+
+	sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+	f = fopen(procfs_path, "r");
+	if (!f) {
+		fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+		goto err_out;
+	}
+	if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+		fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+		goto err_out;
+	}
+	fclose(f);
+	return stime;
+
+err_out:
+	if (f)
+		fclose(f);
+	exit(1);
+	return 0;
+}
+
+static struct {
+	struct local_storage_rcu_tasks_trace_bench *skel;
+	long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+	while (true) {
+		sleep(rand() % 4);
+		syscall(__NR_getpgid);
+	}
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+	int i, err, forkret, runner_pid;
+
+	runner_pid = getpid();
+
+	for (i = 0; i < args.nr_procs; i++) {
+		forkret = fork();
+		if (forkret < 0) {
+			fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+				args.nr_procs);
+			goto err_out;
+		}
+
+		if (!forkret) {
+			err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+			if (err < 0) {
+				fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+				goto err_out;
+			}
+
+			if (getppid() != runner_pid) {
+				fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+				goto err_out;
+			}
+			sleep_and_loop();
+		}
+	}
+	printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+	setup_libbpf();
+
+	ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "Error doing open_and_load, exiting\n");
+		goto err_out;
+	}
+
+	ctx.prev_kthread_stime = kthread_pid_ticks();
+
+	if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+		fprintf(stderr, "Error attaching bpf program\n");
+		goto err_out;
+	}
+
+	return;
+err_out:
+	exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+	long ticks;
+
+	res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+	res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+	ticks = kthread_pid_ticks();
+	res->stime = ticks - ctx.prev_kthread_stime;
+	ctx.prev_kthread_stime = ticks;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void *producer(void *input)
+{
+	while (true)
+		syscall(__NR_getpgid);
+	return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	if (ctx.skel->bss->unexpected) {
+		fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+		fprintf(stderr, "Data can't be trusted, exiting\n");
+		exit(1);
+	}
+
+	if (args.quiet)
+		return;
+
+	printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+	       iter, res->gp_ns / (double)res->gp_ct);
+	printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+	       iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+	struct basic_stats gp_stat;
+
+	grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+	printf("SUMMARY tasks_trace grace period latency");
+	printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+	grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+	printf("SUMMARY ticks per tasks_trace grace period");
+	printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+	.name = "local-storage-tasks-trace",
+	.validate = validate,
+	.setup = local_storage_tasks_trace_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = report_progress,
+	.report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755
index 000000000000..2eb2b513a173
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+	summarize_local_storage "hashmap (control) sequential    get: "\
+		"$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+	printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+	summarize_local_storage "local_storage cache sequential  get: "\
+		"$(./bench --nr_maps $i local-storage-cache-seq-get)"
+	summarize_local_storage "local_storage cache interleaved get: "\
+		"$(./bench --nr_maps $i local-storage-cache-int-get)"
+	printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755
index 000000000000..5dac1f02892c
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+	echo "error: Couldn't find rcu_tasks_trace_kthread"
+	exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
index 6c5e6023a69f..d9f40af82006 100644
--- a/tools/testing/selftests/bpf/benchs/run_common.sh
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -41,6 +41,16 @@ function ops()
 	echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
 }
 
+function local_storage()
+{
+	echo -n "hits throughput: "
+	echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+	echo -n -e ", hits latency: "
+	echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+	echo -n ", important_hits throughput: "
+	echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
 function total()
 {
 	echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -67,6 +77,13 @@ function summarize_ops()
 	printf "%-20s %s\n" "$bench" "$(ops $summary)"
 }
 
+function summarize_local_storage()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
 function summarize_total()
 {
 	bench="$1"
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 719ab56cdb5d..845209581440 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,15 +2,6 @@
 #ifndef __BPF_LEGACY__
 #define __BPF_LEGACY__
 
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
-	struct ____btf_map_##name {				\
-		type_key key;					\
-		type_val value;					\
-	};							\
-	struct ____btf_map_##name				\
-	__attribute__ ((section(".maps." #name), used))		\
-		____btf_map_##name = { }
-
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
  */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3b3edc0fc8a6..c05904d631ec 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -57,3 +57,9 @@ CONFIG_FPROBE=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_MPTCP=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IP_NF_RAW=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 59cf81ec55af..bec15558fd93 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name)
 	int err;
 	struct nstoken *token;
 
-	token = malloc(sizeof(struct nstoken));
+	token = calloc(1, sizeof(struct nstoken));
 	if (!ASSERT_OK_PTR(token, "malloc token"))
 		return NULL;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
index 380d7a2072e3..4cd8a25afe68 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel)
 	bpf_link__destroy(link);
 }
 
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+	struct bpf_link *link =
+		bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	skel->bss->callback_selector = 0x0F;
+	usleep(1);
+	ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+	skel->bss->callback_selector = 0xF0;
+	usleep(1);
+	ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+	bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+	struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+	const int max_key = 12;
+	int key;
+	int map_fd;
+
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.map1);
+
+	if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+		goto out;
+
+	for (key = 1; key <= max_key; ++key) {
+		int val = key;
+		int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto out;
+	}
+
+	usleep(1);
+
+	for (key = 1; key <= max_key; ++key) {
+		int val;
+		int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+			goto out;
+		if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+			goto out;
+	}
+
+out:
+	bpf_link__destroy(link);
+}
+
 void test_bpf_loop(void)
 {
 	struct bpf_loop *skel;
@@ -140,6 +198,10 @@ void test_bpf_loop(void)
 		check_invalid_flags(skel);
 	if (test__start_subtest("check_nested_calls"))
 		check_nested_calls(skel);
+	if (test__start_subtest("check_non_constant_callback"))
+		check_non_constant_callback(skel);
+	if (test__start_subtest("check_stack"))
+		check_stack(skel);
 
 	bpf_loop__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e9a9a31b2ffe..2959a52ced06 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -9,6 +9,9 @@
 #include "bpf_cubic.skel.h"
 #include "bpf_tcp_nogpl.skel.h"
 #include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
 
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
@@ -322,6 +325,58 @@ static void test_rel_setsockopt(void)
 	bpf_dctcp_release__destroy(rel_skel);
 }
 
+static void test_write_sk_pacing(void)
+{
+	struct tcp_ca_write_sk_pacing *skel;
+	struct bpf_link *link;
+
+	skel = tcp_ca_write_sk_pacing__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+	ASSERT_OK_PTR(link, "attach_struct_ops");
+
+	bpf_link__destroy(link);
+	tcp_ca_write_sk_pacing__destroy(skel);
+}
+
+static void test_incompl_cong_ops(void)
+{
+	struct tcp_ca_incompl_cong_ops *skel;
+	struct bpf_link *link;
+
+	skel = tcp_ca_incompl_cong_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	/* That cong_avoid() and cong_control() are missing is only reported at
+	 * this point:
+	 */
+	link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+	ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+	bpf_link__destroy(link);
+	tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+	libbpf_print_fn_t old_print_fn;
+	struct tcp_ca_unsupp_cong_op *skel;
+
+	err_str = "attach to unsupported member get_info";
+	found = false;
+	old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+	skel = tcp_ca_unsupp_cong_op__open_and_load();
+	ASSERT_NULL(skel, "open_and_load");
+	ASSERT_EQ(found, true, "expected_err_msg");
+
+	tcp_ca_unsupp_cong_op__destroy(skel);
+	libbpf_set_print(old_print_fn);
+}
+
 void test_bpf_tcp_ca(void)
 {
 	if (test__start_subtest("dctcp"))
@@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void)
 		test_dctcp_fallback();
 	if (test__start_subtest("rel_setsockopt"))
 		test_rel_setsockopt();
+	if (test__start_subtest("write_sk_pacing"))
+		test_write_sk_pacing();
+	if (test__start_subtest("incompl_cong_ops"))
+		test_incompl_cong_ops();
+	if (test__start_subtest("unsupp_cong_op"))
+		test_unsupp_cong_op();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index edb387163baa..941b0100bafa 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -34,7 +34,6 @@ static bool always_log;
 #undef CHECK
 #define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
 
-#define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
 #define NAME_NTH(N) (0xfffe0000 | N)
@@ -4652,7 +4651,6 @@ struct btf_file_test {
 };
 
 static struct btf_file_test file_tests[] = {
-	{ .file = "test_btf_haskv.o", },
 	{ .file = "test_btf_newkv.o", },
 	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
 };
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 2f92feb809be..c8655ba9a88f 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -543,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test)
 	return 0;
 }
 
-
 static const struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
@@ -556,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = {
 			.valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
 			.comm = "test_progs",
 			.comm_len = sizeof("test_progs"),
+			.local_task_struct_matches = true,
 		},
 		.output_len = sizeof(struct core_reloc_kernel_output),
 		.raw_tp_name = "sys_enter",
@@ -752,9 +752,10 @@ static const struct core_reloc_test_case test_cases[] = {
 	SIZE_CASE(size___diff_offs),
 	SIZE_ERR_CASE(size___err_ambiguous),
 
-	/* validate type existence and size relocations */
+	/* validate type existence, match, and size relocations */
 	TYPE_BASED_CASE(type_based, {
 		.struct_exists = 1,
+		.complex_struct_exists = 1,
 		.union_exists = 1,
 		.enum_exists = 1,
 		.typedef_named_struct_exists = 1,
@@ -763,8 +764,24 @@ static const struct core_reloc_test_case test_cases[] = {
 		.typedef_int_exists = 1,
 		.typedef_enum_exists = 1,
 		.typedef_void_ptr_exists = 1,
+		.typedef_restrict_ptr_exists = 1,
 		.typedef_func_proto_exists = 1,
 		.typedef_arr_exists = 1,
+
+		.struct_matches = 1,
+		.complex_struct_matches = 1,
+		.union_matches = 1,
+		.enum_matches = 1,
+		.typedef_named_struct_matches = 1,
+		.typedef_anon_struct_matches = 1,
+		.typedef_struct_ptr_matches = 1,
+		.typedef_int_matches = 1,
+		.typedef_enum_matches = 1,
+		.typedef_void_ptr_matches = 1,
+		.typedef_restrict_ptr_matches = 1,
+		.typedef_func_proto_matches = 1,
+		.typedef_arr_matches = 1,
+
 		.struct_sz = sizeof(struct a_struct),
 		.union_sz = sizeof(union a_union),
 		.enum_sz = sizeof(enum an_enum),
@@ -780,6 +797,45 @@ static const struct core_reloc_test_case test_cases[] = {
 	TYPE_BASED_CASE(type_based___all_missing, {
 		/* all zeros */
 	}),
+	TYPE_BASED_CASE(type_based___diff, {
+		.struct_exists = 1,
+		.complex_struct_exists = 1,
+		.union_exists = 1,
+		.enum_exists = 1,
+		.typedef_named_struct_exists = 1,
+		.typedef_anon_struct_exists = 1,
+		.typedef_struct_ptr_exists = 1,
+		.typedef_int_exists = 1,
+		.typedef_enum_exists = 1,
+		.typedef_void_ptr_exists = 1,
+		.typedef_func_proto_exists = 1,
+		.typedef_arr_exists = 1,
+
+		.struct_matches = 1,
+		.complex_struct_matches = 1,
+		.union_matches = 1,
+		.enum_matches = 1,
+		.typedef_named_struct_matches = 1,
+		.typedef_anon_struct_matches = 1,
+		.typedef_struct_ptr_matches = 1,
+		.typedef_int_matches = 0,
+		.typedef_enum_matches = 1,
+		.typedef_void_ptr_matches = 1,
+		.typedef_func_proto_matches = 0,
+		.typedef_arr_matches = 0,
+
+		.struct_sz = sizeof(struct a_struct___diff),
+		.union_sz = sizeof(union a_union___diff),
+		.enum_sz = sizeof(enum an_enum___diff),
+		.typedef_named_struct_sz = sizeof(named_struct_typedef___diff),
+		.typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff),
+		.typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff),
+		.typedef_int_sz = sizeof(int_typedef___diff),
+		.typedef_enum_sz = sizeof(enum_typedef___diff),
+		.typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff),
+		.typedef_func_proto_sz = sizeof(func_proto_typedef___diff),
+		.typedef_arr_sz = sizeof(arr_typedef___diff),
+	}),
 	TYPE_BASED_CASE(type_based___diff_sz, {
 		.struct_exists = 1,
 		.union_exists = 1,
@@ -792,6 +848,19 @@ static const struct core_reloc_test_case test_cases[] = {
 		.typedef_void_ptr_exists = 1,
 		.typedef_func_proto_exists = 1,
 		.typedef_arr_exists = 1,
+
+		.struct_matches = 0,
+		.union_matches = 0,
+		.enum_matches = 0,
+		.typedef_named_struct_matches = 0,
+		.typedef_anon_struct_matches = 0,
+		.typedef_struct_ptr_matches = 1,
+		.typedef_int_matches = 0,
+		.typedef_enum_matches = 0,
+		.typedef_void_ptr_matches = 1,
+		.typedef_func_proto_matches = 0,
+		.typedef_arr_matches = 0,
+
 		.struct_sz = sizeof(struct a_struct___diff_sz),
 		.union_sz = sizeof(union a_union___diff_sz),
 		.enum_sz = sizeof(enum an_enum___diff_sz),
@@ -806,10 +875,12 @@ static const struct core_reloc_test_case test_cases[] = {
 	}),
 	TYPE_BASED_CASE(type_based___incompat, {
 		.enum_exists = 1,
+		.enum_matches = 1,
 		.enum_sz = sizeof(enum an_enum),
 	}),
 	TYPE_BASED_CASE(type_based___fn_wrong_args, {
 		.struct_exists = 1,
+		.struct_matches = 1,
 		.struct_sz = sizeof(struct a_struct),
 	}),
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 5b93d5d0bd93..335917df0614 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp)
 	struct hashmap *map;
 	char buf[256];
 	FILE *f;
-	int err;
+	int err = 0;
 
 	/*
 	 * The available_filter_functions contains many duplicates,
@@ -407,7 +407,7 @@ static void test_bench_attach(void)
 	double attach_delta, detach_delta;
 	struct bpf_link *link = NULL;
 	char **syms = NULL;
-	size_t cnt, i;
+	size_t cnt = 0, i;
 
 	if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644
index 000000000000..1102e4f42d2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "lsm_cgroup.skel.h"
+#include "lsm_cgroup_nonvoid.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static struct btf *btf;
+
+static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, p);
+	int cnt = 0;
+	int i;
+
+	ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+	if (!attach_func)
+		return p.prog_cnt;
+
+	/* When attach_func is provided, count the number of progs that
+	 * attach to the given symbol.
+	 */
+
+	if (!btf)
+		btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux"))
+		return -1;
+
+	p.prog_ids = malloc(sizeof(u32) * p.prog_cnt);
+	p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt);
+	ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+	for (i = 0; i < p.prog_cnt; i++) {
+		struct bpf_prog_info info = {};
+		__u32 info_len = sizeof(info);
+		int fd;
+
+		fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
+		ASSERT_GE(fd, 0, "prog_get_fd_by_id");
+		ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd");
+		close(fd);
+
+		if (info.attach_btf_id ==
+		    btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC))
+			cnt++;
+	}
+
+	free(p.prog_ids);
+	free(p.prog_attach_flags);
+
+	return cnt;
+}
+
+static void test_lsm_cgroup_functional(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1;
+	int listen_fd, client_fd, accepted_fd;
+	struct lsm_cgroup *skel = NULL;
+	int post_create_prog_fd2 = -1;
+	int post_create_prog_fd = -1;
+	int bind_link_fd2 = -1;
+	int bind_prog_fd2 = -1;
+	int alloc_prog_fd = -1;
+	int bind_prog_fd = -1;
+	int bind_link_fd = -1;
+	int clone_prog_fd = -1;
+	int err, fd, prio;
+	socklen_t socklen;
+
+	cgroup_fd3 = test__join_cgroup("/sock_policy_empty");
+	if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup"))
+		goto close_cgroup;
+
+	cgroup_fd2 = test__join_cgroup("/sock_policy_reuse");
+	if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse"))
+		goto close_cgroup;
+
+	cgroup_fd = test__join_cgroup("/sock_policy");
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+		goto close_cgroup;
+
+	skel = lsm_cgroup__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		goto close_cgroup;
+
+	post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+	post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+	bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+	bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+	alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+	clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count");
+	err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+	if (err == -ENOTSUPP) {
+		test__skip();
+		goto close_cgroup;
+	}
+	if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count");
+
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count");
+	err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+	if (!ASSERT_OK(err, "attach clone_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count");
+
+	/* Make sure replacing works. */
+
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count");
+	err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+			      BPF_LSM_CGROUP, 0);
+	if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+	attach_opts.replace_prog_fd = post_create_prog_fd;
+	err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+				   BPF_LSM_CGROUP, &attach_opts);
+	if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+	/* Try the same attach/replace via link API. */
+
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count");
+	bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+				       BPF_LSM_CGROUP, NULL);
+	if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+	update_opts.old_prog_fd = bind_prog_fd;
+	update_opts.flags = BPF_F_REPLACE;
+
+	err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+	if (!ASSERT_OK(err, "link update bind_prog_fd"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+	/* Attach another instance of bind program to another cgroup.
+	 * This should trigger the reuse of the trampoline shim (two
+	 * programs attaching to the same btf_id).
+	 */
+
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count");
+	bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+					BPF_LSM_CGROUP, NULL);
+	if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+		goto detach_cgroup;
+	ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+	ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
+
+	/* AF_UNIX is prohibited. */
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+	close(fd);
+
+	/* AF_INET6 gets default policy (sk_priority). */
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+		goto detach_cgroup;
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 123, "sk_priority");
+
+	close(fd);
+
+	/* TX-only AF_PACKET is allowed. */
+
+	ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+		  "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+	fd = socket(AF_PACKET, SOCK_RAW, 0);
+	ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+	/* TX-only AF_PACKET can not be rebound. */
+
+	struct sockaddr_ll sa = {
+		.sll_family = AF_PACKET,
+		.sll_protocol = htons(ETH_P_ALL),
+	};
+	ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+		  "bind(ETH_P_ALL)");
+
+	close(fd);
+
+	/* Trigger passive open. */
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	ASSERT_GE(listen_fd, 0, "start_server");
+	client_fd = connect_to_fd(listen_fd, 0);
+	ASSERT_GE(client_fd, 0, "connect_to_fd");
+	accepted_fd = accept(listen_fd, NULL, NULL);
+	ASSERT_GE(accepted_fd, 0, "accept");
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 234, "sk_priority");
+
+	/* These are replaced and never called. */
+	ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+	ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+	/* AF_INET6+SOCK_STREAM
+	 * AF_PACKET+SOCK_RAW
+	 * listen_fd
+	 * client_fd
+	 * accepted_fd
+	 */
+	ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+	/* start_server
+	 * bind(ETH_P_ALL)
+	 */
+	ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+	/* Single accept(). */
+	ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+	/* AF_UNIX+SOCK_STREAM (failed)
+	 * AF_INET6+SOCK_STREAM
+	 * AF_PACKET+SOCK_RAW (failed)
+	 * AF_PACKET+SOCK_RAW
+	 * listen_fd
+	 * client_fd
+	 * accepted_fd
+	 */
+	ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+	close(listen_fd);
+	close(client_fd);
+	close(accepted_fd);
+
+	/* Make sure other cgroup doesn't trigger the programs. */
+
+	if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup"))
+		goto detach_cgroup;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+		goto detach_cgroup;
+
+	prio = 0;
+	socklen = sizeof(prio);
+	ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+		  "getsockopt");
+	ASSERT_EQ(prio, 0, "sk_priority");
+
+	close(fd);
+
+detach_cgroup:
+	ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_create");
+	close(bind_link_fd);
+	/* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+	ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_alloc");
+	ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+				   BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+	close(cgroup_fd);
+	close(cgroup_fd2);
+	close(cgroup_fd3);
+	lsm_cgroup__destroy(skel);
+}
+
+static void test_lsm_cgroup_nonvoid(void)
+{
+	struct lsm_cgroup_nonvoid *skel = NULL;
+
+	skel = lsm_cgroup_nonvoid__open_and_load();
+	ASSERT_NULL(skel, "open succeeds");
+	lsm_cgroup_nonvoid__destroy(skel);
+}
+
+void test_lsm_cgroup(void)
+{
+	if (test__start_subtest("functional"))
+		test_lsm_cgroup_functional();
+	if (test__start_subtest("nonvoid"))
+		test_lsm_cgroup_nonvoid();
+	btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index f4a13d9dd5c8..c197261d02e2 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -44,7 +44,7 @@ BTF_ID(union,   U)
 BTF_ID(func,    func)
 
 extern __u32 test_list_global[];
-BTF_ID_LIST_GLOBAL(test_list_global)
+BTF_ID_LIST_GLOBAL(test_list_global, 1)
 BTF_ID_UNUSED
 BTF_ID(typedef, S)
 BTF_ID(typedef, T)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 9d211b5c22c4..7d23166c77af 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -394,7 +394,6 @@ void serial_test_sock_fields(void)
 	test();
 
 done:
-	test_sock_fields__detach(skel);
 	test_sock_fields__destroy(skel);
 	if (child_cg_fd >= 0)
 		close(child_cg_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 5f733d50b0d7..9ad9da0f215e 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -12,7 +12,7 @@ int lets_test_this(int);
 
 static volatile int idx = 2;
 static volatile __u64 bla = 0xFEDCBA9876543210ULL;
-static volatile short nums[] = {-1, -2, -3, };
+static volatile short nums[] = {-1, -2, -3, -4};
 
 static volatile struct {
 	int x;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index fb77a123fe89..874a846e298c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -63,7 +63,7 @@ static bool expect_str(char *buf, size_t size, const char *str, const char *name
 static void test_synproxy(bool xdp)
 {
 	int server_fd = -1, client_fd = -1, accept_fd = -1;
-	char *prog_id, *prog_id_end;
+	char *prog_id = NULL, *prog_id_end;
 	struct nstoken *ns = NULL;
 	FILE *ctrl_file = NULL;
 	char buf[CMD_OUT_BUF_SIZE];
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
index e08565282759..de1fc82d2710 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -11,11 +11,19 @@ struct callback_ctx {
 	int output;
 };
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 32);
+	__type(key, int);
+	__type(value, int);
+} map1 SEC(".maps");
+
 /* These should be set by the user program */
 u32 nested_callback_nr_loops;
 u32 stop_index = -1;
 u32 nr_loops;
 int pid;
+int callback_selector;
 
 /* Making these global variables so that the userspace program
  * can verify the output through the skeleton
@@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx)
 
 	return 0;
 }
+
+static int callback_set_f0(int i, void *ctx)
+{
+	g_output = 0xF0;
+	return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+	g_output = 0x0F;
+	return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+	struct callback_ctx data = {};
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	int (*callback)(int i, void *ctx);
+
+	g_output = 0;
+
+	if (callback_selector == 0x0F)
+		callback = callback_set_0f;
+	else
+		callback = callback_set_f0;
+
+	bpf_loop(1, callback, NULL, 0);
+
+	return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+	return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+	int *val = bpf_map_lookup_elem(&map1, &key);
+
+	return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+	bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+	int a = map1_lookup_elem(1);
+	int b = map1_lookup_elem(2);
+	int c = map1_lookup_elem(3);
+	int d = map1_lookup_elem(4);
+	int e = map1_lookup_elem(5);
+	int f = map1_lookup_elem(6);
+
+	bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+	map1_update_elem(1, a + 1);
+	map1_update_elem(2, b + 1);
+	map1_update_elem(3, c + 1);
+	map1_update_elem(4, d + 1);
+	map1_update_elem(5, e + 1);
+	map1_update_elem(6, f + 1);
+
+	return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	int a = map1_lookup_elem(7);
+	int b = map1_lookup_elem(8);
+	int c = map1_lookup_elem(9);
+	int d = map1_lookup_elem(10);
+	int e = map1_lookup_elem(11);
+	int f = map1_lookup_elem(12);
+
+	bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+	map1_update_elem(7,  a + 1);
+	map1_update_elem(8, b + 1);
+	map1_update_elem(9, c + 1);
+	map1_update_elem(10, d + 1);
+	map1_update_elem(11, e + 1);
+	map1_update_elem(12, f + 1);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 1c1289ba5fc5..98dd2c4815f0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -8,6 +8,7 @@
 #define SOL_SOCKET		1
 #define SO_SNDBUF		7
 #define __SO_ACCEPTCON		(1 << 16)
+#define SO_PRIORITY		12
 
 #define SOL_TCP			6
 #define TCP_CONGESTION		13
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
new file mode 100644
index 000000000000..57ae2c258928
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 26e103302c05..fd8e1b4c6762 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -13,6 +13,7 @@ struct core_reloc_kernel_output {
 	int valid[10];
 	char comm[sizeof("test_progs")];
 	int comm_len;
+	bool local_task_struct_matches;
 };
 
 /*
@@ -860,10 +861,11 @@ struct core_reloc_size___err_ambiguous2 {
 };
 
 /*
- * TYPE EXISTENCE & SIZE
+ * TYPE EXISTENCE, MATCH & SIZE
  */
 struct core_reloc_type_based_output {
 	bool struct_exists;
+	bool complex_struct_exists;
 	bool union_exists;
 	bool enum_exists;
 	bool typedef_named_struct_exists;
@@ -872,9 +874,24 @@ struct core_reloc_type_based_output {
 	bool typedef_int_exists;
 	bool typedef_enum_exists;
 	bool typedef_void_ptr_exists;
+	bool typedef_restrict_ptr_exists;
 	bool typedef_func_proto_exists;
 	bool typedef_arr_exists;
 
+	bool struct_matches;
+	bool complex_struct_matches;
+	bool union_matches;
+	bool enum_matches;
+	bool typedef_named_struct_matches;
+	bool typedef_anon_struct_matches;
+	bool typedef_struct_ptr_matches;
+	bool typedef_int_matches;
+	bool typedef_enum_matches;
+	bool typedef_void_ptr_matches;
+	bool typedef_restrict_ptr_matches;
+	bool typedef_func_proto_matches;
+	bool typedef_arr_matches;
+
 	int struct_sz;
 	int union_sz;
 	int enum_sz;
@@ -892,6 +909,14 @@ struct a_struct {
 	int x;
 };
 
+struct a_complex_struct {
+	union {
+		struct a_struct * restrict a;
+		void *b;
+	} x;
+	volatile long y;
+};
+
 union a_union {
 	int y;
 	int z;
@@ -916,6 +941,7 @@ typedef int int_typedef;
 typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
 
 typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
 
 typedef int (*func_proto_typedef)(long);
 
@@ -923,22 +949,86 @@ typedef char arr_typedef[20];
 
 struct core_reloc_type_based {
 	struct a_struct f1;
-	union a_union f2;
-	enum an_enum f3;
-	named_struct_typedef f4;
-	anon_struct_typedef f5;
-	struct_ptr_typedef f6;
-	int_typedef f7;
-	enum_typedef f8;
-	void_ptr_typedef f9;
-	func_proto_typedef f10;
-	arr_typedef f11;
+	struct a_complex_struct f2;
+	union a_union f3;
+	enum an_enum f4;
+	named_struct_typedef f5;
+	anon_struct_typedef f6;
+	struct_ptr_typedef f7;
+	int_typedef f8;
+	enum_typedef f9;
+	void_ptr_typedef f10;
+	restrict_ptr_typedef f11;
+	func_proto_typedef f12;
+	arr_typedef f13;
 };
 
 /* no types in target */
 struct core_reloc_type_based___all_missing {
 };
 
+/* different member orders, enum variant values, signedness, etc */
+struct a_struct___diff {
+	int x;
+	int a;
+};
+
+struct a_struct___forward;
+
+struct a_complex_struct___diff {
+	union {
+		struct a_struct___forward *a;
+		void *b;
+	} x;
+	volatile long y;
+};
+
+union a_union___diff {
+	int z;
+	int y;
+};
+
+typedef struct a_struct___diff named_struct_typedef___diff;
+
+typedef struct { int z, x, y; } anon_struct_typedef___diff;
+
+typedef struct {
+	int c;
+	int b;
+	int a;
+} *struct_ptr_typedef___diff;
+
+enum an_enum___diff {
+	AN_ENUM_VAL2___diff = 0,
+	AN_ENUM_VAL1___diff = 42,
+	AN_ENUM_VAL3___diff = 1,
+};
+
+typedef unsigned int int_typedef___diff;
+
+typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff;
+
+typedef const void *void_ptr_typedef___diff;
+
+typedef int_typedef___diff (*func_proto_typedef___diff)(long);
+
+typedef char arr_typedef___diff[3];
+
+struct core_reloc_type_based___diff {
+	struct a_struct___diff f1;
+	struct a_complex_struct___diff f2;
+	union a_union___diff f3;
+	enum an_enum___diff f4;
+	named_struct_typedef___diff f5;
+	anon_struct_typedef___diff f6;
+	struct_ptr_typedef___diff f7;
+	int_typedef___diff f8;
+	enum_typedef___diff f9;
+	void_ptr_typedef___diff f10;
+	func_proto_typedef___diff f11;
+	arr_typedef___diff f12;
+};
+
 /* different type sizes, extra modifiers, anon vs named enums, etc */
 struct a_struct___diff_sz {
 	long x;
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644
index 000000000000..2c3234c5b73a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1000);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+		__uint(map_flags, BPF_F_NO_PREALLOC);
+		__type(key, int);
+		__type(value, int);
+	});
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1000);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_HASH);
+		__uint(max_entries, HASHMAP_SZ);
+		__type(key, int);
+		__type(value, int);
+	});
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+	struct task_struct *task;
+	long loop_hits;
+	long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+	void *map, *inner_map;
+	int idx = 0;
+
+	if (use_hashmap)
+		map = &array_of_hash_maps;
+	else
+		map = &array_of_local_storage_maps;
+
+	inner_map = bpf_map_lookup_elem(map, &elem);
+	if (!inner_map)
+		return -1;
+
+	if (use_hashmap) {
+		idx = bpf_get_prandom_u32() % hashmap_num_keys;
+		bpf_map_lookup_elem(inner_map, &idx);
+	} else {
+		bpf_task_storage_get(inner_map, lctx->task, &idx,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	}
+
+	lctx->loop_hits++;
+	if (!elem)
+		lctx->loop_important_hits++;
+	return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+	struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+	unsigned int map_idx = index % num_maps;
+
+	do_lookup(map_idx, lctx);
+	if (interleave && map_idx % 3 == 0)
+		do_lookup(0, lctx);
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+	struct loop_ctx lctx;
+
+	lctx.task = bpf_get_current_task_btf();
+	lctx.loop_hits = 0;
+	lctx.loop_important_hits = 0;
+	bpf_loop(10000, &loop, &lctx, 0);
+	__sync_add_and_fetch(&hits, lctx.loop_hits);
+	__sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
new file mode 100644
index 000000000000..03bf69f49075
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, int);
+} task_storage SEC(".maps");
+
+long hits;
+long gp_hits;
+long gp_times;
+long current_gp_start;
+long unexpected;
+bool postgp_seen;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+	struct task_struct *task;
+	int idx;
+	int *s;
+
+	idx = 0;
+	task = bpf_get_current_task_btf();
+	s = bpf_task_storage_get(&task_storage, task, &idx,
+				 BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!s)
+		return 0;
+
+	*s = 3;
+	bpf_task_storage_delete(&task_storage, task);
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_pregp_step")
+int pregp_step(struct pt_regs *ctx)
+{
+	current_gp_start = bpf_ktime_get_ns();
+	return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_postgp")
+int postgp(struct pt_regs *ctx)
+{
+	if (!current_gp_start && postgp_seen) {
+		/* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
+		 * execute before this prog
+		 */
+		__sync_add_and_fetch(&unexpected, 1);
+		return 0;
+	}
+
+	__sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
+	__sync_add_and_fetch(&gp_hits, 1);
+	current_gp_start = 0;
+	postgp_seen = true;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644
index 000000000000..4f2d60b87b75
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+	__type(key, __u64);
+	__type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int test_local_storage(void)
+{
+	__u64 *val;
+
+	val = bpf_get_local_storage(&cgroup_storage, 0);
+	if (!val)
+		return 0;
+	*val += 1;
+
+	return 1;
+}
+
+static __always_inline int real_create(struct socket *sock, int family,
+				       int protocol)
+{
+	struct sock *sk;
+	int prio = 123;
+
+	/* Reject non-tx-only AF_PACKET. */
+	if (family == AF_PACKET && protocol != 0)
+		return 0; /* EPERM */
+
+	sk = sock->sk;
+	if (!sk)
+		return 1;
+
+	/* The rest of the sockets get default policy. */
+	if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+		return 0; /* EPERM */
+
+	/* Make sure bpf_getsockopt is allowed and works. */
+	prio = 0;
+	if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+		return 0; /* EPERM */
+	if (prio != 123)
+		return 0; /* EPERM */
+
+	/* Can access cgroup local storage. */
+	if (!test_local_storage())
+		return 0; /* EPERM */
+
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+	     int type, int protocol, int kern)
+{
+	called_socket_post_create++;
+	return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+	     int type, int protocol, int kern)
+{
+	called_socket_post_create2++;
+	return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+				     struct sockaddr *address,
+				     int addrlen)
+{
+	struct sockaddr_ll sa = {};
+
+	if (sock->sk->__sk_common.skc_family != AF_PACKET)
+		return 1;
+
+	if (sock->sk->sk_kern_sock)
+		return 1;
+
+	bpf_probe_read_kernel(&sa, sizeof(sa), address);
+	if (sa.sll_protocol)
+		return 0; /* EPERM */
+
+	/* Can access cgroup local storage. */
+	if (!test_local_storage())
+		return 0; /* EPERM */
+
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	called_socket_bind++;
+	return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	called_socket_bind2++;
+	return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+	called_socket_alloc++;
+	if (family == AF_UNIX)
+		return 0; /* EPERM */
+
+	/* Can access cgroup local storage. */
+	if (!test_local_storage())
+		return 0; /* EPERM */
+
+	return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+	int prio = 234;
+
+	if (!newsk)
+		return 1;
+
+	/* Accepted request sockets get a different priority. */
+	if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+		return 1;
+
+	/* Make sure bpf_getsockopt is allowed and works. */
+	prio = 0;
+	if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+		return 1;
+	if (prio != 234)
+		return 1;
+
+	/* Can access cgroup local storage. */
+	if (!test_local_storage())
+		return 1;
+
+	called_socket_clone++;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
new file mode 100644
index 000000000000..6cb0f161f417
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+	/* Can not return any errors from void LSM hooks. */
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644
index 000000000000..7bb872fb22dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+	return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/incompl_cong_ops_ssthresh")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+	/* Intentionally leaving out any of the required cong_avoid() and
+	 * cong_control() here.
+	 */
+	.ssthresh = (void *)incompl_cong_ops_ssthresh,
+	.undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+	.name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644
index 000000000000..c06f4a41c21a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupp_cong_op_get_info")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+		union tcp_cc_info *info)
+{
+	return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+	.get_info = (void *)unsupp_cong_op_get_info,
+	.name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644
index 000000000000..43447704cf0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+	return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/write_sk_pacing_init")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+	__sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+				     SK_PACING_NEEDED);
+#else
+	sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops/write_sk_pacing_cong_control")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+	      const struct rate_sample *rs)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	unsigned long rate =
+		((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+		(tp->srtt_us ?: 1U << 3);
+	sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+SEC("struct_ops/write_sk_pacing_ssthresh")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/write_sk_pacing_undo_cwnd")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+	.init = (void *)write_sk_pacing_init,
+	.cong_control = (void *)write_sk_pacing_cong_control,
+	.ssthresh = (void *)write_sk_pacing_ssthresh,
+	.undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+	.name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
deleted file mode 100644
index 07c94df13660..000000000000
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
-
-struct ipv_counts {
-	unsigned int v4;
-	unsigned int v6;
-};
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-struct bpf_map_def SEC("maps") btf_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct ipv_counts),
-	.max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-__attribute__((noinline))
-int test_long_fname_2(void)
-{
-	struct ipv_counts *counts;
-	int key = 0;
-
-	counts = bpf_map_lookup_elem(&btf_map, &key);
-	if (!counts)
-		return 0;
-
-	counts->v6++;
-
-	return 0;
-}
-
-__attribute__((noinline))
-int test_long_fname_1(void)
-{
-	return test_long_fname_2();
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(void *arg)
-{
-	return test_long_fname_1();
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 762671a2e90c..251854a041b5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -9,19 +9,6 @@ struct ipv_counts {
 	unsigned int v6;
 };
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-/* just to validate we can handle maps in multiple sections */
-struct bpf_map_def SEC("maps") btf_map_legacy = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(long long),
-	.max_entries = 4,
-};
-#pragma GCC diagnostic pop
-
-BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
-
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 4);
@@ -41,11 +28,6 @@ int test_long_fname_2(void)
 
 	counts->v6++;
 
-	/* just verify we can reference both maps */
-	counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
-	if (!counts)
-		return 0;
-
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index 145028b52ad8..a17dd83eae67 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -21,6 +21,7 @@ struct core_reloc_kernel_output {
 	/* we have test_progs[-flavor], so cut flavor part */
 	char comm[sizeof("test_progs")];
 	int comm_len;
+	bool local_task_struct_matches;
 };
 
 struct task_struct {
@@ -30,11 +31,25 @@ struct task_struct {
 	struct task_struct *group_leader;
 };
 
+struct mm_struct___wrong {
+    int abc_whatever_should_not_exist;
+};
+
+struct task_struct___local {
+    int pid;
+    struct mm_struct___wrong *mm;
+};
+
 #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
 
 SEC("raw_tracepoint/sys_enter")
 int test_core_kernel(void *ctx)
 {
+	/* Support for the BPF_TYPE_MATCHES argument to the
+	 * __builtin_preserve_type_info builtin was added at some point during
+	 * development of clang 15 and it's what we require for this test.
+	 */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
 	struct task_struct *task = (void *)bpf_get_current_task();
 	struct core_reloc_kernel_output *out = (void *)&data.out;
 	uint64_t pid_tgid = bpf_get_current_pid_tgid();
@@ -93,6 +108,10 @@ int test_core_kernel(void *ctx)
 		group_leader, group_leader, group_leader, group_leader,
 		comm);
 
+	out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local);
+#else
+	data.skip = true;
+#endif
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
index fb60f8195c53..2edb4df35e6e 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -19,6 +19,14 @@ struct a_struct {
 	int x;
 };
 
+struct a_complex_struct {
+	union {
+		struct a_struct *a;
+		void *b;
+	} x;
+	volatile long y;
+};
+
 union a_union {
 	int y;
 	int z;
@@ -43,6 +51,7 @@ typedef int int_typedef;
 typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
 
 typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
 
 typedef int (*func_proto_typedef)(long);
 
@@ -50,6 +59,7 @@ typedef char arr_typedef[20];
 
 struct core_reloc_type_based_output {
 	bool struct_exists;
+	bool complex_struct_exists;
 	bool union_exists;
 	bool enum_exists;
 	bool typedef_named_struct_exists;
@@ -58,9 +68,24 @@ struct core_reloc_type_based_output {
 	bool typedef_int_exists;
 	bool typedef_enum_exists;
 	bool typedef_void_ptr_exists;
+	bool typedef_restrict_ptr_exists;
 	bool typedef_func_proto_exists;
 	bool typedef_arr_exists;
 
+	bool struct_matches;
+	bool complex_struct_matches;
+	bool union_matches;
+	bool enum_matches;
+	bool typedef_named_struct_matches;
+	bool typedef_anon_struct_matches;
+	bool typedef_struct_ptr_matches;
+	bool typedef_int_matches;
+	bool typedef_enum_matches;
+	bool typedef_void_ptr_matches;
+	bool typedef_restrict_ptr_matches;
+	bool typedef_func_proto_matches;
+	bool typedef_arr_matches;
+
 	int struct_sz;
 	int union_sz;
 	int enum_sz;
@@ -77,10 +102,17 @@ struct core_reloc_type_based_output {
 SEC("raw_tracepoint/sys_enter")
 int test_core_type_based(void *ctx)
 {
-#if __has_builtin(__builtin_preserve_type_info)
+	/* Support for the BPF_TYPE_MATCHES argument to the
+	 * __builtin_preserve_type_info builtin was added at some point during
+	 * development of clang 15 and it's what we require for this test. Part of it
+	 * could run with merely __builtin_preserve_type_info (which could be checked
+	 * separately), but we have to find an upper bound.
+	 */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
 	struct core_reloc_type_based_output *out = (void *)&data.out;
 
 	out->struct_exists = bpf_core_type_exists(struct a_struct);
+	out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct);
 	out->union_exists = bpf_core_type_exists(union a_union);
 	out->enum_exists = bpf_core_type_exists(enum an_enum);
 	out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
@@ -89,9 +121,24 @@ int test_core_type_based(void *ctx)
 	out->typedef_int_exists = bpf_core_type_exists(int_typedef);
 	out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
 	out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+	out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef);
 	out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
 	out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
 
+	out->struct_matches = bpf_core_type_matches(struct a_struct);
+	out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct);
+	out->union_matches = bpf_core_type_matches(union a_union);
+	out->enum_matches = bpf_core_type_matches(enum an_enum);
+	out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef);
+	out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef);
+	out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef);
+	out->typedef_int_matches = bpf_core_type_matches(int_typedef);
+	out->typedef_enum_matches = bpf_core_type_matches(enum_typedef);
+	out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef);
+	out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef);
+	out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef);
+	out->typedef_arr_matches = bpf_core_type_matches(arr_typedef);
+
 	out->struct_sz = bpf_core_type_size(struct a_struct);
 	out->union_sz = bpf_core_type_size(union a_union);
 	out->enum_sz = bpf_core_type_size(enum an_enum);
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 9fd62e94b5e6..736686e903f6 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -77,16 +77,30 @@ struct {
 	__uint(max_entries, MAX_ALLOWED_PORTS);
 } allowed_ports SEC(".maps");
 
+/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
+ * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
+ */
+
+struct bpf_ct_opts___local {
+	s32 netns_id;
+	s32 error;
+	u8 l4proto;
+	u8 dir;
+	u8 reserved[2];
+} __attribute__((preserve_access_index));
+
+#define BPF_F_CURRENT_NETNS (-1)
+
 extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
 					 struct bpf_sock_tuple *bpf_tuple,
 					 __u32 len_tuple,
-					 struct bpf_ct_opts *opts,
+					 struct bpf_ct_opts___local *opts,
 					 __u32 len_opts) __ksym;
 
 extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
 					 struct bpf_sock_tuple *bpf_tuple,
 					 u32 len_tuple,
-					 struct bpf_ct_opts *opts,
+					 struct bpf_ct_opts___local *opts,
 					 u32 len_opts) __ksym;
 
 extern void bpf_ct_release(struct nf_conn *ct) __ksym;
@@ -393,7 +407,7 @@ static __always_inline int tcp_dissect(void *data, void *data_end,
 
 static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
 {
-	struct bpf_ct_opts ct_lookup_opts = {
+	struct bpf_ct_opts___local ct_lookup_opts = {
 		.netns_id = BPF_F_CURRENT_NETNS,
 		.l4proto = IPPROTO_TCP,
 	};
@@ -714,10 +728,6 @@ static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
 static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
 					   struct header_pointers *hdr, bool xdp)
 {
-	struct bpf_ct_opts ct_lookup_opts = {
-		.netns_id = BPF_F_CURRENT_NETNS,
-		.l4proto = IPPROTO_TCP,
-	};
 	int ret;
 
 	ret = tcp_dissect(data, data_end, hdr);
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index e443e6542cb9..a6410bebe603 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -471,12 +471,6 @@ class BashcompExtractor(FileExtractor):
     def get_prog_attach_types(self):
         return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
 
-    def get_map_types(self):
-        return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
-
-    def get_cgroup_attach_types(self):
-        return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
-
 def verify(first_set, second_set, message):
     """
     Print all values that differ between two sets.
@@ -516,17 +510,12 @@ def main():
     man_map_types = man_map_info.get_map_types()
     man_map_info.close()
 
-    bashcomp_info = BashcompExtractor()
-    bashcomp_map_types = bashcomp_info.get_map_types()
-
     verify(source_map_types, help_map_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
     verify(source_map_types, man_map_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
     verify(help_map_options, man_map_options,
             f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
-    verify(source_map_types, bashcomp_map_types,
-            f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
 
     # Attach types (names)
 
@@ -542,8 +531,10 @@ def main():
     man_prog_attach_types = man_prog_info.get_attach_types()
     man_prog_info.close()
 
-    bashcomp_info.reset_read() # We stopped at map types, rewind
+
+    bashcomp_info = BashcompExtractor()
     bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+    bashcomp_info.close()
 
     verify(source_prog_attach_types, help_prog_attach_types,
             f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
@@ -568,17 +559,12 @@ def main():
     man_cgroup_attach_types = man_cgroup_info.get_attach_types()
     man_cgroup_info.close()
 
-    bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
-    bashcomp_info.close()
-
     verify(source_cgroup_attach_types, help_cgroup_attach_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
     verify(source_cgroup_attach_types, man_cgroup_attach_types,
             f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
     verify(help_cgroup_options, man_cgroup_options,
             f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
-    verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
-            f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
 
     # Options for remaining commands
 
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 38782bd47fdc..fb4f4714eeb4 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -4,6 +4,8 @@
 #ifndef _TEST_BTF_H
 #define _TEST_BTF_H
 
+#define BTF_END_RAW 0xdeadbeef
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen)			\
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 372579c9f45e..f9d553fbf68a 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -51,12 +51,24 @@
 #endif
 
 #define MAX_INSNS	BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS	32
+#define MAX_UNEXPECTED_INSNS	32
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
 #define MAX_NR_MAPS	23
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
+#define MAX_FUNC_INFOS	8
+#define MAX_BTF_STRINGS	256
+#define MAX_BTF_TYPES	256
+
+#define INSN_OFF_MASK	((__s16)0xFFFF)
+#define INSN_IMM_MASK	((__s32)0xFFFFFFFF)
+#define SKIP_INSNS()	BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL	4
+#define VERBOSE_LIBBPF_LOG_LEVEL	1
 
 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
 #define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
@@ -79,6 +91,23 @@ struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
 	struct bpf_insn	*fill_insns;
+	/* If specified, test engine looks for this sequence of
+	 * instructions in the BPF program after loading. Allows to
+	 * test rewrites applied by verifier.  Use values
+	 * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+	 * fields if content does not matter.  The test case fails if
+	 * specified instructions are not found.
+	 *
+	 * The sequence could be split into sub-sequences by adding
+	 * SKIP_INSNS instruction at the end of each sub-sequence. In
+	 * such case sub-sequences are searched for one after another.
+	 */
+	struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+	/* If specified, test engine applies same pattern matching
+	 * logic as for `expected_insns`. If the specified pattern is
+	 * matched test case is marked as failed.
+	 */
+	struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
 	int fixup_map_hash_8b[MAX_FIXUPS];
 	int fixup_map_hash_48b[MAX_FIXUPS];
 	int fixup_map_hash_16b[MAX_FIXUPS];
@@ -135,6 +164,14 @@ struct bpf_test {
 	};
 	enum bpf_attach_type expected_attach_type;
 	const char *kfunc;
+	struct bpf_func_info func_info[MAX_FUNC_INFOS];
+	int func_info_cnt;
+	char btf_strings[MAX_BTF_STRINGS];
+	/* A set of BTF types to load when specified,
+	 * use macro definitions from test_btf.h,
+	 * must end with BTF_END_RAW
+	 */
+	__u32 btf_types[MAX_BTF_TYPES];
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self)
 	}
 }
 
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->fill_insns;
+	/* This test was added to catch a specific use after free
+	 * error, which happened upon BPF program reallocation.
+	 * Reallocation is handled by core.c:bpf_prog_realloc, which
+	 * reuses old memory if page boundary is not crossed. The
+	 * value of `len` is chosen to cross this boundary on bpf_loop
+	 * patching.
+	 */
+	const int len = getpagesize() - 25;
+	int callback_load_idx;
+	int callback_idx;
+	int i = 0;
+
+	insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+	callback_load_idx = i;
+	insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+				 BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+				 777 /* filled below */);
+	insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+	insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+	insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+	insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+	while (i < len - 3)
+		insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+	insn[i++] = BPF_EXIT_INSN();
+
+	callback_idx = i;
+	insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+	insn[i++] = BPF_EXIT_INSN();
+
+	insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+	self->func_info[1].insn_off = callback_idx;
+	self->prog_len = i;
+	assert(i == len);
+}
+
 /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
 #define BPF_SK_LOOKUP(func)						\
 	/* struct bpf_sock_tuple tuple = {} */				\
@@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = {
 	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
 };
 
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 8];
+
+static int load_btf_spec(__u32 *types, int types_len,
+			 const char *strings, int strings_len)
 {
 	struct btf_header hdr = {
 		.magic = BTF_MAGIC,
 		.version = BTF_VERSION,
 		.hdr_len = sizeof(struct btf_header),
-		.type_len = sizeof(btf_raw_types),
-		.str_off = sizeof(btf_raw_types),
-		.str_len = sizeof(btf_str_sec),
+		.type_len = types_len,
+		.str_off = types_len,
+		.str_len = strings_len,
 	};
 	void *ptr, *raw_btf;
 	int btf_fd;
+	LIBBPF_OPTS(bpf_btf_load_opts, opts,
+		    .log_buf = bpf_vlog,
+		    .log_size = sizeof(bpf_vlog),
+		    .log_level = (verbose
+				  ? VERBOSE_LIBBPF_LOG_LEVEL
+				  : DEFAULT_LIBBPF_LOG_LEVEL),
+	);
 
-	ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
-			       sizeof(btf_str_sec));
+	raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
 
+	ptr = raw_btf;
 	memcpy(ptr, &hdr, sizeof(hdr));
 	ptr += sizeof(hdr);
-	memcpy(ptr, btf_raw_types, hdr.type_len);
+	memcpy(ptr, types, hdr.type_len);
 	ptr += hdr.type_len;
-	memcpy(ptr, btf_str_sec, hdr.str_len);
+	memcpy(ptr, strings, hdr.str_len);
 	ptr += hdr.str_len;
 
-	btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL);
-	free(raw_btf);
+	btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
 	if (btf_fd < 0)
-		return -1;
-	return btf_fd;
+		printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+	free(raw_btf);
+
+	return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+	return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+			     btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+	int types_num = 0;
+
+	while (types_num < MAX_BTF_TYPES &&
+	       test->btf_types[types_num] != BTF_END_RAW)
+		++types_num;
+
+	int types_len = types_num * sizeof(test->btf_types[0]);
+
+	return load_btf_spec(test->btf_types, types_len,
+			     test->btf_strings, sizeof(test->btf_strings));
 }
 
 static int create_map_spin_lock(void)
@@ -770,8 +878,6 @@ static int create_map_kptr(void)
 	return fd;
 }
 
-static char bpf_vlog[UINT_MAX >> 8];
-
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			  struct bpf_insn *prog, int *map_fds)
 {
@@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp)
 	return true;
 }
 
+static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 xlated_prog_len;
+	__u32 buf_element_size = sizeof(struct bpf_insn);
+
+	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("bpf_obj_get_info_by_fd failed");
+		return -1;
+	}
+
+	xlated_prog_len = info.xlated_prog_len;
+	if (xlated_prog_len % buf_element_size) {
+		printf("Program length %d is not multiple of %d\n",
+		       xlated_prog_len, buf_element_size);
+		return -1;
+	}
+
+	*cnt = xlated_prog_len / buf_element_size;
+	*buf = calloc(*cnt, buf_element_size);
+	if (!buf) {
+		perror("can't allocate xlated program buffer");
+		return -ENOMEM;
+	}
+
+	bzero(&info, sizeof(info));
+	info.xlated_prog_len = xlated_prog_len;
+	info.xlated_prog_insns = (__u64)*buf;
+	if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) {
+		perror("second bpf_obj_get_info_by_fd failed");
+		goto out_free_buf;
+	}
+
+	return 0;
+
+out_free_buf:
+	free(*buf);
+	return -1;
+}
+
+static bool is_null_insn(struct bpf_insn *insn)
+{
+	struct bpf_insn null_insn = {};
+
+	return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+	struct bpf_insn skip_insn = SKIP_INSNS();
+
+	return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+	int i;
+
+	for (i = 0; i < max_len; ++i) {
+		if (is_null_insn(&seq[i]))
+			return i;
+	}
+	return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+	struct bpf_insn orig_masked;
+
+	memcpy(&orig_masked, orig, sizeof(orig_masked));
+	if (masked->imm == INSN_IMM_MASK)
+		orig_masked.imm = INSN_IMM_MASK;
+	if (masked->off == INSN_OFF_MASK)
+		orig_masked.off = INSN_OFF_MASK;
+
+	return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+			    int seq_len, int subseq_len)
+{
+	int i, j;
+
+	if (subseq_len > seq_len)
+		return -1;
+
+	for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+		bool found = true;
+
+		for (j = 0; j < subseq_len; ++j) {
+			if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+				found = false;
+				break;
+			}
+		}
+		if (found)
+			return i;
+	}
+
+	return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+	int i;
+
+	for (i = 0; i < len; ++i)
+		if (is_skip_insn(&seq[i]))
+			return i;
+
+	return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+				  int seq_len, int max_subseqs_len)
+{
+	int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+	while (subseqs_len > 0) {
+		int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+		int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+		int subseq_idx = find_insn_subseq(seq, subseqs,
+						  seq_len, cur_subseq_len);
+
+		if (subseq_idx < 0)
+			return false;
+		seq += subseq_idx + cur_subseq_len;
+		seq_len -= subseq_idx + cur_subseq_len;
+		subseqs += cur_subseq_len + 1;
+		subseqs_len -= cur_subseq_len + 1;
+	}
+
+	return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+	int i;
+
+	printf("  addr  op d s off  imm\n");
+	for (i = 0; i < cnt; ++i) {
+		struct bpf_insn *insn = &buf[i];
+
+		if (is_null_insn(insn))
+			break;
+
+		if (is_skip_insn(insn))
+			printf("  ...\n");
+		else
+			printf("  %04x: %02x %1x %x %04hx %08x\n",
+			       i, insn->code, insn->dst_reg,
+			       insn->src_reg, insn->off, insn->imm);
+	}
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+	struct bpf_insn *buf;
+	int cnt;
+	bool result = true;
+	bool check_expected = !is_null_insn(test->expected_insns);
+	bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+	if (!check_expected && !check_unexpected)
+		goto out;
+
+	if (get_xlated_program(fd_prog, &buf, &cnt)) {
+		printf("FAIL: can't get xlated program\n");
+		result = false;
+		goto out;
+	}
+
+	if (check_expected &&
+	    !find_all_insn_subseqs(buf, test->expected_insns,
+				   cnt, MAX_EXPECTED_INSNS)) {
+		printf("FAIL: can't find expected subsequence of instructions\n");
+		result = false;
+		if (verbose) {
+			printf("Program:\n");
+			print_insn(buf, cnt);
+			printf("Expected subsequence:\n");
+			print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+		}
+	}
+
+	if (check_unexpected &&
+	    find_all_insn_subseqs(buf, test->unexpected_insns,
+				  cnt, MAX_UNEXPECTED_INSNS)) {
+		printf("FAIL: found unexpected subsequence of instructions\n");
+		result = false;
+		if (verbose) {
+			printf("Program:\n");
+			print_insn(buf, cnt);
+			printf("Un-expected subsequence:\n");
+			print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+		}
+	}
+
+	free(buf);
+ out:
+	return result;
+}
+
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
-	int fd_prog, expected_ret, alignment_prevented_execution;
+	int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
 	int prog_len, prog_type = test->prog_type;
 	struct bpf_insn *prog = test->insns;
 	LIBBPF_OPTS(bpf_prog_load_opts, opts);
@@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	__u32 pflags;
 	int i, err;
 
+	fd_prog = -1;
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		map_fds[i] = -1;
+	btf_fd = -1;
 
 	if (!prog_type)
 		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
@@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	opts.expected_attach_type = test->expected_attach_type;
 	if (verbose)
-		opts.log_level = 1;
+		opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL;
 	else if (expected_ret == VERBOSE_ACCEPT)
 		opts.log_level = 2;
 	else
-		opts.log_level = 4;
+		opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
 	opts.prog_flags = pflags;
 
 	if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
@@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		opts.attach_btf_id = attach_btf_id;
 	}
 
+	if (test->btf_types[0] != 0) {
+		btf_fd = load_btf_for_test(test);
+		if (btf_fd < 0)
+			goto fail_log;
+		opts.prog_btf_fd = btf_fd;
+	}
+
+	if (test->func_info_cnt != 0) {
+		opts.func_info = test->func_info;
+		opts.func_info_cnt = test->func_info_cnt;
+		opts.func_info_rec_size = sizeof(test->func_info[0]);
+	}
+
 	opts.log_buf = bpf_vlog;
 	opts.log_size = sizeof(bpf_vlog);
 	fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
@@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	if (verbose)
 		printf(", verifier log:\n%s", bpf_vlog);
 
+	if (!check_xlated_program(test, fd_prog))
+		goto fail_log;
+
 	run_errs = 0;
 	run_successes = 0;
 	if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
@@ -1305,6 +1637,7 @@ close_fds:
 	if (test->fill_insns)
 		free(test->fill_insns);
 	close(fd_prog);
+	close(btf_fd);
 	for (i = 0; i < MAX_NR_MAPS; i++)
 		close(map_fds[i]);
 	sched_yield();
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 567500299231..096a957594cd 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -47,7 +47,7 @@
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
 #
-# See the source xdpxceiver.c for information on each test
+# See the source xskxceiver.c for information on each test
 #
 # Kernel configuration:
 # ---------------------
@@ -160,14 +160,14 @@ statusList=()
 
 TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
 
-execxdpxceiver
+exec_xskxceiver
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
 TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
 busy_poll=1
 
 setup_vethPairs
-execxdpxceiver
+exec_xskxceiver
 
 ## END TESTS
 
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644
index 000000000000..2d0023659d88
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -0,0 +1,263 @@
+#define BTF_TYPES \
+	.btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+	.btf_types = { \
+	/* 1: int   */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+	/* 2: int*  */ BTF_PTR_ENC(1), \
+	/* 3: void* */ BTF_PTR_ENC(0), \
+	/* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+		BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+	/* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+		BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+		BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+	/* 6: main      */ BTF_FUNC_ENC(20, 4), \
+	/* 7: callback  */ BTF_FUNC_ENC(11, 5), \
+	BTF_END_RAW \
+	}
+
+#define MAIN_TYPE	6
+#define CALLBACK_TYPE	7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+		     INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+	"inline simple bpf_loop call",
+	.insns = {
+	/* main */
+	/* force verifier state branching to verify logic on first and
+	 * subsequent bpf_loop insn processing steps
+	 */
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* callback */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.expected_insns = { PSEUDO_CALL_INSN() },
+	.unexpected_insns = { HELPER_CALL_INSN() },
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result = ACCEPT,
+	.runs = 0,
+	.func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+	.func_info_cnt = 2,
+	BTF_TYPES
+},
+{
+	"don't inline bpf_loop call, flags non-zero",
+	.insns = {
+	/* main */
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+	BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+	BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+	/* callback */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.expected_insns = { HELPER_CALL_INSN() },
+	.unexpected_insns = { PSEUDO_CALL_INSN() },
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result = ACCEPT,
+	.runs = 0,
+	.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+	.func_info_cnt = 2,
+	BTF_TYPES
+},
+{
+	"don't inline bpf_loop call, callback non-constant",
+	.insns = {
+	/* main */
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* callback */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	/* callback #2 */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.expected_insns = { HELPER_CALL_INSN() },
+	.unexpected_insns = { PSEUDO_CALL_INSN() },
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result = ACCEPT,
+	.runs = 0,
+	.func_info = {
+		{ 0, MAIN_TYPE },
+		{ 14, CALLBACK_TYPE },
+		{ 16, CALLBACK_TYPE }
+	},
+	.func_info_cnt = 3,
+	BTF_TYPES
+},
+{
+	"bpf_loop_inline and a dead func",
+	.insns = {
+	/* main */
+
+	/* A reference to callback #1 to make verifier count it as a func.
+	 * This reference is overwritten below and callback #1 is dead.
+	 */
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* callback */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	/* callback #2 */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.expected_insns = { PSEUDO_CALL_INSN() },
+	.unexpected_insns = { HELPER_CALL_INSN() },
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result = ACCEPT,
+	.runs = 0,
+	.func_info = {
+		{ 0, MAIN_TYPE },
+		{ 10, CALLBACK_TYPE },
+		{ 12, CALLBACK_TYPE }
+	},
+	.func_info_cnt = 3,
+	BTF_TYPES
+},
+{
+	"bpf_loop_inline stack locations for loop vars",
+	.insns = {
+	/* main */
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+	/* bpf_loop call #1 */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	/* bpf_loop call #2 */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	/* call func and exit */
+	BPF_CALL_REL(2),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* func */
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+	BPF_RAW_INSN(0, 0, 0, 0, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* callback */
+	BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.expected_insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+	SKIP_INSNS(),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+	SKIP_INSNS(),
+	/* offsets are the same as in the first call */
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+	SKIP_INSNS(),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+	SKIP_INSNS(),
+	/* offsets differ from main because of different offset
+	 * in BPF_ST_MEM instruction
+	 */
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+	},
+	.unexpected_insns = { HELPER_CALL_INSN() },
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result = ACCEPT,
+	.func_info = {
+		{ 0, MAIN_TYPE },
+		{ 16, MAIN_TYPE },
+		{ 25, CALLBACK_TYPE },
+	},
+	.func_info_cnt = 3,
+	BTF_TYPES
+},
+{
+	"inline bpf_loop call in a big program",
+	.insns = {},
+	.fill_helper = bpf_fill_big_prog_with_loop_1,
+	.expected_insns = { PSEUDO_CALL_INSN() },
+	.unexpected_insns = { HELPER_CALL_INSN() },
+	.result = ACCEPT,
+	.func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+	.func_info_cnt = 2,
+	BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES
diff --git a/tools/lib/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index af136f73b09d..f2721a4ae7c5 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -30,16 +30,10 @@
 #include <sys/types.h>
 #include <linux/if_link.h>
 
-#include "bpf.h"
-#include "libbpf.h"
-#include "libbpf_internal.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "xsk.h"
 
-/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal
- * uses of deprecated APIs
- */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
 #ifndef SOL_XDP
  #define SOL_XDP 283
 #endif
@@ -52,6 +46,8 @@
  #define PF_XDP AF_XDP
 #endif
 
+#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
+
 enum xsk_prog {
 	XSK_PROG_FALLBACK,
 	XSK_PROG_REDIRECT_FLAGS,
@@ -286,11 +282,10 @@ out_mmap:
 	return err;
 }
 
-DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
-			    __u64 size, struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *usr_config)
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+		     __u64 size, struct xsk_ring_prod *fill,
+		     struct xsk_ring_cons *comp,
+		     const struct xsk_umem_config *usr_config)
 {
 	struct xdp_umem_reg mr;
 	struct xsk_umem *umem;
@@ -351,25 +346,9 @@ struct xsk_umem_config_v1 {
 	__u32 frame_headroom;
 };
 
-COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
-			    __u64 size, struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *usr_config)
-{
-	struct xsk_umem_config config;
-
-	memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
-	config.flags = 0;
-
-	return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
-					&config);
-}
-
 static enum xsk_prog get_xsk_prog(void)
 {
 	enum xsk_prog detected = XSK_PROG_FALLBACK;
-	__u32 size_out, retval, duration;
 	char data_in = 0, data_out;
 	struct bpf_insn insns[] = {
 		BPF_LD_MAP_FD(BPF_REG_1, 0),
@@ -378,6 +357,12 @@ static enum xsk_prog get_xsk_prog(void)
 		BPF_EMIT_CALL(BPF_FUNC_redirect_map),
 		BPF_EXIT_INSN(),
 	};
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.data_in = &data_in,
+		.data_size_in = 1,
+		.data_out = &data_out,
+	);
+
 	int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns);
 
 	map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);
@@ -392,8 +377,8 @@ static enum xsk_prog get_xsk_prog(void)
 		return detected;
 	}
 
-	ret = bpf_prog_test_run(prog_fd, 0, &data_in, 1, &data_out, &size_out, &retval, &duration);
-	if (!ret && retval == XDP_PASS)
+	ret = bpf_prog_test_run_opts(prog_fd, &opts);
+	if (!ret && opts.retval == XDP_PASS)
 		detected = XSK_PROG_REDIRECT_FLAGS;
 	close(prog_fd);
 	close(map_fd);
@@ -510,7 +495,7 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
 	int link_fd;
 	int err;
 
-	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+	err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
 	if (err) {
 		pr_warn("getting XDP prog id failed\n");
 		return err;
@@ -536,6 +521,25 @@ static int xsk_create_bpf_link(struct xsk_socket *xsk)
 	return 0;
 }
 
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+        size_t i;
+
+        if (sz == 0)
+                return;
+
+        sz--;
+        for (i = 0; i < sz && src[i]; i++)
+                dst[i] = src[i];
+        dst[i] = '\0';
+}
+
 static int xsk_get_max_queues(struct xsk_socket *xsk)
 {
 	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
@@ -792,8 +796,8 @@ static int xsk_init_xdp_res(struct xsk_socket *xsk,
 	if (ctx->has_bpf_link)
 		err = xsk_create_bpf_link(xsk);
 	else
-		err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
-					  xsk->config.xdp_flags);
+		err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd,
+				     xsk->config.xdp_flags, NULL);
 
 	if (err)
 		goto err_attach_xdp_prog;
@@ -811,7 +815,7 @@ err_set_bpf_maps:
 	if (ctx->has_bpf_link)
 		close(ctx->link_fd);
 	else
-		bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+		bpf_xdp_detach(ctx->ifindex, 0, NULL);
 err_attach_xdp_prog:
 	close(ctx->prog_fd);
 err_load_xdp_prog:
@@ -862,7 +866,7 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
 	if (ctx->has_bpf_link)
 		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
 	else
-		err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+		err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id);
 
 	if (err)
 		return err;
@@ -876,6 +880,11 @@ static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
 	return err;
 }
 
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd)
+{
+	return __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+}
+
 static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
 				   __u32 queue_id)
 {
@@ -954,6 +963,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
 	ctx->fill = fill;
 	ctx->comp = comp;
 	list_add(&ctx->list, &umem->ctx_list);
+	ctx->has_bpf_link = xsk_probe_bpf_link();
 	return ctx;
 }
 
@@ -1055,7 +1065,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 		}
 	}
 	xsk->ctx = ctx;
-	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
 
 	if (rx && !rx_setup_done) {
 		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
@@ -1147,8 +1156,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 		goto out_mmap_tx;
 	}
 
-	ctx->prog_fd = -1;
-
 	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
 		err = __xsk_setup_xdp_prog(xsk, NULL);
 		if (err)
@@ -1229,7 +1236,10 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 
 	ctx = xsk->ctx;
 	umem = ctx->umem;
-	if (ctx->prog_fd != -1) {
+
+	xsk_put_ctx(ctx, true);
+
+	if (!ctx->refcount) {
 		xsk_delete_bpf_maps(xsk);
 		close(ctx->prog_fd);
 		if (ctx->has_bpf_link)
@@ -1248,8 +1258,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 		}
 	}
 
-	xsk_put_ctx(ctx, true);
-
 	umem->refcount--;
 	/* Do not close an fd that also has an associated umem connected
 	 * to it.
diff --git a/tools/lib/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 64e9c57fd792..997723b0bfb2 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -9,15 +9,15 @@
  * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
  */
 
-#ifndef __LIBBPF_XSK_H
-#define __LIBBPF_XSK_H
+#ifndef __XSK_H
+#define __XSK_H
 
 #include <stdio.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <linux/if_xdp.h>
 
-#include "libbpf.h"
+#include <bpf/libbpf.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -251,9 +251,7 @@ static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
 	return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
 }
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_umem__fd(const struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_socket__fd(const struct xsk_socket *xsk);
 
 #define XSK_RING_CONS__DEFAULT_NUM_DESCS      2048
@@ -271,9 +269,8 @@ struct xsk_umem_config {
 	__u32 flags;
 };
 
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
+int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd);
 int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
 
 /* Flags for the libbpf_flags field. */
@@ -288,32 +285,17 @@ struct xsk_socket_config {
 };
 
 /* Set config to NULL to get the default configuration. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_umem__create(struct xsk_umem **umem,
 		     void *umem_area, __u64 size,
 		     struct xsk_ring_prod *fill,
 		     struct xsk_ring_cons *comp,
 		     const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
-			    void *umem_area, __u64 size,
-			    struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
-int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
-			    void *umem_area, __u64 size,
-			    struct xsk_ring_prod *fill,
-			    struct xsk_ring_cons *comp,
-			    const struct xsk_umem_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_socket__create(struct xsk_socket **xsk,
 		       const char *ifname, __u32 queue_id,
 		       struct xsk_umem *umem,
 		       struct xsk_ring_cons *rx,
 		       struct xsk_ring_prod *tx,
 		       const struct xsk_socket_config *config);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      const char *ifname,
 			      __u32 queue_id, struct xsk_umem *umem,
@@ -324,13 +306,11 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			      const struct xsk_socket_config *config);
 
 /* Returns 0 for success and -EBUSY if the umem is still in use. */
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 int xsk_umem__delete(struct xsk_umem *umem);
-LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "AF_XDP support deprecated and moved to libxdp")
 void xsk_socket__delete(struct xsk_socket *xsk);
 
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
 
-#endif /* __LIBBPF_XSK_H */
+#endif /* __XSK_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 684e813803ec..a0b71723a818 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,7 +8,7 @@ ksft_xfail=2
 ksft_xpass=3
 ksft_skip=4
 
-XSKOBJ=xdpxceiver
+XSKOBJ=xskxceiver
 
 validate_root_exec()
 {
@@ -77,7 +77,7 @@ validate_ip_utility()
 	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
 }
 
-execxdpxceiver()
+exec_xskxceiver()
 {
         if [[ $busy_poll -eq 1 ]]; then
 	        ARGS+="-b "
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index e5992a6b5e09..74d56d971baf 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -97,12 +97,12 @@
 #include <time.h>
 #include <unistd.h>
 #include <stdatomic.h>
-#include <bpf/xsk.h>
-#include "xdpxceiver.h"
+#include "xsk.h"
+#include "xskxceiver.h"
 #include "../kselftest.h"
 
 /* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
- * Until xdpxceiver is either moved or re-writed into libxdp, suppress
+ * Until xskxceiver is either moved or re-writed into libxdp, suppress
  * deprecation warnings in this file
  */
 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
@@ -1085,6 +1085,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
 	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	int ret, ifindex;
 	void *bufs;
 	u32 i;
@@ -1130,10 +1131,26 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (!ifindex)
 		exit_with_error(errno);
 
-	ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd);
+	ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd);
 	if (ret)
 		exit_with_error(-ret);
 
+	ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts);
+	if (ret)
+		exit_with_error(-ret);
+
+	if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) {
+		if (opts.attach_mode != XDP_ATTACHED_SKB) {
+			ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n");
+			exit_with_error(-EINVAL);
+		}
+	} else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) {
+		if (opts.attach_mode != XDP_ATTACHED_DRV) {
+			ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n");
+			exit_with_error(-EINVAL);
+		}
+	}
+
 	ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
 	if (ret)
 		exit_with_error(-ret);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 8f672b0fe0e1..3d17053f98e5 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -2,8 +2,8 @@
  * Copyright(c) 2020 Intel Corporation.
  */
 
-#ifndef XDPXCEIVER_H_
-#define XDPXCEIVER_H_
+#ifndef XSKXCEIVER_H_
+#define XSKXCEIVER_H_
 
 #ifndef SOL_XDP
 #define SOL_XDP 283
@@ -169,4 +169,4 @@ pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
 
 int pkts_in_flight;
 
-#endif				/* XDPXCEIVER_H */
+#endif				/* XSKXCEIVER_H_ */