summary refs log tree commit diff
path: root/include/trace
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 11:56:19 -0800
commit5bbcc0f595fadb4cac0eddc4401035ec0bd95b09 (patch)
tree3b65e490cc36a6c6fecac1fa24d9e0ac9ced4455 /include/trace
parent892204e06cb9e89fbc4b299a678f9ca358e97cac (diff)
parent50895b9de1d3e0258e015e8e55128d835d9a9f19 (diff)
downloadlinux-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
 "Highlights:

   1) Maintain the TCP retransmit queue using an rbtree, with 1GB
      windows at 100Gb this really has become necessary. From Eric
      Dumazet.

   2) Multi-program support for cgroup+bpf, from Alexei Starovoitov.

   3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew
      Lunn.

   4) Add meter action support to openvswitch, from Andy Zhou.

   5) Add a data meta pointer for BPF accessible packets, from Daniel
      Borkmann.

   6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet.

   7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli.

   8) More work to move the RTNL mutex down, from Florian Westphal.

   9) Add 'bpftool' utility, to help with bpf program introspection.
      From Jakub Kicinski.

  10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper
      Dangaard Brouer.

  11) Support 'blocks' of transformations in the packet scheduler which
      can span multiple network devices, from Jiri Pirko.

  12) TC flower offload support in cxgb4, from Kumar Sanghvi.

  13) Priority based stream scheduler for SCTP, from Marcelo Ricardo
      Leitner.

  14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg.

  15) Add RED qdisc offloadability, and use it in mlxsw driver. From
      Nogah Frankel.

  16) eBPF based device controller for cgroup v2, from Roman Gushchin.

  17) Add some fundamental tracepoints for TCP, from Song Liu.

  18) Remove garbage collection from ipv6 route layer, this is a
      significant accomplishment. From Wei Wang.

  19) Add multicast route offload support to mlxsw, from Yotam Gigi"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits)
  tcp: highest_sack fix
  geneve: fix fill_info when link down
  bpf: fix lockdep splat
  net: cdc_ncm: GetNtbFormat endian fix
  openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start
  netem: remove unnecessary 64 bit modulus
  netem: use 64 bit divide by rate
  tcp: Namespace-ify sysctl_tcp_default_congestion_control
  net: Protect iterations over net::fib_notifier_ops in fib_seq_sum()
  ipv6: set all.accept_dad to 0 by default
  uapi: fix linux/tls.h userspace compilation error
  usbnet: ipheth: prevent TX queue timeouts when device not ready
  vhost_net: conditionally enable tx polling
  uapi: fix linux/rxrpc.h userspace compilation errors
  net: stmmac: fix LPI transitioning for dwmac4
  atm: horizon: Fix irq release error
  net-sysfs: trigger netlink notification on ifalias change via sysfs
  openvswitch: Using kfree_rcu() to simplify the code
  openvswitch: Make local function ovs_nsh_key_attr_size() static
  openvswitch: Fix return value check in ovs_meter_cmd_features()
  ...
Diffstat (limited to 'include/trace')
-rw-r--r--include/trace/events/bpf.h5
-rw-r--r--include/trace/events/fib6.h6
-rw-r--r--include/trace/events/sock.h2
-rw-r--r--include/trace/events/tcp.h299
-rw-r--r--include/trace/events/xdp.h80
-rw-r--r--include/trace/perf.h6
6 files changed, 388 insertions, 10 deletions
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index cc749d7099fb..150185647e6b 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -5,6 +5,9 @@
 #if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_BPF_H
 
+/* These are only used within the BPF_SYSCALL code */
+#ifdef CONFIG_BPF_SYSCALL
+
 #include <linux/filter.h>
 #include <linux/bpf.h>
 #include <linux/fs.h>
@@ -346,7 +349,7 @@ TRACE_EVENT(bpf_map_next_key,
 		  __print_hex(__get_dynamic_array(nxt), __entry->key_len),
 		  __entry->key_trunc ? " ..." : "")
 );
-
+#endif /* CONFIG_BPF_SYSCALL */
 #endif /* _TRACE_BPF_H */
 
 #include <trace/define_trace.h>
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index d46e24702765..7e8d48a81b91 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -13,9 +13,9 @@
 TRACE_EVENT(fib6_table_lookup,
 
 	TP_PROTO(const struct net *net, const struct rt6_info *rt,
-		 u32 tb_id, const struct flowi6 *flp),
+		 struct fib6_table *table, const struct flowi6 *flp),
 
-	TP_ARGS(net, rt, tb_id, flp),
+	TP_ARGS(net, rt, table, flp),
 
 	TP_STRUCT__entry(
 		__field(	u32,	tb_id		)
@@ -35,7 +35,7 @@ TRACE_EVENT(fib6_table_lookup,
 	TP_fast_assign(
 		struct in6_addr *in6;
 
-		__entry->tb_id = tb_id;
+		__entry->tb_id = table->tb6_id;
 		__entry->oif = flp->flowi6_oif;
 		__entry->iif = flp->flowi6_iif;
 		__entry->tos = ip6_tclass(flp->flowlabel);
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 6d31c0520ef3..ec4dade24466 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -48,7 +48,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
 		strncpy(__entry->name, prot->name, 32);
 		__entry->sysctl_mem = prot->sysctl_mem;
 		__entry->allocated = allocated;
-		__entry->sysctl_rmem = prot->sysctl_rmem[0];
+		__entry->sysctl_rmem = sk_get_rmem0(sk, prot);
 		__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
 	),
 
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 000000000000..07cccca6cbf1
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,299 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/tracepoint.h>
+#include <net/ipv6.h>
+
+#define tcp_state_name(state)	{ state, #state }
+#define show_tcp_state_name(val)			\
+	__print_symbolic(val,				\
+		tcp_state_name(TCP_ESTABLISHED),	\
+		tcp_state_name(TCP_SYN_SENT),		\
+		tcp_state_name(TCP_SYN_RECV),		\
+		tcp_state_name(TCP_FIN_WAIT1),		\
+		tcp_state_name(TCP_FIN_WAIT2),		\
+		tcp_state_name(TCP_TIME_WAIT),		\
+		tcp_state_name(TCP_CLOSE),		\
+		tcp_state_name(TCP_CLOSE_WAIT),		\
+		tcp_state_name(TCP_LAST_ACK),		\
+		tcp_state_name(TCP_LISTEN),		\
+		tcp_state_name(TCP_CLOSING),		\
+		tcp_state_name(TCP_NEW_SYN_RECV))
+
+/*
+ * tcp event with arguments sk and skb
+ *
+ * Note: this class requires a valid sk pointer; while skb pointer could
+ *       be NULL.
+ */
+DECLARE_EVENT_CLASS(tcp_event_sk_skb,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+	TP_ARGS(sk, skb),
+
+	TP_STRUCT__entry(
+		__field(const void *, skbaddr)
+		__field(const void *, skaddr)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->skbaddr = skb;
+		__entry->skaddr = sk;
+
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = sk->sk_v6_rcv_saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = sk->sk_v6_daddr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+		  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6)
+);
+
+DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+	TP_ARGS(sk, skb)
+);
+
+/*
+ * skb of trace_tcp_send_reset is the skb that caused RST. In case of
+ * active reset, skb should be NULL
+ */
+DEFINE_EVENT(tcp_event_sk_skb, tcp_send_reset,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
+
+	TP_ARGS(sk, skb)
+);
+
+/*
+ * tcp event with arguments sk
+ *
+ * Note: this class requires a valid sk pointer.
+ */
+DECLARE_EVENT_CLASS(tcp_event_sk,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk),
+
+	TP_STRUCT__entry(
+		__field(const void *, skaddr)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->skaddr = sk;
+
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = sk->sk_v6_rcv_saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = sk->sk_v6_daddr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+		  __entry->sport, __entry->dport,
+		  __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6)
+);
+
+DEFINE_EVENT(tcp_event_sk, tcp_receive_reset,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
+DEFINE_EVENT(tcp_event_sk, tcp_destroy_sock,
+
+	TP_PROTO(const struct sock *sk),
+
+	TP_ARGS(sk)
+);
+
+TRACE_EVENT(tcp_set_state,
+
+	TP_PROTO(const struct sock *sk, const int oldstate, const int newstate),
+
+	TP_ARGS(sk, oldstate, newstate),
+
+	TP_STRUCT__entry(
+		__field(const void *, skaddr)
+		__field(int, oldstate)
+		__field(int, newstate)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->skaddr = sk;
+		__entry->oldstate = oldstate;
+		__entry->newstate = newstate;
+
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 =  inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = sk->sk_v6_rcv_saddr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = sk->sk_v6_daddr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+		}
+	),
+
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
+		  __entry->sport, __entry->dport,
+		  __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  show_tcp_state_name(__entry->oldstate),
+		  show_tcp_state_name(__entry->newstate))
+);
+
+TRACE_EVENT(tcp_retransmit_synack,
+
+	TP_PROTO(const struct sock *sk, const struct request_sock *req),
+
+	TP_ARGS(sk, req),
+
+	TP_STRUCT__entry(
+		__field(const void *, skaddr)
+		__field(const void *, req)
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+	),
+
+	TP_fast_assign(
+		struct inet_request_sock *ireq = inet_rsk(req);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->skaddr = sk;
+		__entry->req = req;
+
+		__entry->sport = ireq->ir_num;
+		__entry->dport = ntohs(ireq->ir_rmt_port);
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = ireq->ir_loc_addr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 = ireq->ir_rmt_addr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (sk->sk_family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			*pin6 = ireq->ir_v6_loc_addr;
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			*pin6 = ireq->ir_v6_rmt_addr;
+		} else
+#endif
+		{
+			pin6 = (struct in6_addr *)__entry->saddr_v6;
+			ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
+			pin6 = (struct in6_addr *)__entry->daddr_v6;
+			ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
+		}
+	),
+
+	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+		  __entry->sport, __entry->dport,
+		  __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 810e94160c12..4cd0f05d0113 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -137,14 +137,90 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
 		  __entry->map_id, __entry->map_index)
 );
 
+#define devmap_ifindex(fwd, map)				\
+	(!fwd ? 0 :						\
+	 (!map ? 0 :						\
+	  ((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	   ((struct net_device *)fwd)->ifindex : 0)))
+
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
-	 trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0,	\
+	 trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map),	\
 				0, map, idx)
 
 #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err)	\
-	 trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0,	\
+	 trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map),	\
 				    err, map, idx)
 
+TRACE_EVENT(xdp_cpumap_kthread,
+
+	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+		 int sched),
+
+	TP_ARGS(map_id, processed, drops, sched),
+
+	TP_STRUCT__entry(
+		__field(int, map_id)
+		__field(u32, act)
+		__field(int, cpu)
+		__field(unsigned int, drops)
+		__field(unsigned int, processed)
+		__field(int, sched)
+	),
+
+	TP_fast_assign(
+		__entry->map_id		= map_id;
+		__entry->act		= XDP_REDIRECT;
+		__entry->cpu		= smp_processor_id();
+		__entry->drops		= drops;
+		__entry->processed	= processed;
+		__entry->sched	= sched;
+	),
+
+	TP_printk("kthread"
+		  " cpu=%d map_id=%d action=%s"
+		  " processed=%u drops=%u"
+		  " sched=%d",
+		  __entry->cpu, __entry->map_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->processed, __entry->drops,
+		  __entry->sched)
+);
+
+TRACE_EVENT(xdp_cpumap_enqueue,
+
+	TP_PROTO(int map_id, unsigned int processed,  unsigned int drops,
+		 int to_cpu),
+
+	TP_ARGS(map_id, processed, drops, to_cpu),
+
+	TP_STRUCT__entry(
+		__field(int, map_id)
+		__field(u32, act)
+		__field(int, cpu)
+		__field(unsigned int, drops)
+		__field(unsigned int, processed)
+		__field(int, to_cpu)
+	),
+
+	TP_fast_assign(
+		__entry->map_id		= map_id;
+		__entry->act		= XDP_REDIRECT;
+		__entry->cpu		= smp_processor_id();
+		__entry->drops		= drops;
+		__entry->processed	= processed;
+		__entry->to_cpu		= to_cpu;
+	),
+
+	TP_printk("enqueue"
+		  " cpu=%d map_id=%d action=%s"
+		  " processed=%u drops=%u"
+		  " to_cpu=%d",
+		  __entry->cpu, __entry->map_id,
+		  __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+		  __entry->processed, __entry->drops,
+		  __entry->to_cpu)
+);
+
 #endif /* _TRACE_XDP_H */
 
 #include <trace/define_trace.h>
diff --git a/include/trace/perf.h b/include/trace/perf.h
index e4b249821684..dbc6c74defc3 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -35,7 +35,6 @@ perf_trace_##call(void *__data, proto)					\
 	struct trace_event_call *event_call = __data;			\
 	struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
 	struct trace_event_raw_##call *entry;				\
-	struct bpf_prog *prog = event_call->prog;			\
 	struct pt_regs *__regs;						\
 	u64 __count = 1;						\
 	struct task_struct *__task = NULL;				\
@@ -47,8 +46,9 @@ perf_trace_##call(void *__data, proto)					\
 	__data_size = trace_event_get_offsets_##call(&__data_offsets, args); \
 									\
 	head = this_cpu_ptr(event_call->perf_events);			\
-	if (!prog && __builtin_constant_p(!__task) && !__task &&	\
-				hlist_empty(head))			\
+	if (!bpf_prog_array_valid(event_call) &&			\
+	    __builtin_constant_p(!__task) && !__task &&			\
+	    hlist_empty(head))						\
 		return;							\
 									\
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\