summary refs log tree commit diff
path: root/include/net/sock.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-05 12:31:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-05 12:31:59 -0700
commit5518b69b76680a4f2df96b1deca260059db0c2de (patch)
treef33cd1519c8efb4590500f2f9617400be233238c /include/net/sock.h
parent8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff)
parent0e72582270c07850b92cac351c8b97d4f9c123b9 (diff)
downloadlinux-5518b69b76680a4f2df96b1deca260059db0c2de.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
 "Reasonably busy this cycle, but perhaps not as busy as in the 4.12
  merge window:

   1) Several optimizations for UDP processing under high load from
      Paolo Abeni.

   2) Support pacing internally in TCP when using the sch_fq packet
      scheduler for this is not practical. From Eric Dumazet.

   3) Support mutliple filter chains per qdisc, from Jiri Pirko.

   4) Move to 1ms TCP timestamp clock, from Eric Dumazet.

   5) Add batch dequeueing to vhost_net, from Jason Wang.

   6) Flesh out more completely SCTP checksum offload support, from
      Davide Caratti.

   7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
      Neira Ayuso, and Matthias Schiffer.

   8) Add devlink support to nfp driver, from Simon Horman.

   9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
      Prabhu.

  10) Add stack depth tracking to BPF verifier and use this information
      in the various eBPF JITs. From Alexei Starovoitov.

  11) Support XDP on qed device VFs, from Yuval Mintz.

  12) Introduce BPF PROG ID for better introspection of installed BPF
      programs. From Martin KaFai Lau.

  13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.

  14) For loads, allow narrower accesses in bpf verifier checking, from
      Yonghong Song.

  15) Support MIPS in the BPF selftests and samples infrastructure, the
      MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
      Daney.

  16) Support kernel based TLS, from Dave Watson and others.

  17) Remove completely DST garbage collection, from Wei Wang.

  18) Allow installing TCP MD5 rules using prefixes, from Ivan
      Delalande.

  19) Add XDP support to Intel i40e driver, from Björn Töpel

  20) Add support for TC flower offload in nfp driver, from Simon
      Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
      Kicinski, and Bert van Leeuwen.

  21) IPSEC offloading support in mlx5, from Ilan Tayari.

  22) Add HW PTP support to macb driver, from Rafal Ozieblo.

  23) Networking refcount_t conversions, From Elena Reshetova.

  24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
      for tuning the TCP sockopt settings of a group of applications,
      currently via CGROUPs"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
  net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
  dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
  cxgb4: Support for get_ts_info ethtool method
  cxgb4: Add PTP Hardware Clock (PHC) support
  cxgb4: time stamping interface for PTP
  nfp: default to chained metadata prepend format
  nfp: remove legacy MAC address lookup
  nfp: improve order of interfaces in breakout mode
  net: macb: remove extraneous return when MACB_EXT_DESC is defined
  bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
  bpf: fix return in load_bpf_file
  mpls: fix rtm policy in mpls_getroute
  net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
  net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
  net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
  net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
  net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
  net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
  net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
  net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
  ...
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h65
1 files changed, 29 insertions, 36 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index f97da141d920..48e4d5c38f85 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -66,6 +66,7 @@
 #include <linux/poll.h>
 
 #include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <net/dst.h>
 #include <net/checksum.h>
 #include <net/tcp_states.h>
@@ -219,7 +220,7 @@ struct sock_common {
 		u32		skc_tw_rcv_nxt; /* struct tcp_timewait_sock  */
 	};
 
-	atomic_t		skc_refcnt;
+	refcount_t		skc_refcnt;
 	/* private: */
 	int                     skc_dontcopy_end[0];
 	union {
@@ -253,6 +254,7 @@ struct sock_common {
   *	@sk_ll_usec: usecs to busypoll when there is no data
   *	@sk_allocation: allocation mode
   *	@sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
+  *	@sk_pacing_status: Pacing status (requested, handled by sch_fq)
   *	@sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_padding: unused element for alignment
@@ -389,14 +391,14 @@ struct sock {
 
 	/* ===== cache line for TX ===== */
 	int			sk_wmem_queued;
-	atomic_t		sk_wmem_alloc;
+	refcount_t		sk_wmem_alloc;
 	unsigned long		sk_tsq_flags;
 	struct sk_buff		*sk_send_head;
 	struct sk_buff_head	sk_write_queue;
 	__s32			sk_peek_off;
 	int			sk_write_pending;
 	__u32			sk_dst_pending_confirm;
-	/* Note: 32bit hole on 64bit arches */
+	u32			sk_pacing_status; /* see enum sk_pacing */
 	long			sk_sndtimeo;
 	struct timer_list	sk_timer;
 	__u32			sk_priority;
@@ -475,6 +477,12 @@ struct sock {
 	struct rcu_head		sk_rcu;
 };
 
+enum sk_pacing {
+	SK_PACING_NONE		= 0,
+	SK_PACING_NEEDED	= 1,
+	SK_PACING_FQ		= 2,
+};
+
 #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
 
 #define rcu_dereference_sk_user_data(sk)	rcu_dereference(__sk_user_data((sk)))
@@ -604,7 +612,7 @@ static inline bool __sk_del_node_init(struct sock *sk)
 
 static __always_inline void sock_hold(struct sock *sk)
 {
-	atomic_inc(&sk->sk_refcnt);
+	refcount_inc(&sk->sk_refcnt);
 }
 
 /* Ungrab socket in the context, which assumes that socket refcnt
@@ -612,7 +620,7 @@ static __always_inline void sock_hold(struct sock *sk)
  */
 static __always_inline void __sock_put(struct sock *sk)
 {
-	atomic_dec(&sk->sk_refcnt);
+	refcount_dec(&sk->sk_refcnt);
 }
 
 static inline bool sk_del_node_init(struct sock *sk)
@@ -621,7 +629,7 @@ static inline bool sk_del_node_init(struct sock *sk)
 
 	if (rc) {
 		/* paranoid for a while -acme */
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 	return rc;
@@ -643,7 +651,7 @@ static inline bool sk_nulls_del_node_init_rcu(struct sock *sk)
 
 	if (rc) {
 		/* paranoid for a while -acme */
-		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
+		WARN_ON(refcount_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
 	return rc;
@@ -900,7 +908,10 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 
 static inline void sk_incoming_cpu_update(struct sock *sk)
 {
-	sk->sk_incoming_cpu = raw_smp_processor_id();
+	int cpu = raw_smp_processor_id();
+
+	if (unlikely(sk->sk_incoming_cpu != cpu))
+		sk->sk_incoming_cpu = cpu;
 }
 
 static inline void sock_rps_record_flow_hash(__u32 hash)
@@ -1073,6 +1084,7 @@ struct proto {
 	bool			(*stream_memory_free)(const struct sock *sk);
 	/* Memory pressure */
 	void			(*enter_memory_pressure)(struct sock *sk);
+	void			(*leave_memory_pressure)(struct sock *sk);
 	atomic_long_t		*memory_allocated;	/* Current allocated memory. */
 	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
 	/*
@@ -1081,7 +1093,7 @@ struct proto {
 	 * All the __sk_mem_schedule() is of this nature: accounting
 	 * is strict, actions are advisory and have some latency.
 	 */
-	int			*memory_pressure;
+	unsigned long		*memory_pressure;
 	long			*sysctl_mem;
 	int			*sysctl_wmem;
 	int			*sysctl_rmem;
@@ -1133,9 +1145,9 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
 
 static inline void sk_refcnt_debug_release(const struct sock *sk)
 {
-	if (atomic_read(&sk->sk_refcnt) != 1)
+	if (refcount_read(&sk->sk_refcnt) != 1)
 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
-		       sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
+		       sk->sk_prot->name, sk, refcount_read(&sk->sk_refcnt));
 }
 #else /* SOCK_REFCNT_DEBUG */
 #define sk_refcnt_debug_inc(sk) do { } while (0)
@@ -1186,25 +1198,6 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 	return !!*sk->sk_prot->memory_pressure;
 }
 
-static inline void sk_leave_memory_pressure(struct sock *sk)
-{
-	int *memory_pressure = sk->sk_prot->memory_pressure;
-
-	if (!memory_pressure)
-		return;
-
-	if (*memory_pressure)
-		*memory_pressure = 0;
-}
-
-static inline void sk_enter_memory_pressure(struct sock *sk)
-{
-	if (!sk->sk_prot->enter_memory_pressure)
-		return;
-
-	sk->sk_prot->enter_memory_pressure(sk);
-}
-
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
@@ -1644,7 +1637,7 @@ void sock_init_data(struct socket *sock, struct sock *sk);
 /* Ungrab socket and destroy it, if it was the last reference. */
 static inline void sock_put(struct sock *sk)
 {
-	if (atomic_dec_and_test(&sk->sk_refcnt))
+	if (refcount_dec_and_test(&sk->sk_refcnt))
 		sk_free(sk);
 }
 /* Generic version of sock_put(), dealing with all sockets
@@ -1919,7 +1912,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
  */
 static inline int sk_wmem_alloc_get(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) - 1;
+	return refcount_read(&sk->sk_wmem_alloc) - 1;
 }
 
 /**
@@ -2034,8 +2027,8 @@ void sk_reset_timer(struct sock *sk, struct timer_list *timer,
 
 void sk_stop_timer(struct sock *sk, struct timer_list *timer);
 
-int __sk_queue_drop_skb(struct sock *sk, struct sk_buff *skb,
-			unsigned int flags,
+int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
+			struct sk_buff *skb, unsigned int flags,
 			void (*destructor)(struct sock *sk,
 					   struct sk_buff *skb));
 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
@@ -2062,7 +2055,7 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	int amt = 0;
 
 	if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
-		amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+		amt = sk->sk_sndbuf - refcount_read(&sk->sk_wmem_alloc);
 		if (amt < 0)
 			amt = 0;
 	}
@@ -2143,7 +2136,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
  */
 static inline bool sock_writeable(const struct sock *sk)
 {
-	return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+	return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
 }
 
 static inline gfp_t gfp_any(void)