summary refs log tree commit diff
path: root/include/net
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2016-06-06 09:37:16 -0700
committerDavid S. Miller <davem@davemloft.net>2016-06-07 16:37:14 -0700
commitedb09eb17ed89eaa82a52dd306beac93e292b485 (patch)
tree1f241506d6b781b65d1033925f1c1ce6a39c3394 /include/net
parentf9eb8aea2a1e12fc2f584d1627deeb957435a801 (diff)
downloadlinux-edb09eb17ed89eaa82a52dd306beac93e292b485.tar.gz
net: sched: do not acquire qdisc spinlock in qdisc/class stats dump
Large tc dumps (tc -s {qdisc|class} sh dev ethX) done by Google BwE host
agent [1] are problematic at scale :

For each qdisc/class found in the dump, we currently lock the root qdisc
spinlock in order to get stats. Sampling stats every 5 seconds from
thousands of HTB classes is a challenge when the root qdisc spinlock is
under high pressure. Not only the dumps take time, they also slow
down the fast path (queue/dequeue packets) by 10 % to 20 % in some cases.

An audit of existing qdiscs showed that sch_fq_codel is the only qdisc
that might need the qdisc lock in fq_codel_dump_stats() and
fq_codel_dump_class_stats()

In v2 of this patch, I now use the Qdisc running seqcount to provide
consistent reads of packets/bytes counters, regardless of 32/64 bit arches.

I also changed rate estimators to use the same infrastructure
so that they no longer need to lock root qdisc lock.

[1]
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43838.pdf

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Kevin Athey <kda@google.com>
Cc: Xiaotian Pei <xiaotian@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r--include/net/gen_stats.h12
-rw-r--r--include/net/sch_generic.h8
2 files changed, 16 insertions, 4 deletions
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 610cd397890e..231e121cc7d9 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -33,10 +33,12 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 				 spinlock_t *lock, struct gnet_dump *d,
 				 int padattr);
 
-int gnet_stats_copy_basic(struct gnet_dump *d,
+int gnet_stats_copy_basic(const seqcount_t *running,
+			  struct gnet_dump *d,
 			  struct gnet_stats_basic_cpu __percpu *cpu,
 			  struct gnet_stats_basic_packed *b);
-void __gnet_stats_copy_basic(struct gnet_stats_basic_packed *bstats,
+void __gnet_stats_copy_basic(const seqcount_t *running,
+			     struct gnet_stats_basic_packed *bstats,
 			     struct gnet_stats_basic_cpu __percpu *cpu,
 			     struct gnet_stats_basic_packed *b);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
@@ -52,13 +54,15 @@ int gnet_stats_finish_copy(struct gnet_dump *d);
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 		      struct gnet_stats_rate_est64 *rate_est,
-		      spinlock_t *stats_lock, struct nlattr *opt);
+		      spinlock_t *stats_lock,
+		      seqcount_t *running, struct nlattr *opt);
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est64 *rate_est);
 int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_basic_cpu __percpu *cpu_bstats,
 			  struct gnet_stats_rate_est64 *rate_est,
-			  spinlock_t *stats_lock, struct nlattr *opt);
+			  spinlock_t *stats_lock,
+			  seqcount_t *running, struct nlattr *opt);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est64 *rate_est);
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index bff8d895ef8a..c4f5749342ec 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -314,6 +314,14 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
 	return qdisc_lock(root);
 }
 
+static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+{
+	struct Qdisc *root = qdisc_root_sleeping(qdisc);
+
+	ASSERT_RTNL();
+	return &root->running;
+}
+
 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;