summary refs log tree commit diff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/cls_flow.c182
-rw-r--r--net/sched/sch_api.c14
-rw-r--r--net/sched/sch_choke.c161
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sched/sch_gred.c81
-rw-r--r--net/sched/sch_hfsc.c10
-rw-r--r--net/sched/sch_multiq.c6
-rw-r--r--net/sched/sch_netem.c278
-rw-r--r--net/sched/sch_qfq.c17
-rw-r--r--net/sched/sch_red.c57
-rw-r--r--net/sched/sch_sfb.c17
-rw-r--r--net/sched/sch_sfq.c369
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sched/sch_teql.c8
14 files changed, 648 insertions, 562 deletions
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 7b582300d051..1d8bd0dbcd1f 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,6 +26,8 @@
 #include <net/pkt_cls.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <net/flow_keys.h>
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
@@ -66,134 +68,37 @@ static inline u32 addr_fold(void *addr)
 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 }
 
-static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	__be32 *data = NULL, hdata;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		data = skb_header_pointer(skb,
-					  nhoff + offsetof(struct iphdr,
-							   saddr),
-					  4, &hdata);
-		break;
-	case htons(ETH_P_IPV6):
-		data = skb_header_pointer(skb,
-					 nhoff + offsetof(struct ipv6hdr,
-							  saddr.s6_addr32[3]),
-					 4, &hdata);
-		break;
-	}
-
-	if (data)
-		return ntohl(*data);
+	if (flow->src)
+		return ntohl(flow->src);
 	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	__be32 *data = NULL, hdata;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		data = skb_header_pointer(skb,
-					  nhoff + offsetof(struct iphdr,
-							   daddr),
-					  4, &hdata);
-		break;
-	case htons(ETH_P_IPV6):
-		data = skb_header_pointer(skb,
-					 nhoff + offsetof(struct ipv6hdr,
-							  daddr.s6_addr32[3]),
-					 4, &hdata);
-		break;
-	}
-
-	if (data)
-		return ntohl(*data);
+	if (flow->dst)
+		return ntohl(flow->dst);
 	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
 
-static u32 flow_get_proto(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	__u8 *data = NULL, hdata;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-		data = skb_header_pointer(skb,
-					  nhoff + offsetof(struct iphdr,
-							   protocol),
-					  1, &hdata);
-		break;
-	case htons(ETH_P_IPV6):
-		data = skb_header_pointer(skb,
-					 nhoff + offsetof(struct ipv6hdr,
-							  nexthdr),
-					 1, &hdata);
-		break;
-	}
-	if (data)
-		return *data;
-	return 0;
+	return flow->ip_proto;
 }
 
-/* helper function to get either src or dst port */
-static __be16 *flow_get_proto_common(const struct sk_buff *skb, int nhoff,
-				     __be16 *_port, int dst)
+static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	__be16 *port = NULL;
-	int poff;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP): {
-		struct iphdr *iph, _iph;
-
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
-		if (!iph)
-			break;
-		if (ip_is_fragment(iph))
-			break;
-		poff = proto_ports_offset(iph->protocol);
-		if (poff >= 0)
-			port = skb_header_pointer(skb,
-					nhoff + iph->ihl * 4 + poff + dst,
-					sizeof(*_port), _port);
-		break;
-	}
-	case htons(ETH_P_IPV6): {
-		struct ipv6hdr *iph, _iph;
-
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
-		if (!iph)
-			break;
-		poff = proto_ports_offset(iph->nexthdr);
-		if (poff >= 0)
-			port = skb_header_pointer(skb,
-					nhoff + sizeof(*iph) + poff + dst,
-					sizeof(*_port), _port);
-		break;
-	}
-	}
-
-	return port;
-}
-
-static u32 flow_get_proto_src(const struct sk_buff *skb, int nhoff)
-{
-	__be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 0);
-
-	if (port)
-		return ntohs(*port);
+	if (flow->ports)
+		return ntohs(flow->port16[0]);
 
 	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	__be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 2);
-
-	if (port)
-		return ntohs(*port);
+	if (flow->ports)
+		return ntohs(flow->port16[1]);
 
 	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
@@ -239,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 })
 #endif
 
-static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -248,10 +153,10 @@ static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_src(skb, nhoff);
+	return flow_get_src(skb, flow);
 }
 
-static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -260,21 +165,21 @@ static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
 		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_dst(skb, nhoff);
+	return flow_get_dst(skb, flow);
 }
 
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	return ntohs(CTTUPLE(skb, src.u.all));
 fallback:
-	return flow_get_proto_src(skb, nhoff);
+	return flow_get_proto_src(skb, flow);
 }
 
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, int nhoff)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	return ntohs(CTTUPLE(skb, dst.u.all));
 fallback:
-	return flow_get_proto_dst(skb, nhoff);
+	return flow_get_proto_dst(skb, flow);
 }
 
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
@@ -314,21 +219,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 	return skb_get_rxhash(skb);
 }
 
-static u32 flow_key_get(struct sk_buff *skb, int key)
+static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
 {
-	int nhoff = skb_network_offset(skb);
-
 	switch (key) {
 	case FLOW_KEY_SRC:
-		return flow_get_src(skb, nhoff);
+		return flow_get_src(skb, flow);
 	case FLOW_KEY_DST:
-		return flow_get_dst(skb, nhoff);
+		return flow_get_dst(skb, flow);
 	case FLOW_KEY_PROTO:
-		return flow_get_proto(skb, nhoff);
+		return flow_get_proto(skb, flow);
 	case FLOW_KEY_PROTO_SRC:
-		return flow_get_proto_src(skb, nhoff);
+		return flow_get_proto_src(skb, flow);
 	case FLOW_KEY_PROTO_DST:
-		return flow_get_proto_dst(skb, nhoff);
+		return flow_get_proto_dst(skb, flow);
 	case FLOW_KEY_IIF:
 		return flow_get_iif(skb);
 	case FLOW_KEY_PRIORITY:
@@ -338,13 +241,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 	case FLOW_KEY_NFCT:
 		return flow_get_nfct(skb);
 	case FLOW_KEY_NFCT_SRC:
-		return flow_get_nfct_src(skb, nhoff);
+		return flow_get_nfct_src(skb, flow);
 	case FLOW_KEY_NFCT_DST:
-		return flow_get_nfct_dst(skb, nhoff);
+		return flow_get_nfct_dst(skb, flow);
 	case FLOW_KEY_NFCT_PROTO_SRC:
-		return flow_get_nfct_proto_src(skb, nhoff);
+		return flow_get_nfct_proto_src(skb, flow);
 	case FLOW_KEY_NFCT_PROTO_DST:
-		return flow_get_nfct_proto_dst(skb, nhoff);
+		return flow_get_nfct_proto_dst(skb, flow);
 	case FLOW_KEY_RTCLASSID:
 		return flow_get_rtclassid(skb);
 	case FLOW_KEY_SKUID:
@@ -361,6 +264,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 	}
 }
 
+#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
+			  (1 << FLOW_KEY_DST) |			\
+			  (1 << FLOW_KEY_PROTO) |		\
+			  (1 << FLOW_KEY_PROTO_SRC) |		\
+			  (1 << FLOW_KEY_PROTO_DST) | 		\
+			  (1 << FLOW_KEY_NFCT_SRC) |		\
+			  (1 << FLOW_KEY_NFCT_DST) |		\
+			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
+			  (1 << FLOW_KEY_NFCT_PROTO_DST))
+
 static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			 struct tcf_result *res)
 {
@@ -372,17 +285,20 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int r;
 
 	list_for_each_entry(f, &head->filters, list) {
-		u32 keys[f->nkeys];
+		u32 keys[FLOW_KEY_MAX + 1];
+		struct flow_keys flow_keys;
 
 		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
 			continue;
 
 		keymask = f->keymask;
+		if (keymask & FLOW_KEYS_NEEDED)
+			skb_flow_dissect(skb, &flow_keys);
 
 		for (n = 0; n < f->nkeys; n++) {
 			key = ffs(keymask) - 1;
 			keymask &= ~(1 << key);
-			keys[n] = flow_key_get(skb, key);
+			keys[n] = flow_key_get(skb, key, &flow_keys);
 		}
 
 		if (f->mode == FLOW_MODE_HASH)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index dca6c1a576f7..3d8981fde301 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -618,20 +618,24 @@ void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
 }
 EXPORT_SYMBOL(qdisc_class_hash_remove);
 
-/* Allocate an unique handle from space managed by kernel */
-
+/* Allocate an unique handle from space managed by kernel
+ * Possible range is [8000-FFFF]:0000 (0x8000 values)
+ */
 static u32 qdisc_alloc_handle(struct net_device *dev)
 {
-	int i = 0x10000;
+	int i = 0x8000;
 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
 
 	do {
 		autohandle += TC_H_MAKE(0x10000U, 0);
 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
 			autohandle = TC_H_MAKE(0x80000000U, 0);
-	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
+		if (!qdisc_lookup(dev, autohandle))
+			return autohandle;
+		cond_resched();
+	} while	(--i > 0);
 
-	return i > 0 ? autohandle : 0;
+	return 0;
 }
 
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 3422b25df9e4..e465064d39a3 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -19,10 +19,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/red.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/ipv6.h>
-#include <net/ipv6.h>
+#include <net/flow_keys.h>
 
 /*
    CHOKe stateless AQM for fair bandwidth allocation
@@ -60,6 +57,7 @@ struct choke_sched_data {
 	struct red_parms parms;
 
 /* Variables */
+	struct red_vars  vars;
 	struct tcf_proto *filter_list;
 	struct {
 		u32	prob_drop;	/* Early probability drops */
@@ -142,85 +140,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 	--sch->q.qlen;
 }
 
-/*
- * Compare flow of two packets
- *  Returns true only if source and destination address and port match.
- *          false for special cases
- */
-static bool choke_match_flow(struct sk_buff *skb1,
-			     struct sk_buff *skb2)
-{
-	int off1, off2, poff;
-	const u32 *ports1, *ports2;
-	u8 ip_proto;
-	__u32 hash1;
-
-	if (skb1->protocol != skb2->protocol)
-		return false;
-
-	/* Use hash value as quick check
-	 * Assumes that __skb_get_rxhash makes IP header and ports linear
-	 */
-	hash1 = skb_get_rxhash(skb1);
-	if (!hash1 || hash1 != skb_get_rxhash(skb2))
-		return false;
-
-	/* Probably match, but be sure to avoid hash collisions */
-	off1 = skb_network_offset(skb1);
-	off2 = skb_network_offset(skb2);
-
-	switch (skb1->protocol) {
-	case __constant_htons(ETH_P_IP): {
-		const struct iphdr *ip1, *ip2;
-
-		ip1 = (const struct iphdr *) (skb1->data + off1);
-		ip2 = (const struct iphdr *) (skb2->data + off2);
-
-		ip_proto = ip1->protocol;
-		if (ip_proto != ip2->protocol ||
-		    ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
-			return false;
-
-		if (ip_is_fragment(ip1) | ip_is_fragment(ip2))
-			ip_proto = 0;
-		off1 += ip1->ihl * 4;
-		off2 += ip2->ihl * 4;
-		break;
-	}
-
-	case __constant_htons(ETH_P_IPV6): {
-		const struct ipv6hdr *ip1, *ip2;
-
-		ip1 = (const struct ipv6hdr *) (skb1->data + off1);
-		ip2 = (const struct ipv6hdr *) (skb2->data + off2);
-
-		ip_proto = ip1->nexthdr;
-		if (ip_proto != ip2->nexthdr ||
-		    ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
-		    ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
-			return false;
-		off1 += 40;
-		off2 += 40;
-	}
-
-	default: /* Maybe compare MAC header here? */
-		return false;
-	}
-
-	poff = proto_ports_offset(ip_proto);
-	if (poff < 0)
-		return true;
-
-	off1 += poff;
-	off2 += poff;
-
-	ports1 = (__force u32 *)(skb1->data + off1);
-	ports2 = (__force u32 *)(skb2->data + off2);
-	return *ports1 == *ports2;
-}
-
 struct choke_skb_cb {
-	u16 classid;
+	u16			classid;
+	u8			keys_valid;
+	struct flow_keys	keys;
 };
 
 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -241,6 +164,32 @@ static u16 choke_get_classid(const struct sk_buff *skb)
 }
 
 /*
+ * Compare flow of two packets
+ *  Returns true only if source and destination address and port match.
+ *          false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+			     struct sk_buff *skb2)
+{
+	if (skb1->protocol != skb2->protocol)
+		return false;
+
+	if (!choke_skb_cb(skb1)->keys_valid) {
+		choke_skb_cb(skb1)->keys_valid = 1;
+		skb_flow_dissect(skb1, &choke_skb_cb(skb1)->keys);
+	}
+
+	if (!choke_skb_cb(skb2)->keys_valid) {
+		choke_skb_cb(skb2)->keys_valid = 1;
+		skb_flow_dissect(skb2, &choke_skb_cb(skb2)->keys);
+	}
+
+	return !memcmp(&choke_skb_cb(skb1)->keys,
+		       &choke_skb_cb(skb2)->keys,
+		       sizeof(struct flow_keys));
+}
+
+/*
  * Classify flow using either:
  *  1. pre-existing classification result in skb
  *  2. fast internal classification
@@ -317,7 +266,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
-	struct red_parms *p = &q->parms;
+	const struct red_parms *p = &q->parms;
 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 
 	if (q->filter_list) {
@@ -326,14 +275,15 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			goto other_drop;	/* Packet was eaten by filter */
 	}
 
+	choke_skb_cb(skb)->keys_valid = 0;
 	/* Compute average queue usage (see RED) */
-	p->qavg = red_calc_qavg(p, sch->q.qlen);
-	if (red_is_idling(p))
-		red_end_of_idle_period(p);
+	q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
+	if (red_is_idling(&q->vars))
+		red_end_of_idle_period(&q->vars);
 
 	/* Is queue small? */
-	if (p->qavg <= p->qth_min)
-		p->qcount = -1;
+	if (q->vars.qavg <= p->qth_min)
+		q->vars.qcount = -1;
 	else {
 		unsigned int idx;
 
@@ -345,8 +295,8 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 
 		/* Queue is large, always mark/drop */
-		if (p->qavg > p->qth_max) {
-			p->qcount = -1;
+		if (q->vars.qavg > p->qth_max) {
+			q->vars.qcount = -1;
 
 			sch->qstats.overlimits++;
 			if (use_harddrop(q) || !use_ecn(q) ||
@@ -356,10 +306,10 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			}
 
 			q->stats.forced_mark++;
-		} else if (++p->qcount) {
-			if (red_mark_probability(p, p->qavg)) {
-				p->qcount = 0;
-				p->qR = red_random(p);
+		} else if (++q->vars.qcount) {
+			if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
+				q->vars.qcount = 0;
+				q->vars.qR = red_random(p);
 
 				sch->qstats.overlimits++;
 				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
@@ -370,7 +320,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				q->stats.prob_mark++;
 			}
 		} else
-			p->qR = red_random(p);
+			q->vars.qR = red_random(p);
 	}
 
 	/* Admit new packet */
@@ -404,8 +354,8 @@ static struct sk_buff *choke_dequeue(struct Qdisc *sch)
 	struct sk_buff *skb;
 
 	if (q->head == q->tail) {
-		if (!red_is_idling(&q->parms))
-			red_start_of_idle_period(&q->parms);
+		if (!red_is_idling(&q->vars))
+			red_start_of_idle_period(&q->vars);
 		return NULL;
 	}
 
@@ -428,8 +378,8 @@ static unsigned int choke_drop(struct Qdisc *sch)
 	if (len > 0)
 		q->stats.other++;
 	else {
-		if (!red_is_idling(&q->parms))
-			red_start_of_idle_period(&q->parms);
+		if (!red_is_idling(&q->vars))
+			red_start_of_idle_period(&q->vars);
 	}
 
 	return len;
@@ -439,12 +389,13 @@ static void choke_reset(struct Qdisc *sch)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
 
-	red_restart(&q->parms);
+	red_restart(&q->vars);
 }
 
 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_CHOKE_MAX_P]	= { .type = NLA_U32 },
 };
 
 
@@ -466,6 +417,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	int err;
 	struct sk_buff **old = NULL;
 	unsigned int mask;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -478,6 +430,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_CHOKE_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
 	if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -527,10 +481,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 		      ctl->Plog, ctl->Scell_log,
-		      nla_data(tb[TCA_CHOKE_STAB]));
+		      nla_data(tb[TCA_CHOKE_STAB]),
+		      max_P);
+	red_set_vars(&q->vars);
 
 	if (q->head == q->tail)
-		red_end_of_idle_period(&q->parms);
+		red_end_of_idle_period(&q->vars);
 
 	sch_tree_unlock(sch);
 	choke_free(old);
@@ -561,6 +517,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca2798804..67fc573e013a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_frozen_or_stopped(txq)) {
+		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_frozen_or_stopped(txq))
+	if (!netif_xmit_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && netif_tx_queue_frozen_or_stopped(txq))
+	if (ret && netif_xmit_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
@@ -242,10 +242,11 @@ static void dev_watchdog(unsigned long arg)
 				 * old device drivers set dev->trans_start
 				 */
 				trans_start = txq->trans_start ? : dev->trans_start;
-				if (netif_tx_queue_stopped(txq) &&
+				if (netif_xmit_stopped(txq) &&
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
+					txq->trans_timeout++;
 					break;
 				}
 			}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 6cd8ddfb512d..0b15236be7b6 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -34,13 +34,14 @@ struct gred_sched;
 
 struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
-	u32      	DP;		/* the drop pramaters */
+	u32		DP;		/* the drop parameters */
 	u32		bytesin;	/* bytes seen on virtualQ so far*/
 	u32		packetsin;	/* packets seen on virtualQ so far*/
 	u32		backlog;	/* bytes on the virtualQ */
 	u8		prio;		/* the prio of this vq */
 
 	struct red_parms parms;
+	struct red_vars  vars;
 	struct red_stats stats;
 };
 
@@ -55,7 +56,7 @@ struct gred_sched {
 	u32		red_flags;
 	u32 		DPs;
 	u32 		def;
-	struct red_parms wred_set;
+	struct red_vars wred_set;
 };
 
 static inline int gred_wred_mode(struct gred_sched *table)
@@ -125,17 +126,17 @@ static inline u16 tc_index_to_dp(struct sk_buff *skb)
 	return skb->tc_index & GRED_VQ_MASK;
 }
 
-static inline void gred_load_wred_set(struct gred_sched *table,
+static inline void gred_load_wred_set(const struct gred_sched *table,
 				      struct gred_sched_data *q)
 {
-	q->parms.qavg = table->wred_set.qavg;
-	q->parms.qidlestart = table->wred_set.qidlestart;
+	q->vars.qavg = table->wred_set.qavg;
+	q->vars.qidlestart = table->wred_set.qidlestart;
 }
 
 static inline void gred_store_wred_set(struct gred_sched *table,
 				       struct gred_sched_data *q)
 {
-	table->wred_set.qavg = q->parms.qavg;
+	table->wred_set.qavg = q->vars.qavg;
 }
 
 static inline int gred_use_ecn(struct gred_sched *t)
@@ -170,7 +171,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				goto drop;
 		}
 
-		/* fix tc_index? --could be controvesial but needed for
+		/* fix tc_index? --could be controversial but needed for
 		   requeueing */
 		skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
 	}
@@ -181,8 +182,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 		for (i = 0; i < t->DPs; i++) {
 			if (t->tab[i] && t->tab[i]->prio < q->prio &&
-			    !red_is_idling(&t->tab[i]->parms))
-				qavg += t->tab[i]->parms.qavg;
+			    !red_is_idling(&t->tab[i]->vars))
+				qavg += t->tab[i]->vars.qavg;
 		}
 
 	}
@@ -193,15 +194,17 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (gred_wred_mode(t))
 		gred_load_wred_set(t, q);
 
-	q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
+	q->vars.qavg = red_calc_qavg(&q->parms,
+				     &q->vars,
+				     gred_backlog(t, q, sch));
 
-	if (red_is_idling(&q->parms))
-		red_end_of_idle_period(&q->parms);
+	if (red_is_idling(&q->vars))
+		red_end_of_idle_period(&q->vars);
 
 	if (gred_wred_mode(t))
 		gred_store_wred_set(t, q);
 
-	switch (red_action(&q->parms, q->parms.qavg + qavg)) {
+	switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) {
 	case RED_DONT_MARK:
 		break;
 
@@ -260,7 +263,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 			q->backlog -= qdisc_pkt_len(skb);
 
 			if (!q->backlog && !gred_wred_mode(t))
-				red_start_of_idle_period(&q->parms);
+				red_start_of_idle_period(&q->vars);
 		}
 
 		return skb;
@@ -293,7 +296,7 @@ static unsigned int gred_drop(struct Qdisc *sch)
 			q->stats.other++;
 
 			if (!q->backlog && !gred_wred_mode(t))
-				red_start_of_idle_period(&q->parms);
+				red_start_of_idle_period(&q->vars);
 		}
 
 		qdisc_drop(skb, sch);
@@ -320,7 +323,7 @@ static void gred_reset(struct Qdisc *sch)
 		if (!q)
 			continue;
 
-		red_restart(&q->parms);
+		red_restart(&q->vars);
 		q->backlog = 0;
 	}
 }
@@ -379,29 +382,31 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 }
 
 static inline int gred_change_vq(struct Qdisc *sch, int dp,
-				 struct tc_gred_qopt *ctl, int prio, u8 *stab)
+				 struct tc_gred_qopt *ctl, int prio,
+				 u8 *stab, u32 max_P,
+				 struct gred_sched_data **prealloc)
 {
 	struct gred_sched *table = qdisc_priv(sch);
-	struct gred_sched_data *q;
+	struct gred_sched_data *q = table->tab[dp];
 
-	if (table->tab[dp] == NULL) {
-		table->tab[dp] = kzalloc(sizeof(*q), GFP_ATOMIC);
-		if (table->tab[dp] == NULL)
+	if (!q) {
+		table->tab[dp] = q = *prealloc;
+		*prealloc = NULL;
+		if (!q)
 			return -ENOMEM;
 	}
 
-	q = table->tab[dp];
 	q->DP = dp;
 	q->prio = prio;
 	q->limit = ctl->limit;
 
 	if (q->backlog == 0)
-		red_end_of_idle_period(&q->parms);
+		red_end_of_idle_period(&q->vars);
 
 	red_set_parms(&q->parms,
 		      ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
-		      ctl->Scell_log, stab);
-
+		      ctl->Scell_log, stab, max_P);
+	red_set_vars(&q->vars);
 	return 0;
 }
 
@@ -409,6 +414,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
+	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -418,6 +424,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	struct nlattr *tb[TCA_GRED_MAX + 1];
 	int err, prio = GRED_DEF_PRIO;
 	u8 *stab;
+	u32 max_P;
+	struct gred_sched_data *prealloc;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -433,6 +441,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_GRED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+
 	err = -EINVAL;
 	ctl = nla_data(tb[TCA_GRED_PARMS]);
 	stab = nla_data(tb[TCA_GRED_STAB]);
@@ -455,9 +465,10 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 			prio = ctl->prio;
 	}
 
+	prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
 	sch_tree_lock(sch);
 
-	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc);
 	if (err < 0)
 		goto errout_locked;
 
@@ -471,6 +482,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 
 errout_locked:
 	sch_tree_unlock(sch);
+	kfree(prealloc);
 errout:
 	return err;
 }
@@ -498,6 +510,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct gred_sched *table = qdisc_priv(sch);
 	struct nlattr *parms, *opts = NULL;
 	int i;
+	u32 max_p[MAX_DPs];
 	struct tc_gred_sopt sopt = {
 		.DPs	= table->DPs,
 		.def_DP	= table->def,
@@ -509,6 +522,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+
+	for (i = 0; i < MAX_DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+
+		max_p[i] = q ? q->parms.max_P : 0;
+	}
+	NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
@@ -545,12 +566,12 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 		opt.bytesin	= q->bytesin;
 
 		if (gred_wred_mode(table)) {
-			q->parms.qidlestart =
-				table->tab[table->def]->parms.qidlestart;
-			q->parms.qavg = table->tab[table->def]->parms.qavg;
+			q->vars.qidlestart =
+				table->tab[table->def]->vars.qidlestart;
+			q->vars.qavg = table->tab[table->def]->vars.qavg;
 		}
 
-		opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
+		opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg);
 
 append_opt:
 		if (nla_append(skb, sizeof(opt), &opt) < 0)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 6488e6425652..9bdca2e011e9 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1368,6 +1368,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	struct tc_hfsc_stats xstats;
 
 	cl->qstats.qlen = cl->qdisc->q.qlen;
+	cl->qstats.backlog = cl->qdisc->qstats.backlog;
 	xstats.level   = cl->level;
 	xstats.period  = cl->cl_vtperiod;
 	xstats.work    = cl->cl_total;
@@ -1561,6 +1562,15 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
 	struct hfsc_sched *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_hfsc_qopt qopt;
+	struct hfsc_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	sch->qstats.backlog = 0;
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+			sch->qstats.backlog += cl->qdisc->qstats.backlog;
+	}
 
 	qopt.defcls = q->defcls;
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950e0e77..49131d7a7446 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), curband))) {
 			qdisc = q->queues[curband];
 			skb = qdisc->ops->peek(qdisc);
 			if (skb)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a4ab207cdc59..e7e1d0b57b3d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <linux/reciprocal_div.h>
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -66,7 +67,11 @@
 */
 
 struct netem_sched_data {
+	/* internal t(ime)fifo qdisc uses sch->q and sch->limit */
+
+	/* optional qdisc for classful handling (NULL at netem init) */
 	struct Qdisc	*qdisc;
+
 	struct qdisc_watchdog watchdog;
 
 	psched_tdiff_t latency;
@@ -79,6 +84,11 @@ struct netem_sched_data {
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
+	u32 rate;
+	s32 packet_overhead;
+	u32 cell_size;
+	u32 cell_size_reciprocal;
+	s32 cell_overhead;
 
 	struct crndstate {
 		u32 last;
@@ -111,7 +121,9 @@ struct netem_sched_data {
 
 };
 
-/* Time stamp put into socket buffer control block */
+/* Time stamp put into socket buffer control block
+ * Only valid when skbs are in our internal t(ime)fifo queue.
+ */
 struct netem_skb_cb {
 	psched_time_t	time_to_send;
 };
@@ -298,6 +310,51 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
+static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
+{
+	u64 ticks;
+
+	len += q->packet_overhead;
+
+	if (q->cell_size) {
+		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
+
+		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
+			cells++;
+		len = cells * (q->cell_size + q->cell_overhead);
+	}
+
+	ticks = (u64)len * NSEC_PER_SEC;
+
+	do_div(ticks, q->rate);
+	return PSCHED_NS2TICKS(ticks);
+}
+
+static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
+{
+	struct sk_buff_head *list = &sch->q;
+	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+	struct sk_buff *skb;
+
+	if (likely(skb_queue_len(list) < sch->limit)) {
+		skb = skb_peek_tail(list);
+		/* Optimize for add at tail */
+		if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
+			return qdisc_enqueue_tail(nskb, sch);
+
+		skb_queue_reverse_walk(list, skb) {
+			if (tnext >= netem_skb_cb(skb)->time_to_send)
+				break;
+		}
+
+		__skb_queue_after(list, skb, nskb);
+		sch->qstats.backlog += qdisc_pkt_len(nskb);
+		return NET_XMIT_SUCCESS;
+	}
+
+	return qdisc_reshape_fail(nskb, sch);
+}
+
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -371,9 +428,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				  &q->delay_cor, q->delay_dist);
 
 		now = psched_get_time();
+
+		if (q->rate) {
+			struct sk_buff_head *list = &sch->q;
+
+			delay += packet_len_2_sched_time(skb->len, q);
+
+			if (!skb_queue_empty(list)) {
+				/*
+				 * Last packet in queue is reference point (now).
+				 * First packet in queue is already in flight,
+				 * calculate this time bonus and substract
+				 * from delay.
+				 */
+				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
+			}
+		}
+
 		cb->time_to_send = now + delay;
 		++q->counter;
-		ret = qdisc_enqueue(skb, q->qdisc);
+		ret = tfifo_enqueue(skb, sch);
 	} else {
 		/*
 		 * Do re-ordering by putting one out of N packets at the front
@@ -382,9 +457,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		cb->time_to_send = psched_get_time();
 		q->counter = 0;
 
-		__skb_queue_head(&q->qdisc->q, skb);
-		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
-		q->qdisc->qstats.requeues++;
+		__skb_queue_head(&sch->q, skb);
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		sch->qstats.requeues++;
 		ret = NET_XMIT_SUCCESS;
 	}
 
@@ -395,19 +470,20 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		}
 	}
 
-	sch->q.qlen++;
 	return NET_XMIT_SUCCESS;
 }
 
 static unsigned int netem_drop(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
-	unsigned int len = 0;
+	unsigned int len;
 
-	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
-		sch->q.qlen--;
+	len = qdisc_queue_drop(sch);
+	if (!len && q->qdisc && q->qdisc->ops->drop)
+	    len = q->qdisc->ops->drop(q->qdisc);
+	if (len)
 		sch->qstats.drops++;
-	}
+
 	return len;
 }
 
@@ -419,16 +495,16 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	if (qdisc_is_throttled(sch))
 		return NULL;
 
-	skb = q->qdisc->ops->peek(q->qdisc);
+tfifo_dequeue:
+	skb = qdisc_peek_head(sch);
 	if (skb) {
 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
-		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
-		if (cb->time_to_send <= now) {
-			skb = qdisc_dequeue_peeked(q->qdisc);
+		if (cb->time_to_send <= psched_get_time()) {
+			skb = qdisc_dequeue_tail(sch);
 			if (unlikely(!skb))
-				return NULL;
+				goto qdisc_dequeue;
 
 #ifdef CONFIG_NET_CLS_ACT
 			/*
@@ -439,15 +515,37 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 				skb->tstamp.tv64 = 0;
 #endif
 
-			sch->q.qlen--;
+			if (q->qdisc) {
+				int err = qdisc_enqueue(skb, q->qdisc);
+
+				if (unlikely(err != NET_XMIT_SUCCESS)) {
+					if (net_xmit_drop_count(err)) {
+						sch->qstats.drops++;
+						qdisc_tree_decrease_qlen(sch, 1);
+					}
+				}
+				goto tfifo_dequeue;
+			}
+deliver:
 			qdisc_unthrottled(sch);
 			qdisc_bstats_update(sch, skb);
 			return skb;
 		}
 
+		if (q->qdisc) {
+			skb = q->qdisc->ops->dequeue(q->qdisc);
+			if (skb)
+				goto deliver;
+		}
 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 	}
 
+qdisc_dequeue:
+	if (q->qdisc) {
+		skb = q->qdisc->ops->dequeue(q->qdisc);
+		if (skb)
+			goto deliver;
+	}
 	return NULL;
 }
 
@@ -455,8 +553,9 @@ static void netem_reset(struct Qdisc *sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	qdisc_reset(q->qdisc);
-	sch->q.qlen = 0;
+	qdisc_reset_queue(sch);
+	if (q->qdisc)
+		qdisc_reset(q->qdisc);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
@@ -536,6 +635,19 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->corrupt_cor, r->correlation);
 }
 
+static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_rate *r = nla_data(attr);
+
+	q->rate = r->rate;
+	q->packet_overhead = r->packet_overhead;
+	q->cell_size = r->cell_size;
+	if (q->cell_size)
+		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
+	q->cell_overhead = r->cell_overhead;
+}
+
 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -549,7 +661,7 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 		case NETEM_LOSS_GI: {
 			const struct tc_netem_gimodel *gi = nla_data(la);
 
-			if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
+			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
 				pr_info("netem: incorrect gi model size\n");
 				return -EINVAL;
 			}
@@ -568,8 +680,8 @@ static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 		case NETEM_LOSS_GE: {
 			const struct tc_netem_gemodel *ge = nla_data(la);
 
-			if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
-				pr_info("netem: incorrect gi model size\n");
+			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
+				pr_info("netem: incorrect ge model size\n");
 				return -EINVAL;
 			}
 
@@ -595,6 +707,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 };
 
@@ -632,11 +745,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (ret < 0)
 		return ret;
 
-	ret = fifo_set_limit(q->qdisc, qopt->limit);
-	if (ret) {
-		pr_info("netem: can't set fifo limit\n");
-		return ret;
-	}
+	sch->limit = qopt->limit;
 
 	q->latency = qopt->latency;
 	q->jitter = qopt->jitter;
@@ -667,6 +776,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_CORRUPT])
 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
+	if (tb[TCA_NETEM_RATE])
+		get_rate(sch, tb[TCA_NETEM_RATE]);
+
 	q->loss_model = CLG_RANDOM;
 	if (tb[TCA_NETEM_LOSS])
 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -674,88 +786,6 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	return ret;
 }
 
-/*
- * Special case version of FIFO queue for use by netem.
- * It queues in order based on timestamps in skb's
- */
-struct fifo_sched_data {
-	u32 limit;
-	psched_time_t oldest;
-};
-
-static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
-{
-	struct fifo_sched_data *q = qdisc_priv(sch);
-	struct sk_buff_head *list = &sch->q;
-	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
-	struct sk_buff *skb;
-
-	if (likely(skb_queue_len(list) < q->limit)) {
-		/* Optimize for add at tail */
-		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
-			q->oldest = tnext;
-			return qdisc_enqueue_tail(nskb, sch);
-		}
-
-		skb_queue_reverse_walk(list, skb) {
-			const struct netem_skb_cb *cb = netem_skb_cb(skb);
-
-			if (tnext >= cb->time_to_send)
-				break;
-		}
-
-		__skb_queue_after(list, skb, nskb);
-
-		sch->qstats.backlog += qdisc_pkt_len(nskb);
-
-		return NET_XMIT_SUCCESS;
-	}
-
-	return qdisc_reshape_fail(nskb, sch);
-}
-
-static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
-{
-	struct fifo_sched_data *q = qdisc_priv(sch);
-
-	if (opt) {
-		struct tc_fifo_qopt *ctl = nla_data(opt);
-		if (nla_len(opt) < sizeof(*ctl))
-			return -EINVAL;
-
-		q->limit = ctl->limit;
-	} else
-		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
-
-	q->oldest = PSCHED_PASTPERFECT;
-	return 0;
-}
-
-static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
-	struct fifo_sched_data *q = qdisc_priv(sch);
-	struct tc_fifo_qopt opt = { .limit = q->limit };
-
-	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
-	return skb->len;
-
-nla_put_failure:
-	return -1;
-}
-
-static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
-	.id		=	"tfifo",
-	.priv_size	=	sizeof(struct fifo_sched_data),
-	.enqueue	=	tfifo_enqueue,
-	.dequeue	=	qdisc_dequeue_head,
-	.peek		=	qdisc_peek_head,
-	.drop		=	qdisc_queue_drop,
-	.init		=	tfifo_init,
-	.reset		=	qdisc_reset_queue,
-	.change		=	tfifo_init,
-	.dump		=	tfifo_dump,
-};
-
 static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -767,18 +797,9 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	q->loss_model = CLG_RANDOM;
-	q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
-				     TC_H_MAKE(sch->handle, 1));
-	if (!q->qdisc) {
-		pr_notice("netem: qdisc create tfifo qdisc failed\n");
-		return -ENOMEM;
-	}
-
 	ret = netem_change(sch, opt);
-	if (ret) {
+	if (ret)
 		pr_info("netem: change failed\n");
-		qdisc_destroy(q->qdisc);
-	}
 	return ret;
 }
 
@@ -787,7 +808,8 @@ static void netem_destroy(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 
 	qdisc_watchdog_cancel(&q->watchdog);
-	qdisc_destroy(q->qdisc);
+	if (q->qdisc)
+		qdisc_destroy(q->qdisc);
 	dist_free(q->delay_dist);
 }
 
@@ -847,6 +869,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
 	struct tc_netem_corrupt corrupt;
+	struct tc_netem_rate rate;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -869,6 +892,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
+	rate.rate = q->rate;
+	rate.packet_overhead = q->packet_overhead;
+	rate.cell_size = q->cell_size;
+	rate.cell_overhead = q->cell_overhead;
+	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
@@ -884,7 +913,7 @@ static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	if (cl != 1) 	/* only one class */
+	if (cl != 1 || !q->qdisc) 	/* only one class */
 		return -ENOENT;
 
 	tcm->tcm_handle |= TC_H_MIN(1);
@@ -898,14 +927,13 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	if (new == NULL)
-		new = &noop_qdisc;
-
 	sch_tree_lock(sch);
 	*old = q->qdisc;
 	q->qdisc = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
+	if (*old) {
+		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+		qdisc_reset(*old);
+	}
 	sch_tree_unlock(sch);
 
 	return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 7b0325459e71..e68cb440756a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -211,6 +211,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	struct nlattr *tb[TCA_QFQ_MAX + 1];
 	u32 weight, lmax, inv_w;
 	int i, err;
+	int delta_w;
 
 	if (tca[TCA_OPTIONS] == NULL) {
 		pr_notice("qfq: no options\n");
@@ -232,9 +233,10 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	inv_w = ONE_FP / weight;
 	weight = ONE_FP / inv_w;
-	if (q->wsum + weight > QFQ_MAX_WSUM) {
+	delta_w = weight - (cl ? ONE_FP / cl->inv_w : 0);
+	if (q->wsum + delta_w > QFQ_MAX_WSUM) {
 		pr_notice("qfq: total weight out of range (%u + %u)\n",
-			  weight, q->wsum);
+			  delta_w, q->wsum);
 		return -EINVAL;
 	}
 
@@ -256,13 +258,12 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 				return err;
 		}
 
-		sch_tree_lock(sch);
-		if (tb[TCA_QFQ_WEIGHT]) {
-			q->wsum = weight - ONE_FP / cl->inv_w;
+		if (inv_w != cl->inv_w) {
+			sch_tree_lock(sch);
+			q->wsum += delta_w;
 			cl->inv_w = inv_w;
+			sch_tree_unlock(sch);
 		}
-		sch_tree_unlock(sch);
-
 		return 0;
 	}
 
@@ -277,7 +278,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	i = qfq_calc_index(cl->inv_w, cl->lmax);
 
 	cl->grp = &q->groups[i];
-	q->wsum += weight;
 
 	cl->qdisc = qdisc_create_dflt(sch->dev_queue,
 				      &pfifo_qdisc_ops, classid);
@@ -294,6 +294,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			return err;
 		}
 	}
+	q->wsum += weight;
 
 	sch_tree_lock(sch);
 	qdisc_class_hash_insert(&q->clhash, &cl->common);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd3..a5cc3012cf42 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,7 +39,9 @@
 struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
+	struct timer_list	adapt_timer;
 	struct red_parms	parms;
+	struct red_vars		vars;
 	struct red_stats	stats;
 	struct Qdisc		*qdisc;
 };
@@ -60,12 +62,14 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct Qdisc *child = q->qdisc;
 	int ret;
 
-	q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
+	q->vars.qavg = red_calc_qavg(&q->parms,
+				     &q->vars,
+				     child->qstats.backlog);
 
-	if (red_is_idling(&q->parms))
-		red_end_of_idle_period(&q->parms);
+	if (red_is_idling(&q->vars))
+		red_end_of_idle_period(&q->vars);
 
-	switch (red_action(&q->parms, q->parms.qavg)) {
+	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
 	case RED_DONT_MARK:
 		break;
 
@@ -116,8 +120,8 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch)
 		qdisc_bstats_update(sch, skb);
 		sch->q.qlen--;
 	} else {
-		if (!red_is_idling(&q->parms))
-			red_start_of_idle_period(&q->parms);
+		if (!red_is_idling(&q->vars))
+			red_start_of_idle_period(&q->vars);
 	}
 	return skb;
 }
@@ -143,8 +147,8 @@ static unsigned int red_drop(struct Qdisc *sch)
 		return len;
 	}
 
-	if (!red_is_idling(&q->parms))
-		red_start_of_idle_period(&q->parms);
+	if (!red_is_idling(&q->vars))
+		red_start_of_idle_period(&q->vars);
 
 	return 0;
 }
@@ -155,18 +159,21 @@ static void red_reset(struct Qdisc *sch)
 
 	qdisc_reset(q->qdisc);
 	sch->q.qlen = 0;
-	red_restart(&q->parms);
+	red_restart(&q->vars);
 }
 
 static void red_destroy(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->adapt_timer);
 	qdisc_destroy(q->qdisc);
 }
 
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_RED_MAX_P] = { .type = NLA_U32 },
 };
 
 static int red_change(struct Qdisc *sch, struct nlattr *opt)
@@ -176,6 +183,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_red_qopt *ctl;
 	struct Qdisc *child = NULL;
 	int err;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -188,6 +196,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_RED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_RED_PARMS]);
 
 	if (ctl->limit > 0) {
@@ -205,22 +215,42 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 		q->qdisc = child;
 	}
 
-	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
-				 ctl->Plog, ctl->Scell_log,
-				 nla_data(tb[TCA_RED_STAB]));
+	red_set_parms(&q->parms,
+		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
+		      ctl->Plog, ctl->Scell_log,
+		      nla_data(tb[TCA_RED_STAB]),
+		      max_P);
+	red_set_vars(&q->vars);
+
+	del_timer(&q->adapt_timer);
+	if (ctl->flags & TC_RED_ADAPTATIVE)
+		mod_timer(&q->adapt_timer, jiffies + HZ/2);
 
 	if (!q->qdisc->q.qlen)
-		red_start_of_idle_period(&q->parms);
+		red_start_of_idle_period(&q->vars);
 
 	sch_tree_unlock(sch);
 	return 0;
 }
 
+static inline void red_adaptative_timer(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct red_sched_data *q = qdisc_priv(sch);
+	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+	spin_lock(root_lock);
+	red_adaptative_algo(&q->parms, &q->vars);
+	mod_timer(&q->adapt_timer, jiffies + HZ/2);
+	spin_unlock(root_lock);
+}
+
 static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	q->qdisc = &noop_qdisc;
+	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
 	return red_change(sch, opt);
 }
 
@@ -243,6 +273,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index e83c272c0325..96e42cae4c7a 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,6 +26,7 @@
 #include <net/ip.h>
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
+#include <net/flow_keys.h>
 
 /*
  * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -286,6 +287,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	u32 minqlen = ~0;
 	u32 r, slot, salt, sfbhash;
 	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	struct flow_keys keys;
 
 	if (unlikely(sch->q.qlen >= q->limit)) {
 		sch->qstats.overlimits++;
@@ -309,13 +311,19 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		/* If using external classifiers, get result and record it. */
 		if (!sfb_classify(skb, q, &ret, &salt))
 			goto other_drop;
+		keys.src = salt;
+		keys.dst = 0;
+		keys.ports = 0;
 	} else {
-		salt = skb_get_rxhash(skb);
+		skb_flow_dissect(skb, &keys);
 	}
 
 	slot = q->slot;
 
-	sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+	sfbhash = jhash_3words((__force u32)keys.dst,
+			       (__force u32)keys.src,
+			       (__force u32)keys.ports,
+			       q->bins[slot].perturbation);
 	if (!sfbhash)
 		sfbhash = 1;
 	sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -347,7 +355,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (unlikely(p_min >= SFB_MAX_PROB)) {
 		/* Inelastic flow */
 		if (q->double_buffering) {
-			sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+			sfbhash = jhash_3words((__force u32)keys.dst,
+					       (__force u32)keys.src,
+					       (__force u32)keys.ports,
+					       q->bins[slot].perturbation);
 			if (!sfbhash)
 				sfbhash = 1;
 			sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 4f5510e2bd6f..0a7964009e8c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -17,14 +17,13 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/ipv6.h>
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/flow_keys.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -67,16 +66,18 @@
 	SFQ is superior for this purpose.
 
 	IMPLEMENTATION:
-	This implementation limits maximal queue length to 128;
-	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
-	The only goal of this restrictions was that all data
-	fit into one 4K page on 32bit arches.
+	This implementation limits :
+	- maximal queue length per flow to 127 packets.
+	- max mtu to 2^18-1;
+	- max 65408 flows,
+	- number of hash buckets to 65536.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128 /* max number of packets per flow */
-#define SFQ_SLOTS		128 /* max number of flows */
-#define SFQ_EMPTY_SLOT		255
+#define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
+#define SFQ_DEFAULT_FLOWS	128
+#define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
+#define SFQ_EMPTY_SLOT		0xffff
 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 
 /* We use 16 bits to store allot, and want to handle packets up to 64K
@@ -85,13 +86,13 @@
 #define SFQ_ALLOT_SHIFT		3
 #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
-/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
-typedef unsigned char sfq_index;
+/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
+typedef u16 sfq_index;
 
 /*
  * We dont use pointers to save space.
- * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
- * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
  * are 'pointers' to dep[] array
  */
 struct sfq_head {
@@ -103,28 +104,38 @@ struct sfq_slot {
 	struct sk_buff	*skblist_next;
 	struct sk_buff	*skblist_prev;
 	sfq_index	qlen; /* number of skbs in skblist */
-	sfq_index	next; /* next slot in sfq chain */
+	sfq_index	next; /* next slot in sfq RR chain */
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
 };
 
 struct sfq_sched_data {
-/* Parameters */
-	int		perturb_period;
-	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
-	int		limit;
+/* frequently used fields */
+	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-/* Variables */
-	struct tcf_proto *filter_list;
-	struct timer_list perturb_timer;
+	unsigned int	maxflows;	/* number of flows in flows array */
+	int		headdrop;
+	int		maxdepth;	/* limit of packets per flow */
+
 	u32		perturbation;
+	struct tcf_proto *filter_list;
 	sfq_index	cur_depth;	/* depth of longest slot */
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
-	sfq_index	*ht;		/* Hash table (divisor slots) */
-	struct sfq_slot	slots[SFQ_SLOTS];
-	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
+	sfq_index	*ht;		/* Hash table ('divisor' slots) */
+	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
+
+	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
+					/* Linked lists of slots, indexed by depth
+					 * dep[0] : list of unused flows
+					 * dep[1] : list of flows with 1 packet
+					 * dep[X] : list of flows with X packets
+					 */
+
+	int		perturb_period;
+	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
+	struct timer_list perturb_timer;
 };
 
 /*
@@ -132,66 +143,36 @@ struct sfq_sched_data {
  */
 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 {
-	if (val < SFQ_SLOTS)
+	if (val < SFQ_MAX_FLOWS)
 		return &q->slots[val].dep;
-	return &q->dep[val - SFQ_SLOTS];
+	return &q->dep[val - SFQ_MAX_FLOWS];
 }
 
-static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+/*
+ * In order to be able to quickly rehash our queue when timer changes
+ * q->perturbation, we store flow_keys in skb->cb[]
+ */
+struct sfq_skb_cb {
+       struct flow_keys        keys;
+};
+
+static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
 {
-	return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
+       BUILD_BUG_ON(sizeof(skb->cb) <
+               sizeof(struct qdisc_skb_cb) + sizeof(struct sfq_skb_cb));
+       return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
-static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(const struct sfq_sched_data *q,
+			     const struct sk_buff *skb)
 {
-	u32 h, h2;
-
-	switch (skb->protocol) {
-	case htons(ETH_P_IP):
-	{
-		const struct iphdr *iph;
-		int poff;
-
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
-			goto err;
-		iph = ip_hdr(skb);
-		h = (__force u32)iph->daddr;
-		h2 = (__force u32)iph->saddr ^ iph->protocol;
-		if (ip_is_fragment(iph))
-			break;
-		poff = proto_ports_offset(iph->protocol);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
-			iph = ip_hdr(skb);
-			h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
-		}
-		break;
-	}
-	case htons(ETH_P_IPV6):
-	{
-		const struct ipv6hdr *iph;
-		int poff;
-
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
-			goto err;
-		iph = ipv6_hdr(skb);
-		h = (__force u32)iph->daddr.s6_addr32[3];
-		h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
-		poff = proto_ports_offset(iph->nexthdr);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
-			iph = ipv6_hdr(skb);
-			h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
-		}
-		break;
-	}
-	default:
-err:
-		h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
-		h2 = (unsigned long)skb->sk;
-	}
+	const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
+	unsigned int hash;
 
-	return sfq_fold_hash(q, h, h2);
+	hash = jhash_3words((__force u32)keys->dst,
+			    (__force u32)keys->src ^ keys->ip_proto,
+			    (__force u32)keys->ports, q->perturbation);
+	return hash & (q->divisor - 1);
 }
 
 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -206,8 +187,10 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 	    TC_H_MIN(skb->priority) <= q->divisor)
 		return TC_H_MIN(skb->priority);
 
-	if (!q->filter_list)
+	if (!q->filter_list) {
+		skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
 		return sfq_hash(q, skb) + 1;
+	}
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 	result = tc_classify(skb, q->filter_list, &res);
@@ -228,18 +211,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 }
 
 /*
- * x : slot number [0 .. SFQ_SLOTS - 1]
+ * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
  */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int qlen = q->slots[x].qlen;
+	struct sfq_slot *slot = &q->slots[x];
+	int qlen = slot->qlen;
 
-	p = qlen + SFQ_SLOTS;
+	p = qlen + SFQ_MAX_FLOWS;
 	n = q->dep[qlen].next;
 
-	q->slots[x].dep.next = n;
-	q->slots[x].dep.prev = p;
+	slot->dep.next = n;
+	slot->dep.prev = p;
 
 	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
 	sfq_dep_head(q, n)->prev = x;
@@ -304,6 +288,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 
 static inline void slot_queue_init(struct sfq_slot *slot)
 {
+	memset(slot, 0, sizeof(*slot));
 	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
 }
 
@@ -334,7 +319,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		x = q->dep[d].next;
 		slot = &q->slots[x];
 drop:
-		skb = slot_dequeue_tail(slot);
+		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
 		sfq_dec(q, x);
 		kfree_skb(skb);
@@ -378,16 +363,27 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	slot = &q->slots[x];
 	if (x == SFQ_EMPTY_SLOT) {
 		x = q->dep[0].next; /* get a free slot */
+		if (x >= SFQ_MAX_FLOWS)
+			return qdisc_drop(skb, sch);
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
 	}
 
-	/* If selected queue has length q->limit, do simple tail drop,
-	 * i.e. drop _this_ packet.
-	 */
-	if (slot->qlen >= q->limit)
-		return qdisc_drop(skb, sch);
+	if (slot->qlen >= q->maxdepth) {
+		struct sk_buff *head;
+
+		if (!q->headdrop)
+			return qdisc_drop(skb, sch);
+
+		head = slot_dequeue_head(slot);
+		sch->qstats.backlog -= qdisc_pkt_len(head);
+		qdisc_drop(head, sch);
+
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		slot_queue_add(slot, skb);
+		return NET_XMIT_CN;
+	}
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
@@ -395,11 +391,11 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (slot->qlen == 1) {		/* The flow is new */
 		if (q->tail == NULL) {	/* It is the first flow */
 			slot->next = x;
+			q->tail = slot;
 		} else {
 			slot->next = q->tail->next;
 			q->tail->next = x;
 		}
-		q->tail = slot;
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit)
@@ -468,12 +464,83 @@ sfq_reset(struct Qdisc *sch)
 		kfree_skb(skb);
 }
 
+/*
+ * When q->perturbation is changed, we rehash all queued skbs
+ * to avoid OOO (Out Of Order) effects.
+ * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
+ * counters.
+ */
+static void sfq_rehash(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	int i;
+	struct sfq_slot *slot;
+	struct sk_buff_head list;
+	int dropped = 0;
+
+	__skb_queue_head_init(&list);
+
+	for (i = 0; i < q->maxflows; i++) {
+		slot = &q->slots[i];
+		if (!slot->qlen)
+			continue;
+		while (slot->qlen) {
+			skb = slot_dequeue_head(slot);
+			sfq_dec(q, i);
+			__skb_queue_tail(&list, skb);
+		}
+		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
+	}
+	q->tail = NULL;
+
+	while ((skb = __skb_dequeue(&list)) != NULL) {
+		unsigned int hash = sfq_hash(q, skb);
+		sfq_index x = q->ht[hash];
+
+		slot = &q->slots[x];
+		if (x == SFQ_EMPTY_SLOT) {
+			x = q->dep[0].next; /* get a free slot */
+			if (x >= SFQ_MAX_FLOWS) {
+drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				kfree_skb(skb);
+				dropped++;
+				continue;
+			}
+			q->ht[hash] = x;
+			slot = &q->slots[x];
+			slot->hash = hash;
+		}
+		if (slot->qlen >= q->maxdepth)
+			goto drop;
+		slot_queue_add(slot, skb);
+		sfq_inc(q, x);
+		if (slot->qlen == 1) {		/* The flow is new */
+			if (q->tail == NULL) {	/* It is the first flow */
+				slot->next = x;
+			} else {
+				slot->next = q->tail->next;
+				q->tail->next = x;
+			}
+			q->tail = slot;
+			slot->allot = q->scaled_quantum;
+		}
+	}
+	sch->q.qlen -= dropped;
+	qdisc_tree_decrease_qlen(sch, dropped);
+}
+
 static void sfq_perturbation(unsigned long arg)
 {
 	struct Qdisc *sch = (struct Qdisc *)arg;
 	struct sfq_sched_data *q = qdisc_priv(sch);
+	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 
+	spin_lock(root_lock);
 	q->perturbation = net_random();
+	if (!q->filter_list && q->tail)
+		sfq_rehash(sch);
+	spin_unlock(root_lock);
 
 	if (q->perturb_period)
 		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
@@ -483,23 +550,39 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
+	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
-
+	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+		ctl_v1 = nla_data(opt);
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
 
 	sch_tree_lock(sch);
-	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
-	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	if (ctl->quantum) {
+		q->quantum = ctl->quantum;
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	}
 	q->perturb_period = ctl->perturb_period * HZ;
-	if (ctl->limit)
-		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-	if (ctl->divisor)
+	if (ctl->flows)
+		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+	if (ctl->divisor) {
 		q->divisor = ctl->divisor;
+		q->maxflows = min_t(u32, q->maxflows, q->divisor);
+	}
+	if (ctl_v1) {
+		if (ctl_v1->depth)
+			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		q->headdrop = ctl_v1->headdrop;
+	}
+	if (ctl->limit) {
+		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+		q->maxflows = min_t(u32, q->maxflows, q->limit);
+	}
+
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);
@@ -514,46 +597,77 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
+static void *sfq_alloc(size_t sz)
+{
+	void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+	if (!ptr)
+		ptr = vmalloc(sz);
+	return ptr;
+}
+
+static void sfq_free(void *addr)
+{
+	if (addr) {
+		if (is_vmalloc_addr(addr))
+			vfree(addr);
+		else
+			kfree(addr);
+	}
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	q->perturb_period = 0;
+	del_timer_sync(&q->perturb_timer);
+	sfq_free(q->ht);
+	sfq_free(q->slots);
+}
+
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	size_t sz;
 	int i;
 
 	q->perturb_timer.function = sfq_perturbation;
 	q->perturb_timer.data = (unsigned long)sch;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i = 0; i < SFQ_DEPTH; i++) {
-		q->dep[i].next = i + SFQ_SLOTS;
-		q->dep[i].prev = i + SFQ_SLOTS;
+	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
+		q->dep[i].next = i + SFQ_MAX_FLOWS;
+		q->dep[i].prev = i + SFQ_MAX_FLOWS;
 	}
 
-	q->limit = SFQ_DEPTH - 1;
+	q->limit = SFQ_MAX_DEPTH;
+	q->maxdepth = SFQ_MAX_DEPTH;
 	q->cur_depth = 0;
 	q->tail = NULL;
 	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
-	if (opt == NULL) {
-		q->quantum = psched_mtu(qdisc_dev(sch));
-		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
-		q->perturb_period = 0;
-		q->perturbation = net_random();
-	} else {
+	q->maxflows = SFQ_DEFAULT_FLOWS;
+	q->quantum = psched_mtu(qdisc_dev(sch));
+	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	q->perturb_period = 0;
+	q->perturbation = net_random();
+
+	if (opt) {
 		int err = sfq_change(sch, opt);
 		if (err)
 			return err;
 	}
 
-	sz = sizeof(q->ht[0]) * q->divisor;
-	q->ht = kmalloc(sz, GFP_KERNEL);
-	if (!q->ht && sz > PAGE_SIZE)
-		q->ht = vmalloc(sz);
-	if (!q->ht)
+	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
+	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
+	if (!q->ht || !q->slots) {
+		sfq_destroy(sch);
 		return -ENOMEM;
+	}
 	for (i = 0; i < q->divisor; i++)
 		q->ht[i] = SFQ_EMPTY_SLOT;
 
-	for (i = 0; i < SFQ_SLOTS; i++) {
+	for (i = 0; i < q->maxflows; i++) {
 		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
 	}
@@ -564,31 +678,20 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
-static void sfq_destroy(struct Qdisc *sch)
-{
-	struct sfq_sched_data *q = qdisc_priv(sch);
-
-	tcf_destroy_chain(&q->filter_list);
-	q->perturb_period = 0;
-	del_timer_sync(&q->perturb_timer);
-	if (is_vmalloc_addr(q->ht))
-		vfree(q->ht);
-	else
-		kfree(q->ht);
-}
-
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_sfq_qopt opt;
-
-	opt.quantum = q->quantum;
-	opt.perturb_period = q->perturb_period / HZ;
-
-	opt.limit = q->limit;
-	opt.divisor = q->divisor;
-	opt.flows = q->limit;
+	struct tc_sfq_qopt_v1 opt;
+
+	memset(&opt, 0, sizeof(opt));
+	opt.v0.quantum	= q->quantum;
+	opt.v0.perturb_period = q->perturb_period / HZ;
+	opt.v0.limit	= q->limit;
+	opt.v0.divisor	= q->divisor;
+	opt.v0.flows	= q->maxflows;
+	opt.depth	= q->maxdepth;
+	opt.headdrop	= q->headdrop;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1dcfb5223a86..b8e156319d7b 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -346,6 +346,7 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *nest;
 	struct tc_tbf_qopt opt;
 
+	sch->qstats.backlog = q->qdisc->qstats.backlog;
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4f4c52c0eeb3..45326599fda3 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -277,7 +277,7 @@ static inline int teql_resolve(struct sk_buff *skb,
 		return 0;
 
 	rcu_read_lock();
-	mn = dst_get_neighbour(dst);
+	mn = dst_get_neighbour_noref(dst);
 	res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
 	rcu_read_unlock();
 
@@ -310,7 +310,7 @@ restart:
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
-		if (__netif_subqueue_stopped(slave, subq) ||
+		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 		    !netif_running(slave)) {
 			busy = 1;
 			continue;
@@ -321,7 +321,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
@@ -333,7 +333,7 @@ restart:
 				}
 				__netif_tx_unlock(slave_txq);
 			}
-			if (netif_queue_stopped(dev))
+			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 				busy = 1;
 			break;
 		case 1: