summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip6_route.h7
-rw-r--r--net/ipv6/ip6_fib.c8
-rw-r--r--net/ipv6/route.c96
4 files changed, 106 insertions, 6 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index ddf53dd1e948..97cd05d87780 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -149,6 +149,7 @@ struct rt6_info {
 	 */
 	struct list_head		rt6i_siblings;
 	unsigned int			rt6i_nsiblings;
+	atomic_t			rt6i_nh_upper_bound;
 
 	atomic_t			rt6i_ref;
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 34cd3b0c6ded..27d23a65f3cd 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt)
+{
+	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	       RTF_GATEWAY;
+}
+
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
@@ -171,6 +177,7 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags);
 void rt6_disable_ip(struct net_device *dev, unsigned long event);
 void rt6_sync_down_dev(struct net_device *dev, unsigned long event);
+void rt6_multipath_rebalance(struct rt6_info *rt);
 
 static inline const struct rt6_info *skb_rt6_info(const struct sk_buff *skb)
 {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b5f19703fca6..e31118f417b4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -796,12 +796,6 @@ insert_above:
 	return ln;
 }
 
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
-	return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
-	       RTF_GATEWAY;
-}
-
 static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
 {
 	int i;
@@ -991,6 +985,7 @@ next_iter:
 			rt6i_nsiblings++;
 		}
 		BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+		rt6_multipath_rebalance(temp_sibling);
 	}
 
 	/*
@@ -1672,6 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
 			sibling->rt6i_nsiblings--;
 		rt->rt6i_nsiblings = 0;
 		list_del_init(&rt->rt6i_siblings);
+		rt6_multipath_rebalance(next_sibling);
 	}
 
 	/* Adjust walkers */
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1054b059747f..ced2c9bed10b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3481,6 +3481,99 @@ struct arg_netdev_event {
 	};
 };
 
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+	struct rt6_info *iter;
+	struct fib6_node *fn;
+
+	fn = rcu_dereference_protected(rt->rt6i_node,
+			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	iter = rcu_dereference_protected(fn->leaf,
+			lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	while (iter) {
+		if (iter->rt6i_metric == rt->rt6i_metric &&
+		    rt6_qualify_for_ecmp(iter))
+			return iter;
+		iter = rcu_dereference_protected(iter->rt6_next,
+				lockdep_is_held(&rt->rt6i_table->tb6_lock));
+	}
+
+	return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+	if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+	    (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+		return true;
+
+	return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+	struct rt6_info *iter;
+	int total = 0;
+
+	if (!rt6_is_dead(rt))
+		total++;
+
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+		if (!rt6_is_dead(iter))
+			total++;
+	}
+
+	return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+	int upper_bound = -1;
+
+	if (!rt6_is_dead(rt)) {
+		(*weight)++;
+		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+						    total) - 1;
+	}
+	atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+	struct rt6_info *iter;
+	int weight = 0;
+
+	rt6_upper_bound_set(rt, &weight, total);
+
+	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+		rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+	struct rt6_info *first;
+	int total;
+
+	/* In case the entire multipath route was marked for flushing,
+	 * then there is no need to rebalance upon the removal of every
+	 * sibling route.
+	 */
+	if (!rt->rt6i_nsiblings || rt->should_flush)
+		return;
+
+	/* During lookup routes are evaluated in order, so we need to
+	 * make sure upper bounds are assigned from the first sibling
+	 * onwards.
+	 */
+	first = rt6_multipath_first_sibling(rt);
+	if (WARN_ON_ONCE(!first))
+		return;
+
+	total = rt6_multipath_total_weight(first);
+	rt6_multipath_upper_bound_set(first, total);
+}
+
 static int fib6_ifup(struct rt6_info *rt, void *p_arg)
 {
 	const struct arg_netdev_event *arg = p_arg;
@@ -3489,6 +3582,7 @@ static int fib6_ifup(struct rt6_info *rt, void *p_arg)
 	if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
 		rt->rt6i_nh_flags &= ~arg->nh_flags;
 		fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+		rt6_multipath_rebalance(rt);
 	}
 
 	return 0;
@@ -3588,6 +3682,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
 						   RTNH_F_LINKDOWN);
 			fib6_update_sernum(rt);
+			rt6_multipath_rebalance(rt);
 		}
 		return -2;
 	case NETDEV_CHANGE:
@@ -3595,6 +3690,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
 		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
 			break;
 		rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+		rt6_multipath_rebalance(rt);
 		break;
 	}