summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c20
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c8
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--net/ipv4/fib_trie.c148
4 files changed, 174 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 14bed1d10b72..53126bf68ea9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2027,6 +2027,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
+
+	/* Flush pending FIB notifications and then flush the device's
+	 * table before requesting another dump. The FIB notification
+	 * block is unregistered, so no need to take RTNL.
+	 */
+	mlxsw_core_flush_owq();
+	mlxsw_sp_router_fib_flush(mlxsw_sp);
+}
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
@@ -2047,9 +2059,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 		goto err_neigh_init;
 
 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
-	register_fib_notifier(&mlxsw_sp->fib_nb);
+	err = register_fib_notifier(&mlxsw_sp->fib_nb,
+				    mlxsw_sp_router_fib_dump_flush);
+	if (err)
+		goto err_register_fib_notifier;
+
 	return 0;
 
+err_register_fib_notifier:
+	mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
 	mlxsw_sp_vrs_fini(mlxsw_sp);
 err_vrs_init:
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 8c9c90ae8962..7c450b5a1138 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2804,8 +2804,13 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_alloc_ordered_workqueue;
 	}
 
+	/* Only FIBs pointing to our own netdevs are programmed into
+	 * the device, so no need to pass a callback.
+	 */
 	rocker->fib_nb.notifier_call = rocker_router_fib_event;
-	register_fib_notifier(&rocker->fib_nb);
+	err = register_fib_notifier(&rocker->fib_nb, NULL);
+	if (err)
+		goto err_register_fib_notifier;
 
 	rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
 
@@ -2822,6 +2827,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 err_probe_ports:
 	unregister_fib_notifier(&rocker->fib_nb);
+err_register_fib_notifier:
 	destroy_workqueue(rocker->rocker_owq);
 err_alloc_ordered_workqueue:
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6c67b9391fc0..5f376af377c7 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -221,7 +221,8 @@ enum fib_event_type {
 	FIB_EVENT_RULE_DEL,
 };
 
-int register_fib_notifier(struct notifier_block *nb);
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb));
 int unregister_fib_notifier(struct notifier_block *nb);
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
 		       struct fib_notifier_info *info);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 28913563e7cd..73a62700b00a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -84,11 +84,99 @@
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
+static unsigned int fib_seq_sum(void)
+{
+	unsigned int fib_seq = 0;
+	struct net *net;
+
+	rtnl_lock();
+	for_each_net(net)
+		fib_seq += net->ipv4.fib_seq;
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
 static ATOMIC_NOTIFIER_HEAD(fib_chain);
 
-int register_fib_notifier(struct notifier_block *nb)
+static int call_fib_notifier(struct notifier_block *nb, struct net *net,
+			     enum fib_event_type event_type,
+			     struct fib_notifier_info *info)
 {
-	return atomic_notifier_chain_register(&fib_chain, nb);
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+
+static void fib_rules_notify(struct net *net, struct notifier_block *nb,
+			     enum fib_event_type event_type)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	struct fib_notifier_info info;
+
+	if (net->ipv4.fib_has_custom_rules)
+		call_fib_notifier(nb, net, event_type, &info);
+#endif
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+		       enum fib_event_type event_type);
+
+static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+				   enum fib_event_type event_type, u32 dst,
+				   int dst_len, struct fib_info *fi,
+				   u8 tos, u8 type, u32 tb_id, u32 nlflags)
+{
+	struct fib_entry_notifier_info info = {
+		.dst = dst,
+		.dst_len = dst_len,
+		.fi = fi,
+		.tos = tos,
+		.type = type,
+		.tb_id = tb_id,
+		.nlflags = nlflags,
+	};
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   void (*cb)(struct notifier_block *nb),
+				   unsigned int fib_seq)
+{
+	atomic_notifier_chain_register(&fib_chain, nb);
+	if (fib_seq == fib_seq_sum())
+		return true;
+	atomic_notifier_chain_unregister(&fib_chain, nb);
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+#define FIB_DUMP_MAX_RETRIES 5
+int register_fib_notifier(struct notifier_block *nb,
+			  void (*cb)(struct notifier_block *nb))
+{
+	int retries = 0;
+
+	do {
+		unsigned int fib_seq = fib_seq_sum();
+		struct net *net;
+
+		/* Mutex semantics guarantee that every change done to
+		 * FIB tries before we read the change sequence counter
+		 * is now visible to us.
+		 */
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
+			fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
+		}
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, cb, fib_seq))
+			return 0;
+	} while (++retries < FIB_DUMP_MAX_RETRIES);
+
+	return -EBUSY;
 }
 EXPORT_SYMBOL(register_fib_notifier);
 
@@ -1902,6 +1990,62 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
 	return found;
 }
 
+static void fib_leaf_notify(struct net *net, struct key_vector *l,
+			    struct fib_table *tb, struct notifier_block *nb,
+			    enum fib_event_type event_type)
+{
+	struct fib_alias *fa;
+
+	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+		struct fib_info *fi = fa->fa_info;
+
+		if (!fi)
+			continue;
+
+		/* local and main table can share the same trie,
+		 * so don't notify twice for the same entry.
+		 */
+		if (tb->tb_id != fa->tb_id)
+			continue;
+
+		call_fib_entry_notifier(nb, net, event_type, l->key,
+					KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
+					fa->fa_type, fa->tb_id, 0);
+	}
+}
+
+static void fib_table_notify(struct net *net, struct fib_table *tb,
+			     struct notifier_block *nb,
+			     enum fib_event_type event_type)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct key_vector *l, *tp = t->kv;
+	t_key key = 0;
+
+	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+		fib_leaf_notify(net, l, tb, nb, event_type);
+
+		key = l->key + 1;
+		/* stop in case of wrap around */
+		if (key < l->key)
+			break;
+	}
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+		       enum fib_event_type event_type)
+{
+	unsigned int h;
+
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+		struct fib_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb_hlist)
+			fib_table_notify(net, tb, nb, event_type);
+	}
+}
+
 static void __trie_free_rcu(struct rcu_head *head)
 {
 	struct fib_table *tb = container_of(head, struct fib_table, rcu);