diff options
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 20 | ||||
-rw-r--r-- | drivers/net/ethernet/rocker/rocker_main.c | 8 | ||||
-rw-r--r-- | include/net/ip_fib.h | 3 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 148 |
4 files changed, 174 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 14bed1d10b72..53126bf68ea9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2027,6 +2027,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } +static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) +{ + struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + + /* Flush pending FIB notifications and then flush the device's + * table before requesting another dump. The FIB notification + * block is unregistered, so no need to take RTNL. + */ + mlxsw_core_flush_owq(); + mlxsw_sp_router_fib_flush(mlxsw_sp); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2047,9 +2059,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) goto err_neigh_init; mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; - register_fib_notifier(&mlxsw_sp->fib_nb); + err = register_fib_notifier(&mlxsw_sp->fib_nb, + mlxsw_sp_router_fib_dump_flush); + if (err) + goto err_register_fib_notifier; + return 0; +err_register_fib_notifier: + mlxsw_sp_neigh_fini(mlxsw_sp); err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 8c9c90ae8962..7c450b5a1138 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2804,8 +2804,13 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_alloc_ordered_workqueue; } + /* Only FIBs pointing to our own netdevs are programmed into + * the device, so no need to pass a callback. + */ rocker->fib_nb.notifier_call = rocker_router_fib_event; - register_fib_notifier(&rocker->fib_nb); + err = register_fib_notifier(&rocker->fib_nb, NULL); + if (err) + goto err_register_fib_notifier; rocker->hw.id = rocker_read64(rocker, SWITCH_ID); @@ -2822,6 +2827,7 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) err_probe_ports: unregister_fib_notifier(&rocker->fib_nb); +err_register_fib_notifier: destroy_workqueue(rocker->rocker_owq); err_alloc_ordered_workqueue: free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6c67b9391fc0..5f376af377c7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -221,7 +221,8 @@ enum fib_event_type { FIB_EVENT_RULE_DEL, }; -int register_fib_notifier(struct notifier_block *nb); +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)); int unregister_fib_notifier(struct notifier_block *nb); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 28913563e7cd..73a62700b00a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -84,11 +84,99 @@ #include <trace/events/fib.h> #include "fib_lookup.h" +static unsigned int fib_seq_sum(void) +{ + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) + fib_seq += net->ipv4.fib_seq; + rtnl_unlock(); + + return fib_seq; +} + static ATOMIC_NOTIFIER_HEAD(fib_chain); -int register_fib_notifier(struct notifier_block *nb) +static int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) { - return atomic_notifier_chain_register(&fib_chain, nb); + info->net = net; + return nb->notifier_call(nb, event_type, info); +} + +static void fib_rules_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type) +{ +#ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_notifier_info info; + + if (net->ipv4.fib_has_custom_rules) + call_fib_notifier(nb, net, event_type, &info); +#endif +} + +static void fib_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type); + +static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + /* Mutex semantics guarantee that every change done to + * FIB tries before we read the change sequence counter + * is now visible to us. + */ + rcu_read_lock(); + for_each_net_rcu(net) { + fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD); + fib_notify(net, nb, FIB_EVENT_ENTRY_ADD); + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; } EXPORT_SYMBOL(register_fib_notifier); @@ -1902,6 +1990,62 @@ int fib_table_flush(struct net *net, struct fib_table *tb) return found; } +static void fib_leaf_notify(struct net *net, struct key_vector *l, + struct fib_table *tb, struct notifier_block *nb, + enum fib_event_type event_type) +{ + struct fib_alias *fa; + + hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + struct fib_info *fi = fa->fa_info; + + if (!fi) + continue; + + /* local and main table can share the same trie, + * so don't notify twice for the same entry. + */ + if (tb->tb_id != fa->tb_id) + continue; + + call_fib_entry_notifier(nb, net, event_type, l->key, + KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, + fa->fa_type, fa->tb_id, 0); + } +} + +static void fib_table_notify(struct net *net, struct fib_table *tb, + struct notifier_block *nb, + enum fib_event_type event_type) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *l, *tp = t->kv; + t_key key = 0; + + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + fib_leaf_notify(net, l, tb, nb, event_type); + + key = l->key + 1; + /* stop in case of wrap around */ + if (key < l->key) + break; + } +} + +static void fib_notify(struct net *net, struct notifier_block *nb, + enum fib_event_type event_type) +{ + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv4.fib_table_hash[h]; + struct fib_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb_hlist) + fib_table_notify(net, tb, nb, event_type); + } +} + static void __trie_free_rcu(struct rcu_head *head) { struct fib_table *tb = container_of(head, struct fib_table, rcu); |