diff options
Diffstat (limited to 'net')
135 files changed, 4243 insertions, 1908 deletions
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c index 1c140af06d52..600b9563bfc5 100644 --- a/net/6lowpan/debugfs.c +++ b/net/6lowpan/debugfs.c @@ -170,7 +170,8 @@ static void lowpan_dev_debugfs_ctx_init(struct net_device *dev, struct dentry *root; char buf[32]; - WARN_ON_ONCE(id > LOWPAN_IPHC_CTX_TABLE_SIZE); + if (WARN_ON_ONCE(id >= LOWPAN_IPHC_CTX_TABLE_SIZE)) + return; sprintf(buf, "%d", id); diff --git a/net/802/Makefile b/net/802/Makefile index 19406a87bdaa..bfed80221b8b 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_LLC) += p8022.o psnap.o obj-$(CONFIG_NET_FC) += fc.o obj-$(CONFIG_FDDI) += fddi.o obj-$(CONFIG_HIPPI) += hippi.o -obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o obj-$(CONFIG_ATALK) += p8022.o psnap.o obj-$(CONFIG_STP) += stp.o obj-$(CONFIG_GARP) += garp.o diff --git a/net/802/p8023.c b/net/802/p8023.c deleted file mode 100644 index 19cd56990db2..000000000000 --- a/net/802/p8023.c +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * NET3: 802.3 data link hooks used for IPX 802.3 - * - * 802.3 isn't really a protocol data link layer. Some old IPX stuff - * uses it however. Note that there is only one 802.3 protocol layer - * in the system. We don't currently support different protocols - * running raw 802.3 on different devices. Thankfully nobody else - * has done anything like the old IPX. - */ - -#include <linux/in.h> -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/slab.h> - -#include <net/datalink.h> -#include <net/p8022.h> - -/* - * Place an 802.3 header on a packet. The driver will do the mac - * addresses, we just need to give it the buffer length. - */ -static int p8023_request(struct datalink_proto *dl, - struct sk_buff *skb, unsigned char *dest_node) -{ - struct net_device *dev = skb->dev; - - dev_hard_header(skb, dev, ETH_P_802_3, dest_node, NULL, skb->len); - return dev_queue_xmit(skb); -} - -/* - * Create an 802.3 client. Note there can be only one 802.3 client - */ -struct datalink_proto *make_8023_client(void) -{ - struct datalink_proto *proto = kmalloc(sizeof(*proto), GFP_ATOMIC); - - if (proto) { - proto->header_length = 0; - proto->request = p8023_request; - } - return proto; -} - -/* - * Destroy the 802.3 client. - */ -void destroy_8023_client(struct datalink_proto *dl) -{ - kfree(dl); -} - -EXPORT_SYMBOL(destroy_8023_client); -EXPORT_SYMBOL(make_8023_client); - -MODULE_LICENSE("GPL"); diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 12022378f892..f94f538fa382 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -519,8 +519,7 @@ batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, } out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); return res; } @@ -857,8 +856,7 @@ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) rcu_read_unlock(); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) @@ -1046,14 +1044,10 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, unlock: rcu_read_unlock(); out: - if (neigh_node) - batadv_neigh_node_put(neigh_node); - if (router) - batadv_neigh_node_put(router); - if (neigh_ifinfo) - batadv_neigh_ifinfo_put(neigh_ifinfo); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_node_put(neigh_node); + batadv_neigh_node_put(router); + batadv_neigh_ifinfo_put(neigh_ifinfo); + batadv_neigh_ifinfo_put(router_ifinfo); } /** @@ -1194,8 +1188,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, ret = true; out: - if (neigh_node) - batadv_neigh_node_put(neigh_node); + batadv_neigh_node_put(neigh_node); return ret; } @@ -1496,16 +1489,11 @@ out_neigh: if (orig_neigh_node && !is_single_hop_neigh) batadv_orig_node_put(orig_neigh_node); out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - if (router_router) - batadv_neigh_node_put(router_router); - if (orig_neigh_router) - batadv_neigh_node_put(orig_neigh_router); - if (hardif_neigh) - batadv_hardif_neigh_put(hardif_neigh); + batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_node_put(router); + batadv_neigh_node_put(router_router); + batadv_neigh_node_put(orig_neigh_router); + batadv_hardif_neigh_put(hardif_neigh); consume_skb(skb_priv); } @@ -1926,8 +1914,7 @@ batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } out: - if (neigh_node_best) - batadv_neigh_node_put(neigh_node_best); + batadv_neigh_node_put(neigh_node_best); *sub_s = 0; return 0; @@ -2049,10 +2036,8 @@ static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1, *diff = (int)tq1 - (int)tq2; out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + batadv_neigh_ifinfo_put(neigh1_ifinfo); + batadv_neigh_ifinfo_put(neigh2_ifinfo); return ret; } @@ -2299,8 +2284,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (tmp_gw_factor > max_gw_factor || (tmp_gw_factor == max_gw_factor && tq_avg > max_tq)) { - if (curr_gw) - batadv_gw_node_put(curr_gw); + batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); } @@ -2314,8 +2298,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) * $routing_class more tq points) */ if (tq_avg > max_tq) { - if (curr_gw) - batadv_gw_node_put(curr_gw); + batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); } @@ -2332,8 +2315,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) next: batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_ifinfo_put(router_ifinfo); } rcu_read_unlock(); @@ -2397,14 +2379,10 @@ static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv, ret = true; out: - if (router_gw_ifinfo) - batadv_neigh_ifinfo_put(router_gw_ifinfo); - if (router_orig_ifinfo) - batadv_neigh_ifinfo_put(router_orig_ifinfo); - if (router_gw) - batadv_neigh_node_put(router_gw); - if (router_orig) - batadv_neigh_node_put(router_orig); + batadv_neigh_ifinfo_put(router_gw_ifinfo); + batadv_neigh_ifinfo_put(router_orig_ifinfo); + batadv_neigh_node_put(router_gw); + batadv_neigh_node_put(router_orig); return ret; } @@ -2479,12 +2457,9 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, ret = 0; out: - if (curr_gw) - batadv_gw_node_put(curr_gw); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); + batadv_gw_node_put(curr_gw); + batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_node_put(router); return ret; } diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index b98aea958e3d..54e41fc709c3 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -106,8 +106,7 @@ static void batadv_v_iface_update_mac(struct batadv_hard_iface *hard_iface) batadv_v_primary_iface_set(hard_iface); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static void @@ -366,8 +365,7 @@ batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, } out: - if (neigh_node_best) - batadv_neigh_node_put(neigh_node_best); + batadv_neigh_node_put(neigh_node_best); *sub_s = 0; return 0; @@ -568,10 +566,8 @@ static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) ret = 0; out: - if (router) - batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_node_put(router); + batadv_neigh_ifinfo_put(router_ifinfo); return ret; } @@ -599,8 +595,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (curr_gw && bw <= max_bw) goto next; - if (curr_gw) - batadv_gw_node_put(curr_gw); + batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); @@ -662,10 +657,8 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, ret = true; out: - if (curr_gw) - batadv_gw_node_put(curr_gw); - if (orig_gw) - batadv_gw_node_put(orig_gw); + batadv_gw_node_put(curr_gw); + batadv_gw_node_put(orig_gw); return ret; } @@ -764,12 +757,9 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, ret = 0; out: - if (curr_gw) - batadv_gw_node_put(curr_gw); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); + batadv_gw_node_put(curr_gw); + batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_node_put(router); return ret; } diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 423c2d171703..71999e13f729 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -486,14 +486,11 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv, hardif_neigh->bat_v.elp_interval = ntohl(elp_packet->elp_interval); hardif_free: - if (hardif_neigh) - batadv_hardif_neigh_put(hardif_neigh); + batadv_hardif_neigh_put(hardif_neigh); neigh_free: - if (neigh) - batadv_neigh_node_put(neigh); + batadv_neigh_node_put(neigh); orig_free: - if (orig_neigh) - batadv_orig_node_put(orig_neigh); + batadv_orig_node_put(orig_neigh); } /** diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index a0a9636d1740..1d750f3cb2e4 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -584,12 +584,9 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, batadv_v_ogm_queue_on_if(skb, if_outgoing); out: - if (orig_ifinfo) - batadv_orig_ifinfo_put(orig_ifinfo); - if (router) - batadv_neigh_node_put(router); - if (neigh_ifinfo) - batadv_neigh_ifinfo_put(neigh_ifinfo); + batadv_orig_ifinfo_put(orig_ifinfo); + batadv_neigh_node_put(router); + batadv_neigh_ifinfo_put(neigh_ifinfo); } /** @@ -669,10 +666,8 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, else ret = 0; out: - if (orig_ifinfo) - batadv_orig_ifinfo_put(orig_ifinfo); - if (neigh_ifinfo) - batadv_neigh_ifinfo_put(neigh_ifinfo); + batadv_orig_ifinfo_put(orig_ifinfo); + batadv_neigh_ifinfo_put(neigh_ifinfo); return ret; } @@ -763,16 +758,11 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); out: - if (router) - batadv_neigh_node_put(router); - if (orig_neigh_router) - batadv_neigh_node_put(orig_neigh_router); - if (orig_neigh_node) - batadv_orig_node_put(orig_neigh_node); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (neigh_ifinfo) - batadv_neigh_ifinfo_put(neigh_ifinfo); + batadv_neigh_node_put(router); + batadv_neigh_node_put(orig_neigh_router); + batadv_orig_node_put(orig_neigh_node); + batadv_neigh_ifinfo_put(router_ifinfo); + batadv_neigh_ifinfo_put(neigh_ifinfo); return forward; } @@ -978,12 +968,9 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, } rcu_read_unlock(); out: - if (orig_node) - batadv_orig_node_put(orig_node); - if (neigh_node) - batadv_neigh_node_put(neigh_node); - if (hardif_neigh) - batadv_hardif_neigh_put(hardif_neigh); + batadv_orig_node_put(orig_node); + batadv_neigh_node_put(neigh_node); + batadv_hardif_neigh_put(hardif_neigh); } /** diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 2b639c8b0ded..1669744304c5 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -162,6 +162,9 @@ static void batadv_backbone_gw_release(struct kref *ref) */ static void batadv_backbone_gw_put(struct batadv_bla_backbone_gw *backbone_gw) { + if (!backbone_gw) + return; + kref_put(&backbone_gw->refcount, batadv_backbone_gw_release); } @@ -197,6 +200,9 @@ static void batadv_claim_release(struct kref *ref) */ static void batadv_claim_put(struct batadv_bla_claim *claim) { + if (!claim) + return; + kref_put(&claim->refcount, batadv_claim_release); } @@ -439,8 +445,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, u8 *mac, netif_rx_any_context(skb); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } /** @@ -1498,8 +1503,7 @@ static void batadv_bla_periodic_work(struct work_struct *work) rcu_read_unlock(); } out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); queue_delayed_work(batadv_event_workqueue, &bat_priv->bla.work, msecs_to_jiffies(BATADV_BLA_PERIOD_LENGTH)); @@ -1808,8 +1812,7 @@ void batadv_bla_free(struct batadv_priv *bat_priv) batadv_hash_destroy(bat_priv->bla.backbone_hash); bat_priv->bla.backbone_hash = NULL; } - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } /** @@ -1996,10 +1999,8 @@ handled: ret = true; out: - if (primary_if) - batadv_hardif_put(primary_if); - if (claim) - batadv_claim_put(claim); + batadv_hardif_put(primary_if); + batadv_claim_put(claim); return ret; } @@ -2103,10 +2104,8 @@ allow: handled: ret = true; out: - if (primary_if) - batadv_hardif_put(primary_if); - if (claim) - batadv_claim_put(claim); + batadv_hardif_put(primary_if); + batadv_claim_put(claim); return ret; } @@ -2271,8 +2270,7 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); @@ -2442,8 +2440,7 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 7976a0435662..2f008e329007 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -127,6 +127,9 @@ static void batadv_dat_entry_release(struct kref *ref) */ static void batadv_dat_entry_put(struct batadv_dat_entry *dat_entry) { + if (!dat_entry) + return; + kref_put(&dat_entry->refcount, batadv_dat_entry_release); } @@ -405,8 +408,7 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(vid)); out: - if (dat_entry) - batadv_dat_entry_put(dat_entry); + batadv_dat_entry_put(dat_entry); } #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -594,8 +596,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, continue; max = tmp_max; - if (max_orig_node) - batadv_orig_node_put(max_orig_node); + batadv_orig_node_put(max_orig_node); max_orig_node = orig_node; } rcu_read_unlock(); @@ -981,8 +982,7 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); @@ -1217,8 +1217,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, BATADV_P_DAT_DHT_GET); } out: - if (dat_entry) - batadv_dat_entry_put(dat_entry); + batadv_dat_entry_put(dat_entry); return ret; } @@ -1285,8 +1284,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, ret = true; } out: - if (dat_entry) - batadv_dat_entry_put(dat_entry); + batadv_dat_entry_put(dat_entry); if (ret) kfree_skb(skb); return ret; @@ -1419,8 +1417,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, out: if (dropped) kfree_skb(skb); - if (dat_entry) - batadv_dat_entry_put(dat_entry); + batadv_dat_entry_put(dat_entry); /* if dropped == false -> deliver to the interface */ return dropped; } @@ -1829,7 +1826,6 @@ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, ret = true; out: - if (dat_entry) - batadv_dat_entry_put(dat_entry); + batadv_dat_entry_put(dat_entry); return ret; } diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index a5d9d800082b..0899a729a23f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -381,10 +381,8 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, } out: - if (orig_node_dst) - batadv_orig_node_put(orig_node_dst); - if (neigh_node) - batadv_neigh_node_put(neigh_node); + batadv_orig_node_put(orig_node_dst); + batadv_neigh_node_put(neigh_node); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 36a98d3cefe0..b7466136e292 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -59,7 +59,7 @@ * after rcu grace period * @ref: kref pointer of the gw_node */ -static void batadv_gw_node_release(struct kref *ref) +void batadv_gw_node_release(struct kref *ref) { struct batadv_gw_node *gw_node; @@ -70,16 +70,6 @@ static void batadv_gw_node_release(struct kref *ref) } /** - * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release - * it - * @gw_node: gateway node to free - */ -void batadv_gw_node_put(struct batadv_gw_node *gw_node) -{ - kref_put(&gw_node->refcount, batadv_gw_node_release); -} - -/** * batadv_gw_get_selected_gw_node() - Get currently selected gateway * @bat_priv: the bat priv with all the soft interface information * @@ -130,8 +120,7 @@ batadv_gw_get_selected_orig(struct batadv_priv *bat_priv) unlock: rcu_read_unlock(); out: - if (gw_node) - batadv_gw_node_put(gw_node); + batadv_gw_node_put(gw_node); return orig_node; } @@ -148,8 +137,7 @@ static void batadv_gw_select(struct batadv_priv *bat_priv, curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node, true); - if (curr_gw_node) - batadv_gw_node_put(curr_gw_node); + batadv_gw_node_put(curr_gw_node); spin_unlock_bh(&bat_priv->gw.list_lock); } @@ -284,14 +272,10 @@ void batadv_gw_election(struct batadv_priv *bat_priv) batadv_gw_select(bat_priv, next_gw); out: - if (curr_gw) - batadv_gw_node_put(curr_gw); - if (next_gw) - batadv_gw_node_put(next_gw); - if (router) - batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); + batadv_gw_node_put(curr_gw); + batadv_gw_node_put(next_gw); + batadv_neigh_node_put(router); + batadv_neigh_ifinfo_put(router_ifinfo); } /** @@ -325,8 +309,7 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, reselect: batadv_gw_reselect(bat_priv); out: - if (curr_gw_orig) - batadv_orig_node_put(curr_gw_orig); + batadv_orig_node_put(curr_gw_orig); } /** @@ -466,13 +449,11 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, if (gw_node == curr_gw) batadv_gw_reselect(bat_priv); - if (curr_gw) - batadv_gw_node_put(curr_gw); + batadv_gw_node_put(curr_gw); } out: - if (gw_node) - batadv_gw_node_put(gw_node); + batadv_gw_node_put(gw_node); } /** @@ -555,8 +536,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); return ret; @@ -779,15 +759,10 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, batadv_neigh_ifinfo_put(old_ifinfo); out: - if (orig_dst_node) - batadv_orig_node_put(orig_dst_node); - if (curr_gw) - batadv_gw_node_put(curr_gw); - if (gw_node) - batadv_gw_node_put(gw_node); - if (neigh_old) - batadv_neigh_node_put(neigh_old); - if (neigh_curr) - batadv_neigh_node_put(neigh_curr); + batadv_orig_node_put(orig_dst_node); + batadv_gw_node_put(curr_gw); + batadv_gw_node_put(gw_node); + batadv_neigh_node_put(neigh_old); + batadv_neigh_node_put(neigh_curr); return out_of_range; } diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 2ae5846ef958..95c2ccdaa554 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -9,6 +9,7 @@ #include "main.h" +#include <linux/kref.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -27,7 +28,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_free(struct batadv_priv *bat_priv); -void batadv_gw_node_put(struct batadv_gw_node *gw_node); +void batadv_gw_node_release(struct kref *ref); struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); @@ -38,4 +39,17 @@ batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); +/** + * batadv_gw_node_put() - decrement the gw_node refcounter and possibly release + * it + * @gw_node: gateway node to free + */ +static inline void batadv_gw_node_put(struct batadv_gw_node *gw_node) +{ + if (!gw_node) + return; + + kref_put(&gw_node->refcount, batadv_gw_node_release); +} + #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index fdde305a198e..9349c76f30c5 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -10,7 +10,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> -#include <linux/kernel.h> +#include <linux/kstrtox.h> #include <linux/limits.h> #include <linux/math64.h> #include <linux/netdevice.h> diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 55d97e18aa4a..8a2b78f9c4b2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -236,8 +236,7 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) real_netdev = dev_get_by_index(real_net, ifindex); out: - if (hard_iface) - batadv_hardif_put(hard_iface); + batadv_hardif_put(hard_iface); return real_netdev; } @@ -457,8 +456,7 @@ static void batadv_primary_if_update_addr(struct batadv_priv *bat_priv, batadv_dat_init_own_addr(bat_priv, primary_if); batadv_bla_update_orig_address(bat_priv, primary_if, oldif); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static void batadv_primary_if_select(struct batadv_priv *bat_priv, @@ -481,8 +479,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, batadv_primary_if_update_addr(bat_priv, curr_hard_iface); out: - if (curr_hard_iface) - batadv_hardif_put(curr_hard_iface); + batadv_hardif_put(curr_hard_iface); } static bool @@ -657,8 +654,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) bat_priv->algo_ops->iface.activate(hard_iface); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static void @@ -811,8 +807,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) new_if = batadv_hardif_get_active(hard_iface->soft_iface); batadv_primary_if_select(bat_priv, new_if); - if (new_if) - batadv_hardif_put(new_if); + batadv_hardif_put(new_if); } bat_priv->algo_ops->iface.disable(hard_iface); @@ -834,8 +829,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) batadv_hardif_put(hard_iface); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static struct batadv_hard_iface * @@ -990,8 +984,7 @@ static int batadv_hard_if_event(struct notifier_block *this, hardif_put: batadv_hardif_put(hard_iface); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); return NOTIFY_DONE; } diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 8cb2a1f10080..64f660dbbe54 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -89,6 +89,9 @@ int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing, */ static inline void batadv_hardif_put(struct batadv_hard_iface *hard_iface) { + if (!hard_iface) + return; + kref_put(&hard_iface->refcount, batadv_hardif_release); } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 014235fd4681..058b8f2eef65 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2021.2" +#define BATADV_SOURCE_VERSION "2021.3" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 0158f267c403..a3b6658ed789 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2241,7 +2241,7 @@ out: if (!ret && primary_if) *primary_if = hard_iface; - else if (hard_iface) + else batadv_hardif_put(hard_iface); return ret; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index b6cc746e01a6..29276284d281 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -359,15 +359,13 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg, atomic_read(&bat_priv->orig_interval))) goto nla_put_failure; - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); genlmsg_end(msg, hdr); return 0; nla_put_failure: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); genlmsg_cancel(msg, hdr); return -EMSGSIZE; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 4bb76b434d07..9f06132e007d 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -217,6 +217,9 @@ static void batadv_nc_node_release(struct kref *ref) */ static void batadv_nc_node_put(struct batadv_nc_node *nc_node) { + if (!nc_node) + return; + kref_put(&nc_node->refcount, batadv_nc_node_release); } @@ -241,6 +244,9 @@ static void batadv_nc_path_release(struct kref *ref) */ static void batadv_nc_path_put(struct batadv_nc_path *nc_path) { + if (!nc_path) + return; + kref_put(&nc_path->refcount, batadv_nc_path_release); } @@ -930,10 +936,8 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, out_nc_node->last_seen = jiffies; out: - if (in_nc_node) - batadv_nc_node_put(in_nc_node); - if (out_nc_node) - batadv_nc_node_put(out_nc_node); + batadv_nc_node_put(in_nc_node); + batadv_nc_node_put(out_nc_node); } /** @@ -1209,14 +1213,10 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, batadv_send_unicast_skb(skb_dest, first_dest); res = true; out: - if (router_neigh) - batadv_neigh_node_put(router_neigh); - if (router_coding) - batadv_neigh_node_put(router_coding); - if (router_neigh_ifinfo) - batadv_neigh_ifinfo_put(router_neigh_ifinfo); - if (router_coding_ifinfo) - batadv_neigh_ifinfo_put(router_coding_ifinfo); + batadv_neigh_node_put(router_neigh); + batadv_neigh_node_put(router_coding); + batadv_neigh_ifinfo_put(router_neigh_ifinfo); + batadv_neigh_ifinfo_put(router_coding_ifinfo); return res; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 6a4d3f437e00..aadc653ca1d8 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -177,7 +177,7 @@ out: * and queue for free after rcu grace period * @ref: kref pointer of the originator-vlan object */ -static void batadv_orig_node_vlan_release(struct kref *ref) +void batadv_orig_node_vlan_release(struct kref *ref) { struct batadv_orig_node_vlan *orig_vlan; @@ -187,16 +187,6 @@ static void batadv_orig_node_vlan_release(struct kref *ref) } /** - * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release - * the originator-vlan object - * @orig_vlan: the originator-vlan object to release - */ -void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan) -{ - kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); -} - -/** * batadv_originator_init() - Initialize all originator structures * @bat_priv: the bat priv with all the soft interface information * @@ -231,7 +221,7 @@ err: * free after rcu grace period * @ref: kref pointer of the neigh_ifinfo */ -static void batadv_neigh_ifinfo_release(struct kref *ref) +void batadv_neigh_ifinfo_release(struct kref *ref) { struct batadv_neigh_ifinfo *neigh_ifinfo; @@ -244,21 +234,11 @@ static void batadv_neigh_ifinfo_release(struct kref *ref) } /** - * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release - * the neigh_ifinfo - * @neigh_ifinfo: the neigh_ifinfo object to release - */ -void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo) -{ - kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release); -} - -/** * batadv_hardif_neigh_release() - release hardif neigh node from lists and * queue for free after rcu grace period * @ref: kref pointer of the neigh_node */ -static void batadv_hardif_neigh_release(struct kref *ref) +void batadv_hardif_neigh_release(struct kref *ref) { struct batadv_hardif_neigh_node *hardif_neigh; @@ -274,21 +254,11 @@ static void batadv_hardif_neigh_release(struct kref *ref) } /** - * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter - * and possibly release it - * @hardif_neigh: hardif neigh neighbor to free - */ -void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh) -{ - kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release); -} - -/** * batadv_neigh_node_release() - release neigh_node from lists and queue for * free after rcu grace period * @ref: kref pointer of the neigh_node */ -static void batadv_neigh_node_release(struct kref *ref) +void batadv_neigh_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; @@ -309,16 +279,6 @@ static void batadv_neigh_node_release(struct kref *ref) } /** - * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly - * release it - * @neigh_node: neigh neighbor to free - */ -void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) -{ - kref_put(&neigh_node->refcount, batadv_neigh_node_release); -} - -/** * batadv_orig_router_get() - router to the originator depending on iface * @orig_node: the orig node for the router * @if_outgoing: the interface where the payload packet has been received or @@ -704,8 +664,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node, out: spin_unlock_bh(&orig_node->neigh_list_lock); - if (hardif_neigh) - batadv_hardif_neigh_put(hardif_neigh); + batadv_hardif_neigh_put(hardif_neigh); return neigh_node; } @@ -797,11 +756,9 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (hardif) - batadv_hardif_put(hardif); + batadv_hardif_put(hardif); dev_put(hard_iface); - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); return ret; @@ -812,7 +769,7 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) * free after rcu grace period * @ref: kref pointer of the orig_ifinfo */ -static void batadv_orig_ifinfo_release(struct kref *ref) +void batadv_orig_ifinfo_release(struct kref *ref) { struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *router; @@ -824,23 +781,12 @@ static void batadv_orig_ifinfo_release(struct kref *ref) /* this is the last reference to this object */ router = rcu_dereference_protected(orig_ifinfo->router, true); - if (router) - batadv_neigh_node_put(router); + batadv_neigh_node_put(router); kfree_rcu(orig_ifinfo, rcu); } /** - * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release - * the orig_ifinfo - * @orig_ifinfo: the orig_ifinfo object to release - */ -void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo) -{ - kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release); -} - -/** * batadv_orig_node_free_rcu() - free the orig_node * @rcu: rcu pointer of the orig_node */ @@ -863,7 +809,7 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) * free after rcu grace period * @ref: kref pointer of the orig_node */ -static void batadv_orig_node_release(struct kref *ref) +void batadv_orig_node_release(struct kref *ref) { struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; @@ -893,8 +839,7 @@ static void batadv_orig_node_release(struct kref *ref) orig_node->last_bonding_candidate = NULL; spin_unlock_bh(&orig_node->neigh_list_lock); - if (last_candidate) - batadv_orig_ifinfo_put(last_candidate); + batadv_orig_ifinfo_put(last_candidate); spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_safe(vlan, node_tmp, &orig_node->vlan_list, list) { @@ -910,16 +855,6 @@ static void batadv_orig_node_release(struct kref *ref) } /** - * batadv_orig_node_put() - decrement the orig node refcounter and possibly - * release it - * @orig_node: the orig node to free - */ -void batadv_orig_node_put(struct batadv_orig_node *orig_node) -{ - kref_put(&orig_node->refcount, batadv_orig_node_release); -} - -/** * batadv_originator_free() - Free all originator structures * @bat_priv: the bat priv with all the soft interface information */ @@ -1211,8 +1146,7 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, if (!kref_get_unless_zero(&neigh->refcount)) continue; - if (best) - batadv_neigh_node_put(best); + batadv_neigh_node_put(best); best = neigh; } @@ -1257,8 +1191,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, BATADV_IF_DEFAULT); batadv_update_route(bat_priv, orig_node, BATADV_IF_DEFAULT, best_neigh_node); - if (best_neigh_node) - batadv_neigh_node_put(best_neigh_node); + batadv_neigh_node_put(best_neigh_node); /* ... then for all other interfaces. */ rcu_read_lock(); @@ -1277,8 +1210,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, hard_iface); batadv_update_route(bat_priv, orig_node, hard_iface, best_neigh_node); - if (best_neigh_node) - batadv_neigh_node_put(best_neigh_node); + batadv_neigh_node_put(best_neigh_node); batadv_hardif_put(hard_iface); } @@ -1408,11 +1340,9 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (hardif) - batadv_hardif_put(hardif); + batadv_hardif_put(hardif); dev_put(hard_iface); - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); return ret; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 805be87d55b8..ea3d69e4e670 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -12,6 +12,7 @@ #include <linux/compiler.h> #include <linux/if_ether.h> #include <linux/jhash.h> +#include <linux/kref.h> #include <linux/netlink.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -20,19 +21,18 @@ bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); void batadv_originator_free(struct batadv_priv *bat_priv); void batadv_purge_orig_ref(struct batadv_priv *bat_priv); -void batadv_orig_node_put(struct batadv_orig_node *orig_node); +void batadv_orig_node_release(struct kref *ref); struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, const u8 *addr); struct batadv_hardif_neigh_node * batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); -void -batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh); +void batadv_hardif_neigh_release(struct kref *ref); struct batadv_neigh_node * batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, struct batadv_hard_iface *hard_iface, const u8 *neigh_addr); -void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node); +void batadv_neigh_node_release(struct kref *ref); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, const struct batadv_hard_iface *if_outgoing); @@ -42,7 +42,7 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, struct batadv_neigh_ifinfo * batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); -void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo); +void batadv_neigh_ifinfo_release(struct kref *ref); int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); @@ -52,7 +52,7 @@ batadv_orig_ifinfo_get(struct batadv_orig_node *orig_node, struct batadv_orig_ifinfo * batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing); -void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo); +void batadv_orig_ifinfo_release(struct kref *ref); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); struct batadv_orig_node_vlan * @@ -61,7 +61,7 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan * batadv_orig_node_vlan_get(struct batadv_orig_node *orig_node, unsigned short vid); -void batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan); +void batadv_orig_node_vlan_release(struct kref *ref); /** * batadv_choose_orig() - Return the index of the orig entry in the hash table @@ -82,4 +82,86 @@ static inline u32 batadv_choose_orig(const void *data, u32 size) struct batadv_orig_node * batadv_orig_hash_find(struct batadv_priv *bat_priv, const void *data); +/** + * batadv_orig_node_vlan_put() - decrement the refcounter and possibly release + * the originator-vlan object + * @orig_vlan: the originator-vlan object to release + */ +static inline void +batadv_orig_node_vlan_put(struct batadv_orig_node_vlan *orig_vlan) +{ + if (!orig_vlan) + return; + + kref_put(&orig_vlan->refcount, batadv_orig_node_vlan_release); +} + +/** + * batadv_neigh_ifinfo_put() - decrement the refcounter and possibly release + * the neigh_ifinfo + * @neigh_ifinfo: the neigh_ifinfo object to release + */ +static inline void +batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo) +{ + if (!neigh_ifinfo) + return; + + kref_put(&neigh_ifinfo->refcount, batadv_neigh_ifinfo_release); +} + +/** + * batadv_hardif_neigh_put() - decrement the hardif neighbors refcounter + * and possibly release it + * @hardif_neigh: hardif neigh neighbor to free + */ +static inline void +batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh) +{ + if (!hardif_neigh) + return; + + kref_put(&hardif_neigh->refcount, batadv_hardif_neigh_release); +} + +/** + * batadv_neigh_node_put() - decrement the neighbors refcounter and possibly + * release it + * @neigh_node: neigh neighbor to free + */ +static inline void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node) +{ + if (!neigh_node) + return; + + kref_put(&neigh_node->refcount, batadv_neigh_node_release); +} + +/** + * batadv_orig_ifinfo_put() - decrement the refcounter and possibly release + * the orig_ifinfo + * @orig_ifinfo: the orig_ifinfo object to release + */ +static inline void +batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo) +{ + if (!orig_ifinfo) + return; + + kref_put(&orig_ifinfo->refcount, batadv_orig_ifinfo_release); +} + +/** + * batadv_orig_node_put() - decrement the orig node refcounter and possibly + * release it + * @orig_node: the orig node to free + */ +static inline void batadv_orig_node_put(struct batadv_orig_node *orig_node) +{ + if (!orig_node) + return; + + kref_put(&orig_node->refcount, batadv_orig_node_release); +} + #endif /* _NET_BATMAN_ADV_ORIGINATOR_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index bb9e93e3d98c..970d0d7ccc98 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -101,8 +101,7 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, } /* decrease refcount of previous best neighbor */ - if (curr_router) - batadv_neigh_node_put(curr_router); + batadv_neigh_node_put(curr_router); } /** @@ -128,8 +127,7 @@ void batadv_update_route(struct batadv_priv *bat_priv, _batadv_update_route(bat_priv, orig_node, recv_if, neigh_node); out: - if (router) - batadv_neigh_node_put(router); + batadv_neigh_node_put(router); } /** @@ -269,10 +267,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, goto out; } out: - if (primary_if) - batadv_hardif_put(primary_if); - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); kfree_skb(skb); @@ -324,10 +320,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, skb = NULL; out: - if (primary_if) - batadv_hardif_put(primary_if); - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); kfree_skb(skb); @@ -425,8 +419,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, skb = NULL; put_orig_node: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); free_skb: kfree_skb(skb); @@ -513,8 +506,7 @@ batadv_last_bonding_replace(struct batadv_orig_node *orig_node, orig_node->last_bonding_candidate = new_candidate; spin_unlock_bh(&orig_node->neigh_list_lock); - if (old_candidate) - batadv_orig_ifinfo_put(old_candidate); + batadv_orig_ifinfo_put(old_candidate); } /** @@ -656,8 +648,7 @@ next: batadv_orig_ifinfo_put(next_candidate); } - if (last_candidate) - batadv_orig_ifinfo_put(last_candidate); + batadv_orig_ifinfo_put(last_candidate); return router; } @@ -785,10 +776,8 @@ batadv_reroute_unicast_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, ret = true; out: - if (primary_if) - batadv_hardif_put(primary_if); - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); return ret; } @@ -1031,8 +1020,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, orig_node); rx_success: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); return NET_RX_SUCCESS; } @@ -1279,7 +1267,6 @@ free_skb: kfree_skb(skb); ret = NET_RX_DROP; out: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); return ret; } diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 0b9dd29d3b6a..477d85a3b558 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -152,8 +152,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; - if (hardif_neigh) - batadv_hardif_neigh_put(hardif_neigh); + batadv_hardif_neigh_put(hardif_neigh); #endif return ret; @@ -309,8 +308,7 @@ bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv, ret = true; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); return ret; } @@ -425,8 +423,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, packet_subtype, orig_node, vid); - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); return ret; } @@ -452,8 +449,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, BATADV_P_DATA, orig_node, vid); - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); return ret; } @@ -474,10 +470,8 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet, else consume_skb(forw_packet->skb); - if (forw_packet->if_incoming) - batadv_hardif_put(forw_packet->if_incoming); - if (forw_packet->if_outgoing) - batadv_hardif_put(forw_packet->if_outgoing); + batadv_hardif_put(forw_packet->if_incoming); + batadv_hardif_put(forw_packet->if_outgoing); if (forw_packet->queue_left) atomic_inc(forw_packet->queue_left); kfree(forw_packet); @@ -748,6 +742,10 @@ void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv, * Adds a broadcast packet to the queue and sets up timers. Broadcast packets * are sent multiple times to increase probability for being received. * + * This call clones the given skb, hence the caller needs to take into + * account that the data segment of the original skb might not be + * modifiable anymore. + * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv, @@ -761,7 +759,7 @@ static int batadv_forw_bcast_packet_to_list(struct batadv_priv *bat_priv, unsigned long send_time = jiffies; struct sk_buff *newskb; - newskb = skb_copy(skb, GFP_ATOMIC); + newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) goto err; @@ -800,6 +798,10 @@ err: * or if a delay is given after that. Furthermore, queues additional * retransmissions if this interface is a wireless one. * + * This call clones the given skb, hence the caller needs to take into + * account that the data segment of the original skb might not be + * modifiable anymore. + * * Return: NETDEV_TX_OK on success and NETDEV_TX_BUSY on errors. */ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, @@ -814,7 +816,7 @@ static int batadv_forw_bcast_packet_if(struct batadv_priv *bat_priv, int ret = NETDEV_TX_OK; if (!delay) { - newskb = skb_copy(skb, GFP_ATOMIC); + newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) return NETDEV_TX_BUSY; @@ -867,8 +869,7 @@ static bool batadv_send_no_broadcast(struct batadv_priv *bat_priv, ret = batadv_hardif_no_broadcast(if_out, bcast_packet->orig, orig_neigh); - if (neigh_node) - batadv_hardif_neigh_put(neigh_node); + batadv_hardif_neigh_put(neigh_node); /* ok, may broadcast */ if (!ret) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index ae368a42a4ad..0604b0279573 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -383,10 +383,8 @@ dropped: dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: - if (mcast_single_orig) - batadv_orig_node_put(mcast_single_orig); - if (primary_if) - batadv_hardif_put(primary_if); + batadv_orig_node_put(mcast_single_orig); + batadv_hardif_put(primary_if); return NETDEV_TX_OK; } @@ -501,7 +499,7 @@ out: * after rcu grace period * @ref: kref pointer of the vlan object */ -static void batadv_softif_vlan_release(struct kref *ref) +void batadv_softif_vlan_release(struct kref *ref) { struct batadv_softif_vlan *vlan; @@ -515,19 +513,6 @@ static void batadv_softif_vlan_release(struct kref *ref) } /** - * batadv_softif_vlan_put() - decrease the vlan object refcounter and - * possibly release it - * @vlan: the vlan object to release - */ -void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) -{ - if (!vlan) - return; - - kref_put(&vlan->refcount, batadv_softif_vlan_release); -} - -/** * batadv_softif_vlan_get() - get the vlan object for a specific vid * @bat_priv: the bat priv with all the soft interface information * @vid: the identifier of the vlan object to retrieve @@ -851,8 +836,7 @@ static int batadv_softif_slave_add(struct net_device *dev, ret = batadv_hardif_enable_interface(hard_iface, dev); out: - if (hard_iface) - batadv_hardif_put(hard_iface); + batadv_hardif_put(hard_iface); return ret; } @@ -878,8 +862,7 @@ static int batadv_softif_slave_del(struct net_device *dev, ret = 0; out: - if (hard_iface) - batadv_hardif_put(hard_iface); + batadv_hardif_put(hard_iface); return ret; } diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 67a2ddd6832f..9f2003f1a497 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -9,6 +9,7 @@ #include "main.h" +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/types.h> @@ -21,8 +22,21 @@ void batadv_interface_rx(struct net_device *soft_iface, bool batadv_softif_is_valid(const struct net_device *net_dev); extern struct rtnl_link_ops batadv_link_ops; int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid); -void batadv_softif_vlan_put(struct batadv_softif_vlan *softif_vlan); +void batadv_softif_vlan_release(struct kref *ref); struct batadv_softif_vlan *batadv_softif_vlan_get(struct batadv_priv *bat_priv, unsigned short vid); +/** + * batadv_softif_vlan_put() - decrease the vlan object refcounter and + * possibly release it + * @vlan: the vlan object to release + */ +static inline void batadv_softif_vlan_put(struct batadv_softif_vlan *vlan) +{ + if (!vlan) + return; + + kref_put(&vlan->refcount, batadv_softif_vlan_release); +} + #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 789c851732b7..56b9fe97b3b4 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -358,6 +358,9 @@ static void batadv_tp_vars_release(struct kref *ref) */ static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) { + if (!tp_vars) + return; + kref_put(&tp_vars->refcount, batadv_tp_vars_release); } @@ -748,12 +751,9 @@ move_twnd: wake_up(&tp_vars->more_bytes); out: - if (likely(primary_if)) - batadv_hardif_put(primary_if); - if (likely(orig_node)) - batadv_orig_node_put(orig_node); - if (likely(tp_vars)) - batadv_tp_vars_put(tp_vars); + batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); + batadv_tp_vars_put(tp_vars); } /** @@ -882,10 +882,8 @@ static int batadv_tp_send(void *arg) } out: - if (likely(primary_if)) - batadv_hardif_put(primary_if); - if (likely(orig_node)) - batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); batadv_tp_sender_end(bat_priv, tp_vars); batadv_tp_sender_cleanup(bat_priv, tp_vars); @@ -1205,10 +1203,8 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, ret = 0; out: - if (likely(orig_node)) - batadv_orig_node_put(orig_node); - if (likely(primary_if)) - batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); return ret; } @@ -1456,8 +1452,7 @@ send_ack: batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv, icmp->timestamp, icmp->session, icmp->uid); out: - if (likely(tp_vars)) - batadv_tp_vars_put(tp_vars); + batadv_tp_vars_put(tp_vars); } /** diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 711fe5a2cec4..e0b3dace2020 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -247,6 +247,9 @@ static void batadv_tt_local_entry_release(struct kref *ref) static void batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) { + if (!tt_local_entry) + return; + kref_put(&tt_local_entry->common.refcount, batadv_tt_local_entry_release); } @@ -270,7 +273,7 @@ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) * queue for free after rcu grace period * @ref: kref pointer of the nc_node */ -static void batadv_tt_global_entry_release(struct kref *ref) +void batadv_tt_global_entry_release(struct kref *ref) { struct batadv_tt_global_entry *tt_global_entry; @@ -283,17 +286,6 @@ static void batadv_tt_global_entry_release(struct kref *ref) } /** - * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and - * possibly release it - * @tt_global_entry: tt_global_entry to be free'd - */ -void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) -{ - kref_put(&tt_global_entry->common.refcount, - batadv_tt_global_entry_release); -} - -/** * batadv_tt_global_hash_count() - count the number of orig entries * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to count entries for @@ -452,6 +444,9 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) static void batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry) { + if (!orig_entry) + return; + kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release); } @@ -818,13 +813,10 @@ check_roaming: ret = true; out: - if (in_hardif) - batadv_hardif_put(in_hardif); + batadv_hardif_put(in_hardif); dev_put(in_dev); - if (tt_local) - batadv_tt_local_entry_put(tt_local); - if (tt_global) - batadv_tt_global_entry_put(tt_global); + batadv_tt_local_entry_put(tt_local); + batadv_tt_global_entry_put(tt_global); return ret; } @@ -1214,8 +1206,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); cb->args[0] = bucket; @@ -1303,8 +1294,7 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, batadv_tt_local_entry_put(tt_removed_entry); out: - if (tt_local_entry) - batadv_tt_local_entry_put(tt_local_entry); + batadv_tt_local_entry_put(tt_local_entry); return curr_flags; } @@ -1574,8 +1564,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, sync_flags: batadv_tt_global_sync_flags(tt_global); out: - if (orig_entry) - batadv_tt_orig_list_entry_put(orig_entry); + batadv_tt_orig_list_entry_put(orig_entry); spin_unlock_bh(&tt_global->list_lock); } @@ -1748,10 +1737,8 @@ out_remove: tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM; out: - if (tt_global_entry) - batadv_tt_global_entry_put(tt_global_entry); - if (tt_local_entry) - batadv_tt_local_entry_put(tt_local_entry); + batadv_tt_global_entry_put(tt_global_entry); + batadv_tt_local_entry_put(tt_local_entry); return ret; } @@ -1787,15 +1774,13 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, } /* release the refcount for the "old" best */ - if (best_router) - batadv_neigh_node_put(best_router); + batadv_neigh_node_put(best_router); best_entry = orig_entry; best_router = router; } - if (best_router) - batadv_neigh_node_put(best_router); + batadv_neigh_node_put(best_router); return best_entry; } @@ -2001,8 +1986,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) ret = msg->len; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); dev_put(soft_iface); cb->args[0] = bucket; @@ -2193,10 +2177,8 @@ static void batadv_tt_global_del(struct batadv_priv *bat_priv, } out: - if (tt_global_entry) - batadv_tt_global_entry_put(tt_global_entry); - if (local_entry) - batadv_tt_local_entry_put(local_entry); + batadv_tt_global_entry_put(tt_global_entry); + batadv_tt_local_entry_put(local_entry); } /** @@ -2423,10 +2405,8 @@ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, rcu_read_unlock(); out: - if (tt_global_entry) - batadv_tt_global_entry_put(tt_global_entry); - if (tt_local_entry) - batadv_tt_local_entry_put(tt_local_entry); + batadv_tt_global_entry_put(tt_global_entry); + batadv_tt_local_entry_put(tt_local_entry); return orig_node; } @@ -2603,6 +2583,9 @@ static void batadv_tt_req_node_release(struct kref *ref) */ static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) { + if (!tt_req_node) + return; + kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); } @@ -2984,8 +2967,7 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, ret = true; out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); @@ -2996,8 +2978,7 @@ out: spin_unlock_bh(&bat_priv->tt.req_list_lock); } - if (tt_req_node) - batadv_tt_req_node_put(tt_req_node); + batadv_tt_req_node_put(tt_req_node); kfree(tvlv_tt_data); return ret; @@ -3128,10 +3109,8 @@ unlock: spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); out: - if (res_dst_orig_node) - batadv_orig_node_put(res_dst_orig_node); - if (req_dst_orig_node) - batadv_orig_node_put(req_dst_orig_node); + batadv_orig_node_put(res_dst_orig_node); + batadv_orig_node_put(req_dst_orig_node); kfree(tvlv_tt_data); return ret; } @@ -3245,10 +3224,8 @@ unlock: spin_unlock_bh(&bat_priv->tt.last_changeset_lock); out: spin_unlock_bh(&bat_priv->tt.commit_lock); - if (orig_node) - batadv_orig_node_put(orig_node); - if (primary_if) - batadv_hardif_put(primary_if); + batadv_orig_node_put(orig_node); + batadv_hardif_put(primary_if); kfree(tvlv_tt_data); /* The packet was for this host, so it doesn't need to be re-routed */ return true; @@ -3333,8 +3310,7 @@ static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, atomic_set(&orig_node->last_ttvn, ttvn); out: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); } static void batadv_tt_update_changes(struct batadv_priv *bat_priv, @@ -3375,8 +3351,7 @@ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, goto out; ret = true; out: - if (tt_local_entry) - batadv_tt_local_entry_put(tt_local_entry); + batadv_tt_local_entry_put(tt_local_entry); return ret; } @@ -3439,8 +3414,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, spin_unlock_bh(&bat_priv->tt.req_list_lock); out: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); } static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) @@ -3571,8 +3545,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, &tvlv_roam, sizeof(tvlv_roam)); out: - if (primary_if) - batadv_hardif_put(primary_if); + batadv_hardif_put(primary_if); } static void batadv_tt_purge(struct work_struct *work) @@ -4167,8 +4140,7 @@ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, atomic_read(&orig_node->last_ttvn) + 1); out: - if (orig_node) - batadv_orig_node_put(orig_node); + batadv_orig_node_put(orig_node); return NET_RX_SUCCESS; } diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index e1285904f885..d18740d9a22b 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -9,6 +9,7 @@ #include "main.h" +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/skbuff.h> @@ -28,7 +29,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry * batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); -void batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry); +void batadv_tt_global_entry_release(struct kref *ref); int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, @@ -55,4 +56,19 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, int batadv_tt_cache_init(void); void batadv_tt_cache_destroy(void); +/** + * batadv_tt_global_entry_put() - decrement the tt_global_entry refcounter and + * possibly release it + * @tt_global_entry: tt_global_entry to be free'd + */ +static inline void +batadv_tt_global_entry_put(struct batadv_tt_global_entry *tt_global_entry) +{ + if (!tt_global_entry) + return; + + kref_put(&tt_global_entry->common.refcount, + batadv_tt_global_entry_release); +} + #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 253f5a33a914..992773376e51 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -50,6 +50,9 @@ static void batadv_tvlv_handler_release(struct kref *ref) */ static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) { + if (!tvlv_handler) + return; + kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); } @@ -106,6 +109,9 @@ static void batadv_tvlv_container_release(struct kref *ref) */ static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) { + if (!tvlv) + return; + kref_put(&tvlv->refcount, batadv_tvlv_container_release); } @@ -438,8 +444,7 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, ogm_source, orig_node, src, dst, tvlv_value, tvlv_value_cont_len); - if (tvlv_handler) - batadv_tvlv_handler_put(tvlv_handler); + batadv_tvlv_handler_put(tvlv_handler); tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; tvlv_value_len -= tvlv_value_cont_len; } diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index c32638dddbf9..f6b9dc4e408f 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -26,7 +26,7 @@ #include <linux/types.h> #include <net/bluetooth/bluetooth.h> -#define BTNAMSIZ 18 +#define BTNAMSIZ 21 /* CMTP ioctl defines */ #define CMTPCONNADD _IOW('C', 200, int) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index e1a545c8a69f..8a47a3017d61 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1343,6 +1343,12 @@ int hci_inquiry(void __user *arg) goto done; } + /* Restrict maximum inquiry length to 60 seconds */ + if (ir.length > 60) { + err = -EINVAL; + goto done; + } + hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { @@ -1718,6 +1724,7 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) int hci_dev_do_close(struct hci_dev *hdev) { bool auto_off; + int err = 0; BT_DBG("%s %p", hdev->name, hdev); @@ -1727,10 +1734,18 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_request_cancel_all(hdev); hci_req_sync_lock(hdev); + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + test_bit(HCI_UP, &hdev->flags)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + err = hdev->shutdown(hdev); + } + if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); hci_req_sync_unlock(hdev); - return 0; + return err; } hci_leds_update_powered(hdev, false); @@ -1798,14 +1813,6 @@ int hci_dev_do_close(struct hci_dev *hdev) clear_bit(HCI_INIT, &hdev->flags); } - if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - test_bit(HCI_UP, &hdev->flags)) { - /* Execute vendor specific shutdown routine */ - if (hdev->shutdown) - hdev->shutdown(hdev); - } - /* flush cmd work */ flush_work(&hdev->cmd_work); @@ -1845,7 +1852,7 @@ int hci_dev_do_close(struct hci_dev *hdev) hci_req_sync_unlock(hdev); hci_dev_put(hdev); - return 0; + return err; } int hci_dev_close(__u16 dev) @@ -3751,11 +3758,18 @@ done: } /* Alloc HCI device */ -struct hci_dev *hci_alloc_dev(void) +struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) { struct hci_dev *hdev; + unsigned int alloc_size; + + alloc_size = sizeof(*hdev); + if (sizeof_priv) { + /* Fixme: May need ALIGN-ment? */ + alloc_size += sizeof_priv; + } - hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); + hdev = kzalloc(alloc_size, GFP_KERNEL); if (!hdev) return NULL; @@ -3869,7 +3883,7 @@ struct hci_dev *hci_alloc_dev(void) return hdev; } -EXPORT_SYMBOL(hci_alloc_dev); +EXPORT_SYMBOL(hci_alloc_dev_priv); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) @@ -4034,13 +4048,13 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); - /* Actual cleanup is deferred until hci_cleanup_dev(). */ + /* Actual cleanup is deferred until hci_release_dev(). */ hci_dev_put(hdev); } EXPORT_SYMBOL(hci_unregister_dev); -/* Cleanup HCI device */ -void hci_cleanup_dev(struct hci_dev *hdev) +/* Release HCI device */ +void hci_release_dev(struct hci_dev *hdev) { debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); @@ -4067,7 +4081,9 @@ void hci_cleanup_dev(struct hci_dev *hdev) hci_dev_unlock(hdev); ida_simple_remove(&hci_index_ida, hdev->id); + kfree(hdev); } +EXPORT_SYMBOL(hci_release_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1c3018202564..0bca035bf2dc 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,6 +40,8 @@ #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \ "\x00\x00\x00\x00\x00\x00\x00\x00" +#define secs_to_jiffies(_secs) msecs_to_jiffies((_secs) * 1000) + /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb, @@ -1171,6 +1173,12 @@ static void hci_cc_le_set_random_addr(struct hci_dev *hdev, struct sk_buff *skb) bacpy(&hdev->random_addr, sent); + if (!bacmp(&hdev->rpa, sent)) { + hci_dev_clear_flag(hdev, HCI_RPA_EXPIRED); + queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, + secs_to_jiffies(hdev->rpa_timeout)); + } + hci_dev_unlock(hdev); } @@ -1201,24 +1209,30 @@ static void hci_cc_le_set_adv_set_random_addr(struct hci_dev *hdev, { __u8 status = *((__u8 *) skb->data); struct hci_cp_le_set_adv_set_rand_addr *cp; - struct adv_info *adv_instance; + struct adv_info *adv; if (status) return; cp = hci_sent_cmd_data(hdev, HCI_OP_LE_SET_ADV_SET_RAND_ADDR); - if (!cp) + /* Update only in case the adv instance since handle 0x00 shall be using + * HCI_OP_LE_SET_RANDOM_ADDR since that allows both extended and + * non-extended adverting. + */ + if (!cp || !cp->handle) return; hci_dev_lock(hdev); - if (!cp->handle) { - /* Store in hdev for instance 0 (Set adv and Directed advs) */ - bacpy(&hdev->random_addr, &cp->bdaddr); - } else { - adv_instance = hci_find_adv_instance(hdev, cp->handle); - if (adv_instance) - bacpy(&adv_instance->random_addr, &cp->bdaddr); + adv = hci_find_adv_instance(hdev, cp->handle); + if (adv) { + bacpy(&adv->random_addr, &cp->bdaddr); + if (!bacmp(&hdev->rpa, &cp->bdaddr)) { + adv->rpa_expired = false; + queue_delayed_work(hdev->workqueue, + &adv->rpa_expired_cb, + secs_to_jiffies(hdev->rpa_timeout)); + } } hci_dev_unlock(hdev); @@ -1277,7 +1291,9 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_cp_le_set_ext_adv_enable *cp; + struct hci_cp_ext_adv_set *set; __u8 status = *((__u8 *) skb->data); + struct adv_info *adv = NULL, *n; BT_DBG("%s status 0x%2.2x", hdev->name, status); @@ -1288,22 +1304,48 @@ static void hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, if (!cp) return; + set = (void *)cp->data; + hci_dev_lock(hdev); + if (cp->num_of_sets) + adv = hci_find_adv_instance(hdev, set->handle); + if (cp->enable) { struct hci_conn *conn; hci_dev_set_flag(hdev, HCI_LE_ADV); + if (adv) + adv->enabled = true; + conn = hci_lookup_le_connect(hdev); if (conn) queue_delayed_work(hdev->workqueue, &conn->le_conn_timeout, conn->conn_timeout); } else { + if (adv) { + adv->enabled = false; + /* If just one instance was disabled check if there are + * any other instance enabled before clearing HCI_LE_ADV + */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) { + if (adv->enabled) + goto unlock; + } + } else { + /* All instances shall be considered disabled */ + list_for_each_entry_safe(adv, n, &hdev->adv_instances, + list) + adv->enabled = false; + } + hci_dev_clear_flag(hdev, HCI_LE_ADV); } +unlock: hci_dev_unlock(hdev); } @@ -2306,19 +2348,20 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); if (conn) { - u8 type = conn->type; - mgmt_disconnect_failed(hdev, &conn->dst, conn->type, conn->dst_type, status); + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; + hci_req_reenable_advertising(hdev); + } + /* If the disconnection failed for any reason, the upper layer * does not retry to disconnect in current implementation. * Hence, we need to do some basic cleanup here and re-enable * advertising if necessary. */ hci_conn_del(conn); - if (type == LE_LINK) - hci_req_reenable_advertising(hdev); } hci_dev_unlock(hdev); @@ -2844,7 +2887,6 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_conn_params *params; struct hci_conn *conn; bool mgmt_connected; - u8 type; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); @@ -2899,10 +2941,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) } } - type = conn->type; - hci_disconn_cfm(conn, ev->reason); - hci_conn_del(conn); /* The suspend notifier is waiting for all devices to disconnect so * clear the bit from pending tasks and inform the wait queue. @@ -2922,8 +2961,12 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) * or until a connection is created or until the Advertising * is timed out due to Directed Advertising." */ - if (type == LE_LINK) + if (conn->type == LE_LINK) { + hdev->cur_adv_instance = conn->adv_instance; hci_req_reenable_advertising(hdev); + } + + hci_conn_del(conn); unlock: hci_dev_unlock(hdev); @@ -3268,11 +3311,9 @@ unlock: hci_dev_unlock(hdev); } -static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, - u16 opcode, u8 ncmd) +static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) { - if (opcode != HCI_OP_NOP) - cancel_delayed_work(&hdev->cmd_timer); + cancel_delayed_work(&hdev->cmd_timer); if (!test_bit(HCI_RESET, &hdev->flags)) { if (ncmd) { @@ -3647,7 +3688,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb, break; } - handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd); + handle_cmd_cnt_and_timer(hdev, ev->ncmd); hci_req_cmd_complete(hdev, *opcode, *status, req_complete, req_complete_skb); @@ -3748,7 +3789,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, break; } - handle_cmd_cnt_and_timer(hdev, *opcode, ev->ncmd); + handle_cmd_cnt_and_timer(hdev, ev->ncmd); /* Indicate request completion if the command failed. Also, if * we're not waiting for a special event and we get a success @@ -4382,6 +4423,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev, switch (ev->status) { case 0x00: + /* The synchronous connection complete event should only be + * sent once per new connection. Receiving a successful + * complete event when the connection status is already + * BT_CONNECTED means that the device is misbehaving and sent + * multiple complete event packets for the same new connection. + * + * Registering the device more than once can corrupt kernel + * memory, hence upon detecting this invalid event, we report + * an error and ignore the packet. + */ + if (conn->state == BT_CONNECTED) { + bt_dev_err(hdev, "Ignoring connect complete event for existing connection"); + goto unlock; + } + conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; conn->type = ev->link_type; @@ -5104,9 +5160,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev, } #endif +static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr, + u8 bdaddr_type, bdaddr_t *local_rpa) +{ + if (conn->out) { + conn->dst_type = bdaddr_type; + conn->resp_addr_type = bdaddr_type; + bacpy(&conn->resp_addr, bdaddr); + + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, local_rpa); + } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) { + conn->init_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->init_addr, &conn->hdev->rpa); + } else { + hci_copy_identity_address(conn->hdev, &conn->init_addr, + &conn->init_addr_type); + } + } else { + conn->resp_addr_type = conn->hdev->adv_addr_type; + /* Check if the controller has set a Local RPA then it must be + * used instead or hdev->rpa. + */ + if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) { + conn->resp_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(&conn->resp_addr, local_rpa); + } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { + /* In case of ext adv, resp_addr will be updated in + * Adv Terminated event. + */ + if (!ext_adv_capable(conn->hdev)) + bacpy(&conn->resp_addr, + &conn->hdev->random_addr); + } else { + bacpy(&conn->resp_addr, &conn->hdev->bdaddr); + } + + conn->init_addr_type = bdaddr_type; + bacpy(&conn->init_addr, bdaddr); + + /* For incoming connections, set the default minimum + * and maximum connection interval. They will be used + * to check if the parameters are in range and if not + * trigger the connection update procedure. + */ + conn->le_conn_min_interval = conn->hdev->le_conn_min_interval; + conn->le_conn_max_interval = conn->hdev->le_conn_max_interval; + } +} + static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, - bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle, - u16 interval, u16 latency, u16 supervision_timeout) + bdaddr_t *bdaddr, u8 bdaddr_type, + bdaddr_t *local_rpa, u8 role, u16 handle, + u16 interval, u16 latency, + u16 supervision_timeout) { struct hci_conn_params *params; struct hci_conn *conn; @@ -5154,32 +5265,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, cancel_delayed_work(&conn->le_conn_timeout); } - if (!conn->out) { - /* Set the responder (our side) address type based on - * the advertising address type. - */ - conn->resp_addr_type = hdev->adv_addr_type; - if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) { - /* In case of ext adv, resp_addr will be updated in - * Adv Terminated event. - */ - if (!ext_adv_capable(hdev)) - bacpy(&conn->resp_addr, &hdev->random_addr); - } else { - bacpy(&conn->resp_addr, &hdev->bdaddr); - } - - conn->init_addr_type = bdaddr_type; - bacpy(&conn->init_addr, bdaddr); - - /* For incoming connections, set the default minimum - * and maximum connection interval. They will be used - * to check if the parameters are in range and if not - * trigger the connection update procedure. - */ - conn->le_conn_min_interval = hdev->le_conn_min_interval; - conn->le_conn_max_interval = hdev->le_conn_max_interval; - } + le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa); /* Lookup the identity address from the stored connection * address and address type. @@ -5236,6 +5322,13 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status, conn->handle = handle; conn->state = BT_CONFIG; + /* Store current advertising instance as connection advertising instance + * when sotfware rotation is in use so it can be re-enabled when + * disconnected. + */ + if (!ext_adv_capable(hdev)) + conn->adv_instance = hdev->cur_adv_instance; + conn->le_conn_interval = interval; conn->le_conn_latency = latency; conn->le_supv_timeout = supervision_timeout; @@ -5290,7 +5383,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + NULL, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5304,7 +5397,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type, - ev->role, le16_to_cpu(ev->handle), + &ev->local_rpa, ev->role, le16_to_cpu(ev->handle), le16_to_cpu(ev->interval), le16_to_cpu(ev->latency), le16_to_cpu(ev->supervision_timeout)); @@ -5319,13 +5412,13 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_evt_le_ext_adv_set_term *ev = (void *) skb->data; struct hci_conn *conn; + struct adv_info *adv; BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); - if (ev->status) { - struct adv_info *adv; + adv = hci_find_adv_instance(hdev, ev->handle); - adv = hci_find_adv_instance(hdev, ev->handle); + if (ev->status) { if (!adv) return; @@ -5336,11 +5429,18 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } + if (adv) + adv->enabled = false; + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle)); if (conn) { - struct adv_info *adv_instance; + /* Store handle in the connection so the correct advertising + * instance can be re-enabled when disconnected. + */ + conn->adv_instance = ev->handle; - if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM) + if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM || + bacmp(&conn->resp_addr, BDADDR_ANY)) return; if (!ev->handle) { @@ -5348,9 +5448,8 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb) return; } - adv_instance = hci_find_adv_instance(hdev, ev->handle); - if (adv_instance) - bacpy(&conn->resp_addr, &adv_instance->random_addr); + if (adv) + bacpy(&conn->resp_addr, &adv->random_addr); } } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1d14adc023e9..f15626607b2d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2072,8 +2072,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, * current RPA has expired then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2084,14 +2082,10 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, *own_addr_type = ADDR_LE_DEV_RANDOM; if (adv_instance) { - if (!adv_instance->rpa_expired && - !bacmp(&adv_instance->random_addr, &hdev->rpa)) + if (adv_rpa_valid(adv_instance)) return 0; - - adv_instance->rpa_expired = false; } else { - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; } @@ -2103,14 +2097,6 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, bacpy(rand_addr, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - if (adv_instance) - queue_delayed_work(hdev->workqueue, - &adv_instance->rpa_expired_cb, to); - else - queue_delayed_work(hdev->workqueue, - &hdev->rpa_expired, to); - return 0; } @@ -2153,6 +2139,30 @@ void __hci_req_clear_ext_adv_sets(struct hci_request *req) hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL); } +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) +{ + struct hci_dev *hdev = req->hdev; + + /* If we're advertising or initiating an LE connection we can't + * go ahead and change the random address at this time. This is + * because the eventual initiator address used for the + * subsequently created connection will be undefined (some + * controllers use the new address and others the one we had + * when the operation started). + * + * In this kind of scenario skip the update and let the random + * address be updated at the next cycle. + */ + if (hci_dev_test_flag(hdev, HCI_LE_ADV) || + hci_lookup_le_connect(hdev)) { + bt_dev_dbg(hdev, "Deferring random address update"); + hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); + return; + } + + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); +} + int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) { struct hci_cp_le_set_ext_adv_params cp; @@ -2255,6 +2265,13 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) } else { if (!bacmp(&random_addr, &hdev->random_addr)) return 0; + /* Instance 0x00 doesn't have an adv_info, instead it + * uses hdev->random_addr to track its address so + * whenever it needs to be updated this also set the + * random address since hdev->random_addr is shared with + * scan state machine. + */ + set_random_addr(req, &random_addr); } memset(&cp, 0, sizeof(cp)); @@ -2512,30 +2529,6 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, false); } -static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) -{ - struct hci_dev *hdev = req->hdev; - - /* If we're advertising or initiating an LE connection we can't - * go ahead and change the random address at this time. This is - * because the eventual initiator address used for the - * subsequently created connection will be undefined (some - * controllers use the new address and others the one we had - * when the operation started). - * - * In this kind of scenario skip the update and let the random - * address be updated at the next cycle. - */ - if (hci_dev_test_flag(hdev, HCI_LE_ADV) || - hci_lookup_le_connect(hdev)) { - bt_dev_dbg(hdev, "Deferring random address update"); - hci_dev_set_flag(hdev, HCI_RPA_EXPIRED); - return; - } - - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); -} - int hci_update_random_address(struct hci_request *req, bool require_privacy, bool use_rpa, u8 *own_addr_type) { @@ -2547,8 +2540,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, * the current RPA in use, then generate a new one. */ if (use_rpa) { - int to; - /* If Controller supports LL Privacy use own address type is * 0x03 */ @@ -2558,8 +2549,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, else *own_addr_type = ADDR_LE_DEV_RANDOM; - if (!hci_dev_test_and_clear_flag(hdev, HCI_RPA_EXPIRED) && - !bacmp(&hdev->random_addr, &hdev->rpa)) + if (rpa_valid(hdev)) return 0; err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); @@ -2570,9 +2560,6 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, set_random_addr(req, &hdev->rpa); - to = msecs_to_jiffies(hdev->rpa_timeout * 1000); - queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to); - return 0; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index b69d88b88d2e..7827639ecf5c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -85,8 +85,7 @@ static void bt_host_release(struct device *dev) struct hci_dev *hdev = to_hci_dev(dev); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) - hci_cleanup_dev(hdev); - kfree(hdev); + hci_release_dev(hdev); module_put(THIS_MODULE); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3663f880df11..cea01e275f1e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7204,7 +7204,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, if (!mgmt_rp) goto done; - if (status) + if (eir_len == 0) goto send_rsp; eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV, @@ -7725,7 +7725,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, * advertising. */ if (hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) - return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_ADVERTISING, + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_NOT_SUPPORTED); if (cp->instance < 1 || cp->instance > hdev->le_num_of_adv_sets) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ae6f80730561..2c95bb58f901 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -70,7 +70,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) BT_DBG("dlc %p state %ld err %d", d, d->state, err); - spin_lock_bh(&sk->sk_lock.slock); + lock_sock(sk); if (err) sk->sk_err = err; @@ -91,7 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) sk->sk_state_change(sk); } - spin_unlock_bh(&sk->sk_lock.slock); + release_sock(sk); if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise @@ -974,7 +974,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * if (!parent) return 0; - bh_lock_sock(parent); + lock_sock(parent); /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { @@ -1001,7 +1001,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * result = 1; done: - bh_unlock_sock(parent); + release_sock(parent); if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) parent->sk_state_change(parent); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d9a4e88dacbb..98a881586512 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,6 +48,8 @@ struct sco_conn { spinlock_t lock; struct sock *sk; + struct delayed_work timeout_work; + unsigned int mtu; }; @@ -74,31 +76,47 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2) -static void sco_sock_timeout(struct timer_list *t) +static void sco_sock_timeout(struct work_struct *work) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sco_conn *conn = container_of(work, struct sco_conn, + timeout_work.work); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + sco_conn_unlock(conn); + + if (!sk) + return; BT_DBG("sock %p state %d", sk, sk->sk_state); - bh_lock_sock(sk); + lock_sock(sk); sk->sk_err = ETIMEDOUT; sk->sk_state_change(sk); - bh_unlock_sock(sk); - - sco_sock_kill(sk); + release_sock(sk); sock_put(sk); } static void sco_sock_set_timer(struct sock *sk, long timeout) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); + schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout); } static void sco_sock_clear_timer(struct sock *sk) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); } /* ---- SCO connections ---- */ @@ -173,12 +191,14 @@ static void sco_conn_del(struct hci_conn *hcon, int err) if (sk) { sock_hold(sk); - bh_lock_sock(sk); + lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); - bh_unlock_sock(sk); - sco_sock_kill(sk); + release_sock(sk); sock_put(sk); + + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); } hcon->sco_data = NULL; @@ -193,6 +213,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk; + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); + if (parent) bt_accept_enqueue(parent, sk, true); } @@ -212,44 +234,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; } -static int sco_connect(struct sock *sk) +static int sco_connect(struct hci_dev *hdev, struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; - struct hci_dev *hdev; int err, type; BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst); - hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; - - hci_dev_lock(hdev); - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK; if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { - err = -EOPNOTSUPP; - goto done; - } + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) + return -EOPNOTSUPP; hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting); - if (IS_ERR(hcon)) { - err = PTR_ERR(hcon); - goto done; - } + if (IS_ERR(hcon)) + return PTR_ERR(hcon); conn = sco_conn_add(hcon); if (!conn) { hci_conn_drop(hcon); - err = -ENOMEM; - goto done; + return -ENOMEM; } /* Update source addr of the socket */ @@ -257,7 +267,7 @@ static int sco_connect(struct sock *sk) err = sco_chan_add(conn, sk, NULL); if (err) - goto done; + return err; if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); @@ -267,9 +277,6 @@ static int sco_connect(struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); } -done: - hci_dev_unlock(hdev); - hci_dev_put(hdev); return err; } @@ -394,8 +401,7 @@ static void sco_sock_cleanup_listen(struct sock *parent) */ static void sco_sock_kill(struct sock *sk) { - if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket || - sock_flag(sk, SOCK_DEAD)) + if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; BT_DBG("sk %p state %d", sk, sk->sk_state); @@ -443,11 +449,10 @@ static void __sco_sock_close(struct sock *sk) /* Must be called on unlocked socket. */ static void sco_sock_close(struct sock *sk) { - sco_sock_clear_timer(sk); lock_sock(sk); + sco_sock_clear_timer(sk); __sco_sock_close(sk); release_sock(sk); - sco_sock_kill(sk); } static void sco_skb_put_cmsg(struct sk_buff *skb, struct msghdr *msg, @@ -500,8 +505,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT; - timer_setup(&sk->sk_timer, sco_sock_timeout, 0); - bt_sock_link(&sco_sk_list, sk); return sk; } @@ -566,6 +569,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; + struct hci_dev *hdev; int err; BT_DBG("sk %p", sk); @@ -580,12 +584,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen if (sk->sk_type != SOCK_SEQPACKET) return -EINVAL; + hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) + return -EHOSTUNREACH; + hci_dev_lock(hdev); + lock_sock(sk); /* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr); - err = sco_connect(sk); + err = sco_connect(hdev, sk); + hci_dev_unlock(hdev); + hci_dev_put(hdev); if (err) goto done; @@ -773,6 +784,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting) cp.max_latency = cpu_to_le16(0xffff); cp.retrans_effort = 0xff; break; + default: + /* use CVSD settings as fallback */ + cp.max_latency = cpu_to_le16(0xffff); + cp.retrans_effort = 0xff; + break; } hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, @@ -1083,11 +1099,11 @@ static void sco_conn_ready(struct sco_conn *conn) BT_DBG("conn %p", conn); if (sk) { + lock_sock(sk); sco_sock_clear_timer(sk); - bh_lock_sock(sk); sk->sk_state = BT_CONNECTED; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk); } else { sco_conn_lock(conn); @@ -1102,12 +1118,12 @@ static void sco_conn_ready(struct sco_conn *conn) return; } - bh_lock_sock(parent); + lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); return; } @@ -1128,7 +1144,7 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 8a0c0cc55cb4..b50382f957c1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -294,11 +294,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); p = br_port_get_rcu(skb->dev); - if (p->flags & BR_VLAN_TUNNEL) { - if (br_handle_ingress_vlan_tunnel(skb, p, - nbp_vlan_group_rcu(p))) - goto drop; - } + if (p->flags & BR_VLAN_TUNNEL) + br_handle_ingress_vlan_tunnel(skb, p, nbp_vlan_group_rcu(p)); if (unlikely(is_link_local_ether_addr(dest))) { u16 fwd_mask = p->br->group_fwd_mask_required; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 389ff3c1e9d9..0281453f7766 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -37,6 +37,36 @@ br_ip6_rports_get_timer(struct net_bridge_mcast_port *pmctx, #endif } +static size_t __br_rports_one_size(void) +{ + return nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PORT */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_TIMER */ + nla_total_size(sizeof(u8)) + /* MDBA_ROUTER_PATTR_TYPE */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET_TIMER */ + nla_total_size(sizeof(u32)) + /* MDBA_ROUTER_PATTR_INET6_TIMER */ + nla_total_size(sizeof(u32)); /* MDBA_ROUTER_PATTR_VID */ +} + +size_t br_rports_size(const struct net_bridge_mcast *brmctx) +{ + struct net_bridge_mcast_port *pmctx; + size_t size = nla_total_size(0); /* MDBA_ROUTER */ + + rcu_read_lock(); + hlist_for_each_entry_rcu(pmctx, &brmctx->ip4_mc_router_list, + ip4_rlist) + size += __br_rports_one_size(); + +#if IS_ENABLED(CONFIG_IPV6) + hlist_for_each_entry_rcu(pmctx, &brmctx->ip6_mc_router_list, + ip6_rlist) + size += __br_rports_one_size(); +#endif + rcu_read_unlock(); + + return size; +} + int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx) { diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index df6bf6a237aa..2c437d4bf632 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -221,7 +221,7 @@ br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) * can safely be used on return */ rcu_read_lock(); - vlan = br_vlan_find(nbp_vlan_group(pg->key.port), pg->key.addr.vid); + vlan = br_vlan_find(nbp_vlan_group_rcu(pg->key.port), pg->key.addr.vid); if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) pmctx = &vlan->port_mcast_ctx; else @@ -1659,11 +1659,38 @@ again_under_lmqt: } } +static void br_multicast_read_querier(const struct bridge_mcast_querier *querier, + struct bridge_mcast_querier *dest) +{ + unsigned int seq; + + memset(dest, 0, sizeof(*dest)); + do { + seq = read_seqcount_begin(&querier->seq); + dest->port_ifidx = querier->port_ifidx; + memcpy(&dest->addr, &querier->addr, sizeof(struct br_ip)); + } while (read_seqcount_retry(&querier->seq, seq)); +} + +static void br_multicast_update_querier(struct net_bridge_mcast *brmctx, + struct bridge_mcast_querier *querier, + int ifindex, + struct br_ip *saddr) +{ + lockdep_assert_held_once(&brmctx->br->multicast_lock); + + write_seqcount_begin(&querier->seq); + querier->port_ifidx = ifindex; + memcpy(&querier->addr, saddr, sizeof(*saddr)); + write_seqcount_end(&querier->seq); +} + static void br_multicast_send_query(struct net_bridge_mcast *brmctx, struct net_bridge_mcast_port *pmctx, struct bridge_mcast_own_query *own_query) { struct bridge_mcast_other_query *other_query = NULL; + struct bridge_mcast_querier *querier; struct br_ip br_group; unsigned long time; @@ -1676,10 +1703,12 @@ static void br_multicast_send_query(struct net_bridge_mcast *brmctx, if (pmctx ? (own_query == &pmctx->ip4_own_query) : (own_query == &brmctx->ip4_own_query)) { + querier = &brmctx->ip4_querier; other_query = &brmctx->ip4_other_query; br_group.proto = htons(ETH_P_IP); #if IS_ENABLED(CONFIG_IPV6) } else { + querier = &brmctx->ip6_querier; other_query = &brmctx->ip6_other_query; br_group.proto = htons(ETH_P_IPV6); #endif @@ -1688,6 +1717,13 @@ static void br_multicast_send_query(struct net_bridge_mcast *brmctx, if (!other_query || timer_pending(&other_query->timer)) return; + /* we're about to select ourselves as querier */ + if (!pmctx && querier->port_ifidx) { + struct br_ip zeroip = {}; + + br_multicast_update_querier(brmctx, querier, 0, &zeroip); + } + __br_multicast_send_query(brmctx, pmctx, NULL, NULL, &br_group, false, 0, NULL); @@ -2828,58 +2864,157 @@ unlock_continue: } #endif -static bool br_ip4_multicast_select_querier(struct net_bridge_mcast *brmctx, - struct net_bridge_mcast_port *pmctx, - __be32 saddr) +static bool br_multicast_select_querier(struct net_bridge_mcast *brmctx, + struct net_bridge_mcast_port *pmctx, + struct br_ip *saddr) { - struct net_bridge_port *port = pmctx ? pmctx->port : NULL; - - if (!timer_pending(&brmctx->ip4_own_query.timer) && - !timer_pending(&brmctx->ip4_other_query.timer)) - goto update; + int port_ifidx = pmctx ? pmctx->port->dev->ifindex : 0; + struct timer_list *own_timer, *other_timer; + struct bridge_mcast_querier *querier; - if (!brmctx->ip4_querier.addr.src.ip4) - goto update; + switch (saddr->proto) { + case htons(ETH_P_IP): + querier = &brmctx->ip4_querier; + own_timer = &brmctx->ip4_own_query.timer; + other_timer = &brmctx->ip4_other_query.timer; + if (!querier->addr.src.ip4 || + ntohl(saddr->src.ip4) <= ntohl(querier->addr.src.ip4)) + goto update; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + querier = &brmctx->ip6_querier; + own_timer = &brmctx->ip6_own_query.timer; + other_timer = &brmctx->ip6_other_query.timer; + if (ipv6_addr_cmp(&saddr->src.ip6, &querier->addr.src.ip6) <= 0) + goto update; + break; +#endif + default: + return false; + } - if (ntohl(saddr) <= ntohl(brmctx->ip4_querier.addr.src.ip4)) + if (!timer_pending(own_timer) && !timer_pending(other_timer)) goto update; return false; update: - brmctx->ip4_querier.addr.src.ip4 = saddr; - - /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(brmctx->ip4_querier.port, port); + br_multicast_update_querier(brmctx, querier, port_ifidx, saddr); return true; } +static struct net_bridge_port * +__br_multicast_get_querier_port(struct net_bridge *br, + const struct bridge_mcast_querier *querier) +{ + int port_ifidx = READ_ONCE(querier->port_ifidx); + struct net_bridge_port *p; + struct net_device *dev; + + if (port_ifidx == 0) + return NULL; + + dev = dev_get_by_index_rcu(dev_net(br->dev), port_ifidx); + if (!dev) + return NULL; + p = br_port_get_rtnl_rcu(dev); + if (!p || p->br != br) + return NULL; + + return p; +} + +size_t br_multicast_querier_state_size(void) +{ + return nla_total_size(0) + /* nest attribute */ + nla_total_size(sizeof(__be32)) + /* BRIDGE_QUERIER_IP_ADDRESS */ + nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IP_PORT */ + nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IP_OTHER_TIMER */ #if IS_ENABLED(CONFIG_IPV6) -static bool br_ip6_multicast_select_querier(struct net_bridge_mcast *brmctx, - struct net_bridge_mcast_port *pmctx, - struct in6_addr *saddr) + nla_total_size(sizeof(struct in6_addr)) + /* BRIDGE_QUERIER_IPV6_ADDRESS */ + nla_total_size(sizeof(int)) + /* BRIDGE_QUERIER_IPV6_PORT */ + nla_total_size_64bit(sizeof(u64)) + /* BRIDGE_QUERIER_IPV6_OTHER_TIMER */ +#endif + 0; +} + +/* protected by rtnl or rcu */ +int br_multicast_dump_querier_state(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx, + int nest_attr) { - struct net_bridge_port *port = pmctx ? pmctx->port : NULL; + struct bridge_mcast_querier querier = {}; + struct net_bridge_port *p; + struct nlattr *nest; - if (!timer_pending(&brmctx->ip6_own_query.timer) && - !timer_pending(&brmctx->ip6_other_query.timer)) - goto update; + if (!br_opt_get(brmctx->br, BROPT_MULTICAST_ENABLED) || + br_multicast_ctx_vlan_global_disabled(brmctx)) + return 0; - if (ipv6_addr_cmp(saddr, &brmctx->ip6_querier.addr.src.ip6) <= 0) - goto update; + nest = nla_nest_start(skb, nest_attr); + if (!nest) + return -EMSGSIZE; - return false; + rcu_read_lock(); + if (!brmctx->multicast_querier && + !timer_pending(&brmctx->ip4_other_query.timer)) + goto out_v6; -update: - brmctx->ip6_querier.addr.src.ip6 = *saddr; + br_multicast_read_querier(&brmctx->ip4_querier, &querier); + if (nla_put_in_addr(skb, BRIDGE_QUERIER_IP_ADDRESS, + querier.addr.src.ip4)) { + rcu_read_unlock(); + goto out_err; + } - /* update protected by general multicast_lock by caller */ - rcu_assign_pointer(brmctx->ip6_querier.port, port); + p = __br_multicast_get_querier_port(brmctx->br, &querier); + if (timer_pending(&brmctx->ip4_other_query.timer) && + (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IP_OTHER_TIMER, + br_timer_value(&brmctx->ip4_other_query.timer), + BRIDGE_QUERIER_PAD) || + (p && nla_put_u32(skb, BRIDGE_QUERIER_IP_PORT, p->dev->ifindex)))) { + rcu_read_unlock(); + goto out_err; + } - return true; -} +out_v6: +#if IS_ENABLED(CONFIG_IPV6) + if (!brmctx->multicast_querier && + !timer_pending(&brmctx->ip6_other_query.timer)) + goto out; + + br_multicast_read_querier(&brmctx->ip6_querier, &querier); + if (nla_put_in6_addr(skb, BRIDGE_QUERIER_IPV6_ADDRESS, + &querier.addr.src.ip6)) { + rcu_read_unlock(); + goto out_err; + } + + p = __br_multicast_get_querier_port(brmctx->br, &querier); + if (timer_pending(&brmctx->ip6_other_query.timer) && + (nla_put_u64_64bit(skb, BRIDGE_QUERIER_IPV6_OTHER_TIMER, + br_timer_value(&brmctx->ip6_other_query.timer), + BRIDGE_QUERIER_PAD) || + (p && nla_put_u32(skb, BRIDGE_QUERIER_IPV6_PORT, + p->dev->ifindex)))) { + rcu_read_unlock(); + goto out_err; + } +out: #endif + rcu_read_unlock(); + nla_nest_end(skb, nest); + if (!nla_len(nest)) + nla_nest_cancel(skb, nest); + + return 0; + +out_err: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} static void br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, @@ -3082,7 +3217,7 @@ br_ip4_multicast_query_received(struct net_bridge_mcast *brmctx, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip4_multicast_select_querier(brmctx, pmctx, saddr->src.ip4)) + if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); @@ -3097,7 +3232,7 @@ br_ip6_multicast_query_received(struct net_bridge_mcast *brmctx, struct br_ip *saddr, unsigned long max_delay) { - if (!br_ip6_multicast_select_querier(brmctx, pmctx, &saddr->src.ip6)) + if (!br_multicast_select_querier(brmctx, pmctx, saddr)) return; br_multicast_update_query_timer(brmctx, query, max_delay); @@ -3117,7 +3252,7 @@ static void br_ip4_multicast_query(struct net_bridge_mcast *brmctx, struct igmpv3_query *ih3; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct br_ip saddr; + struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; __be32 group; @@ -3197,7 +3332,7 @@ static int br_ip6_multicast_query(struct net_bridge_mcast *brmctx, struct mld2_query *mld2q; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct br_ip saddr; + struct br_ip saddr = {}; unsigned long max_delay; unsigned long now = jiffies; unsigned int offset = skb_transport_offset(skb); @@ -3675,7 +3810,6 @@ static void br_multicast_query_expired(struct net_bridge_mcast *brmctx, if (query->startup_sent < brmctx->multicast_startup_query_count) query->startup_sent++; - RCU_INIT_POINTER(querier->port, NULL); br_multicast_send_query(brmctx, NULL, query); out: spin_unlock(&brmctx->br->multicast_lock); @@ -3732,12 +3866,14 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_membership_interval = 260 * HZ; brmctx->ip4_other_query.delay_time = 0; - brmctx->ip4_querier.port = NULL; + brmctx->ip4_querier.port_ifidx = 0; + seqcount_init(&brmctx->ip4_querier.seq); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; brmctx->ip6_other_query.delay_time = 0; - brmctx->ip6_querier.port = NULL; + brmctx->ip6_querier.port_ifidx = 0; + seqcount_init(&brmctx->ip6_querier.seq); #endif timer_setup(&brmctx->ip4_mc_router_timer, @@ -3938,7 +4074,7 @@ void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on) } } -void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) +static void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) { struct net_bridge_port *p; @@ -3953,6 +4089,9 @@ void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on) continue; br_multicast_toggle_one_vlan(vport, on); } + + if (br_vlan_is_brentry(vlan)) + br_multicast_toggle_one_vlan(vlan, on); } int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, @@ -4107,15 +4246,16 @@ br_multicast_rport_del_notify(struct net_bridge_mcast_port *pmctx, bool deleted) pmctx->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; } -int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) +int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, + unsigned long val) { - struct net_bridge_mcast *brmctx = &p->br->multicast_ctx; - struct net_bridge_mcast_port *pmctx = &p->multicast_ctx; + struct net_bridge_mcast *brmctx; unsigned long now = jiffies; int err = -EINVAL; bool del = false; - spin_lock(&p->br->multicast_lock); + brmctx = br_multicast_port_ctx_get_global(pmctx); + spin_lock(&brmctx->br->multicast_lock); if (pmctx->multicast_router == val) { /* Refresh the temp router port timer */ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) { @@ -4165,7 +4305,20 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val) } err = 0; unlock: - spin_unlock(&p->br->multicast_lock); + spin_unlock(&brmctx->br->multicast_lock); + + return err; +} + +int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router) +{ + int err; + + if (br_vlan_is_master(v)) + err = br_multicast_set_router(&v->br_mcast_ctx, mcast_router); + else + err = br_multicast_set_port_router(&v->port_mcast_ctx, + mcast_router); return err; } @@ -4193,7 +4346,8 @@ static void br_multicast_start_querier(struct net_bridge_mcast *brmctx, if (br_multicast_ctx_is_vlan(brmctx)) { struct net_bridge_vlan *vlan; - vlan = br_vlan_find(nbp_vlan_group(port), brmctx->vlan->vid); + vlan = br_vlan_find(nbp_vlan_group_rcu(port), + brmctx->vlan->vid); if (!vlan || br_multicast_port_ctx_state_stopped(&vlan->port_mcast_ctx)) continue; @@ -4479,6 +4633,7 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) struct net_bridge *br; struct net_bridge_port *port; bool ret = false; + int port_ifidx; rcu_read_lock(); if (!netif_is_bridge_port(dev)) @@ -4493,14 +4648,16 @@ bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto) switch (proto) { case ETH_P_IP: + port_ifidx = brmctx->ip4_querier.port_ifidx; if (!timer_pending(&brmctx->ip4_other_query.timer) || - rcu_dereference(brmctx->ip4_querier.port) == port) + port_ifidx == port->dev->ifindex) goto unlock; break; #if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: + port_ifidx = brmctx->ip6_querier.port_ifidx; if (!timer_pending(&brmctx->ip6_other_query.timer) || - rcu_dereference(brmctx->ip6_querier.port) == port) + port_ifidx == port->dev->ifindex) goto unlock; break; #endif diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8ae026fa2ad7..6c58fc14d2cb 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -932,7 +932,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[], if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) { u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]); - err = br_multicast_set_port_router(p, mcast_router); + err = br_multicast_set_port_router(&p->multicast_ctx, + mcast_router); if (err) return err; } @@ -1501,6 +1502,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_IGMP_VERSION */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_MLD_VERSION */ + br_multicast_querier_state_size() + /* IFLA_BR_MCAST_QUERIER_STATE */ #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */ @@ -1587,7 +1589,9 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT, br->multicast_ctx.multicast_startup_query_count) || nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION, - br->multicast_ctx.multicast_igmp_version)) + br->multicast_ctx.multicast_igmp_version) || + br_multicast_dump_querier_state(skb, &br->multicast_ctx, + IFLA_BR_MCAST_QUERIER_STATE)) return -EMSGSIZE; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c4a8fee990c9..b4cef3a97f12 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -81,7 +81,8 @@ struct bridge_mcast_other_query { /* selected querier */ struct bridge_mcast_querier { struct br_ip addr; - struct net_bridge_port __rcu *port; + int port_ifidx; + seqcount_t seq; }; /* IGMP/MLD statistics */ @@ -675,6 +676,20 @@ static inline bool br_vlan_valid_range(const struct bridge_vlan_info *cur, return true; } +static inline u8 br_vlan_multicast_router(const struct net_bridge_vlan *v) +{ + u8 mcast_router = MDB_RTR_TYPE_DISABLED; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (!br_vlan_is_master(v)) + mcast_router = v->port_mcast_ctx.multicast_router; + else + mcast_router = v->br_mcast_ctx.multicast_router; +#endif + + return mcast_router; +} + static inline int br_afspec_cmd_to_rtm(int cmd) { switch (cmd) { @@ -878,7 +893,9 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig); int br_multicast_set_router(struct net_bridge_mcast *brmctx, unsigned long val); -int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val); +int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, + unsigned long val); +int br_multicast_set_vlan_router(struct net_bridge_vlan *v, u8 mcast_router); int br_multicast_toggle(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack); int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val); @@ -937,7 +954,6 @@ void br_multicast_port_ctx_init(struct net_bridge_port *port, struct net_bridge_mcast_port *pmctx); void br_multicast_port_ctx_deinit(struct net_bridge_mcast_port *pmctx); void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, bool on); -void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, bool on); int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack); bool br_multicast_toggle_global_vlan(struct net_bridge_vlan *vlan, bool on); @@ -947,6 +963,11 @@ int br_mdb_replay(struct net_device *br_dev, struct net_device *dev, struct netlink_ext_ack *extack); int br_rports_fill_info(struct sk_buff *skb, const struct net_bridge_mcast *brmctx); +int br_multicast_dump_querier_state(struct sk_buff *skb, + const struct net_bridge_mcast *brmctx, + int nest_attr); +size_t br_multicast_querier_state_size(void); +size_t br_rports_size(const struct net_bridge_mcast *brmctx); static inline bool br_group_is_l2(const struct br_ip *group) { @@ -1364,11 +1385,6 @@ static inline void br_multicast_toggle_one_vlan(struct net_bridge_vlan *vlan, { } -static inline void br_multicast_toggle_vlan(struct net_bridge_vlan *vlan, - bool on) -{ -} - static inline int br_multicast_toggle_vlan_snooping(struct net_bridge *br, bool on, struct netlink_ext_ack *extack) diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index c54cc26211d7..2b053289f016 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -38,9 +38,9 @@ int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid, void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan); -int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, - struct net_bridge_port *p, - struct net_bridge_vlan_group *vg); +void br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg); int br_handle_egress_vlan_tunnel(struct sk_buff *skb, struct net_bridge_vlan *vlan); bool vlan_tunid_inrange(const struct net_bridge_vlan *v_curr, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index e9e3aedd3178..07fa76080512 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -250,7 +250,7 @@ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) static int store_multicast_router(struct net_bridge_port *p, unsigned long v) { - return br_multicast_set_port_router(p, v); + return br_multicast_set_port_router(&p->multicast_ctx, v); } static BRPORT_ATTR(multicast_router, 0644, show_multicast_router, store_multicast_router); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index cbc922681a76..19f65ab91a02 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -694,6 +694,7 @@ static int br_vlan_add_existing(struct net_bridge *br, vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; *changed = true; + br_multicast_toggle_one_vlan(vlan, true); } if (__vlan_add_flags(vlan, flags)) @@ -2135,6 +2136,7 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] = [BRIDGE_VLANDB_ENTRY_RANGE] = { .type = NLA_U16 }, [BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 }, [BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED }, + [BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 }, }; static int br_vlan_rtm_process_one(struct net_device *dev, diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index b4fd5fa441b7..8ffd4ed2563c 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -40,22 +40,38 @@ static bool __vlan_tun_can_enter_range(const struct net_bridge_vlan *v_curr, bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr, const struct net_bridge_vlan *range_end) { + u8 range_mc_rtr = br_vlan_multicast_router(range_end); + u8 curr_mc_rtr = br_vlan_multicast_router(v_curr); + return v_curr->state == range_end->state && - __vlan_tun_can_enter_range(v_curr, range_end); + __vlan_tun_can_enter_range(v_curr, range_end) && + curr_mc_rtr == range_mc_rtr; } bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v) { - return !nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, - br_vlan_get_state(v)) && - __vlan_tun_put(skb, v); + if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) || + !__vlan_tun_put(skb, v)) + return false; + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER, + br_vlan_multicast_router(v))) + return false; +#endif + + return true; } size_t br_vlan_opts_nl_size(void) { return nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_STATE */ + nla_total_size(0) /* BRIDGE_VLANDB_ENTRY_TUNNEL_INFO */ - + nla_total_size(sizeof(u32)); /* BRIDGE_VLANDB_TINFO_ID */ + + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */ +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */ +#endif + + 0; } static int br_vlan_modify_state(struct net_bridge_vlan_group *vg, @@ -181,6 +197,18 @@ static int br_vlan_process_one_opts(const struct net_bridge *br, return err; } +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]) { + u8 val; + + val = nla_get_u8(tb[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER]); + err = br_multicast_set_vlan_router(v, val); + if (err) + return err; + *changed = true; + } +#endif + return 0; } @@ -298,8 +326,8 @@ bool br_vlan_global_opts_fill(struct sk_buff *skb, u16 vid, u16 vid_range, v_opts->br_mcast_ctx.multicast_startup_query_count) || nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_QUERIER, v_opts->br_mcast_ctx.multicast_querier) || - nla_put_u8(skb, BRIDGE_VLANDB_GOPTS_MCAST_ROUTER, - v_opts->br_mcast_ctx.multicast_router)) + br_multicast_dump_querier_state(skb, &v_opts->br_mcast_ctx, + BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE)) goto out_err; clockval = jiffies_to_clock_t(v_opts->br_mcast_ctx.multicast_last_member_interval); @@ -360,7 +388,7 @@ out_err: return false; } -static size_t rtnl_vlan_global_opts_nlmsg_size(void) +static size_t rtnl_vlan_global_opts_nlmsg_size(const struct net_bridge_vlan *v) { return NLMSG_ALIGN(sizeof(struct br_vlan_msg)) + nla_total_size(0) /* BRIDGE_VLANDB_GLOBAL_OPTIONS */ @@ -378,7 +406,9 @@ static size_t rtnl_vlan_global_opts_nlmsg_size(void) + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL */ + nla_total_size(sizeof(u64)) /* BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL */ + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER */ - + nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER */ + + br_multicast_querier_state_size() /* BRIDGE_VLANDB_GOPTS_MCAST_QUERIER_STATE */ + + nla_total_size(0) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ + + br_rports_size(&v->br_mcast_ctx) /* BRIDGE_VLANDB_GOPTS_MCAST_ROUTER_PORTS */ #endif + nla_total_size(sizeof(u16)); /* BRIDGE_VLANDB_GOPTS_RANGE */ } @@ -395,7 +425,12 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, /* right now notifications are done only with rtnl held */ ASSERT_RTNL(); - skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(), GFP_KERNEL); + /* need to find the vlan due to flags/options */ + v = br_vlan_find(br_vlan_group(br), vid); + if (!v) + return; + + skb = nlmsg_new(rtnl_vlan_global_opts_nlmsg_size(v), GFP_KERNEL); if (!skb) goto out_err; @@ -408,11 +443,6 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, bvm->family = AF_BRIDGE; bvm->ifindex = br->dev->ifindex; - /* need to find the vlan due to flags/options */ - v = br_vlan_find(br_vlan_group(br), vid); - if (!v) - goto out_kfree; - if (!br_vlan_global_opts_fill(skb, vid, vid_range, v)) goto out_err; @@ -422,7 +452,6 @@ static void br_vlan_global_opts_notify(const struct net_bridge *br, out_err: rtnl_set_sk_err(dev_net(br->dev), RTNLGRP_BRVLAN, err); -out_kfree: kfree_skb(skb); } @@ -518,15 +547,6 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, return err; *changed = true; } - if (tb[BRIDGE_VLANDB_GOPTS_MCAST_ROUTER]) { - u8 val; - - val = nla_get_u8(tb[BRIDGE_VLANDB_GOPTS_MCAST_ROUTER]); - err = br_multicast_set_router(&v->br_mcast_ctx, val); - if (err) - return err; - *changed = true; - } #if IS_ENABLED(CONFIG_IPV6) if (tb[BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION]) { u8 ver; @@ -550,7 +570,6 @@ static const struct nla_policy br_vlan_db_gpol[BRIDGE_VLANDB_GOPTS_MAX + 1] = { [BRIDGE_VLANDB_GOPTS_MCAST_MLD_VERSION] = { .type = NLA_U8 }, [BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL] = { .type = NLA_U64 }, [BRIDGE_VLANDB_GOPTS_MCAST_QUERIER] = { .type = NLA_U8 }, - [BRIDGE_VLANDB_GOPTS_MCAST_ROUTER] = { .type = NLA_U8 }, [BRIDGE_VLANDB_GOPTS_MCAST_IGMP_VERSION] = { .type = NLA_U8 }, [BRIDGE_VLANDB_GOPTS_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 }, [BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 }, diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index 01017448ebde..6399a8a69d07 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -158,30 +158,28 @@ void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg) rhashtable_destroy(&vg->tunnel_hash); } -int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, - struct net_bridge_port *p, - struct net_bridge_vlan_group *vg) +void br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) { struct ip_tunnel_info *tinfo = skb_tunnel_info(skb); struct net_bridge_vlan *vlan; if (!vg || !tinfo) - return 0; + return; /* if already tagged, ignore */ if (skb_vlan_tagged(skb)) - return 0; + return; /* lookup vid, given tunnel id */ vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id); if (!vlan) - return 0; + return; skb_dst_drop(skb); __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid); - - return 0; } int br_handle_egress_vlan_tunnel(struct sk_buff *skb, diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 45ae6eeb2964..8c39283c26ae 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -16,10 +16,9 @@ * General list handling functions */ -static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, - const unsigned char *addr, int addr_len, - unsigned char addr_type, bool global, - bool sync) +static struct netdev_hw_addr* +__hw_addr_create(const unsigned char *addr, int addr_len, + unsigned char addr_type, bool global, bool sync) { struct netdev_hw_addr *ha; int alloc_size; @@ -29,32 +28,44 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, alloc_size = L1_CACHE_BYTES; ha = kmalloc(alloc_size, GFP_ATOMIC); if (!ha) - return -ENOMEM; + return NULL; memcpy(ha->addr, addr, addr_len); ha->type = addr_type; ha->refcount = 1; ha->global_use = global; ha->synced = sync ? 1 : 0; ha->sync_cnt = 0; - list_add_tail_rcu(&ha->list, &list->list); - list->count++; - return 0; + return ha; } static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync, - int sync_count) + int sync_count, bool exclusive) { + struct rb_node **ins_point = &list->tree.rb_node, *parent = NULL; struct netdev_hw_addr *ha; if (addr_len > MAX_ADDR_LEN) return -EINVAL; - list_for_each_entry(ha, &list->list, list) { - if (ha->type == addr_type && - !memcmp(ha->addr, addr, addr_len)) { + while (*ins_point) { + int diff; + + ha = rb_entry(*ins_point, struct netdev_hw_addr, node); + diff = memcmp(addr, ha->addr, addr_len); + if (diff == 0) + diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); + + parent = *ins_point; + if (diff < 0) { + ins_point = &parent->rb_left; + } else if (diff > 0) { + ins_point = &parent->rb_right; + } else { + if (exclusive) + return -EEXIST; if (global) { /* check if addr is already used as global */ if (ha->global_use) @@ -73,8 +84,25 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, } } - return __hw_addr_create_ex(list, addr, addr_len, addr_type, global, - sync); + ha = __hw_addr_create(addr, addr_len, addr_type, global, sync); + if (!ha) + return -ENOMEM; + + /* The first address in dev->dev_addrs is pointed to by dev->dev_addr + * and mutated freely by device drivers and netdev ops, so if we insert + * it into the tree we'll end up with an invalid rbtree. + */ + if (list->count > 0) { + rb_link_node(&ha->node, parent, ins_point); + rb_insert_color(&ha->node, &list->tree); + } else { + RB_CLEAR_NODE(&ha->node); + } + + list_add_tail_rcu(&ha->list, &list->list); + list->count++; + + return 0; } static int __hw_addr_add(struct netdev_hw_addr_list *list, @@ -82,7 +110,7 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char addr_type) { return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false, - 0); + 0, false); } static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, @@ -103,24 +131,61 @@ static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, if (--ha->refcount) return 0; + + if (!RB_EMPTY_NODE(&ha->node)) + rb_erase(&ha->node, &list->tree); + list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); list->count--; return 0; } +static struct netdev_hw_addr *__hw_addr_lookup(struct netdev_hw_addr_list *list, + const unsigned char *addr, int addr_len, + unsigned char addr_type) +{ + struct netdev_hw_addr *ha; + struct rb_node *node; + + /* The first address isn't inserted into the tree because in the dev->dev_addrs + * list it's the address pointed to by dev->dev_addr which is freely mutated + * in place, so we need to check it separately. + */ + ha = list_first_entry(&list->list, struct netdev_hw_addr, list); + if (ha && !memcmp(addr, ha->addr, addr_len) && + (!addr_type || addr_type == ha->type)) + return ha; + + node = list->tree.rb_node; + + while (node) { + struct netdev_hw_addr *ha = rb_entry(node, struct netdev_hw_addr, node); + int diff = memcmp(addr, ha->addr, addr_len); + + if (diff == 0 && addr_type) + diff = memcmp(&addr_type, &ha->type, sizeof(addr_type)); + + if (diff < 0) + node = node->rb_left; + else if (diff > 0) + node = node->rb_right; + else + return ha; + } + + return NULL; +} + static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, const unsigned char *addr, int addr_len, unsigned char addr_type, bool global, bool sync) { - struct netdev_hw_addr *ha; + struct netdev_hw_addr *ha = __hw_addr_lookup(list, addr, addr_len, addr_type); - list_for_each_entry(ha, &list->list, list) { - if (!memcmp(ha->addr, addr, addr_len) && - (ha->type == addr_type || !addr_type)) - return __hw_addr_del_entry(list, ha, global, sync); - } - return -ENOENT; + if (!ha) + return -ENOENT; + return __hw_addr_del_entry(list, ha, global, sync); } static int __hw_addr_del(struct netdev_hw_addr_list *list, @@ -137,7 +202,7 @@ static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, int err; err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, - false, true, ha->sync_cnt); + false, true, ha->sync_cnt, false); if (err && err != -EEXIST) return err; @@ -407,6 +472,7 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list) { struct netdev_hw_addr *ha, *tmp; + list->tree = RB_ROOT; list_for_each_entry_safe(ha, tmp, &list->list, list) { list_del_rcu(&ha->list); kfree_rcu(ha, rcu_head); @@ -418,6 +484,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list) { INIT_LIST_HEAD(&list->list); list->count = 0; + list->tree = RB_ROOT; } EXPORT_SYMBOL(__hw_addr_init); @@ -552,22 +619,14 @@ EXPORT_SYMBOL(dev_addr_del); */ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr) { - struct netdev_hw_addr *ha; int err; netif_addr_lock_bh(dev); - list_for_each_entry(ha, &dev->uc.list, list) { - if (!memcmp(ha->addr, addr, dev->addr_len) && - ha->type == NETDEV_HW_ADDR_T_UNICAST) { - err = -EEXIST; - goto out; - } - } - err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_UNICAST, true, false); + err = __hw_addr_add_ex(&dev->uc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_UNICAST, true, false, + 0, true); if (!err) __dev_set_rx_mode(dev); -out: netif_addr_unlock_bh(dev); return err; } @@ -745,22 +804,14 @@ EXPORT_SYMBOL(dev_uc_init); */ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr) { - struct netdev_hw_addr *ha; int err; netif_addr_lock_bh(dev); - list_for_each_entry(ha, &dev->mc.list, list) { - if (!memcmp(ha->addr, addr, dev->addr_len) && - ha->type == NETDEV_HW_ADDR_T_MULTICAST) { - err = -EEXIST; - goto out; - } - } - err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, true, false); + err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, + NETDEV_HW_ADDR_T_MULTICAST, true, false, + 0, true); if (!err) __dev_set_rx_mode(dev); -out: netif_addr_unlock_bh(dev); return err; } @@ -773,7 +824,8 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr, netif_addr_lock_bh(dev); err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, - NETDEV_HW_ADDR_T_MULTICAST, global, false, 0); + NETDEV_HW_ADDR_T_MULTICAST, global, false, + 0, false); if (!err) __dev_set_rx_mode(dev); netif_addr_unlock_bh(dev); diff --git a/net/core/devlink.c b/net/core/devlink.c index ee9787314cff..a856ae401ea5 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -92,7 +92,8 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ DEVLINK_PORT_FN_STATE_ACTIVE), }; -static LIST_HEAD(devlink_list); +static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +#define DEVLINK_REGISTERED XA_MARK_1 /* devlink_mutex * @@ -108,10 +109,23 @@ struct net *devlink_net(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_net); +static void devlink_put(struct devlink *devlink) +{ + if (refcount_dec_and_test(&devlink->refcount)) + complete(&devlink->comp); +} + +static bool __must_check devlink_try_get(struct devlink *devlink) +{ + return refcount_inc_not_zero(&devlink->refcount); +} + static struct devlink *devlink_get_from_attrs(struct net *net, struct nlattr **attrs) { struct devlink *devlink; + unsigned long index; + bool found = false; char *busname; char *devname; @@ -123,19 +137,19 @@ static struct devlink *devlink_get_from_attrs(struct net *net, lockdep_assert_held(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { if (strcmp(devlink->dev->bus->name, busname) == 0 && strcmp(dev_name(devlink->dev), devname) == 0 && - net_eq(devlink_net(devlink), net)) - return devlink; + net_eq(devlink_net(devlink), net)) { + found = true; + break; + } } - return ERR_PTR(-ENODEV); -} + if (!found || !devlink_try_get(devlink)) + devlink = ERR_PTR(-ENODEV); -static struct devlink *devlink_get_from_info(struct genl_info *info) -{ - return devlink_get_from_attrs(genl_info_net(info), info->attrs); + return devlink; } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, @@ -486,7 +500,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, int err; mutex_lock(&devlink_mutex); - devlink = devlink_get_from_info(info); + devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(devlink)) { mutex_unlock(&devlink_mutex); return PTR_ERR(devlink); @@ -529,6 +543,7 @@ static int devlink_nl_pre_doit(const struct genl_ops *ops, unlock: if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return err; } @@ -541,6 +556,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops, devlink = info->user_ptr[0]; if (~ops->internal_flags & DEVLINK_NL_FLAG_NO_LOCK) mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); } @@ -832,12 +848,11 @@ static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, } static int devlink_nl_rate_fill(struct sk_buff *msg, - struct devlink *devlink, struct devlink_rate *devlink_rate, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, - struct netlink_ext_ack *extack) + enum devlink_command cmd, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) { + struct devlink *devlink = devlink_rate->devlink; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -959,12 +974,12 @@ out: return err; } -static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, +static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink_port *devlink_port, - enum devlink_command cmd, u32 portid, - u32 seq, int flags, - struct netlink_ext_ack *extack) + enum devlink_command cmd, u32 portid, u32 seq, + int flags, struct netlink_ext_ack *extack) { + struct devlink *devlink = devlink_port->devlink; void *hdr; hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); @@ -1025,53 +1040,47 @@ nla_put_failure: static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { - struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; - if (!devlink_port->devlink) - return; - WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_port_fill(msg, devlink, devlink_port, cmd, 0, 0, 0, - NULL); + err = devlink_nl_port_fill(msg, devlink_port, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_port->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static void devlink_rate_notify(struct devlink_rate *devlink_rate, enum devlink_command cmd) { - struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; - WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && - cmd != DEVLINK_CMD_RATE_DEL); + WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - err = devlink_nl_rate_fill(msg, devlink, devlink_rate, - cmd, 0, 0, 0, NULL); + err = devlink_nl_rate_fill(msg, devlink_rate, cmd, 0, 0, 0, NULL); if (err) { nlmsg_free(msg); return; } - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(devlink_rate->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, @@ -1080,13 +1089,18 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, struct devlink_rate *devlink_rate; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_rate, &devlink->rate_list, list) { enum devlink_command cmd = DEVLINK_CMD_RATE_NEW; @@ -1096,18 +1110,19 @@ static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_rate_fill(msg, devlink, - devlink_rate, - cmd, id, + err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1122,7 +1137,6 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_rate *devlink_rate = info->user_ptr[1]; - struct devlink *devlink = devlink_rate->devlink; struct sk_buff *msg; int err; @@ -1130,8 +1144,7 @@ static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_rate_fill(msg, devlink, devlink_rate, - DEVLINK_CMD_RATE_NEW, + err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { @@ -1179,20 +1192,30 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) { + devlink_put(devlink); continue; + } + if (idx < start) { idx++; + devlink_put(devlink); continue; } + err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); + devlink_put(devlink); if (err) goto out; idx++; @@ -1208,7 +1231,6 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; struct sk_buff *msg; int err; @@ -1216,8 +1238,7 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb, if (!msg) return -ENOMEM; - err = devlink_nl_port_fill(msg, devlink, devlink_port, - DEVLINK_CMD_PORT_NEW, + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_PORT_NEW, info->snd_portid, info->snd_seq, 0, info->extack); if (err) { @@ -1234,32 +1255,39 @@ static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_port *devlink_port; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { if (idx < start) { idx++; continue; } - err = devlink_nl_port_fill(msg, devlink, devlink_port, + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, - cb->extack); + NLM_F_MULTI, cb->extack); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -1488,9 +1516,8 @@ static int devlink_port_new_notifiy(struct devlink *devlink, goto out; } - err = devlink_nl_port_fill(msg, devlink, devlink_port, - DEVLINK_CMD_NEW, info->snd_portid, - info->snd_seq, 0, NULL); + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, + info->snd_portid, info->snd_seq, 0, NULL); if (err) goto out; @@ -1894,13 +1921,18 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { if (idx < start) { @@ -1914,11 +1946,14 @@ static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2038,14 +2073,19 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_pool_get_dumpit(msg, start, &idx, devlink, @@ -2056,10 +2096,13 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2251,14 +2294,19 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_port_pool_get) - continue; + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { err = __sb_port_pool_get_dumpit(msg, start, &idx, @@ -2269,10 +2317,13 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -2492,14 +2543,18 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, struct devlink *devlink; struct devlink_sb *devlink_sb; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + if (!net_eq(devlink_net(devlink), sock_net(msg->sk)) || !devlink->ops->sb_tc_pool_bind_get) - continue; + goto retry; mutex_lock(&devlink->lock); list_for_each_entry(devlink_sb, &devlink->sb_list, list) { @@ -2512,10 +2567,13 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4111,7 +4169,7 @@ out_free_msg: static void devlink_flash_update_begin_notify(struct devlink *devlink) { - struct devlink_flash_notify params = { 0 }; + struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE, @@ -4120,7 +4178,7 @@ static void devlink_flash_update_begin_notify(struct devlink *devlink) static void devlink_flash_update_end_notify(struct devlink *devlink) { - struct devlink_flash_notify params = { 0 }; + struct devlink_flash_notify params = {}; __devlink_flash_update_notify(devlink, DEVLINK_CMD_FLASH_UPDATE_END, @@ -4562,13 +4620,18 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, struct devlink_param_item *param_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(param_item, &devlink->param_list, list) { if (idx < start) { @@ -4584,11 +4647,14 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -4830,13 +4896,18 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct devlink_port *devlink_port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(devlink_port, &devlink->port_list, list) { list_for_each_entry(param_item, @@ -4856,12 +4927,15 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, err = 0; } else if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -5071,7 +5145,6 @@ static void devlink_nl_region_notify(struct devlink_region *region, struct devlink_snapshot *snapshot, enum devlink_command cmd) { - struct devlink *devlink = region->devlink; struct sk_buff *msg; WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL); @@ -5080,8 +5153,9 @@ static void devlink_nl_region_notify(struct devlink_region *region, if (IS_ERR(msg)) return; - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + genlmsg_multicast_netns(&devlink_nl_family, + devlink_net(region->devlink), msg, 0, + DEVLINK_MCGRP_CONFIG, GFP_KERNEL); } /** @@ -5399,15 +5473,22 @@ static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; - int err; + int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink, &idx, start); +retry: + devlink_put(devlink); if (err) goto out; } @@ -5770,6 +5851,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, nla_nest_end(skb, chunks_attr); genlmsg_end(skb, hdr); mutex_unlock(&devlink->lock); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return skb->len; @@ -5778,6 +5860,7 @@ nla_put_failure: genlmsg_cancel(skb, hdr); out_unlock: mutex_unlock(&devlink->lock); + devlink_put(devlink); out_dev: mutex_unlock(&devlink_mutex); return err; @@ -5924,22 +6007,20 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, { struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err = 0; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) - continue; - if (idx < start) { - idx++; + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; - } - if (!devlink->ops->info_get) { - idx++; - continue; - } + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + + if (idx < start || !devlink->ops->info_get) + goto inc; mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, @@ -5949,9 +6030,14 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, mutex_unlock(&devlink->lock); if (err == -EOPNOTSUPP) err = 0; - else if (err) + else if (err) { + devlink_put(devlink); break; + } +inc: idx++; +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); @@ -6765,11 +6851,11 @@ EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy); static int devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink *devlink, struct devlink_health_reporter *reporter, enum devlink_command cmd, u32 portid, u32 seq, int flags) { + struct devlink *devlink = reporter->devlink; struct nlattr *reporter_attr; void *hdr; @@ -6846,8 +6932,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, if (!msg) return; - err = devlink_nl_health_reporter_fill(msg, reporter->devlink, - reporter, cmd, 0, 0, 0); + err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; @@ -7037,6 +7122,7 @@ devlink_health_reporter_get_from_cb(struct netlink_callback *cb) goto unlock; reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); + devlink_put(devlink); mutex_unlock(&devlink_mutex); return reporter; unlock: @@ -7080,7 +7166,7 @@ static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, goto out; } - err = devlink_nl_health_reporter_fill(msg, devlink, reporter, + err = devlink_nl_health_reporter_fill(msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, info->snd_portid, info->snd_seq, 0); @@ -7103,13 +7189,18 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, struct devlink_port *port; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_rep; + mutex_lock(&devlink->reporters_lock); list_for_each_entry(reporter, &devlink->reporter_list, list) { @@ -7117,24 +7208,29 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_health_reporter_fill(msg, devlink, - reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); + err = devlink_nl_health_reporter_fill( + msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + NLM_F_MULTI); if (err) { mutex_unlock(&devlink->reporters_lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->reporters_lock); +retry_rep: + devlink_put(devlink); } - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry_port; + mutex_lock(&devlink->lock); list_for_each_entry(port, &devlink->port_list, list) { mutex_lock(&port->reporters_lock); @@ -7143,14 +7239,15 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, idx++; continue; } - err = devlink_nl_health_reporter_fill(msg, devlink, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); + err = devlink_nl_health_reporter_fill( + msg, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); if (err) { mutex_unlock(&port->reporters_lock); mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; @@ -7158,6 +7255,8 @@ devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg, mutex_unlock(&port->reporters_lock); } mutex_unlock(&devlink->lock); +retry_port: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7686,13 +7785,18 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, struct devlink_trap_item *trap_item; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(trap_item, &devlink->trap_list, list) { if (idx < start) { @@ -7706,11 +7810,14 @@ static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -7905,13 +8012,18 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(group_item, &devlink->trap_group_list, list) { @@ -7926,11 +8038,14 @@ static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8211,13 +8326,18 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, u32 portid = NETLINK_CB(cb->skb).portid; struct devlink *devlink; int start = cb->args[0]; + unsigned long index; int idx = 0; int err; mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) continue; + + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + goto retry; + mutex_lock(&devlink->lock); list_for_each_entry(policer_item, &devlink->trap_policer_list, list) { @@ -8232,11 +8352,14 @@ static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg, NLM_F_MULTI); if (err) { mutex_unlock(&devlink->lock); + devlink_put(devlink); goto out; } idx++; } mutex_unlock(&devlink->lock); +retry: + devlink_put(devlink); } out: mutex_unlock(&devlink_mutex); @@ -8793,6 +8916,8 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, struct device *dev) { struct devlink *devlink; + static u32 last_id; + int ret; WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) @@ -8802,6 +8927,13 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (!devlink) return NULL; + ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b, + &last_id, GFP_KERNEL); + if (ret < 0) { + kfree(devlink); + return NULL; + } + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); @@ -8819,6 +8951,9 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->trap_policer_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); + refcount_set(&devlink->refcount, 1); + init_completion(&devlink->comp); + return devlink; } EXPORT_SYMBOL_GPL(devlink_alloc_ns); @@ -8831,7 +8966,7 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); int devlink_register(struct devlink *devlink) { mutex_lock(&devlink_mutex); - list_add_tail(&devlink->list, &devlink_list); + xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify(devlink, DEVLINK_CMD_NEW); mutex_unlock(&devlink_mutex); return 0; @@ -8845,11 +8980,14 @@ EXPORT_SYMBOL_GPL(devlink_register); */ void devlink_unregister(struct devlink *devlink) { + devlink_put(devlink); + wait_for_completion(&devlink->comp); + mutex_lock(&devlink_mutex); WARN_ON(devlink_reload_supported(devlink->ops) && devlink->reload_enabled); devlink_notify(devlink, DEVLINK_CMD_DEL); - list_del(&devlink->list); + xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); mutex_unlock(&devlink_mutex); } EXPORT_SYMBOL_GPL(devlink_unregister); @@ -8911,6 +9049,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->port_list)); xa_destroy(&devlink->snapshot_ids); + xa_erase(&devlinks, devlink->index); kfree(devlink); } @@ -11385,23 +11524,29 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net) { struct devlink *devlink; u32 actions_performed; + unsigned long index; int err; /* In case network namespace is getting destroyed, reload * all devlink instances from this namespace into init_net. */ mutex_lock(&devlink_mutex); - list_for_each_entry(devlink, &devlink_list, list) { - if (net_eq(devlink_net(devlink), net)) { - if (WARN_ON(!devlink_reload_supported(devlink->ops))) - continue; - err = devlink_reload(devlink, &init_net, - DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_LIMIT_UNSPEC, - &actions_performed, NULL); - if (err && err != -EOPNOTSUPP) - pr_warn("Failed to reload devlink instance into init_net\n"); - } + xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) { + if (!devlink_try_get(devlink)) + continue; + + if (!net_eq(devlink_net(devlink), net)) + goto retry; + + WARN_ON(!devlink_reload_supported(devlink->ops)); + err = devlink_reload(devlink, &init_net, + DEVLINK_RELOAD_ACTION_DRIVER_REINIT, + DEVLINK_RELOAD_LIMIT_UNSPEC, + &actions_performed, NULL); + if (err && err != -EOPNOTSUPP) + pr_warn("Failed to reload devlink instance into init_net\n"); +retry: + devlink_put(devlink); } mutex_unlock(&devlink_mutex); } diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 715b67f6c62f..6beaea13564a 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -321,13 +321,13 @@ EXPORT_SYMBOL(flow_block_cb_setup_simple); static DEFINE_MUTEX(flow_indr_block_lock); static LIST_HEAD(flow_block_indr_list); static LIST_HEAD(flow_block_indr_dev_list); +static LIST_HEAD(flow_indir_dev_list); struct flow_indr_dev { struct list_head list; flow_indr_block_bind_cb_t *cb; void *cb_priv; refcount_t refcnt; - struct rcu_head rcu; }; static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, @@ -346,6 +346,33 @@ static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb, return indr_dev; } +struct flow_indir_dev_info { + void *data; + struct net_device *dev; + struct Qdisc *sch; + enum tc_setup_type type; + void (*cleanup)(struct flow_block_cb *block_cb); + struct list_head list; + enum flow_block_command command; + enum flow_block_binder_type binder_type; + struct list_head *cb_list; +}; + +static void existing_qdiscs_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) +{ + struct flow_block_offload bo; + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + memset(&bo, 0, sizeof(bo)); + bo.command = cur->command; + bo.binder_type = cur->binder_type; + INIT_LIST_HEAD(&bo.cb_list); + cb(cur->dev, cur->sch, cb_priv, cur->type, &bo, cur->data, cur->cleanup); + list_splice(&bo.cb_list, cur->cb_list); + } +} + int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_indr_dev *indr_dev; @@ -367,6 +394,7 @@ int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv) } list_add(&indr_dev->list, &flow_block_indr_dev_list); + existing_qdiscs_register(cb, cb_priv); mutex_unlock(&flow_indr_block_lock); return 0; @@ -463,7 +491,59 @@ out: } EXPORT_SYMBOL(flow_indr_block_cb_alloc); -int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, +static struct flow_indir_dev_info *find_indir_dev(void *data) +{ + struct flow_indir_dev_info *cur; + + list_for_each_entry(cur, &flow_indir_dev_list, list) { + if (cur->data == data) + return cur; + } + return NULL; +} + +static int indir_dev_add(void *data, struct net_device *dev, struct Qdisc *sch, + enum tc_setup_type type, void (*cleanup)(struct flow_block_cb *block_cb), + struct flow_block_offload *bo) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (info) + return -EEXIST; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->data = data; + info->dev = dev; + info->sch = sch; + info->type = type; + info->cleanup = cleanup; + info->command = bo->command; + info->binder_type = bo->binder_type; + info->cb_list = bo->cb_list_head; + + list_add(&info->list, &flow_indir_dev_list); + return 0; +} + +static int indir_dev_remove(void *data) +{ + struct flow_indir_dev_info *info; + + info = find_indir_dev(data); + if (!info) + return -ENOENT; + + list_del(&info->list); + + kfree(info); + return 0; +} + +int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)) @@ -471,6 +551,12 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, struct flow_indr_dev *this; mutex_lock(&flow_indr_block_lock); + + if (bo->command == FLOW_BLOCK_BIND) + indir_dev_add(data, dev, sch, type, cleanup, bo); + else if (bo->command == FLOW_BLOCK_UNBIND) + indir_dev_remove(data); + list_for_each_entry(this, &flow_block_indr_dev_list, list) this->cb(dev, sch, this->cb_priv, type, bo, data, cleanup); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index d0ae987d2de9..2820aca2173a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -23,6 +23,9 @@ #include <net/ip6_fib.h> #include <net/rtnh.h> +DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled); +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled); + #ifdef CONFIG_MODULES static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a4..eab5fc88a002 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu " - "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n", + seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu " + "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n", dev->name, stats->rx_bytes, stats->rx_packets, stats->rx_errors, stats->rx_dropped + stats->rx_missed_errors, @@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) static int dev_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) - seq_puts(seq, "Inter-| Receive " - " | Transmit\n" - " face |bytes packets errs drop fifo frame " - "compressed multicast|bytes packets errs " - "drop fifo colls carrier compressed\n"); + seq_puts(seq, "Interface| Receive " + " | Transmit\n" + " | bytes packets errs drop fifo frame " + "compressed multicast| bytes packets errs " + " drop fifo colls carrier compressed\n"); else dev_seq_printf_stats(seq, v); return 0; @@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v) struct packet_type *pt = v; if (v == SEQ_START_TOKEN) - seq_puts(seq, "Type Device Function\n"); + seq_puts(seq, "Type Device Function\n"); else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s %ps\n", + seq_printf(seq, " %-9s %ps\n", pt->dev ? pt->dev->name : "", pt->func); } @@ -327,12 +327,14 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct netdev_hw_addr *ha; struct net_device *dev = v; - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { + seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n"); return 0; + } netif_addr_lock_bh(dev); netdev_for_each_mc_addr(ha, dev) { - seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n", + seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n", dev->ifindex, dev->name, ha->refcount, ha->global_use, (int)dev->addr_len, ha->addr); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 9b5a767eddd5..a448a9b5bb2d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -98,7 +98,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data) } ng = net_alloc_generic(); - if (ng == NULL) + if (!ng) return -ENOMEM; /* @@ -148,13 +148,6 @@ out: return err; } -static void ops_free(const struct pernet_operations *ops, struct net *net) -{ - if (ops->id && ops->size) { - kfree(net_generic(net, *ops->id)); - } -} - static void ops_pre_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { @@ -184,7 +177,7 @@ static void ops_free_list(const struct pernet_operations *ops, struct net *net; if (ops->size && ops->id) { list_for_each_entry(net, net_exit_list, exit_list) - ops_free(ops, net); + kfree(net_generic(net, *ops->id)); } } @@ -433,15 +426,18 @@ out_free: static void net_free(struct net *net) { - kfree(rcu_access_pointer(net->gen)); - kmem_cache_free(net_cachep, net); + if (refcount_dec_and_test(&net->passive)) { + kfree(rcu_access_pointer(net->gen)); + kmem_cache_free(net_cachep, net); + } } void net_drop_ns(void *p) { - struct net *ns = p; - if (ns && refcount_dec_and_test(&ns->passive)) - net_free(ns); + struct net *net = (struct net *)p; + + if (net) + net_free(net); } struct net *copy_net_ns(unsigned long flags, @@ -479,7 +475,7 @@ struct net *copy_net_ns(unsigned long flags, put_userns: key_remove_domain(net->key_domain); put_user_ns(user_ns); - net_drop_ns(net); + net_free(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -611,7 +607,7 @@ static void cleanup_net(struct work_struct *work) dec_net_namespaces(net->ucounts); key_remove_domain(net->key_domain); put_user_ns(net->user_ns); - net_drop_ns(net); + net_free(net); } } @@ -1120,6 +1116,14 @@ static int __init net_ns_init(void) pure_initcall(net_ns_init); +static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) +{ + ops_pre_exit_list(ops, net_exit_list); + synchronize_rcu(); + ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); +} + #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) @@ -1145,10 +1149,7 @@ static int __register_pernet_operations(struct list_head *list, out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); return error; } @@ -1161,10 +1162,8 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + + free_exit_list(ops, &net_exit_list); } #else @@ -1187,10 +1186,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); - ops_pre_exit_list(ops, &net_exit_list); - synchronize_rcu(); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + free_exit_list(ops, &net_exit_list); } } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e1409056965a..1a6978427d6c 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -370,7 +370,7 @@ skip_dma_unmap: /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. */ - count = atomic_inc_return(&pool->pages_state_release_cnt); + count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); trace_page_pool_state_release(pool, page, count); } EXPORT_SYMBOL(page_pool_release_page); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 94008536a9d6..9e5a3249373c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2601,6 +2601,32 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].count++; } +static void fill_imix_distribution(struct pktgen_dev *pkt_dev) +{ + int cumulative_probabilites[MAX_IMIX_ENTRIES]; + int j = 0; + __u64 cumulative_prob = 0; + __u64 total_weight = 0; + int i = 0; + + for (i = 0; i < pkt_dev->n_imix_entries; i++) + total_weight += pkt_dev->imix_entries[i].weight; + + /* Fill cumulative_probabilites with sum of normalized probabilities */ + for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) { + cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight * + IMIX_PRECISION, + total_weight); + cumulative_probabilites[i] = cumulative_prob; + } + cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100; + + for (i = 0; i < IMIX_PRECISION; i++) { + if (i == cumulative_probabilites[j]) + j++; + pkt_dev->imix_distribution[i] = j; + } +} #ifdef CONFIG_XFRM static u32 pktgen_dst_metrics[RTAX_MAX + 1] = { @@ -2662,33 +2688,6 @@ static void free_SAs(struct pktgen_dev *pkt_dev) } } -static void fill_imix_distribution(struct pktgen_dev *pkt_dev) -{ - int cumulative_probabilites[MAX_IMIX_ENTRIES]; - int j = 0; - __u64 cumulative_prob = 0; - __u64 total_weight = 0; - int i = 0; - - for (i = 0; i < pkt_dev->n_imix_entries; i++) - total_weight += pkt_dev->imix_entries[i].weight; - - /* Fill cumulative_probabilites with sum of normalized probabilities */ - for (i = 0; i < pkt_dev->n_imix_entries - 1; i++) { - cumulative_prob += div64_u64(pkt_dev->imix_entries[i].weight * - IMIX_PRECISION, - total_weight); - cumulative_probabilites[i] = cumulative_prob; - } - cumulative_probabilites[pkt_dev->n_imix_entries - 1] = 100; - - for (i = 0; i < IMIX_PRECISION; i++) { - if (i == cumulative_probabilites[j]) - j++; - pkt_dev->imix_distribution[i] = j; - } -} - static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2dcf1c084b20..972c8cb303a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2605,6 +2605,7 @@ static int do_setlink(const struct sk_buff *skb, return err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { + const char *pat = ifname && ifname[0] ? ifname : NULL; struct net *net; int new_ifindex; @@ -2620,7 +2621,7 @@ static int do_setlink(const struct sk_buff *skb, else new_ifindex = 0; - err = __dev_change_net_namespace(dev, net, ifname, new_ifindex); + err = __dev_change_net_namespace(dev, net, pat, new_ifindex); put_net(net); if (err) goto errout; diff --git a/net/core/sock.c b/net/core/sock.c index aada649e07e8..62627e868e03 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2574,7 +2574,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -#define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); /** @@ -2728,10 +2727,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct proto *prot = sk->sk_prot; long allocated = sk_memory_allocated_add(sk, amt); + bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; bool charged = true; - if (mem_cgroup_sockets_enabled && sk->sk_memcg && - !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt))) + if (memcg_charge && + !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge()))) goto suppress_allocation; /* Under limit. */ @@ -2785,8 +2786,14 @@ suppress_allocation: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { + /* Force charge with __GFP_NOFAIL */ + if (memcg_charge && !charged) { + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); + } return 1; + } } if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) @@ -2794,7 +2801,7 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (mem_cgroup_sockets_enabled && sk->sk_memcg) + if (memcg_charge && charged) mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); return 0; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 970906eb5b2c..548285539752 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,7 +138,7 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" - depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP + depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105 select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index dcd67801eca4..1b2b25d7bd02 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,6 +21,9 @@ static DEFINE_MUTEX(dsa2_mutex); LIST_HEAD(dsa_tree_list); +/* Track the bridges with forwarding offload enabled */ +static unsigned long dsa_fwd_offloading_bridges; + /** * dsa_tree_notify - Execute code for all switches in a DSA switch tree. * @dst: collection of struct dsa_switch devices to notify. @@ -126,6 +129,51 @@ void dsa_lag_unmap(struct dsa_switch_tree *dst, struct net_device *lag) } } +static int dsa_bridge_num_find(const struct net_device *bridge_dev) +{ + struct dsa_switch_tree *dst; + struct dsa_port *dp; + + /* When preparing the offload for a port, it will have a valid + * dp->bridge_dev pointer but a not yet valid dp->bridge_num. + * However there might be other ports having the same dp->bridge_dev + * and a valid dp->bridge_num, so just ignore this port. + */ + list_for_each_entry(dst, &dsa_tree_list, list) + list_for_each_entry(dp, &dst->ports, list) + if (dp->bridge_dev == bridge_dev && + dp->bridge_num != -1) + return dp->bridge_num; + + return -1; +} + +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max) +{ + int bridge_num = dsa_bridge_num_find(bridge_dev); + + if (bridge_num < 0) { + /* First port that offloads TX forwarding for this bridge */ + bridge_num = find_first_zero_bit(&dsa_fwd_offloading_bridges, + DSA_MAX_NUM_OFFLOADING_BRIDGES); + if (bridge_num >= max) + return -1; + + set_bit(bridge_num, &dsa_fwd_offloading_bridges); + } + + return bridge_num; +} + +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num) +{ + /* Check if the bridge is still in use, otherwise it is time + * to clean it up so we can reuse this bridge_num later. + */ + if (!dsa_bridge_num_find(bridge_dev)) + clear_bit(bridge_num, &dsa_fwd_offloading_bridges); +} + struct dsa_switch *dsa_switch_find(int tree_index, int sw_index) { struct dsa_switch_tree *dst; diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index b7a269e0513f..33ab7d7af9eb 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -320,6 +320,8 @@ int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); void dsa_slave_setup_tagger(struct net_device *slave); int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); +int dsa_slave_manage_vlan_filtering(struct net_device *dev, + bool vlan_filtering); static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) { @@ -543,6 +545,8 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, struct net_device *master, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); +int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); +void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num); /* tag_8021q.c */ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, diff --git a/net/dsa/port.c b/net/dsa/port.c index 979042a64d1a..616330a16d31 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -270,27 +270,9 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) */ } -static int dsa_tree_find_bridge_num(struct dsa_switch_tree *dst, - struct net_device *bridge_dev) -{ - struct dsa_port *dp; - - /* When preparing the offload for a port, it will have a valid - * dp->bridge_dev pointer but a not yet valid dp->bridge_num. - * However there might be other ports having the same dp->bridge_dev - * and a valid dp->bridge_num, so just ignore this port. - */ - list_for_each_entry(dp, &dst->ports, list) - if (dp->bridge_dev == bridge_dev && dp->bridge_num != -1) - return dp->bridge_num; - - return -1; -} - static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, struct net_device *bridge_dev) { - struct dsa_switch_tree *dst = dp->ds->dst; int bridge_num = dp->bridge_num; struct dsa_switch *ds = dp->ds; @@ -300,11 +282,7 @@ static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, dp->bridge_num = -1; - /* Check if the bridge is still in use, otherwise it is time - * to clean it up so we can reuse this bridge_num later. - */ - if (!dsa_tree_find_bridge_num(dst, bridge_dev)) - clear_bit(bridge_num, &dst->fwd_offloading_bridges); + dsa_bridge_num_put(bridge_dev, bridge_num); /* Notify the chips only once the offload has been deactivated, so * that they can update their configuration accordingly. @@ -316,23 +294,16 @@ static void dsa_port_bridge_tx_fwd_unoffload(struct dsa_port *dp, static bool dsa_port_bridge_tx_fwd_offload(struct dsa_port *dp, struct net_device *bridge_dev) { - struct dsa_switch_tree *dst = dp->ds->dst; struct dsa_switch *ds = dp->ds; int bridge_num, err; if (!ds->ops->port_bridge_tx_fwd_offload) return false; - bridge_num = dsa_tree_find_bridge_num(dst, bridge_dev); - if (bridge_num < 0) { - /* First port that offloads TX forwarding for this bridge */ - bridge_num = find_first_zero_bit(&dst->fwd_offloading_bridges, - DSA_MAX_NUM_OFFLOADING_BRIDGES); - if (bridge_num >= ds->num_fwd_offloading_bridges) - return false; - - set_bit(bridge_num, &dst->fwd_offloading_bridges); - } + bridge_num = dsa_bridge_num_get(bridge_dev, + ds->num_fwd_offloading_bridges); + if (bridge_num < 0) + return false; dp->bridge_num = bridge_num; @@ -402,6 +373,10 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) { struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + /* Don't try to unoffload something that is not offloaded */ + if (!brport_dev) + return; + switchdev_bridge_port_unoffload(brport_dev, dp, &dsa_slave_switchdev_notifier, &dsa_slave_switchdev_blocking_notifier); @@ -605,6 +580,7 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct netlink_ext_ack *extack) { + bool old_vlan_filtering = dsa_port_is_vlan_filtering(dp); struct dsa_switch *ds = dp->ds; bool apply; int err; @@ -630,12 +606,49 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, if (err) return err; - if (ds->vlan_filtering_is_global) + if (ds->vlan_filtering_is_global) { + int port; + ds->vlan_filtering = vlan_filtering; - else + + for (port = 0; port < ds->num_ports; port++) { + struct net_device *slave; + + if (!dsa_is_user_port(ds, port)) + continue; + + /* We might be called in the unbind path, so not + * all slave devices might still be registered. + */ + slave = dsa_to_port(ds, port)->slave; + if (!slave) + continue; + + err = dsa_slave_manage_vlan_filtering(slave, + vlan_filtering); + if (err) + goto restore; + } + } else { dp->vlan_filtering = vlan_filtering; + err = dsa_slave_manage_vlan_filtering(dp->slave, + vlan_filtering); + if (err) + goto restore; + } + return 0; + +restore: + ds->ops->port_vlan_filtering(ds, dp->index, old_vlan_filtering, NULL); + + if (ds->vlan_filtering_is_global) + ds->vlan_filtering = old_vlan_filtering; + else + dp->vlan_filtering = old_vlan_filtering; + + return err; } /* This enforces legacy behavior for switch drivers which assume they can't diff --git a/net/dsa/slave.c b/net/dsa/slave.c index eb9d9e53c536..662ff531d4e2 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1409,6 +1409,76 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return 0; } +static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_slave_vlan_rx_add_vid(arg, proto, vid); +} + +static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) +{ + __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); + + return dsa_slave_vlan_rx_kill_vid(arg, proto, vid); +} + +/* Keep the VLAN RX filtering list in sync with the hardware only if VLAN + * filtering is enabled. The baseline is that only ports that offload a + * VLAN-aware bridge are VLAN-aware, and standalone ports are VLAN-unaware, + * but there are exceptions for quirky hardware. + * + * If ds->vlan_filtering_is_global = true, then standalone ports which share + * the same switch with other ports that offload a VLAN-aware bridge are also + * inevitably VLAN-aware. + * + * To summarize, a DSA switch port offloads: + * + * - If standalone (this includes software bridge, software LAG): + * - if ds->needs_standalone_vlan_filtering = true, OR if + * (ds->vlan_filtering_is_global = true AND there are bridges spanning + * this switch chip which have vlan_filtering=1) + * - the 8021q upper VLANs + * - else (standalone VLAN filtering is not needed, VLAN filtering is not + * global, or it is, but no port is under a VLAN-aware bridge): + * - no VLAN (any 8021q upper is a software VLAN) + * + * - If under a vlan_filtering=0 bridge which it offload: + * - if ds->configure_vlan_while_not_filtering = true (default): + * - the bridge VLANs. These VLANs are committed to hardware but inactive. + * - else (deprecated): + * - no VLAN. The bridge VLANs are not restored when VLAN awareness is + * enabled, so this behavior is broken and discouraged. + * + * - If under a vlan_filtering=1 bridge which it offload: + * - the bridge VLANs + * - the 8021q upper VLANs + */ +int dsa_slave_manage_vlan_filtering(struct net_device *slave, + bool vlan_filtering) +{ + int err; + + if (vlan_filtering) { + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + + err = vlan_for_each(slave, dsa_slave_restore_vlan, slave); + if (err) { + vlan_for_each(slave, dsa_slave_clear_vlan, slave); + slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + return err; + } + } else { + err = vlan_for_each(slave, dsa_slave_clear_vlan, slave); + if (err) + return err; + + slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + } + + return 0; +} + struct dsa_hw_port { struct list_head list; struct net_device *dev; @@ -1816,12 +1886,12 @@ void dsa_slave_setup_tagger(struct net_device *slave) p->xmit = cpu_dp->tag_ops->xmit; slave->features = master->vlan_features | NETIF_F_HW_TC; - if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) - slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; slave->hw_features |= NETIF_F_HW_TC; slave->features |= NETIF_F_LLTX; if (slave->needed_tailroom) slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); + if (ds->needs_standalone_vlan_filtering) + slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } static struct lock_class_key dsa_slave_netdev_xmit_lock_key; @@ -2009,6 +2079,11 @@ static int dsa_slave_changeupper(struct net_device *dev, err = dsa_port_bridge_join(dp, info->upper_dev, extack); if (!err) dsa_bridge_mtu_normalization(dp); + if (err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(extack, + "Offloading not supported"); + err = 0; + } err = notifier_from_errno(err); } else { dsa_port_bridge_leave(dp, info->upper_dev); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index fd1a1c6bf9cf..1c797ec8e2c2 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -92,8 +92,10 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, struct dsa_switch_tree *dst = ds->dst; int err; - if (dst->index == info->tree_index && ds->index == info->sw_index && - ds->ops->port_bridge_join) { + if (dst->index == info->tree_index && ds->index == info->sw_index) { + if (!ds->ops->port_bridge_join) + return -EOPNOTSUPP; + err = ds->ops->port_bridge_join(ds, info->port, info->br); if (err) return err; @@ -114,9 +116,10 @@ static int dsa_switch_bridge_join(struct dsa_switch *ds, static int dsa_switch_bridge_leave(struct dsa_switch *ds, struct dsa_notifier_bridge_info *info) { - bool unset_vlan_filtering = br_vlan_enabled(info->br); struct dsa_switch_tree *dst = ds->dst; struct netlink_ext_ack extack = {0}; + bool change_vlan_filtering = false; + bool vlan_filtering; int err, port; if (dst->index == info->tree_index && ds->index == info->sw_index && @@ -129,6 +132,15 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, info->sw_index, info->port, info->br); + if (ds->needs_standalone_vlan_filtering && !br_vlan_enabled(info->br)) { + change_vlan_filtering = true; + vlan_filtering = true; + } else if (!ds->needs_standalone_vlan_filtering && + br_vlan_enabled(info->br)) { + change_vlan_filtering = true; + vlan_filtering = false; + } + /* If the bridge was vlan_filtering, the bridge core doesn't trigger an * event for changing vlan_filtering setting upon slave ports leaving * it. That is a good thing, because that lets us handle it and also @@ -137,21 +149,22 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, * vlan_filtering callback is only when the last port leaves the last * VLAN-aware bridge. */ - if (unset_vlan_filtering && ds->vlan_filtering_is_global) { + if (change_vlan_filtering && ds->vlan_filtering_is_global) { for (port = 0; port < ds->num_ports; port++) { struct net_device *bridge_dev; bridge_dev = dsa_to_port(ds, port)->bridge_dev; if (bridge_dev && br_vlan_enabled(bridge_dev)) { - unset_vlan_filtering = false; + change_vlan_filtering = false; break; } } } - if (unset_vlan_filtering) { + + if (change_vlan_filtering) { err = dsa_port_vlan_filtering(dsa_to_port(ds, info->port), - false, &extack); + vlan_filtering, &extack); if (extack._msg) dev_err(ds->dev, "port %d: %s\n", info->port, extack._msg); diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index e6d5f3b4fd89..f8f7b7c34e7d 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -362,12 +362,12 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, continue; /* Install the RX VID of the targeted port in our VLAN table */ - err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, false); + err = dsa_port_tag_8021q_vlan_add(dp, targeted_rx_vid, true); if (err) return err; /* Install our RX VID into the targeted port's VLAN table */ - err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, false); + err = dsa_port_tag_8021q_vlan_add(targeted_dp, rx_vid, true); if (err) return err; } @@ -451,7 +451,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) * L2 forwarding rules still take precedence when there are no VLAN * restrictions, so there are no concerns about leaking traffic. */ - err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, true); + err = dsa_port_tag_8021q_vlan_add(dp, rx_vid, false); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %pe\n", @@ -463,7 +463,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) vlan_vid_add(master, ctx->proto, rx_vid); /* Finally apply the TX VID on this port and on the CPU port */ - err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, true); + err = dsa_port_tag_8021q_vlan_add(dp, tx_vid, false); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %pe\n", diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 1406bc41d345..c054f48541c8 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -116,9 +116,14 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb) } /* Calls sja1105_port_deferred_xmit in sja1105_main.c */ -static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, +static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) { + struct sja1105_port *sp = dp->priv; + + if (!dsa_port_is_sja1105(dp)) + return skb; + /* Increase refcount so the kfree_skb in dsa_slave_xmit * won't really free the packet. */ @@ -128,9 +133,44 @@ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp, return NULL; } -static u16 sja1105_xmit_tpid(struct sja1105_port *sp) +/* Send VLAN tags with a TPID that blends in with whatever VLAN protocol a + * bridge spanning ports of this switch might have. + */ +static u16 sja1105_xmit_tpid(struct dsa_port *dp) { - return sp->xmit_tpid; + struct dsa_switch *ds = dp->ds; + struct dsa_port *other_dp; + u16 proto; + + /* Since VLAN awareness is global, then if this port is VLAN-unaware, + * all ports are. Use the VLAN-unaware TPID used for tag_8021q. + */ + if (!dsa_port_is_vlan_filtering(dp)) + return ETH_P_SJA1105; + + /* Port is VLAN-aware, so there is a bridge somewhere (a single one, + * we're sure about that). It may not be on this port though, so we + * need to find it. + */ + list_for_each_entry(other_dp, &ds->dst->ports, list) { + if (other_dp->ds != ds) + continue; + + if (!other_dp->bridge_dev) + continue; + + /* Error is returned only if CONFIG_BRIDGE_VLAN_FILTERING, + * which seems pointless to handle, as our port cannot become + * VLAN-aware in that case. + */ + br_vlan_get_proto(other_dp->bridge_dev, &proto); + + return proto; + } + + WARN_ONCE(1, "Port is VLAN-aware but cannot find associated bridge!\n"); + + return ETH_P_SJA1105; } static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, @@ -155,7 +195,37 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, */ tx_vid = dsa_8021q_bridge_tx_fwd_offload_vid(dp->bridge_num); - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), tx_vid); + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), tx_vid); +} + +/* Transform untagged control packets into pvid-tagged control packets so that + * all packets sent by this tagger are VLAN-tagged and we can configure the + * switch to drop untagged packets coming from the DSA master. + */ +static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, + struct sk_buff *skb, u8 pcp) +{ + __be16 xmit_tpid = htons(sja1105_xmit_tpid(dp)); + struct vlan_ethhdr *hdr; + + /* If VLAN tag is in hwaccel area, move it to the payload + * to deal with both cases uniformly and to ensure that + * the VLANs are added in the right order. + */ + if (unlikely(skb_vlan_tag_present(skb))) { + skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + return NULL; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + + /* If skb is already VLAN-tagged, leave that VLAN ID in place */ + if (hdr->h_vlan_proto == xmit_tpid) + return skb; + + return vlan_insert_tag(skb, xmit_tpid, (pcp << VLAN_PRIO_SHIFT) | + SJA1105_DEFAULT_VLAN); } static struct sk_buff *sja1105_xmit(struct sk_buff *skb, @@ -173,10 +243,15 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, * but instead SPI-installed management routes. Part 2 of this * is the .port_deferred_xmit driver callback. */ - if (unlikely(sja1105_is_link_local(skb))) - return sja1105_defer_xmit(dp->priv, skb); + if (unlikely(sja1105_is_link_local(skb))) { + skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp); + if (!skb) + return NULL; - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), + return sja1105_defer_xmit(dp, skb); + } + + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); } @@ -200,9 +275,13 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, * tag_8021q TX VLANs. */ if (likely(!sja1105_is_link_local(skb))) - return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv), + return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp), ((pcp << VLAN_PRIO_SHIFT) | tx_vid)); + skb = sja1105_pvid_tag_control_pkt(dp, skb, pcp); + if (!skb) + return NULL; + skb_push(skb, SJA1110_HEADER_LEN); dsa_alloc_etype_header(skb, SJA1110_HEADER_LEN); @@ -265,16 +344,16 @@ static struct sk_buff bool is_link_local, bool is_meta) { - struct sja1105_port *sp; - struct dsa_port *dp; - - dp = dsa_slave_to_port(skb->dev); - sp = dp->priv; - /* Step 1: A timestampable frame was received. * Buffer it until we get its meta frame. */ if (is_link_local) { + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_port *sp = dp->priv; + + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; + if (!test_bit(SJA1105_HWTS_RX_EN, &sp->data->state)) /* Do normal processing. */ return skb; @@ -307,8 +386,13 @@ static struct sk_buff * frame, which serves no further purpose). */ } else if (is_meta) { + struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct sja1105_port *sp = dp->priv; struct sk_buff *stampable_skb; + if (unlikely(!dsa_port_is_sja1105(dp))) + return skb; + /* Drop the meta frame if we're not in the right state * to process it. */ diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 1d6bc132aa4d..46776ea42a92 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -10,6 +10,7 @@ struct coalesce_req_info { struct coalesce_reply_data { struct ethnl_reply_data base; struct ethtool_coalesce coalesce; + struct kernel_ethtool_coalesce kernel_coalesce; u32 supported_params; }; @@ -61,6 +62,7 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base, struct genl_info *info) { struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); + struct netlink_ext_ack *extack = info ? info->extack : NULL; struct net_device *dev = reply_base->dev; int ret; @@ -70,7 +72,8 @@ static int coalesce_prepare_data(const struct ethnl_req_info *req_base, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; - ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce); + ret = dev->ethtool_ops->get_coalesce(dev, &data->coalesce, + &data->kernel_coalesce, extack); ethnl_ops_complete(dev); return ret; @@ -100,7 +103,9 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RX_MAX_FRAMES_HIGH */ nla_total_size(sizeof(u32)) + /* _TX_USECS_HIGH */ nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */ - nla_total_size(sizeof(u32)); /* _RATE_SAMPLE_INTERVAL */ + nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */ + nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */ + nla_total_size(sizeof(u8)); /* _USE_CQE_MODE_RX */ } static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val, @@ -124,6 +129,7 @@ static int coalesce_fill_reply(struct sk_buff *skb, const struct ethnl_reply_data *reply_base) { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); + const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; @@ -170,7 +176,11 @@ static int coalesce_fill_reply(struct sk_buff *skb, coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, coal->tx_max_coalesced_frames_high, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, - coal->rate_sample_interval, supported)) + coal->rate_sample_interval, supported) || + coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, + kcoal->use_cqe_mode_tx, supported) || + coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, + kcoal->use_cqe_mode_rx, supported)) return -EMSGSIZE; return 0; @@ -215,10 +225,13 @@ const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_TX_USECS_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1), + [ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1), }; int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info) { + struct kernel_ethtool_coalesce kernel_coalesce = {}; struct ethtool_coalesce coalesce = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; @@ -255,7 +268,8 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info) ret = ethnl_ops_begin(dev); if (ret < 0) goto out_rtnl; - ret = ops->get_coalesce(dev, &coalesce); + ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce, + info->extack); if (ret < 0) goto out_ops; @@ -303,11 +317,16 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info) tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod); ethnl_update_u32(&coalesce.rate_sample_interval, tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod); + ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx, + tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod); + ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx, + tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod); ret = 0; if (!mod) goto out_ops; - ret = dev->ethtool_ops->set_coalesce(dev, &coalesce); + ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce, + info->extack); if (ret < 0) goto out_ops; ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 81fa36a4c9c4..f2abc3152888 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1619,12 +1619,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; + struct kernel_ethtool_coalesce kernel_coalesce = {}; int ret; if (!dev->ethtool_ops->get_coalesce) return -EOPNOTSUPP; - ret = dev->ethtool_ops->get_coalesce(dev, &coalesce); + ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce, + NULL); if (ret) return ret; @@ -1691,19 +1693,26 @@ ethtool_set_coalesce_supported(struct net_device *dev, static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { + struct kernel_ethtool_coalesce kernel_coalesce = {}; struct ethtool_coalesce coalesce; int ret; - if (!dev->ethtool_ops->set_coalesce) + if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce) return -EOPNOTSUPP; + ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce, + NULL); + if (ret) + return ret; + if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) return -EFAULT; if (!ethtool_set_coalesce_supported(dev, &coalesce)) return -EOPNOTSUPP; - ret = dev->ethtool_ops->set_coalesce(dev, &coalesce); + ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce, + NULL); if (!ret) ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL); return ret; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 077aac3929a8..e8987e28036f 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -359,7 +359,7 @@ extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; -extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL + 1]; +extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1]; extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1]; extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1]; extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1]; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 099259fc826a..7fbd0b532f52 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -465,14 +465,16 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) if (!doi_def) return; - switch (doi_def->type) { - case CIPSO_V4_MAP_TRANS: - kfree(doi_def->map.std->lvl.cipso); - kfree(doi_def->map.std->lvl.local); - kfree(doi_def->map.std->cat.cipso); - kfree(doi_def->map.std->cat.local); - kfree(doi_def->map.std); - break; + if (doi_def->map.std) { + switch (doi_def->type) { + case CIPSO_V4_MAP_TRANS: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); + break; + } } kfree(doi_def); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index a09e36c4a413..851f542928a3 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -97,7 +97,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, static void esp_ssg_unref(struct xfrm_state *x, void *tmp) { - struct esp_output_extra *extra = esp_tmp_extra(tmp); struct crypto_aead *aead = x->data; int extralen = 0; u8 *iv; @@ -105,9 +104,8 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp) struct scatterlist *sg; if (x->props.flags & XFRM_STATE_ESN) - extralen += sizeof(*extra); + extralen += sizeof(struct esp_output_extra); - extra = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 754013fa393b..f25d02ad4a8a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -534,7 +534,8 @@ out: atomic_read(&newsk->sk_rmem_alloc)); mem_cgroup_sk_alloc(newsk); if (newsk->sk_memcg && amt) - mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + mem_cgroup_charge_skmem(newsk->sk_memcg, amt, + GFP_KERNEL | __GFP_NOFAIL); release_sock(newsk); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 6ebf05859acb..177d26d8fb9c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -473,6 +473,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, static int gre_handle_offloads(struct sk_buff *skb, bool csum) { + if (csum && skb_checksum_start(skb) < skb->data) + return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6b04a88466b2..9bca57ef8b83 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -606,18 +606,6 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, } EXPORT_SYMBOL(ip_fraglist_init); -static void ip_fraglist_ipcb_prepare(struct sk_buff *skb, - struct ip_fraglist_iter *iter) -{ - struct sk_buff *to = iter->frag; - - /* Copy the flags to each fragment. */ - IPCB(to)->flags = IPCB(skb)->flags; - - if (iter->offset == 0) - ip_options_fragment(to); -} - void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) { unsigned int hlen = iter->hlen; @@ -663,7 +651,7 @@ void ip_frag_init(struct sk_buff *skb, unsigned int hlen, EXPORT_SYMBOL(ip_frag_init); static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, - bool first_frag, struct ip_frag_state *state) + bool first_frag) { /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; @@ -838,11 +826,14 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); + if (iter.frag) + ip_options_fragment(iter.frag); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { - ip_fraglist_ipcb_prepare(skb, &iter); + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); } @@ -897,7 +888,7 @@ slow_path: err = PTR_ERR(skb2); goto fail; } - ip_frag_ipcb(skb, skb2, first_frag, &state); + ip_frag_ipcb(skb, skb2, first_frag); /* * Put this fragment into the sending queue. diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index b52a4c8a14fc..40417a3f930b 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -112,6 +112,8 @@ static int __init iptable_mangle_init(void) { int ret = xt_register_template(&packet_mangler, iptable_mangle_table_init); + if (ret < 0) + return ret; mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); if (IS_ERR(mangle_ops)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b181773d7ad3..1b6c8fad6277 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -587,28 +587,35 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe) } } -static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) +static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) { - struct fib_nh_exception *fnhe, *oldest; + struct fib_nh_exception __rcu **fnhe_p, **oldest_p; + struct fib_nh_exception *fnhe, *oldest = NULL; - oldest = rcu_dereference(hash->chain); - for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; - fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { + fnhe = rcu_dereference_protected(*fnhe_p, + lockdep_is_held(&fnhe_lock)); + if (!fnhe) + break; + if (!oldest || + time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { oldest = fnhe; + oldest_p = fnhe_p; + } } fnhe_flush_routes(oldest); - return oldest; + *oldest_p = oldest->fnhe_next; + kfree_rcu(oldest, rcu); } -static inline u32 fnhe_hashfun(__be32 daddr) +static u32 fnhe_hashfun(__be32 daddr) { - static u32 fnhe_hashrnd __read_mostly; - u32 hval; + static siphash_key_t fnhe_hash_key __read_mostly; + u64 hval; - net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); - hval = jhash_1word((__force u32)daddr, fnhe_hashrnd); - return hash_32(hval, FNHE_HASH_SHIFT); + net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); + hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); + return hash_64(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) @@ -677,16 +684,21 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, if (rt) fill_route_from_fnhe(rt, fnhe); } else { - if (depth > FNHE_RECLAIM_DEPTH) - fnhe = fnhe_oldest(hash); - else { - fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); - if (!fnhe) - goto out_unlock; - - fnhe->fnhe_next = hash->chain; - rcu_assign_pointer(hash->chain, fnhe); + /* Randomize max depth to avoid some side channels attacks. */ + int max_depth = FNHE_RECLAIM_DEPTH + + prandom_u32_max(FNHE_RECLAIM_DEPTH); + + while (depth > max_depth) { + fnhe_remove_oldest(hash); + depth--; } + + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + goto out_unlock; + + fnhe->fnhe_next = hash->chain; + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; @@ -694,6 +706,8 @@ static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = max(1UL, expires); + rcu_assign_pointer(hash->chain, fnhe); + /* Exception created; mark the cached routes for the nexthop * stale, so anyone caching it rechecks if this exception * applies to them. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f931def6302e..e8b48df73c85 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3338,6 +3338,7 @@ int tcp_set_window_clamp(struct sock *sk, int val) } else { tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ? SOCK_MIN_RCVBUF / 2 : val; + tp->rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp); } return 0; } diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 62ba8d0f2c60..59412d6354a0 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -368,8 +368,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, return NULL; } - if (syn_data && - tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) + if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len == 0) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 29553fce8502..6d72f3ea48c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size) sk_memory_allocated_add(sk, amt); if (mem_cgroup_sockets_enabled && sk->sk_memcg) - mem_cgroup_charge_skmem(sk->sk_memcg, amt); + mem_cgroup_charge_skmem(sk->sk_memcg, amt, + gfp_memcg_charge() | __GFP_NOFAIL); } /* Send a FIN. The caller locks the socket for us. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8381288a0d6e..17756f3ed33b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -394,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; ndev->cnf.mtu6 = dev->mtu; + ndev->ra_mtu = 0; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); if (!ndev->nd_parms) { kfree(ndev); @@ -3849,6 +3850,7 @@ restart: } idev->tstamp = jiffies; + idev->ra_mtu = 0; /* Last: Shot the device (if unregistered) */ if (unregister) { @@ -5543,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void) + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ + nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */ + nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */ + + nla_total_size(4) /* IFLA_INET6_RA_MTU */ + 0; } @@ -5651,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev, if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode)) goto nla_put_failure; + if (idev->ra_mtu && + nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -5767,6 +5774,9 @@ update_lft: static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = { [IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 }, [IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) }, + [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT, + .reject_message = + "IFLA_INET6_RA_MTU can not be set" }, }; static int check_addr_gen_mode(int mode) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index a8f118e469b7..1bec5b22f80d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1341,7 +1341,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, lockdep_is_held(&rt->fib6_table->tb6_lock)); - /* paired with smp_rmb() in rt6_get_cookie_safe() */ + /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { fn->fn_sernum = sernum; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3ad201d372d8..7baf41d160f5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -629,6 +629,8 @@ drop: static int gre_handle_offloads(struct sk_buff *skb, bool csum) { + if (csum && skb_checksum_start(skb) < skb->data) + return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c467c6419893..4b098521a44c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1391,12 +1391,6 @@ skip_defrtr: } } - /* - * Send a notify if RA changed managed/otherconf flags or timer settings - */ - if (send_ifinfo_notify) - inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); - skip_linkparms: /* @@ -1496,6 +1490,11 @@ skip_routeinfo: memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); + if (in6_dev->ra_mtu != mtu) { + in6_dev->ra_mtu = mtu; + send_ifinfo_notify = true; + } + if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { @@ -1519,6 +1518,12 @@ skip_routeinfo: ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: + /* Send a notify if RA changed managed/otherconf flags or + * timer settings or ra_mtu value + */ + if (send_ifinfo_notify) + inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); + fib6_info_release(rt); if (neigh) neigh_release(neigh); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6cf4bb89ca69..dbc224023977 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -41,6 +41,7 @@ #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/jhash.h> +#include <linux/siphash.h> #include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> @@ -1484,17 +1485,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static u32 seed __read_mostly; - u32 val; + static siphash_key_t rt6_exception_key __read_mostly; + struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + }; + u64 val; - net_get_random_once(&seed, sizeof(seed)); - val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed); + net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); #ifdef CONFIG_IPV6_SUBTREES if (src) - val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val); + combined.src = *src; #endif - return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); + val = siphash(&combined, sizeof(combined), &rt6_exception_key); + + return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); } /* Helper function to find the cached rt in the hash table @@ -1649,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; struct fib6_nh *nh = res->nh; + int max_depth; int err = 0; spin_lock_bh(&rt6_exception_lock); @@ -1703,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt, bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; - if (bucket->depth > FIB6_MAX_DEPTH) + /* Randomize max depth to avoid some side channels attacks. */ + max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH); + while (bucket->depth > max_depth) rt6_exception_remove_oldest(bucket); out: diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 897fa59c47de..6ebc7aa24466 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -26,6 +26,8 @@ #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif +#include <net/lwtunnel.h> +#include <linux/netfilter.h> static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) { @@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); return 0; } -static int seg6_input(struct sk_buff *skb) +static int seg6_input_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + return dst_input(skb); +} + +static int seg6_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb) if (unlikely(err)) return err; - return dst_input(skb); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, seg6_input_finish); + + return seg6_input_finish(dev_net(skb->dev), NULL, skb); } -static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int seg6_input_nf(struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(skb->dev); + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL, + skb, NULL, dev, seg6_input_core); + } + + return -EINVAL; +} + +static int seg6_input(struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_input_nf(skb); + + return seg6_input_core(dev_net(skb->dev), NULL, skb); +} + +static int seg6_output_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; @@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (unlikely(err)) goto drop; + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, + NULL, skb_dst(skb)->dev, dst_output); + return dst_output(net, sk, skb); drop: kfree_skb(skb); return err; } +static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_device *dev = skb_dst(skb)->dev; + + switch (skb->protocol) { + case htons(ETH_P_IP): + return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + case htons(ETH_P_IPV6): + return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, + NULL, dev, seg6_output_core); + } + + return -EINVAL; +} + +static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return seg6_output_nf(net, sk, skb); + + return seg6_output_core(net, sk, skb); +} + static int seg6_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60bf3b877957..ddc8dfcd4e2b 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -30,6 +30,8 @@ #include <net/seg6_local.h> #include <linux/etherdevice.h> #include <linux/bpf.h> +#include <net/lwtunnel.h> +#include <linux/netfilter.h> #define SEG6_F_ATTR(i) BIT(i) @@ -413,12 +415,33 @@ drop: return -EINVAL; } +static int input_action_end_dx6_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct in6_addr *nhaddr = NULL; + struct seg6_local_lwt *slwt; + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + seg6_lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +} + /* decapsulate and forward to specified nexthop */ static int input_action_end_dx6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { - struct in6_addr *nhaddr = NULL; - /* this function accepts IPv6 encapsulated packets, with either * an SRH with SL=0, or no SRH. */ @@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; - /* The inner packet is not associated to any local interface, - * so we do not call netif_rx(). - * - * If slwt->nh6 is set to ::, then lookup the nexthop for the - * inner packet's DA. Otherwise, use the specified nexthop. - */ - - if (!ipv6_addr_any(&slwt->nh6)) - nhaddr = &slwt->nh6; - skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + nf_reset_ct(skb); - seg6_lookup_nexthop(skb, nhaddr, 0); + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx6_finish); - return dst_input(skb); + return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } -static int input_action_end_dx4(struct sk_buff *skb, - struct seg6_local_lwt *slwt) +static int input_action_end_dx4_finish(struct net *net, struct sock *sk, + struct sk_buff *skb) { + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; int err; - if (!decap_and_validate(skb, IPPROTO_IPIP)) - goto drop; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto drop; - - skb->protocol = htons(ETH_P_IP); + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); iph = ip_hdr(skb); @@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb, skb_dst_drop(skb); - skb_set_transport_header(skb, sizeof(struct iphdr)); - err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); - if (err) - goto drop; + if (err) { + kfree_skb(skb); + return -EINVAL; + } return dst_input(skb); +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + skb_set_transport_header(skb, sizeof(struct iphdr)); + nf_reset_ct(skb); + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, + dev_net(skb->dev), NULL, skb, NULL, + skb_dst(skb)->dev, input_action_end_dx4_finish); + return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; @@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb, skb_dst_drop(skb); skb_set_transport_header(skb, hdrlen); + nf_reset_ct(skb); return end_dt_vrf_rcv(skb, family, vrf); @@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt, u64_stats_update_end(&pcounters->syncp); } -static int seg6_local_input(struct sk_buff *skb) +static int seg6_local_input_core(struct net *net, struct sock *sk, + struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct seg6_action_desc *desc; @@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb) unsigned int len = skb->len; int rc; - if (skb->protocol != htons(ETH_P_IPV6)) { - kfree_skb(skb); - return -EINVAL; - } - slwt = seg6_local_lwtunnel(orig_dst->lwtstate); desc = slwt->desc; @@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb) return rc; } +static int seg6_local_input(struct sk_buff *skb) +{ + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net(skb->dev), NULL, skb, skb->dev, NULL, + seg6_local_input_core); + + return seg6_local_input_core(dev_net(skb->dev), NULL, skb); +} + static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4e6f11e63df3..d69b31c20fe2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -828,9 +828,11 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, return ret; } -static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, - const u8 *resp, size_t resp_len, - const struct ieee80211_csa_settings *csa) +static int +ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, + const u8 *resp, size_t resp_len, + const struct ieee80211_csa_settings *csa, + const struct ieee80211_color_change_settings *cca) { struct probe_resp *new, *old; @@ -850,6 +852,8 @@ static int ieee80211_set_probe_resp(struct ieee80211_sub_if_data *sdata, memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_presp, csa->n_counter_offsets_presp * sizeof(new->cntdwn_counter_offsets[0])); + else if (cca) + new->cntdwn_counter_offsets[0] = cca->counter_offset_presp; rcu_assign_pointer(sdata->u.ap.probe_resp, new); if (old) @@ -955,7 +959,8 @@ static int ieee80211_set_ftm_responder_params( static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, struct cfg80211_beacon_data *params, - const struct ieee80211_csa_settings *csa) + const struct ieee80211_csa_settings *csa, + const struct ieee80211_color_change_settings *cca) { struct beacon_data *new, *old; int new_head_len, new_tail_len; @@ -1004,6 +1009,9 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, memcpy(new->cntdwn_counter_offsets, csa->counter_offsets_beacon, csa->n_counter_offsets_beacon * sizeof(new->cntdwn_counter_offsets[0])); + } else if (cca) { + new->cntdwn_current_counter = cca->count; + new->cntdwn_counter_offsets[0] = cca->counter_offset_beacon; } /* copy in head */ @@ -1020,7 +1028,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata, memcpy(new->tail, old->tail, new_tail_len); err = ieee80211_set_probe_resp(sdata, params->probe_resp, - params->probe_resp_len, csa); + params->probe_resp_len, csa, cca); if (err < 0) { kfree(new); return err; @@ -1175,7 +1183,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) sdata->vif.bss_conf.beacon_tx_rate = params->beacon_rate; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon, NULL, NULL); if (err < 0) goto error; changed |= err; @@ -1230,17 +1238,17 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); sdata_assert_lock(sdata); - /* don't allow changing the beacon while CSA is in place - offset + /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ - if (sdata->vif.csa_active) + if (sdata->vif.csa_active || sdata->vif.color_change_active) return -EBUSY; old = sdata_dereference(sdata->u.ap.beacon, sdata); if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, params, NULL); + err = ieee80211_assign_beacon(sdata, params, NULL, NULL); if (err < 0) return err; ieee80211_bss_info_change_notify(sdata, err); @@ -3156,7 +3164,7 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, switch (sdata->vif.type) { case NL80211_IFTYPE_AP: err = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, - NULL); + NULL, NULL); kfree(sdata->u.ap.next_beacon); sdata->u.ap.next_beacon = NULL; @@ -3322,7 +3330,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, csa.n_counter_offsets_presp = params->n_counter_offsets_presp; csa.count = params->count; - err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa); + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_csa, &csa, NULL); if (err < 0) { kfree(sdata->u.ap.next_beacon); return err; @@ -3411,6 +3419,15 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } +static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata) +{ + sdata->vif.color_change_active = false; + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + + cfg80211_color_change_aborted_notify(sdata->dev); +} + static int __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params) @@ -3479,6 +3496,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, goto out; } + /* if there is a color change in progress, abort it */ + if (sdata->vif.color_change_active) + ieee80211_color_change_abort(sdata); + err = ieee80211_set_csa_beacon(sdata, params, &changed); if (err) { ieee80211_vif_unreserve_chanctx(sdata); @@ -4130,6 +4151,196 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy, return local->ops->set_sar_specs(&local->hw, sar); } +static int +ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata, + u32 *changed) +{ + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: { + int ret; + + ret = ieee80211_assign_beacon(sdata, sdata->u.ap.next_beacon, + NULL, NULL); + kfree(sdata->u.ap.next_beacon); + sdata->u.ap.next_beacon = NULL; + + if (ret < 0) + return ret; + + *changed |= ret; + break; + } + default: + WARN_ON_ONCE(1); + return -EINVAL; + } + + return 0; +} + +static int +ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata, + struct cfg80211_color_change_settings *params, + u32 *changed) +{ + struct ieee80211_color_change_settings color_change = {}; + int err; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + sdata->u.ap.next_beacon = + cfg80211_beacon_dup(¶ms->beacon_next); + if (!sdata->u.ap.next_beacon) + return -ENOMEM; + + if (params->count <= 1) + break; + + color_change.counter_offset_beacon = + params->counter_offset_beacon; + color_change.counter_offset_presp = + params->counter_offset_presp; + color_change.count = params->count; + + err = ieee80211_assign_beacon(sdata, ¶ms->beacon_color_change, + NULL, &color_change); + if (err < 0) { + kfree(sdata->u.ap.next_beacon); + return err; + } + *changed |= err; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void +ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, + u8 color, int enable, u32 changed) +{ + sdata->vif.bss_conf.he_bss_color.color = color; + sdata->vif.bss_conf.he_bss_color.enabled = enable; + changed |= BSS_CHANGED_HE_BSS_COLOR; + + ieee80211_bss_info_change_notify(sdata, changed); +} + +static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + lockdep_assert_held(&local->mtx); + + sdata->vif.color_change_active = false; + + err = ieee80211_set_after_color_change_beacon(sdata, &changed); + if (err) { + cfg80211_color_change_aborted_notify(sdata->dev); + return err; + } + + ieee80211_color_change_bss_config_notify(sdata, + sdata->vif.color_change_color, + 1, changed); + cfg80211_color_change_notify(sdata->dev); + + return 0; +} + +void ieee80211_color_change_finalize_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + color_change_finalize_work); + struct ieee80211_local *local = sdata->local; + + sdata_lock(sdata); + mutex_lock(&local->mtx); + + /* AP might have been stopped while waiting for the lock. */ + if (!sdata->vif.color_change_active) + goto unlock; + + if (!ieee80211_sdata_running(sdata)) + goto unlock; + + ieee80211_color_change_finalize(sdata); + +unlock: + mutex_unlock(&local->mtx); + sdata_unlock(sdata); +} + +void ieee80211_color_change_finish(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + ieee80211_queue_work(&sdata->local->hw, + &sdata->color_change_finalize_work); +} +EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); + +void +ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, + u64 color_bitmap) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (sdata->vif.color_change_active || sdata->vif.csa_active) + return; + + cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap); +} +EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify); + +static int +ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_color_change_settings *params) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + u32 changed = 0; + int err; + + sdata_assert_lock(sdata); + + mutex_lock(&local->mtx); + + /* don't allow another color change if one is already active or if csa + * is active + */ + if (sdata->vif.color_change_active || sdata->vif.csa_active) { + err = -EBUSY; + goto out; + } + + err = ieee80211_set_color_change_beacon(sdata, params, &changed); + if (err) + goto out; + + sdata->vif.color_change_active = true; + sdata->vif.color_change_color = params->color; + + cfg80211_color_change_started_notify(sdata->dev, params->count); + + if (changed) + ieee80211_color_change_bss_config_notify(sdata, 0, 0, changed); + else + /* if the beacon didn't change, we can finalize immediately */ + ieee80211_color_change_finalize(sdata); + +out: + mutex_unlock(&local->mtx); + + return err; +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -4233,4 +4444,5 @@ const struct cfg80211_ops mac80211_config_ops = { .set_tid_config = ieee80211_set_tid_config, .reset_tid_config = ieee80211_reset_tid_config, .set_sar_specs = ieee80211_set_sar_specs, + .color_change = ieee80211_color_change, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index bcb7cc06db3d..cd3731cbf6c6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1447,4 +1447,40 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_add_twt_setup(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_twt_setup *twt) +{ + struct ieee80211_twt_params *twt_agrt; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + twt_agrt = (void *)twt->params; + + trace_drv_add_twt_setup(local, sta, twt, twt_agrt); + local->ops->add_twt_setup(&local->hw, sta, twt); + trace_drv_return_void(local); +} + +static inline void drv_twt_teardown_request(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u8 flowid) +{ + might_sleep(); + if (!check_sdata_in_driver(sdata)) + return; + + if (!local->ops->twt_teardown_request) + return; + + trace_drv_twt_teardown_request(local, sta, flowid); + local->ops->twt_teardown_request(&local->hw, sta, flowid); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a7ac53a2f00d..5d6ca4c3e698 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -489,7 +489,6 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; u16 capability = WLAN_CAPABILITY_IBSS; u64 tsf; - int ret = 0; sdata_assert_lock(sdata); @@ -501,10 +500,8 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY(ifibss->privacy)); - if (WARN_ON(!cbss)) { - ret = -EINVAL; - goto out; - } + if (WARN_ON(!cbss)) + return -EINVAL; rcu_read_lock(); ies = rcu_dereference(cbss->ies); @@ -520,18 +517,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.basic_rates, capability, tsf, &ifibss->chandef, NULL, csa_settings); - if (!presp) { - ret = -ENOMEM; - goto out; - } + if (!presp) + return -ENOMEM; rcu_assign_pointer(ifibss->presp, presp); if (old_presp) kfree_rcu(old_presp, rcu_head); return BSS_CHANGED_BEACON; - out: - return ret; } int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 30ce6d2ec7ce..159af6c3ffb0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -25,6 +25,7 @@ #include <linux/leds.h> #include <linux/idr.h> #include <linux/rhashtable.h> +#include <linux/rbtree.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> @@ -244,6 +245,12 @@ struct ieee80211_csa_settings { u8 count; }; +struct ieee80211_color_change_settings { + u16 counter_offset_beacon; + u16 counter_offset_presp; + u8 count; +}; + struct beacon_data { u8 *head, *tail; int head_len, tail_len; @@ -923,6 +930,8 @@ struct ieee80211_sub_if_data { bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ struct cfg80211_chan_def csa_chandef; + struct work_struct color_change_finalize_work; + struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ @@ -937,6 +946,7 @@ struct ieee80211_sub_if_data { struct work_struct work; struct sk_buff_head skb_queue; + struct sk_buff_head status_queue; u8 needed_rx_chains; enum ieee80211_smps_mode smps_mode; @@ -1524,6 +1534,7 @@ struct ieee802_11_elems { const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; const u8 *uora_element; const u8 *mesh_id; const u8 *peering; @@ -1574,6 +1585,8 @@ struct ieee802_11_elems { u8 perr_len; u8 country_elem_len; u8 bssid_index_len; + u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT]; + u8 tx_pwr_env_num; /* whether a parse error occurred while retrieving these elements */ bool parse_error; @@ -1887,6 +1900,9 @@ void ieee80211_csa_finalize_work(struct work_struct *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); +/* color change handling */ +void ieee80211_color_change_finalize_work(struct work_struct *work); + /* interface handling */ #define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \ NETIF_F_HW_CSUM | NETIF_F_SG | \ @@ -2068,6 +2084,11 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, /* S1G */ void ieee80211_s1g_sta_rate_init(struct sta_info *sta); +bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb); +void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); +void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1e5e9fc45523..62c95597704b 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -462,6 +462,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do sdata_unlock(sdata); cancel_work_sync(&sdata->csa_finalize_work); + cancel_work_sync(&sdata->color_change_finalize_work); cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); @@ -551,6 +552,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do */ ieee80211_free_keys(sdata, true); skb_queue_purge(&sdata->skb_queue); + skb_queue_purge(&sdata->status_queue); } spin_lock_irqsave(&local->queue_stop_reason_lock, flags); @@ -983,6 +985,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) } skb_queue_head_init(&sdata->skb_queue); + skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); return 0; @@ -1381,6 +1384,16 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, WARN_ON(1); break; } + } else if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_S1G) { + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_TEARDOWN: + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_rx_twt_action(sdata, skb); + break; + default: + break; + } } else if (ieee80211_is_ext(mgmt->frame_control)) { if (sdata->vif.type == NL80211_IFTYPE_STATION) ieee80211_sta_rx_queued_ext(sdata, skb); @@ -1436,6 +1449,24 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } +static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_S1G) { + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_TEARDOWN: + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_status_twt_action(sdata, skb); + break; + default: + break; + } + } +} + static void ieee80211_iface_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = @@ -1465,6 +1496,16 @@ static void ieee80211_iface_work(struct work_struct *work) kcov_remote_stop(); } + /* process status queue */ + while ((skb = skb_dequeue(&sdata->status_queue))) { + kcov_remote_start_common(skb_get_kcov_handle(skb)); + + ieee80211_iface_process_status(sdata, skb); + kfree_skb(skb); + + kcov_remote_stop(); + } + /* then other type-dependent work */ switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -1528,9 +1569,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, } skb_queue_head_init(&sdata->skb_queue); + skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); INIT_WORK(&sdata->csa_finalize_work, ieee80211_csa_finalize_work); + INIT_WORK(&sdata->color_change_finalize_work, ieee80211_color_change_finalize_work); INIT_LIST_HEAD(&sdata->assigned_chanctx_list); INIT_LIST_HEAD(&sdata->reserved_chanctx_list); @@ -2001,9 +2044,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, netdev_set_default_ethtool_ops(ndev, &ieee80211_ethtool_ops); - /* MTU range: 256 - 2304 */ + /* MTU range is normally 256 - 2304, where the upper limit is + * the maximum MSDU size. Monitor interfaces send and receive + * MPDU and A-MSDU frames which may be much larger so we do + * not impose an upper limit in that case. + */ ndev->min_mtu = 256; - ndev->max_mtu = local->hw.max_mtu; + if (type == NL80211_IFTYPE_MONITOR) + ndev->max_mtu = 0; + else + ndev->max_mtu = local->hw.max_mtu; ret = cfg80211_register_netdevice(ndev); if (ret) { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 05f4c3c72619..45fb517591ee 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -260,6 +260,8 @@ static void ieee80211_restart_work(struct work_struct *work) flush_work(&local->radar_detected_work); rtnl_lock(); + /* we might do interface manipulations, so need both */ + wiphy_lock(local->hw.wiphy); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); @@ -1018,7 +1020,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) iftd = &sband->iftype_data[i]; - supp_he = supp_he || (iftd && iftd->he_cap.has_he); + supp_he = supp_he || iftd->he_cap.has_he; } /* HT, VHT, HE require QoS, thus >= 4 queues */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2563473b5cf1..99ed68f7dc36 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -359,7 +359,12 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, put_unaligned_le32(it_present_val, it_present); - pos = (void *)(it_present + 1); + /* This references through an offset into it_optional[] rather + * than via it_present otherwise later uses of pos will cause + * the compiler to think we have walked past the end of the + * struct member. + */ + pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional]; /* the order of the following fields is important */ @@ -372,7 +377,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, ieee80211_calculate_rx_timestamp(local, status, mpdulen, 0), pos); - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_TSFT); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT)); pos += 8; } @@ -396,7 +401,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos = 0; } else { int shift = 0; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE)); if (status->bw == RATE_INFO_BW_10) shift = 1; else if (status->bw == RATE_INFO_BW_5) @@ -433,7 +438,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, !(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { *pos = status->signal; rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTSIGNAL); + cpu_to_le32(BIT(IEEE80211_RADIOTAP_DBM_ANTSIGNAL)); pos++; } @@ -459,7 +464,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, if (status->encoding == RX_ENC_HT) { unsigned int stbc; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; if (status->enc_flags & RX_ENC_FLAG_SHORT_GI) @@ -483,7 +488,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, while ((pos - (u8 *)rthdr) & 3) pos++; rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS); + cpu_to_le32(BIT(IEEE80211_RADIOTAP_AMPDU_STATUS)); put_unaligned_le32(status->ampdu_reference, pos); pos += 4; if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN) @@ -510,7 +515,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, if (status->encoding == RX_ENC_VHT) { u16 known = local->hw.radiotap_vht_details; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); put_unaligned_le16(known, pos); pos += 2; /* flags */ @@ -554,7 +559,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP); + cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP)); /* ensure 8 byte alignment */ while ((pos - (u8 *)rthdr) & 7) @@ -642,7 +647,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE)); memcpy(pos, &he, sizeof(he)); pos += sizeof(he); } @@ -652,14 +657,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE_MU); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE_MU)); memcpy(pos, &he_mu, sizeof(he_mu)); pos += sizeof(he_mu); } if (status->flag & RX_FLAG_NO_PSDU) { rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_ZERO_LEN_PSDU); + cpu_to_le32(BIT(IEEE80211_RADIOTAP_ZERO_LEN_PSDU)); *pos++ = status->zero_length_psdu_type; } @@ -667,7 +672,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* ensure 2 byte alignment */ while ((pos - (u8 *)rthdr) & 1) pos++; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_LSIG); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_LSIG)); memcpy(pos, &lsig, sizeof(lsig)); pos += sizeof(lsig); } @@ -3207,6 +3212,68 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +static bool +ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ieee80211_sub_if_data *sdata = rx->sdata; + const struct ieee80211_sta_he_cap *hecap; + struct ieee80211_supported_band *sband; + + /* TWT actions are only supported in AP for the moment */ + if (sdata->vif.type != NL80211_IFTYPE_AP) + return false; + + if (!rx->local->ops->add_twt_setup) + return false; + + sband = rx->local->hw.wiphy->bands[status->band]; + hecap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); + if (!hecap) + return false; + + if (!(hecap->he_cap_elem.mac_cap_info[0] & + IEEE80211_HE_MAC_CAP0_TWT_RES)) + return false; + + if (!rx->sta) + return false; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: { + struct ieee80211_twt_setup *twt; + + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + + 1 + /* action code */ + sizeof(struct ieee80211_twt_setup) + + 2 /* TWT req_type agrt */) + break; + + twt = (void *)mgmt->u.action.u.s1g.variable; + if (twt->element_id != WLAN_EID_S1G_TWT) + break; + + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + + 4 + /* action code + token + tlv */ + twt->length) + break; + + return true; /* queue the frame */ + } + case WLAN_S1G_TWT_TEARDOWN: + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2) + break; + + return true; /* queue the frame */ + default: + break; + } + + return false; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { @@ -3486,6 +3553,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) !mesh_path_sel_is_hwmp(sdata)) break; goto queue; + case WLAN_CATEGORY_S1G: + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + case WLAN_S1G_TWT_TEARDOWN: + if (ieee80211_process_rx_twt_action(rx)) + goto queue; + break; + default: + break; + } + break; } return RX_CONTINUE; diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index c33f332b049a..7e35ab5b6166 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -6,6 +6,7 @@ #include <linux/ieee80211.h> #include <net/mac80211.h> #include "ieee80211_i.h" +#include "driver-ops.h" void ieee80211_s1g_sta_rate_init(struct sta_info *sta) { @@ -14,3 +15,182 @@ void ieee80211_s1g_sta_rate_init(struct sta_info *sta) sta->rx_stats.last_rate = STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_S1G); } + +bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + + if (likely(!ieee80211_is_action(mgmt->frame_control))) + return false; + + if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G)) + return false; + + return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP; +} + +static void +ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, + const u8 *bssid, struct ieee80211_twt_setup *twt) +{ + int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length; + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + len); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = skb_put_zero(skb, len); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_S1G; + mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP; + memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length); + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_INTFL_MLME_CONN_TX | + IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); +} + +static void +ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, + const u8 *da, const u8 *bssid, u8 flowid) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + struct sk_buff *skb; + u8 *id; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + IEEE80211_MIN_ACTION_SIZE + 2); + if (!skb) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + + mgmt->u.action.category = WLAN_CATEGORY_S1G; + mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN; + id = (u8 *)mgmt->u.action.u.s1g.variable; + *id = flowid; + + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | + IEEE80211_TX_CTL_REQ_TX_STATUS; + ieee80211_tx_skb(sdata, skb); +} + +static void +ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (void *)skb->data; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_params *twt_agrt = (void *)twt->params; + + twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST); + + /* broadcast TWT not supported yet */ + if (twt->control & IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST) { + le16p_replace_bits(&twt_agrt->req_type, + TWT_SETUP_CMD_REJECT, + IEEE80211_TWT_REQTYPE_SETUP_CMD); + goto out; + } + + drv_add_twt_setup(sdata->local, sdata, &sta->sta, twt); +out: + ieee80211_s1g_send_twt_setup(sdata, mgmt->sa, sdata->vif.addr, twt); +} + +static void +ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + + drv_twt_teardown_request(sdata->local, sdata, &sta->sta, + mgmt->u.action.u.s1g.variable[0]); +} + +static void +ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_params *twt_agrt = (void *)twt->params; + u8 flowid = le16_get_bits(twt_agrt->req_type, + IEEE80211_TWT_REQTYPE_FLOWID); + + drv_twt_teardown_request(sdata->local, sdata, &sta->sta, flowid); + + ieee80211_s1g_send_twt_teardown(sdata, mgmt->sa, sdata->vif.addr, + flowid); +} + +void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + mutex_lock(&local->sta_mtx); + + sta = sta_info_get_bss(sdata, mgmt->sa); + if (!sta) + goto out; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + ieee80211_s1g_rx_twt_setup(sdata, sta, skb); + break; + case WLAN_S1G_TWT_TEARDOWN: + ieee80211_s1g_rx_twt_teardown(sdata, sta, skb); + break; + default: + break; + } + +out: + mutex_unlock(&local->sta_mtx); +} + +void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + mutex_lock(&local->sta_mtx); + + sta = sta_info_get_bss(sdata, mgmt->da); + if (!sta) + goto out; + + switch (mgmt->u.action.u.s1g.action_code) { + case WLAN_S1G_TWT_SETUP: + /* process failed twt setup frames */ + ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb); + break; + default: + break; + } + +out: + mutex_unlock(&local->sta_mtx); +} diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a5505ee51229..2b5acb37587f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -543,7 +543,7 @@ static int sta_info_insert_check(struct sta_info *sta) return -ENETDOWN; if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) || - is_multicast_ether_addr(sta->sta.addr))) + !is_valid_ether_addr(sta->sta.addr))) return -EINVAL; /* The RCU read lock is required by rhashtable due to diff --git a/net/mac80211/status.c b/net/mac80211/status.c index bae321ff77f6..f6f63a0b1b72 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -305,8 +305,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, memset(rthdr, 0, rtap_len); rthdr->it_len = cpu_to_le16(rtap_len); rthdr->it_present = - cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + cpu_to_le32(BIT(IEEE80211_RADIOTAP_TX_FLAGS) | + BIT(IEEE80211_RADIOTAP_DATA_RETRIES)); pos = (unsigned char *)(rthdr + 1); /* @@ -331,7 +331,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, sband->bitrates[info->status.rates[0].idx].bitrate; if (legacy_rate) { - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE)); *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift)); /* padding for tx flags */ pos += 2; @@ -358,7 +358,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, if (status && status->rate && (status->rate->flags & RATE_INFO_FLAGS_MCS)) { - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | IEEE80211_RADIOTAP_MCS_HAVE_GI | IEEE80211_RADIOTAP_MCS_HAVE_BW; @@ -374,7 +374,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, (IEEE80211_RADIOTAP_VHT_KNOWN_GI | IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH); - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); /* required alignment from rthdr */ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); @@ -419,7 +419,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) { struct ieee80211_radiotap_he *he; - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_HE)); /* required alignment from rthdr */ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); @@ -495,7 +495,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_MCS * IEEE80211_RADIOTAP_VHT */ if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_MCS)); pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | IEEE80211_RADIOTAP_MCS_HAVE_GI | IEEE80211_RADIOTAP_MCS_HAVE_BW; @@ -512,7 +512,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, (IEEE80211_RADIOTAP_VHT_KNOWN_GI | IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH); - rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); + rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_VHT)); /* required alignment from rthdr */ pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); @@ -705,13 +705,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, /* Check to see if packet is a TDLS teardown packet */ if (ieee80211_is_data(hdr->frame_control) && (ieee80211_get_tdls_action(skb, hdr_size) == - WLAN_TDLS_TEARDOWN)) + WLAN_TDLS_TEARDOWN)) { ieee80211_tdls_td_tx_handle(local, sdata, skb, info->flags); - else + } else if (ieee80211_s1g_is_twt_setup(skb)) { + if (!acked) { + struct sk_buff *qskb; + + qskb = skb_clone(skb, GFP_ATOMIC); + if (qskb) { + skb_queue_tail(&sdata->status_queue, + qskb); + ieee80211_queue_work(&local->hw, + &sdata->work); + } + } + } else { ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control, acked); + } } rcu_read_unlock(); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index f6ef15366938..9e8381bef7ed 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2825,6 +2825,73 @@ DEFINE_EVENT(sta_flag_evt, drv_sta_set_decap_offload, TP_ARGS(local, sdata, sta, enabled) ); +TRACE_EVENT(drv_add_twt_setup, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, + struct ieee80211_twt_setup *twt, + struct ieee80211_twt_params *twt_agrt), + + TP_ARGS(local, sta, twt, twt_agrt), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, dialog_token) + __field(u8, control) + __field(__le16, req_type) + __field(__le64, twt) + __field(u8, duration) + __field(__le16, mantissa) + __field(u8, channel) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->dialog_token = twt->dialog_token; + __entry->control = twt->control; + __entry->req_type = twt_agrt->req_type; + __entry->twt = twt_agrt->twt; + __entry->duration = twt_agrt->min_twt_dur; + __entry->mantissa = twt_agrt->mantissa; + __entry->channel = twt_agrt->channel; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT + " token:%d control:0x%02x req_type:0x%04x" + " twt:%llu duration:%d mantissa:%d channel:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->dialog_token, + __entry->control, le16_to_cpu(__entry->req_type), + le64_to_cpu(__entry->twt), __entry->duration, + le16_to_cpu(__entry->mantissa), __entry->channel + ) +); + +TRACE_EVENT(drv_twt_teardown_request, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sta *sta, u8 flowid), + + TP_ARGS(local, sta, flowid), + + TP_STRUCT__entry( + LOCAL_ENTRY + STA_ENTRY + __field(u8, flowid) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + STA_ASSIGN; + __entry->flowid = flowid; + ), + + TP_printk( + LOCAL_PR_FMT STA_PR_FMT " flowid:%d", + LOCAL_PR_ARG, STA_PR_ARG, __entry->flowid + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8509778ff31f..2d1193ed3eb5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3242,7 +3242,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata, if (info->control.flags & IEEE80211_TX_CTRL_AMSDU) return true; - if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr))) + if (!ieee80211_amsdu_realloc_pad(local, skb, + sizeof(*amsdu_hdr) + + local->hw.extra_tx_headroom)) return false; data = skb_push(skb, sizeof(*amsdu_hdr)); @@ -4782,11 +4784,11 @@ static int ieee80211_beacon_add_tim(struct ieee80211_sub_if_data *sdata, static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, struct beacon_data *beacon) { + u8 *beacon_data, count, max_count = 1; struct probe_resp *resp; - u8 *beacon_data; size_t beacon_data_len; + u16 *bcn_offsets; int i; - u8 count = beacon->cntdwn_current_counter; switch (sdata->vif.type) { case NL80211_IFTYPE_AP: @@ -4806,21 +4808,27 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, } rcu_read_lock(); - for (i = 0; i < IEEE80211_MAX_CNTDWN_COUNTERS_NUM; ++i) { - resp = rcu_dereference(sdata->u.ap.probe_resp); + resp = rcu_dereference(sdata->u.ap.probe_resp); + + bcn_offsets = beacon->cntdwn_counter_offsets; + count = beacon->cntdwn_current_counter; + if (sdata->vif.csa_active) + max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM; - if (beacon->cntdwn_counter_offsets[i]) { - if (WARN_ON_ONCE(beacon->cntdwn_counter_offsets[i] >= - beacon_data_len)) { + for (i = 0; i < max_count; ++i) { + if (bcn_offsets[i]) { + if (WARN_ON_ONCE(bcn_offsets[i] >= beacon_data_len)) { rcu_read_unlock(); return; } - - beacon_data[beacon->cntdwn_counter_offsets[i]] = count; + beacon_data[bcn_offsets[i]] = count; } - if (sdata->vif.type == NL80211_IFTYPE_AP && resp) - resp->data[resp->cntdwn_counter_offsets[i]] = count; + if (sdata->vif.type == NL80211_IFTYPE_AP && resp) { + u16 *resp_offsets = resp->cntdwn_counter_offsets; + + resp->data[resp_offsets[i]] = count; + } } rcu_read_unlock(); } @@ -5030,6 +5038,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, if (offs) { offs->tim_offset = beacon->head_len; offs->tim_length = skb->len - beacon->head_len; + offs->cntdwn_counter_offs[0] = beacon->cntdwn_counter_offsets[0]; /* for AP the csa offsets are from tail */ csa_off_base = skb->len; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 05e96212b104..49cb96d25169 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1336,6 +1336,18 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->rsnx = pos; elems->rsnx_len = elen; break; + case WLAN_EID_TX_POWER_ENVELOPE: + if (elen < 1 || + elen > sizeof(struct ieee80211_tx_pwr_env)) + break; + + if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) + break; + + elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; + elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; + elems->tx_pwr_env_num++; + break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? &crc : NULL, diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 7d738bd06f2c..8b235468c88f 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -21,43 +21,50 @@ struct mptcp_pernet { struct ctl_table_header *ctl_table_hdr; #endif - u8 mptcp_enabled; unsigned int add_addr_timeout; + unsigned int stale_loss_cnt; + u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; }; -static struct mptcp_pernet *mptcp_get_pernet(struct net *net) +static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) { return net_generic(net, mptcp_pernet_id); } -int mptcp_is_enabled(struct net *net) +int mptcp_is_enabled(const struct net *net) { return mptcp_get_pernet(net)->mptcp_enabled; } -unsigned int mptcp_get_add_addr_timeout(struct net *net) +unsigned int mptcp_get_add_addr_timeout(const struct net *net) { return mptcp_get_pernet(net)->add_addr_timeout; } -int mptcp_is_checksum_enabled(struct net *net) +int mptcp_is_checksum_enabled(const struct net *net) { return mptcp_get_pernet(net)->checksum_enabled; } -int mptcp_allow_join_id0(struct net *net) +int mptcp_allow_join_id0(const struct net *net) { return mptcp_get_pernet(net)->allow_join_initial_addr_port; } +unsigned int mptcp_stale_loss_cnt(const struct net *net) +{ + return mptcp_get_pernet(net)->stale_loss_cnt; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; + pernet->stale_loss_cnt = 4; } #ifdef CONFIG_SYSCTL @@ -95,6 +102,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "stale_loss_cnt", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + }, {} }; @@ -114,6 +127,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; + table[4].data = &pernet->stale_loss_cnt; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index ff2cc0e3273d..b21ff9be04c6 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -44,7 +44,11 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), + SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX), + SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), + SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), + SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 0663cb12b448..ecd3d8b117e0 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -37,7 +37,11 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */ MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */ MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */ + MPTCP_MIB_MPFAILTX, /* Transmit a MP_FAIL */ + MPTCP_MIB_MPFAILRX, /* Received a MP_FAIL */ MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */ + MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */ + MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4452455aef7f..c41273cefc51 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -81,12 +81,11 @@ static void mptcp_parse_option(const struct sk_buff *skb, * is if both hosts in their SYNs set A=0." */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - mp_opt->csum_reqd = 1; + mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; - if (flags & MPTCP_CAP_DENY_JOIN_ID0) - mp_opt->deny_join_id0 = 1; + mp_opt->deny_join_id0 = !!(flags & MPTCP_CAP_DENY_JOIN_ID0); - mp_opt->mp_capable = 1; + mp_opt->suboptions |= OPTIONS_MPTCP_MPC; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { mp_opt->sndr_key = get_unaligned_be64(ptr); ptr += 8; @@ -101,7 +100,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, * equivalent to those in a DSS option and can be used * interchangeably." */ - mp_opt->dss = 1; + mp_opt->suboptions |= OPTION_MPTCP_DSS; mp_opt->use_map = 1; mp_opt->mpc_map = 1; mp_opt->data_len = get_unaligned_be16(ptr); @@ -109,7 +108,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, } if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); - mp_opt->csum_reqd = 1; + mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", @@ -118,7 +117,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, break; case MPTCPOPT_MP_JOIN: - mp_opt->mp_join = 1; + mp_opt->suboptions |= OPTIONS_MPTCP_MPJ; if (opsize == TCPOLEN_MPTCP_MPJ_SYN) { mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; @@ -144,7 +143,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); pr_debug("MP_JOIN hmac"); } else { - mp_opt->mp_join = 0; + mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ; } break; @@ -192,8 +191,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) break; - mp_opt->dss = 1; - + mp_opt->suboptions |= OPTION_MPTCP_DSS; if (mp_opt->use_ack) { if (mp_opt->ack64) { mp_opt->data_ack = get_unaligned_be64(ptr); @@ -222,14 +220,15 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { - mp_opt->csum_reqd = 1; + mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); ptr += 2; } pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); + mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), + mp_opt->csum); } break; @@ -260,8 +259,10 @@ static void mptcp_parse_option(const struct sk_buff *skb, break; } - mp_opt->add_addr = 1; + mp_opt->suboptions |= OPTION_MPTCP_ADD_ADDR; mp_opt->addr.id = *ptr++; + mp_opt->addr.port = 0; + mp_opt->ahmac = 0; if (mp_opt->addr.family == AF_INET) { memcpy((u8 *)&mp_opt->addr.addr.s_addr, (u8 *)ptr, 4); ptr += 4; @@ -298,7 +299,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr++; - mp_opt->rm_addr = 1; + mp_opt->suboptions |= OPTION_MPTCP_RM_ADDR; mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; @@ -309,7 +310,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (opsize != TCPOLEN_MPTCP_PRIO) break; - mp_opt->mp_prio = 1; + mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; pr_debug("MP_PRIO: prio=%d", mp_opt->backup); break; @@ -321,7 +322,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; - mp_opt->fastclose = 1; + mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; break; case MPTCPOPT_RST: @@ -330,12 +331,23 @@ static void mptcp_parse_option(const struct sk_buff *skb, if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) break; - mp_opt->reset = 1; + + mp_opt->suboptions |= OPTION_MPTCP_RST; flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; break; + case MPTCPOPT_MP_FAIL: + if (opsize != TCPOLEN_MPTCP_FAIL) + break; + + ptr += 2; + mp_opt->suboptions |= OPTION_MPTCP_FAIL; + mp_opt->fail_seq = get_unaligned_be64(ptr); + pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); + break; + default: break; } @@ -345,25 +357,12 @@ void mptcp_get_options(const struct sock *sk, const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; /* initialize option status */ - mp_opt->mp_capable = 0; - mp_opt->mp_join = 0; - mp_opt->add_addr = 0; - mp_opt->ahmac = 0; - mp_opt->fastclose = 0; - mp_opt->addr.port = 0; - mp_opt->rm_addr = 0; - mp_opt->dss = 0; - mp_opt->mp_prio = 0; - mp_opt->reset = 0; - mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled); - mp_opt->deny_join_id0 = 0; + mp_opt->suboptions = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -592,6 +591,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); + opts->suboptions = OPTION_MPTCP_DSS; ret = true; } @@ -615,6 +615,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; + opts->suboptions = OPTION_MPTCP_DSS; WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ @@ -667,29 +668,34 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * bool port; int len; - if ((mptcp_pm_should_add_signal_ipv6(msk) || - mptcp_pm_should_add_signal_port(msk) || - mptcp_pm_should_add_signal_echo(msk)) && - skb && skb_is_tcp_pure_ack(skb)) { - pr_debug("drop other suboptions"); - opts->suboptions = 0; - opts->ext_copy.use_ack = 0; - opts->ext_copy.use_map = 0; - remaining += opt_size; - drop_other_suboptions = true; - } - + /* add addr will strip the existing options, be sure to avoid breaking + * MPC/MPJ handshakes + */ if (!mptcp_pm_should_add_signal(msk) || - !(mptcp_pm_add_addr_signal(msk, remaining, &opts->addr, &echo, &port))) + (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || + !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, + &echo, &port, &drop_other_suboptions)) return false; + if (drop_other_suboptions) + remaining += opt_size; len = mptcp_add_addr_len(opts->addr.family, echo, port); if (remaining < len) return false; *size = len; - if (drop_other_suboptions) + if (drop_other_suboptions) { + pr_debug("drop other suboptions"); + opts->suboptions = 0; + + /* note that e.g. DSS could have written into the memory + * aliased by ahmac, we must reset the field here + * to avoid appending the hmac even for ADD_ADDR echo + * options + */ + opts->ahmac = 0; *size -= opt_size; + } opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { opts->ahmac = add_addr_generate_hmac(msk->local_key, @@ -739,7 +745,12 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - if (!subflow->send_mp_prio) + /* can't send MP_PRIO with MPC, as they share the same option space: + * 'backup'. Also it makes no sense at all + */ + if (!subflow->send_mp_prio || + ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | + OPTION_MPTCP_MPC_ACK) & opts->suboptions)) return false; /* account for the trailing 'nop' option */ @@ -755,7 +766,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, return true; } -static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, +static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -763,12 +774,36 @@ static noinline void mptcp_established_options_rst(struct sock *sk, struct sk_bu const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); if (remaining < TCPOLEN_MPTCP_RST) - return; + return false; *size = TCPOLEN_MPTCP_RST; opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; + + return true; +} + +static bool mptcp_established_options_mp_fail(struct sock *sk, + unsigned int *size, + unsigned int remaining, + struct mptcp_out_options *opts) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + + if (likely(!subflow->send_mp_fail)) + return false; + + if (remaining < TCPOLEN_MPTCP_FAIL) + return false; + + *size = TCPOLEN_MPTCP_FAIL; + opts->suboptions |= OPTION_MPTCP_FAIL; + opts->fail_seq = subflow->map_seq; + + pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + + return true; } bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, @@ -787,15 +822,28 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, return false; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { - mptcp_established_options_rst(sk, skb, size, remaining, opts); + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + } + if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + } return true; } snd_data_fin = mptcp_data_fin_enabled(msk); if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; - else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) + else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) { ret = true; + if (mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { + *size += opt_size; + remaining -= opt_size; + return true; + } + } /* we reserved enough space for the above options, and exceeding the * TCP option space would be fatal @@ -868,7 +916,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, */ if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && - subflow->mp_join && mp_opt->mp_join && + subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && READ_ONCE(msk->pm.server_side)) tcp_send_ack(ssk); goto fully_established; @@ -885,25 +933,21 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return subflow->mp_capable; } - if (mp_opt->dss && mp_opt->use_ack) { + if (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo)) { /* subflows are fully established as soon as we get any - * additional ack. + * additional ack, including ADD_ADDR. */ subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); goto fully_established; } - if (mp_opt->add_addr) { - WRITE_ONCE(msk->fully_established, true); - return true; - } - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. */ - if (!mp_opt->mp_capable) { + if (!(mp_opt->suboptions & OPTIONS_MPTCP_MPC)) { if (subflow->mp_join) goto reset; subflow->mp_capable = 0; @@ -975,9 +1019,11 @@ static void ack_update_msk(struct mptcp_sock *msk, old_snd_una = msk->snd_una; new_snd_una = mptcp_expand_seq(old_snd_una, mp_opt->data_ack, mp_opt->ack64); - /* ACK for data not even sent yet? Ignore. */ - if (after64(new_snd_una, snd_nxt)) - new_snd_una = old_snd_una; + /* ACK for data not even sent yet and even above recovery bound? Ignore.*/ + if (unlikely(after64(new_snd_una, snd_nxt))) { + if (!msk->recovery || after64(new_snd_una, msk->recovery_snd_nxt)) + new_snd_una = old_snd_una; + } new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; @@ -1065,48 +1111,51 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return sk->sk_state != TCP_CLOSE; - if (mp_opt.fastclose && - msk->local_key == mp_opt.rcvr_key) { - WRITE_ONCE(msk->rcv_fastclose, true); - mptcp_schedule_work((struct sock *)msk); - } + if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { + if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && + msk->local_key == mp_opt.rcvr_key) { + WRITE_ONCE(msk->rcv_fastclose, true); + mptcp_schedule_work((struct sock *)msk); + } - if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { - if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &mp_opt.addr); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); - } else { - mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); - mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); + if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && + add_addr_hmac_valid(msk, &mp_opt)) { + if (!mp_opt.echo) { + mptcp_pm_add_addr_received(msk, &mp_opt.addr); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); + } else { + mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); + mptcp_pm_del_add_timer(msk, &mp_opt.addr, true); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD); + } + + if (mp_opt.addr.port) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); } - if (mp_opt.addr.port) - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_PORTADD); + if (mp_opt.suboptions & OPTION_MPTCP_RM_ADDR) + mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); - mp_opt.add_addr = 0; - } + if (mp_opt.suboptions & OPTION_MPTCP_PRIO) { + mptcp_pm_mp_prio_received(sk, mp_opt.backup); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); + } - if (mp_opt.rm_addr) { - mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); - mp_opt.rm_addr = 0; - } + if (mp_opt.suboptions & OPTION_MPTCP_FAIL) { + mptcp_pm_mp_fail_received(sk, mp_opt.fail_seq); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILRX); + } - if (mp_opt.mp_prio) { - mptcp_pm_mp_prio_received(sk, mp_opt.backup); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIORX); - mp_opt.mp_prio = 0; - } + if (mp_opt.suboptions & OPTION_MPTCP_RST) { + subflow->reset_seen = 1; + subflow->reset_reason = mp_opt.reset_reason; + subflow->reset_transient = mp_opt.reset_transient; + } - if (mp_opt.reset) { - subflow->reset_seen = 1; - subflow->reset_reason = mp_opt.reset_reason; - subflow->reset_transient = mp_opt.reset_transient; + if (!(mp_opt.suboptions & OPTION_MPTCP_DSS)) + return true; } - if (!mp_opt.dss) - return true; - /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ @@ -1133,7 +1182,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) memset(mpext, 0, sizeof(*mpext)); - if (mp_opt.use_map) { + if (likely(mp_opt.use_map)) { if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data @@ -1153,7 +1202,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; - mpext->csum_reqd = mp_opt.csum_reqd; + mpext->csum_reqd = !!(mp_opt.suboptions & OPTION_MPTCP_CSUMREQD); if (mpext->csum_reqd) mpext->csum = mp_opt.csum; @@ -1200,8 +1249,88 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext) void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { - if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | - OPTION_MPTCP_MPC_ACK) & opts->suboptions) { + if (unlikely(OPTION_MPTCP_FAIL & opts->suboptions)) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_fail = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_FAIL, + TCPOLEN_MPTCP_FAIL, + 0, 0); + put_unaligned_be64(opts->fail_seq, ptr); + ptr += 2; + } + + /* RST is mutually exclusive with everything else */ + if (unlikely(OPTION_MPTCP_RST & opts->suboptions)) { + *ptr++ = mptcp_option(MPTCPOPT_RST, + TCPOLEN_MPTCP_RST, + opts->reset_transient, + opts->reset_reason); + return; + } + + /* DSS, MPC, MPJ and ADD_ADDR are mutually exclusive, see + * mptcp_established_options*() + */ + if (likely(OPTION_MPTCP_DSS & opts->suboptions)) { + struct mptcp_ext *mpext = &opts->ext_copy; + u8 len = TCPOLEN_MPTCP_DSS_BASE; + u8 flags = 0; + + if (mpext->use_ack) { + flags = MPTCP_DSS_HAS_ACK; + if (mpext->ack64) { + len += TCPOLEN_MPTCP_DSS_ACK64; + flags |= MPTCP_DSS_ACK64; + } else { + len += TCPOLEN_MPTCP_DSS_ACK32; + } + } + + if (mpext->use_map) { + len += TCPOLEN_MPTCP_DSS_MAP64; + + /* Use only 64-bit mapping flags for now, add + * support for optional 32-bit mappings later. + */ + flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; + if (mpext->data_fin) + flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + + *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); + + if (mpext->use_ack) { + if (mpext->ack64) { + put_unaligned_be64(mpext->data_ack, ptr); + ptr += 2; + } else { + put_unaligned_be32(mpext->data_ack32, ptr); + ptr += 1; + } + } + + if (mpext->use_map) { + put_unaligned_be64(mpext->data_seq, ptr); + ptr += 2; + put_unaligned_be32(mpext->subflow_seq, ptr); + ptr += 1; + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } + } + } else if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | + OPTION_MPTCP_MPC_ACK) & opts->suboptions) { u8 len, flag = MPTCP_CAP_HMAC_SHA256; if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { @@ -1248,10 +1377,31 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); } ptr += 1; - } -mp_capable_done: - if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { + /* MPC is additionally mutually exclusive with MP_PRIO */ + goto mp_capable_done; + } else if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_SYN, + opts->backup, opts->join_id); + put_unaligned_be32(opts->token, ptr); + ptr += 1; + put_unaligned_be32(opts->nonce, ptr); + ptr += 1; + } else if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_SYNACK, + opts->backup, opts->join_id); + put_unaligned_be64(opts->thmac, ptr); + ptr += 2; + put_unaligned_be32(opts->nonce, ptr); + ptr += 1; + } else if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) { + *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, + TCPOLEN_MPTCP_MPJ_ACK, 0, 0); + memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); + ptr += 5; + } else if (OPTION_MPTCP_ADD_ADDR & opts->suboptions) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; u8 echo = MPTCP_ADDR_ECHO; @@ -1309,6 +1459,19 @@ mp_capable_done: } } + if (OPTION_MPTCP_PRIO & opts->suboptions) { + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + + subflow = mptcp_subflow_ctx(ssk); + subflow->send_mp_prio = 0; + + *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, + TCPOLEN_MPTCP_PRIO, + opts->backup, TCPOPT_NOP); + } + +mp_capable_done: if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { u8 i = 1; @@ -1329,107 +1492,6 @@ mp_capable_done: } } - if (OPTION_MPTCP_PRIO & opts->suboptions) { - const struct sock *ssk = (const struct sock *)tp; - struct mptcp_subflow_context *subflow; - - subflow = mptcp_subflow_ctx(ssk); - subflow->send_mp_prio = 0; - - *ptr++ = mptcp_option(MPTCPOPT_MP_PRIO, - TCPOLEN_MPTCP_PRIO, - opts->backup, TCPOPT_NOP); - } - - if (OPTION_MPTCP_MPJ_SYN & opts->suboptions) { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_SYN, - opts->backup, opts->join_id); - put_unaligned_be32(opts->token, ptr); - ptr += 1; - put_unaligned_be32(opts->nonce, ptr); - ptr += 1; - } - - if (OPTION_MPTCP_MPJ_SYNACK & opts->suboptions) { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_SYNACK, - opts->backup, opts->join_id); - put_unaligned_be64(opts->thmac, ptr); - ptr += 2; - put_unaligned_be32(opts->nonce, ptr); - ptr += 1; - } - - if (OPTION_MPTCP_MPJ_ACK & opts->suboptions) { - *ptr++ = mptcp_option(MPTCPOPT_MP_JOIN, - TCPOLEN_MPTCP_MPJ_ACK, 0, 0); - memcpy(ptr, opts->hmac, MPTCPOPT_HMAC_LEN); - ptr += 5; - } - - if (OPTION_MPTCP_RST & opts->suboptions) - *ptr++ = mptcp_option(MPTCPOPT_RST, - TCPOLEN_MPTCP_RST, - opts->reset_transient, - opts->reset_reason); - - if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { - struct mptcp_ext *mpext = &opts->ext_copy; - u8 len = TCPOLEN_MPTCP_DSS_BASE; - u8 flags = 0; - - if (mpext->use_ack) { - flags = MPTCP_DSS_HAS_ACK; - if (mpext->ack64) { - len += TCPOLEN_MPTCP_DSS_ACK64; - flags |= MPTCP_DSS_ACK64; - } else { - len += TCPOLEN_MPTCP_DSS_ACK32; - } - } - - if (mpext->use_map) { - len += TCPOLEN_MPTCP_DSS_MAP64; - - /* Use only 64-bit mapping flags for now, add - * support for optional 32-bit mappings later. - */ - flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; - if (mpext->data_fin) - flags |= MPTCP_DSS_DATA_FIN; - - if (opts->csum_reqd) - len += TCPOLEN_MPTCP_DSS_CHECKSUM; - } - - *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); - - if (mpext->use_ack) { - if (mpext->ack64) { - put_unaligned_be64(mpext->data_ack, ptr); - ptr += 2; - } else { - put_unaligned_be32(mpext->data_ack32, ptr); - ptr += 1; - } - } - - if (mpext->use_map) { - put_unaligned_be64(mpext->data_seq, ptr); - ptr += 2; - put_unaligned_be32(mpext->subflow_seq, ptr); - ptr += 1; - if (opts->csum_reqd) { - put_unaligned_be32(mpext->data_len << 16 | - mptcp_make_csum(mpext), ptr); - } else { - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); - } - } - } - if (tp) mptcp_set_rwin(tp); } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 639271e09604..6ab386ff3294 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -10,6 +10,8 @@ #include <net/mptcp.h> #include "protocol.h" +#include "mib.h" + /* path manager command handlers */ int mptcp_pm_announce_addr(struct mptcp_sock *msk, @@ -18,23 +20,23 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, { u8 add_addr = READ_ONCE(msk->pm.addr_signal); - pr_debug("msk=%p, local_id=%d", msk, addr->id); + pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); lockdep_assert_held(&msk->pm.lock); - if (add_addr) { - pr_warn("addr_signal error, add_addr=%d", add_addr); + if (add_addr & + (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { + pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); return -EINVAL; } - msk->pm.local = *addr; - add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); - if (echo) + if (echo) { + msk->pm.remote = *addr; add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); - if (addr->family == AF_INET6) - add_addr |= BIT(MPTCP_ADD_ADDR_IPV6); - if (addr->port) - add_addr |= BIT(MPTCP_ADD_ADDR_PORT); + } else { + msk->pm.local = *addr; + add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); + } WRITE_ONCE(msk->pm.addr_signal, add_addr); return 0; } @@ -247,12 +249,21 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); } +void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) +{ + pr_debug("fail_seq=%llu", fail_seq); +} + /* path manager helpers */ -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo, bool *port) +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, + unsigned int opt_size, unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo, + bool *port, bool *drop_other_suboptions) { int ret = false; + u8 add_addr; + u8 family; spin_lock_bh(&msk->pm.lock); @@ -260,14 +271,30 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_add_signal(msk)) goto out_unlock; + /* always drop every other options for pure ack ADD_ADDR; this is a + * plain dup-ack from TCP perspective. The other MPTCP-relevant info, + * if any, will be carried by the 'original' TCP ack + */ + if (skb && skb_is_tcp_pure_ack(skb)) { + remaining += opt_size; + *drop_other_suboptions = true; + } + *echo = mptcp_pm_should_add_signal_echo(msk); - *port = mptcp_pm_should_add_signal_port(msk); + *port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); - if (remaining < mptcp_add_addr_len(msk->pm.local.family, *echo, *port)) + family = *echo ? msk->pm.remote.family : msk->pm.local.family; + if (remaining < mptcp_add_addr_len(family, *echo, *port)) goto out_unlock; - *saddr = msk->pm.local; - WRITE_ONCE(msk->pm.addr_signal, 0); + if (*echo) { + *addr = msk->pm.remote; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); + } else { + *addr = msk->pm.local; + add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); + } + WRITE_ONCE(msk->pm.addr_signal, add_addr); ret = true; out_unlock: @@ -279,6 +306,7 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list) { int ret = false, len; + u8 rm_addr; spin_lock_bh(&msk->pm.lock); @@ -286,16 +314,17 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, if (!mptcp_pm_should_rm_signal(msk)) goto out_unlock; + rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); if (len < 0) { - WRITE_ONCE(msk->pm.addr_signal, 0); + WRITE_ONCE(msk->pm.addr_signal, rm_addr); goto out_unlock; } if (remaining < len) goto out_unlock; *rm_list = msk->pm.rm_list_tx; - WRITE_ONCE(msk->pm.addr_signal, 0); + WRITE_ONCE(msk->pm.addr_signal, rm_addr); ret = true; out_unlock: @@ -308,6 +337,25 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, skc); } +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); + + /* keep track of rtx periods with no progress */ + if (!subflow->stale_count) { + subflow->stale_rcv_tstamp = rcv_tstamp; + subflow->stale_count++; + } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { + if (subflow->stale_count < U8_MAX) + subflow->stale_count++; + mptcp_pm_nl_subflow_chk_stale(msk, ssk); + } else { + subflow->stale_count = 0; + mptcp_subflow_set_active(subflow); + } +} + void mptcp_pm_data_init(struct mptcp_sock *msk) { msk->pm.add_addr_signaled = 0; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56263c2c4014..1e4289c507ff 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -46,6 +46,7 @@ struct pm_nl_pernet { spinlock_t lock; struct list_head local_addr_list; unsigned int addrs; + unsigned int stale_loss_cnt; unsigned int add_addr_signal_max; unsigned int add_addr_accept_max; unsigned int local_addr_max; @@ -316,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer) if (!entry->addr.id) return; - if (mptcp_pm_should_add_signal(msk)) { + if (mptcp_pm_should_add_signal_addr(msk)) { sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); goto out; } spin_lock_bh(&msk->pm.lock); - if (!mptcp_pm_should_add_signal(msk)) { + if (!mptcp_pm_should_add_signal_addr(msk)) { pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); mptcp_pm_announce_addr(msk, &entry->addr, false); mptcp_pm_add_addr_send_ack(msk); @@ -409,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } +static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr, + struct mptcp_addr_info *addr) +{ + int i; + + for (i = 0; i < nr; i++) { + if (addresses_equal(&addrs[i], addr, addr->port)) + return true; + } + + return false; +} + +/* Fill all the remote addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk, *ssk; + struct mptcp_subflow_context *subflow; + struct mptcp_addr_info remote = { 0 }; + unsigned int subflows_max; + int i = 0; + + subflows_max = mptcp_pm_get_subflows_max(msk); + + /* Non-fullmesh endpoint, fill in the single entry + * corresponding to the primary MPC subflow remote address + */ + if (!fullmesh) { + remote_address((struct sock_common *)sk, &remote); + msk->pm.subflows++; + addrs[i++] = remote; + } else { + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + remote_address((struct sock_common *)ssk, &remote); + if (!lookup_address_in_vec(addrs, i, &remote) && + msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = remote; + } + } + } + + return i; +} + static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -454,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) !READ_ONCE(msk->pm.remote_deny_join_id0)) { local = select_local_address(pernet, msk); if (local) { - struct mptcp_addr_info remote = { 0 }; + bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; + int i, nr; msk->pm.local_addr_used++; - msk->pm.subflows++; check_work_pending(msk); - remote_address((struct sock_common *)sk, &remote); + nr = fill_remote_addresses_vec(msk, fullmesh, addrs); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local->addr, &remote, - local->flags, local->ifindex); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); return; } @@ -483,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) mptcp_pm_create_subflow_or_signal_addr(msk); } +/* Fill all the local addresses into the array addrs[], + * and return the array size. + */ +static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, + struct mptcp_addr_info *addrs) +{ + struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info local; + struct pm_nl_pernet *pernet; + unsigned int subflows_max; + int i = 0; + + pernet = net_generic(sock_net(sk), pm_nl_pernet_id); + subflows_max = mptcp_pm_get_subflows_max(msk); + + rcu_read_lock(); + __mptcp_flush_join_list(msk); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) + continue; + + if (entry->addr.family != sk->sk_family) { +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if ((entry->addr.family == AF_INET && + !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || + (sk->sk_family == AF_INET && + !ipv6_addr_v4mapped(&entry->addr.addr6))) +#endif + continue; + } + + if (msk->pm.subflows < subflows_max) { + msk->pm.subflows++; + addrs[i++] = entry->addr; + } + } + rcu_read_unlock(); + + /* If the array is empty, fill in the single + * 'IPADDRANY' local address + */ + if (!i) { + memset(&local, 0, sizeof(local)); + local.family = msk->pm.remote.family; + + msk->pm.subflows++; + addrs[i++] = local; + } + + return i; +} + static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; struct mptcp_addr_info remote; - struct mptcp_addr_info local; unsigned int subflows_max; + int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk); @@ -501,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote)) goto add_addr_echo; - msk->pm.add_addr_accepted++; - msk->pm.subflows++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - /* connect to the specified remote address, using whatever * local address the routing configuration will pick. */ remote = msk->pm.remote; if (!remote.port) remote.port = sk->sk_dport; - memset(&local, 0, sizeof(local)); - local.family = remote.family; + nr = fill_local_addresses_vec(msk, addrs); + + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); spin_unlock_bh(&msk->pm.lock); - __mptcp_subflow_connect(sk, &local, &remote, 0, 0); + for (i = 0; i < nr; i++) + __mptcp_subflow_connect(sk, &addrs[i], &remote); spin_lock_bh(&msk->pm.lock); add_addr_echo: @@ -543,10 +647,8 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) bool slow; spin_unlock_bh(&msk->pm.lock); - pr_debug("send ack for %s%s%s", - mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr", - mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", - mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); + pr_debug("send ack for %s", + mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); slow = lock_sock_fast(ssk); tcp_send_ack(ssk); @@ -899,6 +1001,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, }; +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = (struct sock *)msk; + unsigned int active_max_loss_cnt; + struct net *net = sock_net(sk); + unsigned int stale_loss_cnt; + bool slow; + + stale_loss_cnt = mptcp_stale_loss_cnt(net); + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) + return; + + /* look for another available subflow not in loss state */ + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); + mptcp_for_each_subflow(msk, iter) { + if (iter != subflow && mptcp_subflow_active(iter) && + iter->stale_count < active_max_loss_cnt) { + /* we have some alternatives, try to mark this subflow as idle ...*/ + slow = lock_sock_fast(ssk); + if (!tcp_rtx_and_write_queues_empty(ssk)) { + subflow->stale = 1; + __mptcp_retransmit_pending_data(sk); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE); + } + unlock_sock_fast(ssk, slow); + + /* always try to push the pending data regarless of re-injections: + * we can possibly use backup subflows now, and subflow selection + * is cheap under the msk socket lock + */ + __mptcp_push_pending(sk, 0); + return; + } + } +} + static int mptcp_pm_family_to_addr(int family) { #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1067,6 +1206,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex) +{ + struct mptcp_pm_addr_entry *entry; + + *flags = 0; + *ifindex = 0; + + if (id) { + rcu_read_lock(); + entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id); + if (entry) { + *flags = entry->flags; + *ifindex = entry->ifindex; + } + rcu_read_unlock(); + } + + return 0; +} + static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr) { @@ -1135,36 +1295,12 @@ next: return 0; } -struct addr_entry_release_work { - struct rcu_work rwork; - struct mptcp_pm_addr_entry *entry; -}; - -static void mptcp_pm_release_addr_entry(struct work_struct *work) -{ - struct addr_entry_release_work *w; - struct mptcp_pm_addr_entry *entry; - - w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork); - entry = w->entry; - if (entry) { - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); - } - kfree(w); -} - -static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) +/* caller must ensure the RCU grace period is already elapsed */ +static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) { - struct addr_entry_release_work *w; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry); - w->entry = entry; - queue_rcu_work(system_wq, &w->rwork); - } + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); } static int mptcp_nl_remove_id_zero_address(struct net *net, @@ -1244,7 +1380,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); - mptcp_pm_free_addr_entry(entry); + synchronize_rcu(); + __mptcp_pm_release_addr_entry(entry); return ret; } @@ -1297,6 +1434,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net, } } +/* caller must ensure the RCU grace period is already elapsed */ static void __flush_addrs(struct list_head *list) { while (!list_empty(list)) { @@ -1305,7 +1443,7 @@ static void __flush_addrs(struct list_head *list) cur = list_entry(list->next, struct mptcp_pm_addr_entry, list); list_del_rcu(&cur->list); - mptcp_pm_free_addr_entry(cur); + __mptcp_pm_release_addr_entry(cur); } } @@ -1329,6 +1467,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + synchronize_rcu(); __flush_addrs(&free_list); return 0; } @@ -1922,6 +2061,7 @@ static int __net_init pm_nl_init_net(struct net *net) INIT_LIST_HEAD_RCU(&pernet->local_addr_list); pernet->next_id = 1; + pernet->stale_loss_cnt = 4; spin_lock_init(&pernet->lock); /* No need to initialize other pernet fields, the struct is zeroed at @@ -1939,7 +2079,8 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); /* net is removed from namespace list, can't race with - * other modifiers + * other modifiers, also netns core already waited for a + * RCU grace period. */ __flush_addrs(&pernet->local_addr_list); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a88924947815..ade648c3512b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -411,16 +411,29 @@ static void mptcp_set_datafin_timeout(const struct sock *sk) TCP_RTO_MIN << icsk->icsk_retransmits); } -static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk) +static void __mptcp_set_timeout(struct sock *sk, long tout) { - long tout = ssk && inet_csk(ssk)->icsk_pending ? - inet_csk(ssk)->icsk_timeout - jiffies : 0; - - if (tout <= 0) - tout = mptcp_sk(sk)->timer_ival; mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; } +static long mptcp_timeout_from_subflow(const struct mptcp_subflow_context *subflow) +{ + const struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + return inet_csk(ssk)->icsk_pending && !subflow->stale_count ? + inet_csk(ssk)->icsk_timeout - jiffies : 0; +} + +static void mptcp_set_timeout(struct sock *sk) +{ + struct mptcp_subflow_context *subflow; + long tout = 0; + + mptcp_for_each_subflow(mptcp_sk(sk), subflow) + tout = max(tout, mptcp_timeout_from_subflow(subflow)); + __mptcp_set_timeout(sk, tout); +} + static bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & @@ -531,7 +544,6 @@ static bool mptcp_check_data_fin(struct sock *sk) } ret = true; - mptcp_set_timeout(sk, NULL); mptcp_send_ack(msk); mptcp_close_wake_up(sk); } @@ -791,10 +803,7 @@ static void mptcp_reset_timer(struct sock *sk) if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE)) return; - /* should never be called with mptcp level timer cleared */ - tout = READ_ONCE(mptcp_sk(sk)->timer_ival); - if (WARN_ON_ONCE(!tout)) - tout = TCP_RTO_MIN; + tout = mptcp_sk(sk)->timer_ival; sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); } @@ -1046,8 +1055,14 @@ static void __mptcp_clean_una(struct sock *sk) if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) break; - if (WARN_ON_ONCE(dfrag == msk->first_pending)) - break; + if (unlikely(dfrag == msk->first_pending)) { + /* in recovery mode can see ack after the current snd head */ + if (WARN_ON_ONCE(!msk->recovery)) + break; + + WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + } + dfrag_clear(sk, dfrag); cleaned = true; } @@ -1056,8 +1071,14 @@ static void __mptcp_clean_una(struct sock *sk) if (dfrag && after64(snd_una, dfrag->data_seq)) { u64 delta = snd_una - dfrag->data_seq; - if (WARN_ON_ONCE(delta > dfrag->already_sent)) - goto out; + /* prevent wrap around in recovery mode */ + if (unlikely(delta > dfrag->already_sent)) { + if (WARN_ON_ONCE(!msk->recovery)) + goto out; + if (WARN_ON_ONCE(delta > dfrag->data_len)) + goto out; + dfrag->already_sent += delta - dfrag->already_sent; + } dfrag->data_seq += delta; dfrag->offset += delta; @@ -1068,6 +1089,10 @@ static void __mptcp_clean_una(struct sock *sk) cleaned = true; } + /* all retransmitted data acked, recovery completed */ + if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt)) + msk->recovery = false; + out: if (cleaned) { if (tcp_under_memory_pressure(sk)) { @@ -1076,8 +1101,8 @@ out: } } - if (snd_una == READ_ONCE(msk->snd_nxt)) { - if (msk->timer_ival && !mptcp_data_fin_enabled(msk)) + if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { + if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) mptcp_stop_timer(sk); } else { mptcp_reset_timer(sk); @@ -1366,16 +1391,44 @@ struct subflow_send_info { u64 ratio; }; +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) +{ + if (!subflow->stale) + return; + + subflow->stale = 0; + MPTCP_INC_STATS(sock_net(mptcp_subflow_tcp_sock(subflow)), MPTCP_MIB_SUBFLOWRECOVER); +} + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +{ + if (unlikely(subflow->stale)) { + u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp); + + if (subflow->stale_rcv_tstamp == rcv_tstamp) + return false; + + mptcp_subflow_set_active(subflow); + } + return __mptcp_subflow_active(subflow); +} + +/* implement the mptcp packet scheduler; + * returns the subflow that will transmit the next DSS + * additionally updates the rtx timeout + */ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct subflow_send_info send_info[2]; struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; int i, nr_active = 0; struct sock *ssk; + long tout = 0; u64 ratio; u32 pace; - sock_owned_by_me((struct sock *)msk); + sock_owned_by_me(sk); if (__mptcp_check_fallback(msk)) { if (!msk->first) @@ -1386,8 +1439,10 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) /* re-use last subflow, if the burst allow that */ if (msk->last_snd && msk->snd_burst > 0 && sk_stream_memory_free(msk->last_snd) && - mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) + mptcp_subflow_active(mptcp_subflow_ctx(msk->last_snd))) { + mptcp_set_timeout(sk); return msk->last_snd; + } /* pick the subflow with the lower wmem/wspace ratio */ for (i = 0; i < 2; ++i) { @@ -1400,6 +1455,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) if (!mptcp_subflow_active(subflow)) continue; + tout = max(tout, mptcp_timeout_from_subflow(subflow)); nr_active += !subflow->backup; if (!sk_stream_memory_free(subflow->tcp_sock) || !tcp_sk(ssk)->snd_wnd) continue; @@ -1415,6 +1471,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) send_info[subflow->backup].ratio = ratio; } } + __mptcp_set_timeout(sk, tout); /* pick the best backup if no other subflow is active */ if (!nr_active) @@ -1433,12 +1490,11 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) static void mptcp_push_release(struct sock *sk, struct sock *ssk, struct mptcp_sendmsg_info *info) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); release_sock(ssk); } -static void __mptcp_push_pending(struct sock *sk, unsigned int flags) +void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1459,15 +1515,19 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); - /* try to keep the subflow socket lock across - * consecutive xmit on the same socket + /* First check. If the ssk has changed since + * the last round, release prev_ssk */ if (ssk != prev_ssk && prev_ssk) mptcp_push_release(sk, prev_ssk, &info); if (!ssk) goto out; - if (ssk != prev_ssk || !prev_ssk) + /* Need to lock the new subflow only if different + * from the previous one, otherwise we are still + * helding the relevant lock + */ + if (ssk != prev_ssk) lock_sock(ssk); /* keep it simple and always provide a new skb for the @@ -1501,12 +1561,11 @@ static void __mptcp_push_pending(struct sock *sk, unsigned int flags) mptcp_push_release(sk, ssk, &info); out: - if (copied) { - /* start the timer, if it's not pending */ - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + /* ensure the rtx timer is running */ + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + if (copied) __mptcp_check_send_data_fin(sk); - } } static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) @@ -1567,7 +1626,6 @@ out: */ __mptcp_update_wmem(sk); if (copied) { - mptcp_set_timeout(sk, ssk); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); if (!mptcp_timer_pending(sk)) @@ -2083,10 +2141,11 @@ static void mptcp_timeout_timer(struct timer_list *t) * * A backup subflow is returned only if that is the only kind available. */ -static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) +static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) { + struct sock *backup = NULL, *pick = NULL; struct mptcp_subflow_context *subflow; - struct sock *backup = NULL; + int min_stale_count = INT_MAX; sock_owned_by_me((const struct sock *)msk); @@ -2096,14 +2155,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - if (!mptcp_subflow_active(subflow)) + if (!__mptcp_subflow_active(subflow)) continue; - /* still data outstanding at TCP level? Don't retransmit. */ - if (!tcp_write_queue_empty(ssk)) { - if (inet_csk(ssk)->icsk_ca_state >= TCP_CA_Loss) - continue; - return NULL; + /* still data outstanding at TCP level? skip this */ + if (!tcp_rtx_and_write_queues_empty(ssk)) { + mptcp_pm_subflow_chk_stale(msk, ssk); + min_stale_count = min_t(int, min_stale_count, subflow->stale_count); + continue; } if (subflow->backup) { @@ -2112,10 +2171,15 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) continue; } - return ssk; + if (!pick) + pick = ssk; } - return backup; + if (pick) + return pick; + + /* use backup only if there are no progresses anywhere */ + return min_stale_count > 1 ? backup : NULL; } static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) @@ -2126,6 +2190,50 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) } } +bool __mptcp_retransmit_pending_data(struct sock *sk) +{ + struct mptcp_data_frag *cur, *rtx_head; + struct mptcp_sock *msk = mptcp_sk(sk); + + if (__mptcp_check_fallback(mptcp_sk(sk))) + return false; + + if (tcp_rtx_and_write_queues_empty(sk)) + return false; + + /* the closing socket has some data untransmitted and/or unacked: + * some data in the mptcp rtx queue has not really xmitted yet. + * keep it simple and re-inject the whole mptcp level rtx queue + */ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + rtx_head = mptcp_rtx_head(sk); + if (!rtx_head) { + mptcp_data_unlock(sk); + return false; + } + + /* will accept ack for reijected data before re-sending them */ + if (!msk->recovery || after64(msk->snd_nxt, msk->recovery_snd_nxt)) + msk->recovery_snd_nxt = msk->snd_nxt; + msk->recovery = true; + mptcp_data_unlock(sk); + + msk->first_pending = rtx_head; + msk->tx_pending_data += msk->snd_nxt - rtx_head->data_seq; + msk->snd_nxt = rtx_head->data_seq; + msk->snd_burst = 0; + + /* be sure to clear the "sent status" on all re-injected fragments */ + list_for_each_entry(cur, &msk->rtx_queue, list) { + if (!cur->already_sent) + break; + cur->already_sent = 0; + } + + return true; +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2138,6 +2246,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { struct mptcp_sock *msk = mptcp_sk(sk); + bool need_push; list_del(&subflow->node); @@ -2149,6 +2258,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (ssk->sk_socket) sock_orphan(ssk); + need_push = __mptcp_retransmit_pending_data(sk); subflow->disposable = 1; /* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops @@ -2176,6 +2286,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (msk->subflow && ssk == msk->subflow->sk) mptcp_dispose_initial_subflow(msk); + + if (need_push) + __mptcp_push_pending(sk, 0); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2313,7 +2426,6 @@ static void __mptcp_retrans(struct sock *sk) info.size_goal); } - mptcp_set_timeout(sk, ssk); release_sock(ssk); reset_timer: @@ -2384,10 +2496,12 @@ static int __mptcp_init_sock(struct sock *sk) msk->wmem_reserved = 0; WRITE_ONCE(msk->rmem_released, 0); msk->tx_pending_data = 0; + msk->timer_ival = TCP_RTO_MIN; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); + msk->recovery = false; mptcp_pm_data_init(msk); @@ -2472,7 +2586,6 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) tcp_shutdown(ssk, how); } else { pr_debug("Sending DATA_FIN on subflow %p", ssk); - mptcp_set_timeout(sk, ssk); tcp_send_ack(ssk); if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); @@ -2723,7 +2836,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->token = subflow_req->token; msk->subflow = NULL; WRITE_ONCE(msk->fully_established, false); - if (mp_opt->csum_reqd) + if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(msk->csum_enabled, true); msk->write_seq = subflow_req->idsn + 1; @@ -2732,7 +2845,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd; msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; - if (mp_opt->mp_capable) { + if (mp_opt->suboptions & OPTIONS_MPTCP_MPC) { msk->can_ack = true; msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0f0c026c5f8b..d7aba1c4dc48 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -26,6 +26,15 @@ #define OPTION_MPTCP_FASTCLOSE BIT(8) #define OPTION_MPTCP_PRIO BIT(9) #define OPTION_MPTCP_RST BIT(10) +#define OPTION_MPTCP_DSS BIT(11) +#define OPTION_MPTCP_FAIL BIT(12) + +#define OPTION_MPTCP_CSUMREQD BIT(13) + +#define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ + OPTION_MPTCP_MPC_ACK) +#define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ + OPTION_MPTCP_MPJ_SYNACK) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -67,6 +76,7 @@ #define TCPOLEN_MPTCP_PRIO_ALIGN 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4 +#define TCPOLEN_MPTCP_FAIL 12 #define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) @@ -129,35 +139,28 @@ struct mptcp_options_received { u32 subflow_seq; u16 data_len; __sum16 csum; - u16 mp_capable : 1, - mp_join : 1, - fastclose : 1, - reset : 1, - dss : 1, - add_addr : 1, - rm_addr : 1, - mp_prio : 1, - echo : 1, - csum_reqd : 1, - backup : 1, - deny_join_id0 : 1; + u16 suboptions; u32 token; u32 nonce; - u64 thmac; - u8 hmac[MPTCPOPT_HMAC_LEN]; - u8 join_id; - u8 use_map:1, + u16 use_map:1, dsn64:1, data_fin:1, use_ack:1, ack64:1, mpc_map:1, + reset_reason:4, + reset_transient:1, + echo:1, + backup:1, + deny_join_id0:1, __unused:2; + u8 join_id; + u64 thmac; + u8 hmac[MPTCPOPT_HMAC_LEN]; struct mptcp_addr_info addr; struct mptcp_rm_list rm_list; u64 ahmac; - u8 reset_reason:4; - u8 reset_transient:1; + u64 fail_seq; }; static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) @@ -178,8 +181,6 @@ enum mptcp_pm_status { enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, - MPTCP_ADD_ADDR_IPV6, - MPTCP_ADD_ADDR_PORT, MPTCP_RM_ADDR_SIGNAL, }; @@ -230,12 +231,17 @@ struct mptcp_sock { struct sock *last_snd; int snd_burst; int old_wspace; + u64 recovery_snd_nxt; /* in recovery mode accept up to this seq; + * recovery related fields are under data_lock + * protection + */ u64 snd_una; u64 wnd_end; unsigned long timer_ival; u32 token; int rmem_released; unsigned long flags; + bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; bool rcv_data_fin; @@ -425,9 +431,11 @@ struct mptcp_subflow_context { mpc_map : 1, backup : 1, send_mp_prio : 1, + send_mp_fail : 1, rx_eof : 1, can_ack : 1, /* only after processing the remote a key */ - disposable : 1; /* ctx can be free at ulp release time */ + disposable : 1, /* ctx can be free at ulp release time */ + stale : 1; /* unable to snd/rcv data, do not use for xmit */ enum mptcp_data_avail data_avail; u32 remote_nonce; u64 thmac; @@ -439,11 +447,13 @@ struct mptcp_subflow_context { u8 reset_seen:1; u8 reset_transient:1; u8 reset_reason:4; + u8 stale_count; long delegated_status; struct list_head delegated_node; /* link into delegated_action, protected by local BH */ - u32 setsockopt_seq; + u32 setsockopt_seq; + u32 stale_rcv_tstamp; struct sock *tcp_sock; /* tcp sk backpointer */ struct sock *conn; /* parent mptcp_sock */ @@ -549,12 +559,15 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status); } -int mptcp_is_enabled(struct net *net); -unsigned int mptcp_get_add_addr_timeout(struct net *net); -int mptcp_is_checksum_enabled(struct net *net); -int mptcp_allow_join_id0(struct net *net); +int mptcp_is_enabled(const struct net *net); +unsigned int mptcp_get_add_addr_timeout(const struct net *net); +int mptcp_is_checksum_enabled(const struct net *net); +int mptcp_allow_join_id0(const struct net *net); +unsigned int mptcp_stale_loss_cnt(const struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); +bool __mptcp_retransmit_pending_data(struct sock *sk); +void __mptcp_push_pending(struct sock *sk, unsigned int flags); bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); @@ -566,14 +579,13 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex); + const struct mptcp_addr_info *remote); int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); -static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) +static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -585,6 +597,10 @@ static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); } +void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); + +bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); + static inline void mptcp_subflow_tcp_fallback(struct sock *sk, struct mptcp_subflow_context *ctx) { @@ -596,6 +612,19 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk, inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops; } +static inline bool mptcp_has_another_subflow(struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk), *tmp; + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + mptcp_for_each_subflow(msk, tmp) { + if (tmp != subflow) + return true; + } + + return false; +} + void __init mptcp_proto_init(void); #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcp_proto_v6_init(void); @@ -690,6 +719,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); +void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); +void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); @@ -708,6 +739,7 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct mptcp_addr_info *addr, u8 bkup); +void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq); void mptcp_pm_free_anno_list(struct mptcp_sock *msk); bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); struct mptcp_pm_add_entry * @@ -716,6 +748,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk, struct mptcp_addr_info *addr); +int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, + u8 *flags, int *ifindex); int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, @@ -730,22 +764,18 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); + return READ_ONCE(msk->pm.addr_signal) & + (BIT(MPTCP_ADD_ADDR_SIGNAL) | BIT(MPTCP_ADD_ADDR_ECHO)); } -static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) -{ - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); -} - -static inline bool mptcp_pm_should_add_signal_ipv6(struct mptcp_sock *msk) +static inline bool mptcp_pm_should_add_signal_addr(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_IPV6); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL); } -static inline bool mptcp_pm_should_add_signal_port(struct mptcp_sock *msk) +static inline bool mptcp_pm_should_add_signal_echo(struct mptcp_sock *msk) { - return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_PORT); + return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_ECHO); } static inline bool mptcp_pm_should_rm_signal(struct mptcp_sock *msk) @@ -776,8 +806,10 @@ static inline int mptcp_rm_addr_len(const struct mptcp_rm_list *rm_list) return TCPOLEN_MPTCP_RM_ADDR_BASE + roundup(rm_list->nr - 1, 4) + 1; } -bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, unsigned int remaining, - struct mptcp_addr_info *saddr, bool *echo, bool *port); +bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, struct sk_buff *skb, + unsigned int opt_size, unsigned int remaining, + struct mptcp_addr_info *addr, bool *echo, + bool *port, bool *drop_other_suboptions); bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, struct mptcp_rm_list *rm_list); int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 966f777d35ce..1de7ce883c37 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -141,6 +141,7 @@ static int subflow_check_req(struct request_sock *req, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; + bool opt_mp_capable, opt_mp_join; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); @@ -154,16 +155,18 @@ static int subflow_check_req(struct request_sock *req, mptcp_get_options(sk_listener, skb, &mp_opt); - if (mp_opt.mp_capable) { + opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC); + opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ); + if (opt_mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (mp_opt.mp_join) + if (opt_mp_join) return 0; - } else if (mp_opt.mp_join) { + } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (mp_opt.mp_capable && listener->request_mptcp) { + if (opt_mp_capable && listener->request_mptcp) { int err, retries = MPTCP_TOKEN_MAX_RETRIES; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -194,7 +197,7 @@ again: else SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_TOKENFALLBACKINIT); - } else if (mp_opt.mp_join && listener->request_mptcp) { + } else if (opt_mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; subflow_req->backup = mp_opt.backup; @@ -243,15 +246,18 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct mptcp_options_received mp_opt; + bool opt_mp_capable, opt_mp_join; int err; subflow_init_req(req, sk_listener); mptcp_get_options(sk_listener, skb, &mp_opt); - if (mp_opt.mp_capable && mp_opt.mp_join) + opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC); + opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ); + if (opt_mp_capable && opt_mp_join) return -EINVAL; - if (mp_opt.mp_capable && listener->request_mptcp) { + if (opt_mp_capable && listener->request_mptcp) { if (mp_opt.sndr_key == 0) return -EINVAL; @@ -262,7 +268,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; - } else if (mp_opt.mp_join && listener->request_mptcp) { + } else if (opt_mp_join && listener->request_mptcp) { if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) return -EINVAL; @@ -394,7 +400,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - /* be sure no special action on any packet other than syn-ack */ if (subflow->conn_finished) return; @@ -407,7 +412,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { - if (!mp_opt.mp_capable) { + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); mptcp_do_fallback(sk); @@ -415,7 +420,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } - if (mp_opt.csum_reqd) + if (mp_opt.suboptions & OPTION_MPTCP_CSUMREQD) WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); if (mp_opt.deny_join_id0) WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); @@ -430,15 +435,17 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; - if (!mp_opt.mp_join) { + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) { subflow->reset_reason = MPTCP_RST_EMPTCP; goto do_reset; } + subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", + subflow, subflow->thmac, subflow->remote_nonce, + subflow->backup); if (!subflow_thmac_valid(subflow)) { MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); @@ -634,10 +641,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - /* After child creation we must look for 'mp_capable' even when options + /* After child creation we must look for MPC even when options * are not parsed */ - mp_opt.mp_capable = 0; + mp_opt.suboptions = 0; /* hopefully temporary handling for MP_JOIN+syncookie */ subflow_req = mptcp_subflow_rsk(req); @@ -657,7 +664,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, * options. */ mptcp_get_options(sk, skb, &mp_opt); - if (!mp_opt.mp_capable) { + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) { fallback = true; goto create_child; } @@ -667,7 +674,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, fallback = true; } else if (subflow_req->mp_join) { mptcp_get_options(sk, skb, &mp_opt); - if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || + if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) || + !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); fallback = true; @@ -724,7 +732,7 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - if (mp_opt.mp_capable) + if (mp_opt.suboptions & OPTIONS_MPTCP_MPC) mptcp_subflow_fully_established(ctx, &mp_opt); } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -908,6 +916,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); if (unlikely(csum_fold(csum))) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + subflow->send_mp_fail = 1; + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; } @@ -1155,6 +1165,20 @@ no_data: fallback: /* RFC 8684 section 3.7. */ + if (subflow->send_mp_fail) { + if (mptcp_has_another_subflow(ssk)) { + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + } + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; + tcp_send_active_reset(ssk, GFP_ATOMIC); + WRITE_ONCE(subflow->data_avail, 0); + return true; + } + if (subflow->mp_join || subflow->fully_established) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers @@ -1353,8 +1377,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, } int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, - const struct mptcp_addr_info *remote, - u8 flags, int ifindex) + const struct mptcp_addr_info *remote) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1365,6 +1388,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, struct sock *ssk; u32 remote_token; int addrlen; + int ifindex; + u8 flags; int err; if (!mptcp_is_fully_established(sk)) @@ -1388,6 +1413,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, local_id = err; } + mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id, + &flags, &ifindex); subflow->remote_key = msk->remote_key; subflow->local_key = msk->local_key; subflow->token = msk->token; diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 049890e00a3d..aab20e575ecd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/ # IPVS obj-$(CONFIG_IP_VS) += ipvs/ + +# lwtunnel +obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 296e4a171bd1..41768ff19464 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -130,58 +130,77 @@ static void ecache_work(struct work_struct *work) schedule_delayed_work(&cnet->ecache_dwork, delay); } -int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 portid, int report) +static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e, + const unsigned int events, + const unsigned long missed, + const struct nf_ct_event *item) { - int ret = 0; - struct net *net = nf_ct_net(ct); + struct nf_conn *ct = item->ct; + struct net *net = nf_ct_net(item->ct); struct nf_ct_event_notifier *notify; - struct nf_conntrack_ecache *e; + int ret; + + if (!((events | missed) & e->ctmask)) + return 0; rcu_read_lock(); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); - if (!notify) - goto out_unlock; + if (!notify) { + rcu_read_unlock(); + return 0; + } + + ret = notify->ct_event(events | missed, item); + rcu_read_unlock(); + + if (likely(ret >= 0 && missed == 0)) + return 0; + + spin_lock_bh(&ct->lock); + if (ret < 0) + e->missed |= events; + else + e->missed &= ~missed; + spin_unlock_bh(&ct->lock); + + return ret; +} + +int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, + u32 portid, int report) +{ + struct nf_conntrack_ecache *e; + struct nf_ct_event item; + unsigned long missed; + int ret; + + if (!nf_ct_is_confirmed(ct)) + return 0; e = nf_ct_ecache_find(ct); if (!e) - goto out_unlock; + return 0; - if (nf_ct_is_confirmed(ct)) { - struct nf_ct_event item = { - .ct = ct, - .portid = e->portid ? e->portid : portid, - .report = report - }; - /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->portid ? 0 : e->missed; - - if (!((eventmask | missed) & e->ctmask)) - goto out_unlock; - - ret = notify->fcn(eventmask | missed, &item); - if (unlikely(ret < 0 || missed)) { - spin_lock_bh(&ct->lock); - if (ret < 0) { - /* This is a destroy event that has been - * triggered by a process, we store the PORTID - * to include it in the retransmission. - */ - if (eventmask & (1 << IPCT_DESTROY)) { - if (e->portid == 0 && portid != 0) - e->portid = portid; - e->state = NFCT_ECACHE_DESTROY_FAIL; - } else { - e->missed |= eventmask; - } - } else { - e->missed &= ~missed; - } - spin_unlock_bh(&ct->lock); - } + memset(&item, 0, sizeof(item)); + + item.ct = ct; + item.portid = e->portid ? e->portid : portid; + item.report = report; + + /* This is a resent of a destroy event? If so, skip missed */ + missed = e->portid ? 0 : e->missed; + + ret = __nf_conntrack_eventmask_report(e, events, missed, &item); + if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) { + /* This is a destroy event that has been triggered by a process, + * we store the PORTID to include it in the retransmission. + */ + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; } -out_unlock: - rcu_read_unlock(); + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); @@ -190,53 +209,28 @@ EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); * disabled softirqs */ void nf_ct_deliver_cached_events(struct nf_conn *ct) { - struct net *net = nf_ct_net(ct); - unsigned long events, missed; - struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; struct nf_ct_event item; - int ret; - - rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_conntrack_event_cb); - if (notify == NULL) - goto out_unlock; + unsigned long events; if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) - goto out_unlock; + return; e = nf_ct_ecache_find(ct); if (e == NULL) - goto out_unlock; + return; events = xchg(&e->cache, 0); - /* We make a copy of the missed event cache without taking - * the lock, thus we may send missed events twice. However, - * this does not harm and it happens very rarely. */ - missed = e->missed; - - if (!((events | missed) & e->ctmask)) - goto out_unlock; - item.ct = ct; item.portid = 0; item.report = 0; - ret = notify->fcn(events | missed, &item); - - if (likely(ret == 0 && !missed)) - goto out_unlock; - - spin_lock_bh(&ct->lock); - if (ret < 0) - e->missed |= events; - else - e->missed &= ~missed; - spin_unlock_bh(&ct->lock); - -out_unlock: - rcu_read_unlock(); + /* We make a copy of the missed event cache without taking + * the lock, thus we may send missed events twice. However, + * this does not harm and it happens very rarely. + */ + __nf_conntrack_eventmask_report(e, events, e->missed, &item); } EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); @@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, { struct net *net = nf_ct_exp_net(exp); - struct nf_exp_event_notifier *notify; + struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; rcu_read_lock(); - notify = rcu_dereference(net->ct.nf_expect_event_cb); + notify = rcu_dereference(net->ct.nf_conntrack_event_cb); if (!notify) goto out_unlock; @@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, .portid = portid, .report = report }; - notify->fcn(1 << event, &item); + notify->exp_event(1 << event, &item); } out_unlock: rcu_read_unlock(); } -int nf_conntrack_register_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_register_notifier(struct net *net, + const struct nf_ct_event_notifier *new) { - int ret; struct nf_ct_event_notifier *notify; mutex_lock(&nf_ct_ecache_mutex); notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } + WARN_ON_ONCE(notify); rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); - ret = 0; - -out_unlock: mutex_unlock(&nf_ct_ecache_mutex); - return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct net *net, - struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net) { - struct nf_ct_event_notifier *notify; - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL); mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ + /* synchronize_rcu() is called after netns pre_exit */ } EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - int ret; - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - if (notify != NULL) { - ret = -EBUSY; - goto out_unlock; - } - rcu_assign_pointer(net->ct.nf_expect_event_cb, new); - ret = 0; - -out_unlock: - mutex_unlock(&nf_ct_ecache_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); - -void nf_ct_expect_unregister_notifier(struct net *net, - struct nf_exp_event_notifier *new) -{ - struct nf_exp_event_notifier *notify; - - mutex_lock(&nf_ct_ecache_mutex); - notify = rcu_dereference_protected(net->ct.nf_expect_event_cb, - lockdep_is_held(&nf_ct_ecache_mutex)); - BUG_ON(notify != new); - RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL); - mutex_unlock(&nf_ct_ecache_mutex); - /* synchronize_rcu() is called from ctnetlink_exit. */ -} -EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); - void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index eb35c6151fb0..5f9fc6b94855 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct) } static int -ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) +ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) { const struct nf_conntrack_zone *zone; struct net *net; @@ -2669,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct) + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + ctnetlink_secctx_size(ct) + + ctnetlink_acct_size(ct) + + ctnetlink_timestamp_size(ct) #if IS_ENABLED(CONFIG_NF_NAT) + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ @@ -2726,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) if (ctnetlink_dump_protoinfo(skb, ct, false) < 0) goto nla_put_failure; + if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 || + ctnetlink_dump_timestamp(skb, ct) < 0) + goto nla_put_failure; + if (ctnetlink_dump_helpinfo(skb, ct) < 0) goto nla_put_failure; @@ -3104,7 +3110,7 @@ nla_put_failure: #ifdef CONFIG_NF_CONNTRACK_EVENTS static int -ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) +ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item) { struct nf_conntrack_expect *exp = item->exp; struct net *net = nf_ct_exp_net(exp); @@ -3755,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb, #ifdef CONFIG_NF_CONNTRACK_EVENTS static struct nf_ct_event_notifier ctnl_notifier = { - .fcn = ctnetlink_conntrack_event, -}; - -static struct nf_exp_event_notifier ctnl_notifier_exp = { - .fcn = ctnetlink_expect_event, + .ct_event = ctnetlink_conntrack_event, + .exp_event = ctnetlink_expect_event, }; #endif @@ -3852,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); static int __net_init ctnetlink_net_init(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - int ret; - - ret = nf_conntrack_register_notifier(net, &ctnl_notifier); - if (ret < 0) { - pr_err("ctnetlink_init: cannot register notifier.\n"); - goto err_out; - } - - ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); - if (ret < 0) { - pr_err("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; - } + nf_conntrack_register_notifier(net, &ctnl_notifier); #endif return 0; - -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: - nf_conntrack_unregister_notifier(net, &ctnl_notifier); -err_out: - return ret; -#endif } -static void ctnetlink_net_exit(struct net *net) +static void ctnetlink_net_pre_exit(struct net *net) { #ifdef CONFIG_NF_CONNTRACK_EVENTS - nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); - nf_conntrack_unregister_notifier(net, &ctnl_notifier); + nf_conntrack_unregister_notifier(net); #endif } -static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) -{ - struct net *net; - - list_for_each_entry(net, net_exit_list, exit_list) - ctnetlink_net_exit(net); - - /* wait for other cpus until they are done with ctnl_notifiers */ - synchronize_rcu(); -} - static struct pernet_operations ctnetlink_net_ops = { .init = ctnetlink_net_init, - .exit_batch = ctnetlink_net_exit_batch, + .pre_exit = ctnetlink_net_pre_exit, }; static int __init ctnetlink_init(void) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e84b499b7bfa..7e0d956da51d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -22,6 +22,9 @@ #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> +#ifdef CONFIG_LWTUNNEL +#include <net/netfilter/nf_hooks_lwtunnel.h> +#endif #include <linux/rculist_nulls.h> static bool enable_hooks __read_mostly; @@ -612,6 +615,9 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_GRE, NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM, #endif +#ifdef CONFIG_LWTUNNEL + NF_SYSCTL_CT_LWTUNNEL, +#endif __NF_SYSCTL_CT_LAST_SYSCTL, }; @@ -959,6 +965,15 @@ static struct ctl_table nf_ct_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, #endif +#ifdef CONFIG_LWTUNNEL + [NF_SYSCTL_CT_LWTUNNEL] = { + .procname = "nf_hooks_lwtunnel", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = nf_hooks_lwtunnel_sysctl_handler, + }, +#endif {} }; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2bfd9f1b8f11..d6bf1b2cd541 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1096,6 +1096,7 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &flowtable->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/netfilter/nf_hooks_lwtunnel.c b/net/netfilter/nf_hooks_lwtunnel.c new file mode 100644 index 000000000000..00e89ffd78f6 --- /dev/null +++ b/net/netfilter/nf_hooks_lwtunnel.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/sysctl.h> +#include <net/lwtunnel.h> +#include <net/netfilter/nf_hooks_lwtunnel.h> + +static inline int nf_hooks_lwtunnel_get(void) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) + return 1; + else + return 0; +} + +static inline int nf_hooks_lwtunnel_set(int enable) +{ + if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) { + if (!enable) + return -EBUSY; + } else if (enable) { + static_branch_enable(&nf_hooks_lwtunnel_enabled); + } + + return 0; +} + +#ifdef CONFIG_SYSCTL +int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int proc_nf_hooks_lwtunnel_enabled = 0; + struct ctl_table tmp = { + .procname = table->procname, + .data = &proc_nf_hooks_lwtunnel_enabled, + .maxlen = sizeof(int), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + int ret; + + if (!write) + proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get(); + + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) + ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled); + + return ret; +} +EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler); +#endif /* CONFIG_SYSCTL */ diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index b58d73a96523..9656c1646222 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -353,6 +353,7 @@ static void nft_flow_block_offload_init(struct flow_block_offload *bo, bo->command = cmd; bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; bo->extack = extack; + bo->cb_list_head = &basechain->flow_block.cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 000bb3da4f77..894e6b8f1a86 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -144,8 +144,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info, return -ENOMEM; doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); if (doi_def->map.std == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; + kfree(doi_def); + return -ENOMEM; } doi_def->type = CIPSO_V4_MAP_TRANS; diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 88deb5b41429..cf2ce5812489 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,6 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; + skb->tstamp = 0; vport->ops->send(skb); return; diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index 1dc955ca57d3..fa611678af05 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,7 +15,6 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; - struct completion ready; }; /* From MHI to QRTR */ @@ -51,10 +50,6 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; - rc = wait_for_completion_interruptible(&qdev->ready); - if (rc) - goto free_skb; - if (skb->sk) sock_hold(skb->sk); @@ -84,7 +79,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev, 0); + rc = mhi_prepare_for_transfer(mhi_dev); if (rc) return rc; @@ -101,15 +96,6 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; - /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); - if (rc) { - qrtr_endpoint_unregister(&qdev->ep); - dev_set_drvdata(&mhi_dev->dev, NULL); - return rc; - } - - complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 6c61b7b1838f..525e3ea063b1 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) goto err; } - if (len != ALIGN(size, 4) + hdrlen) + if (!size || size & 3 || len != size + hdrlen) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && @@ -506,8 +506,12 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (cb->type == QRTR_TYPE_NEW_SERVER) { /* Remote node endpoint can bridge other distant nodes */ - const struct qrtr_ctrl_pkt *pkt = data + hdrlen; + const struct qrtr_ctrl_pkt *pkt; + if (size < sizeof(*pkt)) + goto err; + + pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 9b6ffff72f2d..28c1b0022178 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 0885b22e5c0e..accd35c05577 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -21,6 +21,8 @@ config AF_RXRPC See Documentation/networking/rxrpc.rst. +if AF_RXRPC + config AF_RXRPC_IPV6 bool "IPv6 support for RxRPC" depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) @@ -30,7 +32,6 @@ config AF_RXRPC_IPV6 config AF_RXRPC_INJECT_LOSS bool "Inject packet loss into RxRPC packet stream" - depends on AF_RXRPC help Say Y here to inject packet loss by discarding some received and some transmitted packets. @@ -38,7 +39,6 @@ config AF_RXRPC_INJECT_LOSS config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" - depends on AF_RXRPC help Say Y here to make runtime controllable debugging messages appear. @@ -47,7 +47,6 @@ config AF_RXRPC_DEBUG config RXKAD bool "RxRPC Kerberos security" - depends on AF_RXRPC select CRYPTO select CRYPTO_MANAGER select CRYPTO_SKCIPHER @@ -58,3 +57,5 @@ config RXKAD through the use of the key retention service. See Documentation/networking/rxrpc.rst. + +endif diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4a7043a4e5d6..2ef8f5a6205a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -634,6 +634,7 @@ static void tcf_block_offload_init(struct flow_block_offload *bo, bo->block_shared = shared; bo->extack = extack; bo->sch = sch; + bo->cb_list_head = &flow_block->cb_list; INIT_LIST_HEAD(&bo->cb_list); } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index ecc5c4d93779..3c2300d14468 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -720,7 +720,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, skip_hash: if (flow_override) flow_hash = flow_override - 1; - else if (use_skbhash) + else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS)) flow_hash = skb->hash; if (host_override) { dsthost_hash = host_override - 1; diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index 925924fab1ab..1f857ffd1ac2 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -660,6 +660,13 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); q->nbands = nbands; + for (i = nstrict; i < q->nstrict; i++) { + INIT_LIST_HEAD(&q->classes[i].alist); + if (q->classes[i].qdisc->q.qlen) { + list_add_tail(&q->classes[i].alist, &q->active); + q->classes[i].deficit = quanta[i]; + } + } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 81ea8332547a..5067a6e5d4fd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -125,6 +125,7 @@ struct htb_class { struct htb_class_leaf { int deficit[TC_HTB_MAXDEPTH]; struct Qdisc *q; + struct netdev_queue *offload_queue; } leaf; struct htb_class_inner { struct htb_prio clprio[TC_HTB_NUMPRIO]; @@ -1411,24 +1412,47 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) return old_q; } -static void htb_offload_move_qdisc(struct Qdisc *sch, u16 qid_old, u16 qid_new) +static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) +{ + struct netdev_queue *queue; + + queue = cl->leaf.offload_queue; + if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) + WARN_ON(cl->leaf.q->dev_queue != queue); + + return queue; +} + +static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, + struct htb_class *cl_new, bool destroying) { struct netdev_queue *queue_old, *queue_new; struct net_device *dev = qdisc_dev(sch); - struct Qdisc *qdisc; - queue_old = netdev_get_tx_queue(dev, qid_old); - queue_new = netdev_get_tx_queue(dev, qid_new); + queue_old = htb_offload_get_queue(cl_old); + queue_new = htb_offload_get_queue(cl_new); - if (dev->flags & IFF_UP) - dev_deactivate(dev); - qdisc = dev_graft_qdisc(queue_old, NULL); - qdisc->dev_queue = queue_new; - qdisc = dev_graft_qdisc(queue_new, qdisc); - if (dev->flags & IFF_UP) - dev_activate(dev); + if (!destroying) { + struct Qdisc *qdisc; - WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); + if (dev->flags & IFF_UP) + dev_deactivate(dev); + qdisc = dev_graft_qdisc(queue_old, NULL); + WARN_ON(qdisc != cl_old->leaf.q); + } + + if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) + cl_old->leaf.q->dev_queue = queue_new; + cl_old->leaf.offload_queue = queue_new; + + if (!destroying) { + struct Qdisc *qdisc; + + qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); + if (dev->flags & IFF_UP) + dev_activate(dev); + WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); + } } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, @@ -1442,10 +1466,8 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level) return -EINVAL; - if (q->offload) { - dev_queue = new->dev_queue; - WARN_ON(dev_queue != cl->leaf.q->dev_queue); - } + if (q->offload) + dev_queue = htb_offload_get_queue(cl); if (!new) { new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, @@ -1514,6 +1536,8 @@ static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, parent->ctokens = parent->cbuffer; parent->t_c = ktime_get_ns(); parent->cmode = HTB_CAN_SEND; + if (q->offload) + parent->leaf.offload_queue = cl->leaf.offload_queue; } static void htb_parent_to_leaf_offload(struct Qdisc *sch, @@ -1534,6 +1558,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, struct netlink_ext_ack *extack) { struct tc_htb_qopt_offload offload_opt; + struct netdev_queue *dev_queue; struct Qdisc *q = cl->leaf.q; struct Qdisc *old = NULL; int err; @@ -1542,16 +1567,15 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, return -EINVAL; WARN_ON(!q); - if (!destroying) { - /* On destroy of HTB, two cases are possible: - * 1. q is a normal qdisc, but q->dev_queue has noop qdisc. - * 2. q is a noop qdisc (for nodes that were inner), - * q->dev_queue is noop_netdev_queue. + dev_queue = htb_offload_get_queue(cl); + old = htb_graft_helper(dev_queue, NULL); + if (destroying) + /* Before HTB is destroyed, the kernel grafts noop_qdisc to + * all queues. */ - old = htb_graft_helper(q->dev_queue, NULL); - WARN_ON(!old); + WARN_ON(!(old->flags & TCQ_F_BUILTIN)); + else WARN_ON(old != q); - } if (cl->parent) { cl->parent->bstats_bias.bytes += q->bstats.bytes; @@ -1570,18 +1594,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, if (!err || destroying) qdisc_put(old); else - htb_graft_helper(q->dev_queue, old); + htb_graft_helper(dev_queue, old); if (last_child) return err; - if (!err && offload_opt.moved_qid != 0) { - if (destroying) - q->dev_queue = netdev_get_tx_queue(qdisc_dev(sch), - offload_opt.qid); - else - htb_offload_move_qdisc(sch, offload_opt.moved_qid, - offload_opt.qid); + if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { + u32 classid = TC_H_MAJ(sch->handle) | + TC_H_MIN(offload_opt.classid); + struct htb_class *moved_cl = htb_find(classid, sch); + + htb_offload_move_qdisc(sch, moved_cl, cl, destroying); } return err; @@ -1704,9 +1727,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg, } if (last_child) { - struct netdev_queue *dev_queue; + struct netdev_queue *dev_queue = sch->dev_queue; + + if (q->offload) + dev_queue = htb_offload_get_queue(cl); - dev_queue = q->offload ? cl->leaf.q->dev_queue : sch->dev_queue; new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid, NULL); @@ -1878,7 +1903,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); } else { /* First child. */ - dev_queue = parent->leaf.q->dev_queue; + dev_queue = htb_offload_get_queue(parent); old_q = htb_graft_helper(dev_queue, NULL); WARN_ON(old_q != parent->leaf.q); offload_opt = (struct tc_htb_qopt_offload) { @@ -1935,6 +1960,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* leaf (we) needs elementary qdisc */ cl->leaf.q = new_q ? new_q : &noop_qdisc; + if (q->offload) + cl->leaf.offload_queue = dev_queue; cl->parent = parent; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 9c79374457a0..1ab2fc933a21 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1513,7 +1513,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, taprio_set_picos_per_byte(dev, q); if (mqprio) { - netdev_set_num_tc(dev, mqprio->num_tc); + err = netdev_set_num_tc(dev, mqprio->num_tc); + if (err) + goto free_sched; for (i = 0; i < mqprio->num_tc; i++) netdev_set_tc_queue(dev, i, mqprio->count[i], diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d66a8e44a1ae..dbb41821b1b8 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -835,7 +835,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) rqstp->rq_stime = ktime_get(); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); - } + } else + svc_xprt_received(xprt); out: trace_svc_handle_xprt(xprt, len); return len; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b15b2b1b2f38..e3105ba407c7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); - if (timeout) { + if (dlen && timeout) { timeout = msecs_to_jiffies(timeout); tipc_wait_for_connect(sock, &timeout); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 15c1e4e4012d..7cad52ba9cde 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1934,7 +1934,6 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other return err; skb_put(skb, 1); - skb->len = 1; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); if (err) { @@ -1943,11 +1942,19 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other } unix_state_lock(other); + + if (sock_flag(other, SOCK_DEAD) || + (other->sk_shutdown & RCV_SHUTDOWN)) { + unix_state_unlock(other); + kfree_skb(skb); + return -EPIPE; + } + maybe_add_creds(skb, sock, other); skb_get(skb); if (ousk->oob_skb) - kfree_skb(ousk->oob_skb); + consume_skb(ousk->oob_skb); ousk->oob_skb = skb; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dceed5b5b226..bf7cd4752547 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -759,6 +759,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT }, [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy), [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG }, + [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 }, + [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, + [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, + [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), }; /* policy for the key attributes */ @@ -14800,6 +14804,106 @@ bad_tid_conf: return ret; } +static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_color_change_settings params = {}; + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct nlattr **tb; + u16 offset; + int err; + + if (!rdev->ops->color_change) + return -EOPNOTSUPP; + + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BSS_COLOR)) + return -EOPNOTSUPP; + + if (wdev->iftype != NL80211_IFTYPE_AP) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] || + !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] || + !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS]) + return -EINVAL; + + params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]); + params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]); + + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms.beacon_next); + if (err) + return err; + + tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + + err = nla_parse_nested(tb, NL80211_ATTR_MAX, + info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS], + nl80211_policy, info->extack); + if (err) + goto out; + + err = nl80211_parse_beacon(rdev, tb, ¶ms.beacon_color_change); + if (err) + goto out; + + if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { + err = -EINVAL; + goto out; + } + + if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) { + err = -EINVAL; + goto out; + } + + offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]); + if (offset >= params.beacon_color_change.tail_len) { + err = -EINVAL; + goto out; + } + + if (params.beacon_color_change.tail[offset] != params.count) { + err = -EINVAL; + goto out; + } + + params.counter_offset_beacon = offset; + + if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { + if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) != + sizeof(u16)) { + err = -EINVAL; + goto out; + } + + offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]); + if (offset >= params.beacon_color_change.probe_resp_len) { + err = -EINVAL; + goto out; + } + + if (params.beacon_color_change.probe_resp[offset] != + params.count) { + err = -EINVAL; + goto out; + } + + params.counter_offset_presp = offset; + } + + wdev_lock(wdev); + err = rdev_color_change(rdev, dev, ¶ms); + wdev_unlock(wdev); + +out: + kfree(tb); + return err; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15795,6 +15899,14 @@ static const struct genl_small_ops nl80211_small_ops[] = { .internal_flags = NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, + .doit = nl80211_color_change, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17424,6 +17536,51 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); +int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp, + enum nl80211_commands cmd, u8 count, + u64 color_bitmap) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + ASSERT_WDEV_LOCK(wdev); + + trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) + goto nla_put_failure; + + if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED && + nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count)) + goto nla_put_failure; + + if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION && + nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP, + color_bitmap, NL80211_ATTR_PAD)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_MLME, gfp); + +nla_put_failure: + nlmsg_free(msg); + return -EINVAL; +} +EXPORT_SYMBOL(cfg80211_bss_color_notify); + void nl80211_radar_notify(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 36f1b59a78bf..ae2e1a896461 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -115,23 +115,22 @@ int ieee80211_radiotap_iterator_init( iterator->_max_length = get_unaligned_le16(&radiotap_header->it_len); iterator->_arg_index = 0; iterator->_bitmap_shifter = get_unaligned_le32(&radiotap_header->it_present); - iterator->_arg = (uint8_t *)radiotap_header + sizeof(*radiotap_header); + iterator->_arg = (uint8_t *)radiotap_header->it_optional; iterator->_reset_on_ext = 0; - iterator->_next_bitmap = &radiotap_header->it_present; - iterator->_next_bitmap++; + iterator->_next_bitmap = radiotap_header->it_optional; iterator->_vns = vns; iterator->current_namespace = &radiotap_ns; iterator->is_radiotap_ns = 1; /* find payload start allowing for extended bitmap(s) */ - if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) { + if (iterator->_bitmap_shifter & (BIT(IEEE80211_RADIOTAP_EXT))) { if ((unsigned long)iterator->_arg - (unsigned long)iterator->_rtheader + sizeof(uint32_t) > (unsigned long)iterator->_max_length) return -EINVAL; while (get_unaligned_le32(iterator->_arg) & - (1 << IEEE80211_RADIOTAP_EXT)) { + (BIT(IEEE80211_RADIOTAP_EXT))) { iterator->_arg += sizeof(uint32_t); /* diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index b1d37f582dc6..ce6bf218a1a3 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1368,4 +1368,17 @@ static inline int rdev_set_sar_specs(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_color_change(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_color_change_settings *params) +{ + int ret; + + trace_rdev_color_change(&rdev->wiphy, dev, params); + ret = rdev->ops->color_change(&rdev->wiphy, dev, params); + trace_rdev_return_int(&rdev->wiphy, ret); + + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c2d0ff7f089f..df87c7f3a049 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -171,9 +171,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) { const struct ieee80211_regdomain *regd = NULL; const struct ieee80211_regdomain *wiphy_regd = NULL; + enum nl80211_dfs_regions dfs_region; rcu_read_lock(); regd = get_cfg80211_regdom(); + dfs_region = regd->dfs_region; if (!wiphy) goto out; @@ -182,6 +184,11 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) if (!wiphy_regd) goto out; + if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { + dfs_region = wiphy_regd->dfs_region; + goto out; + } + if (wiphy_regd->dfs_region == regd->dfs_region) goto out; @@ -193,7 +200,7 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy) out: rcu_read_unlock(); - return regd->dfs_region; + return dfs_region; } static void rcu_free_regdom(const struct ieee80211_regdomain *r) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 440bce5f0274..19b78d472283 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3597,6 +3597,52 @@ TRACE_EVENT(rdev_set_sar_specs, WIPHY_PR_ARG, __entry->type, __entry->num) ); +TRACE_EVENT(rdev_color_change, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_color_change_settings *params), + TP_ARGS(wiphy, netdev, params), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u8, count) + __field(u16, bcn_ofs) + __field(u16, pres_ofs) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->count = params->count; + __entry->bcn_ofs = params->counter_offset_beacon; + __entry->pres_ofs = params->counter_offset_presp; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT + ", count: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, + __entry->count) +); + +TRACE_EVENT(cfg80211_bss_color_notify, + TP_PROTO(struct net_device *netdev, + enum nl80211_commands cmd, + u8 count, u64 color_bitmap), + TP_ARGS(netdev, cmd, count, color_bitmap), + TP_STRUCT__entry( + NETDEV_ENTRY + __field(u32, cmd) + __field(u8, count) + __field(u64, color_bitmap) + ), + TP_fast_assign( + NETDEV_ASSIGN; + __entry->cmd = cmd; + __entry->count = count; + __entry->color_bitmap = color_bitmap; + ), + TP_printk(NETDEV_PR_FMT ", cmd: %x, count: %u, bitmap: %llx", + NETDEV_PR_ARG, __entry->cmd, __entry->count, + __entry->color_bitmap) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7f881f5a5897..37d17a79617c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3157,6 +3157,11 @@ ok: return dst; nopol: + if (!(dst_orig->dev->flags & IFF_LOOPBACK) && + !xfrm_default_allow(net, dir)) { + err = -EPERM; + goto error; + } if (!(flags & XFRM_LOOKUP_ICMP)) { dst = dst_orig; goto ok; @@ -3545,6 +3550,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) { + if (!xfrm_default_allow(net, dir)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); + return 0; + } + if (sp && secpath_has_nontransport(sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); @@ -3599,6 +3609,12 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; + + if (!xfrm_default_allow(net, dir) && !xfrm_nr) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); + goto reject; + } + if (npols > 1) { xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7aff641c717d..03b66d154b2b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1961,6 +1961,59 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, return skb; } +static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct net *net = sock_net(skb->sk); + struct xfrm_userpolicy_default *up = nlmsg_data(nlh); + u8 dirmask; + u8 old_default = net->xfrm.policy_default; + + if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX) + return -EINVAL; + + dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK; + + net->xfrm.policy_default = (old_default & (0xff ^ dirmask)) + | (up->action << up->dirmask); + + rt_genid_bump_all(net); + + return 0; +} + +static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, + struct nlattr **attrs) +{ + struct sk_buff *r_skb; + struct nlmsghdr *r_nlh; + struct net *net = sock_net(skb->sk); + struct xfrm_userpolicy_default *r_up, *up; + int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default)); + u32 portid = NETLINK_CB(skb).portid; + u32 seq = nlh->nlmsg_seq; + + up = nlmsg_data(nlh); + + r_skb = nlmsg_new(len, GFP_ATOMIC); + if (!r_skb) + return -ENOMEM; + + r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0); + if (!r_nlh) { + kfree_skb(r_skb); + return -EMSGSIZE; + } + + r_up = nlmsg_data(r_nlh); + + r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask); + r_up->dirmask = up->dirmask; + nlmsg_end(r_skb, r_nlh); + + return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); +} + static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { @@ -2664,6 +2717,8 @@ const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), + [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), + [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; EXPORT_SYMBOL_GPL(xfrm_msg_min); @@ -2743,6 +2798,8 @@ static const struct xfrm_link { .nla_pol = xfrma_spd_policy, .nla_max = XFRMA_SPD_MAX }, [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, + [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default }, + [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default }, }; static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, |