diff options
Diffstat (limited to 'net')
72 files changed, 1174 insertions, 1087 deletions
diff --git a/net/can/Kconfig b/net/can/Kconfig index a15c0e0d1fc7..a4399be54ff4 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -11,7 +11,7 @@ menuconfig CAN 1991, mainly for automotive, but now widely used in marine (NMEA2000), industrial, and medical applications. More information on the CAN network protocol family PF_CAN - is contained in <Documentation/networking/can.txt>. + is contained in <Documentation/networking/can.rst>. If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. diff --git a/net/core/dev.c b/net/core/dev.c index 94435cd09072..4670ccabe23a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1694,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); /** * call_netdevice_notifiers_info - call all network notifier blocks * @val: value passed unmodified to notifier function - * @dev: net_device pointer passed unmodified to notifier function * @info: notifier information data * * Call all network notifier blocks. Parameters and return value @@ -3167,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len = skb_transport_header(skb) - skb_mac_header(skb); /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; + struct tcphdr _tcphdr; + + th = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_tcphdr), &_tcphdr); + if (likely(th)) + hdr_len += __tcp_hdrlen(th); + } else { + struct udphdr _udphdr; + + if (skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_udphdr), &_udphdr)) + hdr_len += sizeof(struct udphdr); + } if (shinfo->gso_type & SKB_GSO_DODGY) gso_segs = DIV_ROUND_UP(skb->len - hdr_len, @@ -6425,6 +6435,7 @@ rollback: * netdev_upper_dev_link - Add a link to the upper device * @dev: device * @upper_dev: new upper device + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. The caller must hold * the RTNL lock. On a failure a negative errno code is returned. @@ -6446,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link); * @upper_dev: new upper device * @upper_priv: upper device private * @upper_info: upper info to be passed down via notifier + * @extack: netlink extended ack * * Adds a link to device which is upper to this one. In this case, only * one master upper device can be linked, although other non-master devices diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 7e690d0ccd05..0ab1af04296c 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -18,26 +18,10 @@ * match. --pb */ -static int dev_ifname(struct net *net, struct ifreq __user *arg) +static int dev_ifname(struct net *net, struct ifreq *ifr) { - struct ifreq ifr; - int error; - - /* - * Fetch the caller's info block. - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - ifr.ifr_name[IFNAMSIZ-1] = 0; - - error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex); - if (error) - return error; - - if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) - return -EFAULT; - return 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; + return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex); } static gifconf_func_t *gifconf_list[NPROTO]; @@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf); * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(struct net *net, char __user *arg) +int dev_ifconf(struct net *net, struct ifconf *ifc, int size) { - struct ifconf ifc; struct net_device *dev; char __user *pos; int len; @@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg) * Fetch the caller's info block. */ - if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) - return -EFAULT; - - pos = ifc.ifc_buf; - len = ifc.ifc_len; + pos = ifc->ifc_buf; + len = ifc->ifc_len; /* * Loop over the interfaces, and write an info block for each. @@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg) if (gifconf_list[i]) { int done; if (!pos) - done = gifconf_list[i](dev, NULL, 0); + done = gifconf_list[i](dev, NULL, 0, size); else done = gifconf_list[i](dev, pos + total, - len - total); + len - total, size); if (done < 0) return -EFAULT; total += done; @@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg) /* * All done. Write the updated control block back to the caller. */ - ifc.ifc_len = total; + ifc->ifc_len = total; /* * Both BSD and Solaris return 0 here, so we do too. */ - return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; + return 0; } /* @@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load); * positive or a negative errno code on error. */ -int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout) { - struct ifreq ifr; int ret; char *colon; - /* One special case: SIOCGIFCONF takes ifconf argument - and requires shared lock, because it sleeps writing - to user space. - */ - - if (cmd == SIOCGIFCONF) { - rtnl_lock(); - ret = dev_ifconf(net, (char __user *) arg); - rtnl_unlock(); - return ret; - } + if (need_copyout) + *need_copyout = true; if (cmd == SIOCGIFNAME) - return dev_ifname(net, (struct ifreq __user *)arg); - - /* - * Take care of Wireless Extensions. Unfortunately struct iwreq - * isn't a proper subset of struct ifreq (it's 8 byte shorter) - * so we need to treat it specially, otherwise applications may - * fault if the struct they're passing happens to land at the - * end of a mapped page. - */ - if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - struct iwreq iwr; - - if (copy_from_user(&iwr, arg, sizeof(iwr))) - return -EFAULT; - - iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + return dev_ifname(net, ifr); - return wext_handle_ioctl(net, &iwr, cmd, arg); - } - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - return -EFAULT; - - ifr.ifr_name[IFNAMSIZ-1] = 0; + ifr->ifr_name[IFNAMSIZ-1] = 0; - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; + dev_load(net, ifr->ifr_name); + /* * See which interface the caller is talking about. */ @@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: - dev_load(net, ifr.ifr_name); rcu_read_lock(); - ret = dev_ifsioc_locked(net, &ifr, cmd); + ret = dev_ifsioc_locked(net, ifr, cmd); rcu_read_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; case SIOCETHTOOL: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ethtool(net, &ifr); + ret = dev_ethtool(net, ifr); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFNAME: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret) { - if (colon) - *colon = ':'; - if (copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; - } + if (colon) + *colon = ':'; return ret; /* @@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) /* fall through */ case SIOCBONDSLAVEINFOQUERY: case SIOCBONDINFOQUERY: - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); + if (need_copyout) + *need_copyout = false; return ret; case SIOCGIFMEM: @@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) cmd == SIOCGHWTSTAMP || (cmd >= SIOCDEVPRIVATE && cmd <= SIOCDEVPRIVATE + 15)) { - dev_load(net, ifr.ifr_name); rtnl_lock(); - ret = dev_ifsioc(net, &ifr, cmd); + ret = dev_ifsioc(net, ifr, cmd); rtnl_unlock(); - if (!ret && copy_to_user(arg, &ifr, - sizeof(struct ifreq))) - ret = -EFAULT; return ret; } return -ENOTTY; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 982861607f88..e38e641e98d5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev_get_iflink(dev)) return true; - if (dev->priv_flags & IFF_TEAM_PORT) + if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 1ccb953b3b09..3cad5f51afd3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -439,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags, return net; } +static void unhash_nsid(struct net *net, struct net *last) +{ + struct net *tmp; + /* This function is only called from cleanup_net() work, + * and this work is the only process, that may delete + * a net from net_namespace_list. So, when the below + * is executing, the list may only grow. Thus, we do not + * use for_each_net_rcu() or rtnl_lock(). + */ + for_each_net(tmp) { + int id; + + spin_lock_bh(&tmp->nsid_lock); + id = __peernet2id(tmp, net); + if (id >= 0) + idr_remove(&tmp->netns_ids, id); + spin_unlock_bh(&tmp->nsid_lock); + if (id >= 0) + rtnl_net_notifyid(tmp, RTM_DELNSID, id); + if (tmp == last) + break; + } + spin_lock_bh(&net->nsid_lock); + idr_destroy(&net->netns_ids); + spin_unlock_bh(&net->nsid_lock); +} + static DEFINE_SPINLOCK(cleanup_list_lock); static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; - struct net *net, *tmp; + struct net *net, *tmp, *last; struct list_head net_kill_list; LIST_HEAD(net_exit_list); @@ -458,26 +485,25 @@ static void cleanup_net(struct work_struct *work) /* Don't let anyone else find us. */ rtnl_lock(); - list_for_each_entry(net, &net_kill_list, cleanup_list) { + list_for_each_entry(net, &net_kill_list, cleanup_list) list_del_rcu(&net->list); - list_add_tail(&net->exit_list, &net_exit_list); - for_each_net(tmp) { - int id; - - spin_lock_bh(&tmp->nsid_lock); - id = __peernet2id(tmp, net); - if (id >= 0) - idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); - if (id >= 0) - rtnl_net_notifyid(tmp, RTM_DELNSID, id); - } - spin_lock_bh(&net->nsid_lock); - idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + /* Cache last net. After we unlock rtnl, no one new net + * added to net_namespace_list can assign nsid pointer + * to a net from net_kill_list (see peernet2id_alloc()). + * So, we skip them in unhash_nsid(). + * + * Note, that unhash_nsid() does not delete nsid links + * between net_kill_list's nets, as they've already + * deleted from net_namespace_list. But, this would be + * useless anyway, as netns_ids are destroyed there. + */ + last = list_last_entry(&net_namespace_list, struct net, list); + rtnl_unlock(); + list_for_each_entry(net, &net_kill_list, cleanup_list) { + unhash_nsid(net, last); + list_add_tail(&net->exit_list, &net_exit_list); } - rtnl_unlock(); /* * Another CPU might be rcu-iterating the list, wait for it. diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4fcfcb14e7c6..b8ab5c829511 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -184,25 +184,44 @@ #define func_enter() pr_debug("entering %s\n", __func__); +#define PKT_FLAGS \ + pf(IPV6) /* Interface in IPV6 Mode */ \ + pf(IPSRC_RND) /* IP-Src Random */ \ + pf(IPDST_RND) /* IP-Dst Random */ \ + pf(TXSIZE_RND) /* Transmit size is random */ \ + pf(UDPSRC_RND) /* UDP-Src Random */ \ + pf(UDPDST_RND) /* UDP-Dst Random */ \ + pf(UDPCSUM) /* Include UDP checksum */ \ + pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \ + pf(MPLS_RND) /* Random MPLS labels */ \ + pf(QUEUE_MAP_RND) /* queue map Random */ \ + pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \ + pf(FLOW_SEQ) /* Sequential flows */ \ + pf(IPSEC) /* ipsec on for flows */ \ + pf(MACSRC_RND) /* MAC-Src Random */ \ + pf(MACDST_RND) /* MAC-Dst Random */ \ + pf(VID_RND) /* Random VLAN ID */ \ + pf(SVID_RND) /* Random SVLAN ID */ \ + pf(NODE) /* Node memory alloc*/ \ + +#define pf(flag) flag##_SHIFT, +enum pkt_flags { + PKT_FLAGS +}; +#undef pf + /* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ -#define F_FLOW_SEQ (1<<11) /* Sequential flows */ -#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ -#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ -#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ -#define F_NODE (1<<15) /* Node memory alloc*/ -#define F_UDPCSUM (1<<16) /* Include UDP checksum */ -#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */ +#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT); +PKT_FLAGS +#undef pf + +#define pf(flag) __stringify(flag), +static char *pkt_flag_names[] = { + PKT_FLAGS +}; +#undef pf + +#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names) /* Thread control flag bits */ #define T_STOP (1<<0) /* Stop run */ @@ -534,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) { const struct pktgen_dev *pkt_dev = seq->private; ktime_t stopped; + unsigned int i; u64 idle; seq_printf(seq, @@ -595,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->src_mac_count, pkt_dev->dst_mac_count); if (pkt_dev->nr_labels) { - unsigned int i; seq_puts(seq, " mpls: "); for (i = 0; i < pkt_dev->nr_labels; i++) seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), @@ -631,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, " Flags: "); - if (pkt_dev->flags & F_IPV6) - seq_puts(seq, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - seq_puts(seq, "IPSRC_RND "); - - if (pkt_dev->flags & F_IPDST_RND) - seq_puts(seq, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - seq_puts(seq, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - seq_puts(seq, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - seq_puts(seq, "UDPDST_RND "); - - if (pkt_dev->flags & F_UDPCSUM) - seq_puts(seq, "UDPCSUM "); - - if (pkt_dev->flags & F_NO_TIMESTAMP) - seq_puts(seq, "NO_TIMESTAMP "); - - if (pkt_dev->flags & F_MPLS_RND) - seq_puts(seq, "MPLS_RND "); - - if (pkt_dev->flags & F_QUEUE_MAP_RND) - seq_puts(seq, "QUEUE_MAP_RND "); + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (i == F_FLOW_SEQ) + if (!pkt_dev->cflows) + continue; - if (pkt_dev->flags & F_QUEUE_MAP_CPU) - seq_puts(seq, "QUEUE_MAP_CPU "); - - if (pkt_dev->cflows) { - if (pkt_dev->flags & F_FLOW_SEQ) - seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/ - else - seq_puts(seq, "FLOW_RND "); - } + if (pkt_dev->flags & (1 << i)) + seq_printf(seq, "%s ", pkt_flag_names[i]); + else if (i == F_FLOW_SEQ) + seq_puts(seq, "FLOW_RND "); #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) { - seq_puts(seq, "IPSEC "); - if (pkt_dev->spi) + if (i == F_IPSEC && pkt_dev->spi) seq_printf(seq, "spi:%u", pkt_dev->spi); - } #endif - - if (pkt_dev->flags & F_MACSRC_RND) - seq_puts(seq, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - seq_puts(seq, "MACDST_RND "); - - if (pkt_dev->flags & F_VID_RND) - seq_puts(seq, "VID_RND "); - - if (pkt_dev->flags & F_SVID_RND) - seq_puts(seq, "SVID_RND "); - - if (pkt_dev->flags & F_NODE) - seq_puts(seq, "NODE_ALLOC "); + } seq_puts(seq, "\n"); @@ -858,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) return i; } +static __u32 pktgen_read_flag(const char *f, bool *disable) +{ + __u32 i; + + if (f[0] == '!') { + *disable = true; + f++; + } + + for (i = 0; i < NR_PKT_FLAGS; i++) { + if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT) + continue; + + /* allow only disabling ipv6 flag */ + if (!*disable && i == IPV6_SHIFT) + continue; + + if (strcmp(f, pkt_flag_names[i]) == 0) + return 1 << i; + } + + if (strcmp(f, "FLOW_RND") == 0) { + *disable = !*disable; + return F_FLOW_SEQ; + } + + return 0; +} + static ssize_t pktgen_if_write(struct file *file, const char __user * user_buffer, size_t count, loff_t * offset) @@ -1215,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "flag")) { + __u32 flag; char f[32]; + bool disable = false; + memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); if (len < 0) @@ -1224,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file, if (copy_from_user(f, &user_buffer[i], len)) return -EFAULT; i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; - - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; - - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else if (strcmp(f, "MPLS_RND") == 0) - pkt_dev->flags |= F_MPLS_RND; - - else if (strcmp(f, "!MPLS_RND") == 0) - pkt_dev->flags &= ~F_MPLS_RND; - else if (strcmp(f, "VID_RND") == 0) - pkt_dev->flags |= F_VID_RND; + flag = pktgen_read_flag(f, &disable); - else if (strcmp(f, "!VID_RND") == 0) - pkt_dev->flags &= ~F_VID_RND; - - else if (strcmp(f, "SVID_RND") == 0) - pkt_dev->flags |= F_SVID_RND; - - else if (strcmp(f, "!SVID_RND") == 0) - pkt_dev->flags &= ~F_SVID_RND; - - else if (strcmp(f, "FLOW_SEQ") == 0) - pkt_dev->flags |= F_FLOW_SEQ; - - else if (strcmp(f, "QUEUE_MAP_RND") == 0) - pkt_dev->flags |= F_QUEUE_MAP_RND; - - else if (strcmp(f, "!QUEUE_MAP_RND") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_RND; - - else if (strcmp(f, "QUEUE_MAP_CPU") == 0) - pkt_dev->flags |= F_QUEUE_MAP_CPU; - - else if (strcmp(f, "!QUEUE_MAP_CPU") == 0) - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; -#ifdef CONFIG_XFRM - else if (strcmp(f, "IPSEC") == 0) - pkt_dev->flags |= F_IPSEC_ON; -#endif - - else if (strcmp(f, "!IPV6") == 0) - pkt_dev->flags &= ~F_IPV6; - - else if (strcmp(f, "NODE_ALLOC") == 0) - pkt_dev->flags |= F_NODE; - - else if (strcmp(f, "!NODE_ALLOC") == 0) - pkt_dev->flags &= ~F_NODE; - - else if (strcmp(f, "UDPCSUM") == 0) - pkt_dev->flags |= F_UDPCSUM; - - else if (strcmp(f, "!UDPCSUM") == 0) - pkt_dev->flags &= ~F_UDPCSUM; - - else if (strcmp(f, "NO_TIMESTAMP") == 0) - pkt_dev->flags |= F_NO_TIMESTAMP; - - else if (strcmp(f, "!NO_TIMESTAMP") == 0) - pkt_dev->flags &= ~F_NO_TIMESTAMP; - - else { + if (flag) { + if (disable) + pkt_dev->flags &= ~flag; + else + pkt_dev->flags |= flag; + } else { sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -2541,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; #ifdef CONFIG_XFRM - if (pkt_dev->flags & F_IPSEC_ON) + if (pkt_dev->flags & F_IPSEC) get_ipsec_sa(pkt_dev, flow); #endif pkt_dev->nflows++; @@ -2646,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev) static int process_ipsec(struct pktgen_dev *pkt_dev, struct sk_buff *skb, __be16 protocol) { - if (pkt_dev->flags & F_IPSEC_ON) { + if (pkt_dev->flags & F_IPSEC) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; int nhead = 0; if (x) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 21f9bed11988..adf50fbc4c13 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - err = dsa_port_fixed_link_register_of(dp); + err = dsa_port_link_register_of(dp); if (err) { - dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + dev_err(ds->dev, "failed to setup link for port %d.%d\n", ds->index, dp->index); return err; } - break; case DSA_PORT_TYPE_USER: err = dsa_slave_create(dp); @@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp) break; case DSA_PORT_TYPE_CPU: case DSA_PORT_TYPE_DSA: - dsa_port_fixed_link_unregister_of(dp); + dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: if (dp->slave) { diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index cefb0c3c6d51..70de7895e5b8 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -166,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_fixed_link_register_of(struct dsa_port *dp); -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); +int dsa_port_link_register_of(struct dsa_port *dp); +void dsa_port_link_unregister_of(struct dsa_port *dp); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index aa56d3fb5da4..cb54b81d0bd9 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds) if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - ret = dsa_port_fixed_link_register_of(&ds->ports[port]); + ret = dsa_port_link_register_of(&ds->ports[port]); if (ret) return ret; } @@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds) for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_port_fixed_link_unregister_of(&ds->ports[port]); + dsa_port_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) diff --git a/net/dsa/port.c b/net/dsa/port.c index bb4be2679904..7acc1169d75e 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp, return 0; } -int dsa_port_fixed_link_register_of(struct dsa_port *dp) +static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) +{ + struct device_node *port_dn = dp->dn; + struct device_node *phy_dn; + struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int err = 0; + + phy_dn = of_parse_phandle(port_dn, "phy-handle", 0); + if (!phy_dn) + return 0; + + phydev = of_phy_find_device(phy_dn); + if (!phydev) { + err = -EPROBE_DEFER; + goto err_put_of; + } + + if (enable) { + err = genphy_config_init(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_resume(phydev); + if (err < 0) + goto err_put_dev; + + err = genphy_read_status(phydev); + if (err < 0) + goto err_put_dev; + } else { + err = genphy_suspend(phydev); + if (err < 0) + goto err_put_dev; + } + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev)); + +err_put_dev: + put_device(&phydev->mdio.dev); +err_put_of: + of_node_put(phy_dn); + return err; +} + +static int dsa_port_fixed_link_register_of(struct dsa_port *dp) { struct device_node *dn = dp->dn; struct dsa_switch *ds = dp->ds; @@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp) int mode; int err; - if (of_phy_is_fixed_link(dn)) { - err = of_phy_register_fixed_link(dn); - if (err) { - dev_err(ds->dev, - "failed to register the fixed PHY of port %d\n", - port); - return err; - } + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } - phydev = of_phy_find_device(dn); + phydev = of_phy_find_device(dn); - mode = of_get_phy_mode(dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; - genphy_config_init(phydev); - genphy_read_status(phydev); + genphy_config_init(phydev); + genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); - put_device(&phydev->mdio.dev); - } + put_device(&phydev->mdio.dev); return 0; } -void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +int dsa_port_link_register_of(struct dsa_port *dp) { - struct device_node *dn = dp->dn; + if (of_phy_is_fixed_link(dp->dn)) + return dsa_port_fixed_link_register_of(dp); + else + return dsa_port_setup_phy_of(dp, true); +} - if (of_phy_is_fixed_link(dn)) - of_phy_deregister_fixed_link(dn); +void dsa_port_link_unregister_of(struct dsa_port *dp) +{ + if (of_phy_is_fixed_link(dp->dn)) + of_phy_deregister_fixed_link(dp->dn); + else + dsa_port_setup_phy_of(dp, false); } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 54cccdd8b1e3..c24008daa3d8 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -872,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; int err = 0; struct net *net = sock_net(sk); + void __user *p = (void __user *)arg; + struct ifreq ifr; + struct rtentry rt; switch (cmd) { case SIOCGSTAMP: @@ -882,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; case SIOCADDRT: case SIOCDELRT: + if (copy_from_user(&rt, p, sizeof(struct rtentry))) + return -EFAULT; + err = ip_rt_ioctl(net, cmd, &rt); + break; case SIOCRTMSG: - err = ip_rt_ioctl(net, cmd, (void __user *)arg); + err = -EINVAL; break; case SIOCDARP: case SIOCGARP: @@ -891,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = arp_ioctl(net, cmd, (void __user *)arg); break; case SIOCGIFADDR: - case SIOCSIFADDR: case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: case SIOCGIFNETMASK: - case SIOCSIFNETMASK: case SIOCGIFDSTADDR: + case SIOCGIFPFLAGS: + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); + if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq))) + err = -EFAULT; + break; + + case SIOCSIFADDR: + case SIOCSIFBRDADDR: + case SIOCSIFNETMASK: case SIOCSIFDSTADDR: case SIOCSIFPFLAGS: - case SIOCGIFPFLAGS: case SIOCSIFFLAGS: - err = devinet_ioctl(net, cmd, (void __user *)arg); + if (copy_from_user(&ifr, p, sizeof(struct ifreq))) + return -EFAULT; + err = devinet_ioctl(net, cmd, &ifr); break; default: if (sk->sk_prot->ioctl) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7a93359fbc72..e056c0067f2c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr) } -int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr) { - struct ifreq ifr; struct sockaddr_in sin_orig; - struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr; + struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr; struct in_device *in_dev; struct in_ifaddr **ifap = NULL; struct in_ifaddr *ifa = NULL; @@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) int ret = -EFAULT; int tryaddrmatch = 0; - /* - * Fetch the caller's info block into kernel space - */ - - if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) - goto out; - ifr.ifr_name[IFNAMSIZ - 1] = 0; + ifr->ifr_name[IFNAMSIZ - 1] = 0; /* save original address for comparison */ memcpy(&sin_orig, sin, sizeof(*sin)); - colon = strchr(ifr.ifr_name, ':'); + colon = strchr(ifr->ifr_name, ':'); if (colon) *colon = 0; - dev_load(net, ifr.ifr_name); + dev_load(net, ifr->ifr_name); switch (cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); ret = -ENODEV; - dev = __dev_get_by_name(net, ifr.ifr_name); + dev = __dev_get_by_name(net, ifr->ifr_name); if (!dev) goto done; @@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) This is checked above. */ for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (!strcmp(ifr.ifr_name, ifa->ifa_label) && + if (!strcmp(ifr->ifr_name, ifa->ifa_label) && sin_orig.sin_addr.s_addr == ifa->ifa_local) { break; /* found */ @@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) { for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) - if (!strcmp(ifr.ifr_name, ifa->ifa_label)) + if (!strcmp(ifr->ifr_name, ifa->ifa_label)) break; } } @@ -1056,19 +1049,19 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) switch (cmd) { case SIOCGIFADDR: /* Get interface address */ sin->sin_addr.s_addr = ifa->ifa_local; - goto rarok; + break; case SIOCGIFBRDADDR: /* Get the broadcast address */ sin->sin_addr.s_addr = ifa->ifa_broadcast; - goto rarok; + break; case SIOCGIFDSTADDR: /* Get the destination address */ sin->sin_addr.s_addr = ifa->ifa_address; - goto rarok; + break; case SIOCGIFNETMASK: /* Get the netmask for the interface */ sin->sin_addr.s_addr = ifa->ifa_mask; - goto rarok; + break; case SIOCSIFFLAGS: if (colon) { @@ -1076,11 +1069,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ifa) break; ret = 0; - if (!(ifr.ifr_flags & IFF_UP)) + if (!(ifr->ifr_flags & IFF_UP)) inet_del_ifa(in_dev, ifap, 1); break; } - ret = dev_change_flags(dev, ifr.ifr_flags); + ret = dev_change_flags(dev, ifr->ifr_flags); break; case SIOCSIFADDR: /* Set interface address (and family) */ @@ -1095,7 +1088,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; INIT_HLIST_NODE(&ifa->hash); if (colon) - memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ); + memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); } else { @@ -1182,28 +1175,27 @@ done: rtnl_unlock(); out: return ret; -rarok: - rtnl_unlock(); - ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0; - goto out; } -static int inet_gifconf(struct net_device *dev, char __user *buf, int len) +static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size) { struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_ifaddr *ifa; struct ifreq ifr; int done = 0; + if (WARN_ON(size > sizeof(struct ifreq))) + goto out; + if (!in_dev) goto out; for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { if (!buf) { - done += sizeof(ifr); + done += size; continue; } - if (len < (int) sizeof(ifr)) + if (len < size) break; memset(&ifr, 0, sizeof(struct ifreq)); strcpy(ifr.ifr_name, ifa->ifa_label); @@ -1212,13 +1204,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len) (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr = ifa->ifa_local; - if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) { + if (copy_to_user(buf + done, &ifr, size)) { done = -EFAULT; break; } - buf += sizeof(struct ifreq); - len -= sizeof(struct ifreq); - done += sizeof(struct ifreq); + len -= size; + done += size; } out: return done; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 32fbd9ba3609..da5635fc52c2 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 08259d078b1c..f05afaf3235c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, * Handle IP routing ioctl calls. * These are used to manipulate the routing tables */ -int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt) { struct fib_config cfg; - struct rtentry rt; int err; switch (cmd) { @@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&rt, arg, sizeof(rt))) - return -EFAULT; - rtnl_lock(); - err = rtentry_to_fib_config(net, cmd, &rt, &cfg); + err = rtentry_to_fib_config(net, cmd, rt, &cfg); if (err == 0) { struct fib_table *tb; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 02f00be12bb0..10f7f74a0831 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev, return htonl(INADDR_ANY); for_ifa(in_dev) { - if (inet_ifa_match(fl4->saddr, ifa)) + if (fl4->saddr == ifa->ifa_local) return fl4->saddr; } endfor_ifa(in_dev); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index b61f2285816d..6ec670fbbbdd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -114,7 +114,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void erspan_build_header(struct sk_buff *skb, - __be32 id, u32 index, + u32 id, u32 index, bool truncate, bool is_ipv4); static unsigned int ipgre_net_id __read_mostly; @@ -273,12 +273,12 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, iph = ip_hdr(skb); ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len); - ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; + ver = ershdr->ver; /* The original GRE header does not have key field, * Use ERSPAN 10-bit session ID as key. */ - tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); + tpi->key = cpu_to_be32(get_session_id(ershdr)); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags | TUNNEL_KEY, iph->saddr, iph->daddr, tpi->key); @@ -324,14 +324,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, if (ver == 1) { tunnel->index = ntohl(pkt_md->u.index); } else { - u16 md2_flags; - u16 dir, hwid; - - md2_flags = ntohs(pkt_md->u.md2.flags); - dir = (md2_flags & DIR_MASK) >> DIR_OFFSET; - hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; - tunnel->dir = dir; - tunnel->hwid = hwid; + tunnel->dir = pkt_md->u.md2.dir; + tunnel->hwid = get_hwid(&pkt_md->u.md2); } } @@ -615,19 +609,14 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, } if (version == 1) { - erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), + erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)), ntohl(md->u.index), truncate, true); } else if (version == 2) { - u16 md2_flags; - u8 direction; - u16 hwid; - - md2_flags = ntohs(md->u.md2.flags); - direction = (md2_flags & DIR_MASK) >> DIR_OFFSET; - hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; - - erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id), - direction, hwid, truncate, true); + erspan_build_header_v2(skb, + ntohl(tunnel_id_to_key32(key->tun_id)), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, true); } else { goto err_free_rt; } @@ -733,10 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, /* Push ERSPAN header */ if (tunnel->erspan_ver == 1) - erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, + erspan_build_header(skb, ntohl(tunnel->parms.o_key), + tunnel->index, truncate, true); else - erspan_build_header_v2(skb, tunnel->parms.o_key, + erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 60fb1eb7d7d8..6cc70fa488cb 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, { struct net_device *dev = NULL; int ifindex; + int midx; if (optlen != sizeof(int)) goto e_inval; @@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EADDRNOTAVAIL; if (!dev) break; + + midx = l3mdev_master_ifindex(dev); dev_put(dev); err = -EINVAL; - if (sk->sk_bound_dev_if) + if (sk->sk_bound_dev_if && + (!midx || midx != sk->sk_bound_dev_if)) break; inet->uc_index = ifindex; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5ddb1cb52bd4..141f5e865731 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -711,9 +711,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + if (tunnel->fwmark) { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + tunnel->fwmark); + } + else { + init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, + skb->mark); + } if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) goto tx_error; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index e9e488e72900..f75802ad960f 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) sin->sin_port = port; } -static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg); - set_fs(oldfs); - return res; -} - -static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg) -{ - int res; - - mm_segment_t oldfs = get_fs(); - set_fs(get_ds()); - res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); - set_fs(oldfs); - return res; -} - /* * Set up interface addresses and routes. */ @@ -375,19 +342,19 @@ static int __init ic_setup_if(void) memset(&ir, 0, sizeof(ir)); strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); - if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", err); return -1; } set_sockaddr(sin, ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) { pr_err("IP-Config: Unable to set interface netmask (%d)\n", err); return -1; } set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0); - if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) { + if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface broadcast address (%d)\n", err); return -1; @@ -397,11 +364,11 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->dev->name); - ir.ifr_mtu = ic_dev_mtu; - if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) + rtnl_lock(); + if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", ic_dev_mtu, err); + rtnl_unlock(); } return 0; } @@ -423,7 +390,7 @@ static int __init ic_setup_routes(void) set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0); set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0); rm.rt_flags = RTF_UP | RTF_GATEWAY; - if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) { + if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) { pr_err("IP-Config: Cannot add default route (%d)\n", err); return -1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 136544b36a46..7c509697ebc7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; - } else if (!ipc.oif) + } else if (!ipc.oif) { ipc.oif = inet->uc_index; + } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + /* oif is set, packet is to local broadcast and + * and uc_index is set. oif is most likely set + * by sk_bound_dev_if. If uc_index != oif check if the + * oif is an L3 master and uc_index is an L3 slave. + * If so, we want to allow the send using the uc_index. + */ + if (ipc.oif != inet->uc_index && + ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), + inet->uc_index)) { + ipc.oif = inet->uc_index; + } + } flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index b6a2aa1dcf56..4d58e2ce0b5b 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 853321555a4e..3f018f34cf56 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -977,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!saddr) saddr = inet->mc_addr; connected = 0; - } else if (!ipc.oif) + } else if (!ipc.oif) { ipc.oif = inet->uc_index; + } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + /* oif is set, packet is to local broadcast and + * and uc_index is set. oif is most likely set + * by sk_bound_dev_if. If uc_index != oif check if the + * oif is an L3 master and uc_index is an L3 slave. + * If so, we want to allow the send using the uc_index. + */ + if (ipc.oif != inet->uc_index && + ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), + inet->uc_index)) { + ipc.oif = inet->uc_index; + } + } if (connected) rt = (struct rtable *)sk_dst_check(sk, 0); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 01801b77bd0d..ea6e6e7df0ee 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 8affc6d83d58..63faeee989a9 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 44d109c435bc..3fd1ec775dc2 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!xo) return ERR_PTR(-EINVAL); + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) + return ERR_PTR(-EINVAL); + x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a88480193d77..05f070e123e4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -513,8 +513,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, ipv6h = ipv6_hdr(skb); ershdr = (struct erspan_base_hdr *)skb->data; - ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET; - tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); + ver = ershdr->ver; + tpi->key = cpu_to_be32(get_session_id(ershdr)); tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, tpi->key, @@ -565,14 +565,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, if (ver == 1) { tunnel->parms.index = ntohl(pkt_md->u.index); } else { - u16 md2_flags; - u16 dir, hwid; - - md2_flags = ntohs(pkt_md->u.md2.flags); - dir = (md2_flags & DIR_MASK) >> DIR_OFFSET; - hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; - tunnel->parms.dir = dir; - tunnel->parms.hwid = hwid; + tunnel->parms.dir = pkt_md->u.md2.dir; + tunnel->parms.hwid = get_hwid(&pkt_md->u.md2); } ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); @@ -925,6 +919,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; + __be32 tun_id; tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || @@ -944,23 +939,18 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (!md) goto tx_err; + tun_id = tunnel_id_to_key32(key->tun_id); if (md->version == 1) { erspan_build_header(skb, - tunnel_id_to_key32(key->tun_id), + ntohl(tun_id), ntohl(md->u.index), truncate, false); } else if (md->version == 2) { - u16 md2_flags; - u16 dir, hwid; - - md2_flags = ntohs(md->u.md2.flags); - dir = (md2_flags & DIR_MASK) >> DIR_OFFSET; - hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET; - erspan_build_header_v2(skb, - tunnel_id_to_key32(key->tun_id), - dir, hwid, truncate, - false); + ntohl(tun_id), + md->u.md2.dir, + get_hwid(&md->u.md2), + truncate, false); } } else { switch (skb->protocol) { @@ -982,11 +972,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, } if (t->parms.erspan_ver == 1) - erspan_build_header(skb, t->parms.o_key, + erspan_build_header(skb, ntohl(t->parms.o_key), t->parms.index, truncate, false); else - erspan_build_header_v2(skb, t->parms.o_key, + erspan_build_header_v2(skb, ntohl(t->parms.o_key), t->parms.dir, t->parms.hwid, truncate, false); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a4a94452132b..997c7f19ad62 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -174,7 +174,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } -static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) { if (!np->autoflowlabel_set) return ip6_default_np_autolabel(net); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2d4680e0376f..e8ffb5b5d84e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_AUTOFLOWLABEL: - val = np->autoflowlabel; + val = ip6_autoflowlabel(sock_net(sk), np); break; case IPV6_RECVFRAGSIZE: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f85da2f1e729..fe3966a9c999 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2440,7 +2440,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc, static struct rt6_info *ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, - const struct in6_addr *gw_addr) + const struct in6_addr *gw_addr, + u32 tbid, int flags) { struct flowi6 fl6 = { .flowi6_oif = cfg->fc_ifindex, @@ -2449,15 +2450,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; - table = fib6_get_table(net, cfg->fc_table); + table = fib6_get_table(net, tbid); if (!table) return NULL; if (!ipv6_addr_any(&cfg->fc_prefsrc)) flags |= RT6_LOOKUP_F_HAS_SADDR; + flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); /* if table lookup failed, fall back to full lookup */ @@ -2469,6 +2470,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, return rt; } +static int ip6_route_check_nh_onlink(struct net *net, + struct fib6_config *cfg, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL; + const struct in6_addr *gw_addr = &cfg->fc_gateway; + u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; + struct rt6_info *grt; + int err; + + err = 0; + grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); + if (grt) { + if (grt->rt6i_flags & flags || dev != grt->dst.dev) { + NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); + err = -EINVAL; + } + + ip6_rt_put(grt); + } + + return err; +} + +static int ip6_route_check_nh(struct net *net, + struct fib6_config *cfg, + struct net_device **_dev, + struct inet6_dev **idev) +{ + const struct in6_addr *gw_addr = &cfg->fc_gateway; + struct net_device *dev = _dev ? *_dev : NULL; + struct rt6_info *grt = NULL; + int err = -EHOSTUNREACH; + + if (cfg->fc_table) { + int flags = RT6_LOOKUP_F_IFACE; + + grt = ip6_nh_lookup_table(net, cfg, gw_addr, + cfg->fc_table, flags); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + + if (!grt) + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + + if (!grt) + goto out; + + if (dev) { + if (dev != grt->dst.dev) { + ip6_rt_put(grt); + goto out; + } + } else { + *_dev = dev = grt->dst.dev; + *idev = grt->rt6i_idev; + dev_hold(dev); + in6_dev_hold(grt->rt6i_idev); + } + + if (!(grt->rt6i_flags & RTF_GATEWAY)) + err = 0; + + ip6_rt_put(grt); + +out: + return err; +} + static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -2520,6 +2597,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (cfg->fc_metric == 0) cfg->fc_metric = IP6_RT_PRIO_USER; + if (cfg->fc_flags & RTNH_F_ONLINK) { + if (!dev) { + NL_SET_ERR_MSG(extack, + "Nexthop device required for onlink"); + err = -ENODEV; + goto out; + } + + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + } + err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { @@ -2664,8 +2756,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, rt->rt6i_gateway = *gw_addr; if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { - struct rt6_info *grt = NULL; - /* IPv6 strictly inhibits using not link-local addresses as nexthop address. Otherwise, router will not able to send redirects. @@ -2682,40 +2772,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } - if (cfg->fc_table) { - grt = ip6_nh_lookup_table(net, cfg, gw_addr); - - if (grt) { - if (grt->rt6i_flags & RTF_GATEWAY || - (dev && dev != grt->dst.dev)) { - ip6_rt_put(grt); - grt = NULL; - } - } - } - - if (!grt) - grt = rt6_lookup(net, gw_addr, NULL, - cfg->fc_ifindex, 1); - - err = -EHOSTUNREACH; - if (!grt) - goto out; - if (dev) { - if (dev != grt->dst.dev) { - ip6_rt_put(grt); - goto out; - } + if (cfg->fc_flags & RTNH_F_ONLINK) { + err = ip6_route_check_nh_onlink(net, cfg, dev, + extack); } else { - dev = grt->dst.dev; - idev = grt->rt6i_idev; - dev_hold(dev); - in6_dev_hold(grt->rt6i_idev); + err = ip6_route_check_nh(net, cfg, &dev, &idev); } - if (!(grt->rt6i_flags & RTF_GATEWAY)) - err = 0; - ip6_rt_put(grt); - if (err) goto out; } @@ -2734,6 +2796,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!dev) goto out; + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + if (!ipv6_addr_any(&cfg->fc_prefsrc)) { if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); @@ -2751,6 +2819,7 @@ install_route: if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -3820,6 +3889,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_CLONED) cfg->fc_flags |= RTF_CACHE; + cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -4225,6 +4296,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index d883c9204c01..278e49cd67d4 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, { struct tcphdr *th; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) + return ERR_PTR(-EINVAL); + if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a0f89ad76f9d..2a04dc9c781b 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, const struct ipv6hdr *ipv6h; struct udphdr *uh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) + goto out; + if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4e12859bc2ee..bb935a3b7fea 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) skb_reset_network_header(skb); skb_mac_header_rebuild(skb); + eth_hdr(skb)->h_proto = skb->protocol; err = 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d4e98f20fc2a..4a8d407f8902 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; - /* We must prevent loops or risk deadlock ! */ - if (csk->sk_family == PF_KCM) + /* Only allow TCP sockets to be attached for now */ + if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) || + csk->sk_protocol != IPPROTO_TCP) + return -EOPNOTSUPP; + + /* Don't allow listeners or closed sockets */ + if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) return -EOPNOTSUPP; psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); @@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock, return err; } - sock_hold(csk); - write_lock_bh(&csk->sk_callback_lock); + + /* Check if sk_user_data is aready by KCM or someone else. + * Must be done under lock to prevent race conditions. + */ + if (csk->sk_user_data) { + write_unlock_bh(&csk->sk_callback_lock); + strp_done(&psock->strp); + kmem_cache_free(kcm_psockp, psock); + return -EALREADY; + } + psock->save_data_ready = csk->sk_data_ready; psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; @@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock, csk->sk_data_ready = psock_data_ready; csk->sk_write_space = psock_write_space; csk->sk_state_change = psock_state_change; + write_unlock_bh(&csk->sk_callback_lock); + sock_hold(csk); + /* Finished initialization, now add the psock to the MUX. */ spin_lock_bh(&mux->lock); head = &mux->psocks; diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index eb55f1b3d047..7322aa1e382e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -49,6 +49,7 @@ #include <net/mpls.h> #include <net/vxlan.h> #include <net/tun_proto.h> +#include <net/erspan.h> #include "flow_netlink.h" @@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void) + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ - /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and + * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ @@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE }, }; static const struct ovs_len_tbl @@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } +static int erspan_tun_opt_from_nlattr(const struct nlattr *a, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + + BUILD_BUG_ON(sizeof(struct erspan_metadata) > + sizeof(match->key->tun_opts)); + + if (nla_len(a) > sizeof(match->key->tun_opts)) { + OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).", + nla_len(a), sizeof(match->key->tun_opts)); + return -EINVAL; + } + + if (!is_mask) + SW_FLOW_KEY_PUT(match, tun_opts_len, + sizeof(struct erspan_metadata), false); + else + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); + + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), + nla_len(a), is_mask); + return 0; +} + static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = erspan_tun_opt_from_nlattr(a, match, is_mask, + log); + if (err) + return err; + + tun_flags |= TUNNEL_ERSPAN_OPT; + opts_type = type; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + swkey_tun_opts_len, tun_opts)) + return -EMSGSIZE; } return 0; @@ -2486,6 +2534,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + break; } } diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 2e554ef6d75f..9920d2f84eff 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock) sizeof(val)); } -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc) { - return tcp_sk(tc->t_sock->sk)->snd_nxt; + /* seq# of the last byte of data in tcp send buffer */ + return tcp_sk(tc->t_sock->sk)->write_seq; } u32 rds_tcp_snd_una(struct rds_tcp_connection *tc) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index e7858ee8ed8b..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); -u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); +u32 rds_tcp_write_seq(struct rds_tcp_connection *tc); u32 rds_tcp_snd_una(struct rds_tcp_connection *tc); u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq); extern struct rds_transport rds_tcp_transport; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 73c74763ca72..16f65744d984 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, * m_ack_seq is set to the sequence number of the last byte of * header and data. see rds_tcp_is_acked(). */ - tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc); + tc->t_last_sent_nxt = rds_tcp_write_seq(tc); rm->m_ack_seq = tc->t_last_sent_nxt + sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1; @@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; rdsdebug("rm %p tcp nxt %u ack_seq %llu\n", - rm, rds_tcp_snd_nxt(tc), + rm, rds_tcp_write_seq(tc), (unsigned long long)rm->m_ack_seq); } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index af4b8ec60d9a..b7ba9b06b147 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, int bind) { struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tcf_csum_params *params_old, *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tc_csum *parm; struct tcf_csum *p; @@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (!tcf_idr_check(tn, parm->index, a, bind)) { ret = tcf_idr_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + &act_csum_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; @@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } p = to_tcf_csum(*a); - spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; - p->update_flags = parm->update_flags; - spin_unlock_bh(&p->tcf_lock); + ASSERT_RTNL(); + + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_idr_release(*a, bind); + return -ENOMEM; + } + params_old = rtnl_dereference(p->params); + + params_new->action = parm->action; + params_new->update_flags = parm->update_flags; + rcu_assign_pointer(p->params, params_new); + if (params_old) + kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); @@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); - int action; + struct tcf_csum_params *params; u32 update_flags; + int action; + + rcu_read_lock(); + params = rcu_dereference(p->params); - spin_lock(&p->tcf_lock); tcf_lastuse_update(&p->tcf_tm); - bstats_update(&p->tcf_bstats, skb); - action = p->tcf_action; - update_flags = p->update_flags; - spin_unlock(&p->tcf_lock); + bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb); + action = params->action; if (unlikely(action == TC_ACT_SHOT)) - goto drop; + goto drop_stats; + update_flags = params->update_flags; switch (tc_skb_protocol(skb)) { case cpu_to_be16(ETH_P_IP): if (!tcf_csum_ipv4(skb, update_flags)) @@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, break; } +unlock: + rcu_read_unlock(); return action; drop: - spin_lock(&p->tcf_lock); - p->tcf_qstats.drops++; - spin_unlock(&p->tcf_lock); - return TC_ACT_SHOT; + action = TC_ACT_SHOT; + +drop_stats: + qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats)); + goto unlock; } static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, @@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; struct tc_csum opt = { - .update_flags = p->update_flags, .index = p->tcf_index, - .action = p->tcf_action, .refcnt = p->tcf_refcnt - ref, .bindcnt = p->tcf_bindcnt - bind, }; struct tcf_t t; + params = rtnl_dereference(p->params); + opt.action = params->action; + opt.update_flags = params->update_flags; + if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -600,6 +620,15 @@ nla_put_failure: return -1; } +static void tcf_csum_cleanup(struct tc_action *a) +{ + struct tcf_csum *p = to_tcf_csum(a); + struct tcf_csum_params *params; + + params = rcu_dereference_protected(p->params, 1); + kfree_rcu(params, rcu); +} + static int tcf_csum_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) @@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .init = tcf_csum_init, + .cleanup = tcf_csum_cleanup, .walk = tcf_csum_walker, .lookup = tcf_csum_search, .size = sizeof(struct tcf_csum), diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f5d293416f46..bcb4ccb5f894 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -172,9 +172,10 @@ errout: return ERR_PTR(err); } -static void tcf_proto_destroy(struct tcf_proto *tp) +static void tcf_proto_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { - tp->ops->destroy(tp); + tp->ops->destroy(tp, extack); module_put(tp->ops->owner); kfree_rcu(tp, rcu); } @@ -223,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) tcf_chain_head_change(chain, NULL); while (tp) { RCU_INIT_POINTER(chain->filter_chain, tp->next); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); tp = rtnl_dereference(chain->filter_chain); tcf_chain_put(chain); } @@ -1182,7 +1183,7 @@ replay: tcf_chain_tp_remove(chain, &chain_info, tp); tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_DELTFILTER, false); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); err = 0; goto errout; } @@ -1200,7 +1201,7 @@ replay: case RTM_NEWTFILTER: if (n->nlmsg_flags & NLM_F_EXCL) { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); NL_SET_ERR_MSG(extack, "Filter already exists"); err = -EEXIST; goto errout; @@ -1214,7 +1215,7 @@ replay: goto errout; if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, extack); } goto errout; case RTM_GETTFILTER: @@ -1240,7 +1241,7 @@ replay: RTM_NEWTFILTER, false); } else { if (tp_created) - tcf_proto_destroy(tp); + tcf_proto_destroy(tp, NULL); } errout: diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6088be65d167..d333f5c5101d 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void basic_destroy(struct tcf_proto *tp) +static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c11e0fe23a17..8e5326bc6440 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -159,14 +159,14 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, skip_sw = prog && tc_skip_sw(prog->gen_flags); obj = prog ?: oldprog; - tc_cls_common_offload_init(&cls_bpf.common, tp, extack); + tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags, + extack); cls_bpf.command = TC_CLSBPF_OFFLOAD; cls_bpf.exts = &obj->exts; cls_bpf.prog = prog ? prog->filter : NULL; cls_bpf.oldprog = oldprog ? oldprog->filter : NULL; cls_bpf.name = obj->bpf_name; cls_bpf.exts_integrated = obj->exts_integrated; - cls_bpf.gen_flags = obj->gen_flags; if (oldprog) tcf_block_offload_dec(block, &oldprog->gen_flags); @@ -212,11 +212,12 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, } static void cls_bpf_stop_offload(struct tcf_proto *tp, - struct cls_bpf_prog *prog) + struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { int err; - err = cls_bpf_offload_cmd(tp, NULL, prog, NULL); + err = cls_bpf_offload_cmd(tp, NULL, prog, extack); if (err) pr_err("Stopping hardware offload failed: %d\n", err); } @@ -227,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, struct tcf_block *block = tp->chain->block; struct tc_cls_bpf_offload cls_bpf = {}; - tc_cls_common_offload_init(&cls_bpf.common, tp, NULL); + tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL); cls_bpf.command = TC_CLSBPF_STATS; cls_bpf.exts = &prog->exts; cls_bpf.prog = prog->filter; cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - cls_bpf.gen_flags = prog->gen_flags; tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false); } @@ -290,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) tcf_queue_work(&prog->work); } -static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) +static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, prog->handle); - cls_bpf_stop_offload(tp, prog); + cls_bpf_stop_offload(tp, prog, extack); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); if (tcf_exts_get_net(&prog->exts)) @@ -309,18 +310,19 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last, { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, arg); + __cls_bpf_delete(tp, arg, extack); *last = list_empty(&head->plist); return 0; } -static void cls_bpf_destroy(struct tcf_proto *tp) +static void cls_bpf_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; list_for_each_entry_safe(prog, tmp, &head->plist, link) - __cls_bpf_delete(tp, prog); + __cls_bpf_delete(tp, prog, extack); idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 1b54fbfca414..762da5c0cf5e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -143,7 +143,8 @@ errout: return err; } -static void cls_cgroup_destroy(struct tcf_proto *tp) +static void cls_cgroup_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 64c24b488058..cd5fe383afdd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -600,7 +600,7 @@ static int flow_init(struct tcf_proto *tp) return 0; } -static void flow_destroy(struct tcf_proto *tp) +static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 727c10378f37..dc9acaafc0a8 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -218,12 +218,13 @@ static void fl_destroy_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp, NULL); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long) f; @@ -243,7 +244,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(f->flags); int err; - tc_cls_common_offload_init(&cls_flower.common, tp, extack); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; cls_flower.dissector = dissector; @@ -255,7 +256,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); if (err < 0) { - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &f->flags); @@ -272,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_flower.common, tp, NULL); + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.exts = &f->exts; @@ -282,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) &cls_flower, false); } -static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) +static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, + struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) - fl_hw_destroy_filter(tp, f); + fl_hw_destroy_filter(tp, f, extack); tcf_unbind_filter(tp, &f->res); if (tcf_exts_get_net(&f->exts)) call_rcu(&f->rcu, fl_destroy_filter); @@ -315,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu) schedule_work(&head->work); } -static void fl_destroy(struct tcf_proto *tp) +static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_fl_head *head = rtnl_dereference(tp->root); struct cls_fl_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) - __fl_delete(tp, f); + __fl_delete(tp, f, extack); idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); @@ -958,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, rhashtable_remove_fast(&head->ht, &fold->ht_node, head->ht_params); if (!tc_skip_hw(fold->flags)) - fl_hw_destroy_filter(tp, fold); + fl_hw_destroy_filter(tp, fold, NULL); } *arg = fnew; @@ -997,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last, if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, head->ht_params); - __fl_delete(tp, f); + __fl_delete(tp, f, extack); *last = list_empty(&head->filters); return 0; } diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 94d159a8869a..8b207723fbc2 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void fw_destroy(struct tcf_proto *tp) +static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d990d2a52c6d..2ba721a590a7 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -71,12 +71,13 @@ static void mall_destroy_rcu(struct rcu_head *rcu) static void mall_destroy_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, - unsigned long cookie) + unsigned long cookie, + struct netlink_ext_ack *extack) { struct tc_cls_matchall_offload cls_mall = {}; struct tcf_block *block = tp->chain->block; - tc_cls_common_offload_init(&cls_mall.common, tp, NULL); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; @@ -94,7 +95,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, bool skip_sw = tc_skip_sw(head->flags); int err; - tc_cls_common_offload_init(&cls_mall.common, tp, extack); + tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack); cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.exts = &head->exts; cls_mall.cookie = cookie; @@ -102,7 +103,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); if (err < 0) { - mall_destroy_hw_filter(tp, head, cookie); + mall_destroy_hw_filter(tp, head, cookie, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &head->flags); @@ -114,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return 0; } -static void mall_destroy(struct tcf_proto *tp) +static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); @@ -122,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp) return; if (!tc_skip_hw(head->flags)) - mall_destroy_hw_filter(tp, head, (unsigned long) head); + mall_destroy_hw_filter(tp, head, (unsigned long) head, extack); if (tcf_exts_get_net(&head->exts)) call_rcu(&head->rcu, mall_destroy_rcu); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 55467c30d524..21a03a8ee029 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head) tcf_queue_work(&f->work); } -static void route4_destroy(struct tcf_proto *tp) +static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 5cc0df690cff..4f1297657c27 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) __rsvp_delete_filter(f); } -static void rsvp_destroy(struct tcf_proto *tp) +static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 01a163e0b6aa..b49cc990a000 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -581,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } -static void tcindex_destroy(struct tcf_proto *tp) +static void tcindex_destroy(struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 7030240f8826..60c892c36a60 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -87,6 +87,7 @@ struct tc_u_hnode { unsigned int divisor; struct idr handle_idr; struct rcu_head rcu; + u32 flags; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. */ @@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp, NULL); + tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack); cls_u32.command = TC_CLSU32_DELETE_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, bool offloaded = false; int err; - tc_cls_common_offload_init(&cls_u32.common, tp, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_NEW_HNODE; cls_u32.hnode.divisor = h->divisor; cls_u32.hnode.handle = h->handle; @@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_clear_hw_hnode(tp, h); + u32_clear_hw_hnode(tp, h, NULL); return err; } else if (err > 0) { offloaded = true; @@ -529,12 +531,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n) +static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + struct netlink_ext_ack *extack) { struct tcf_block *block = tp->chain->block; struct tc_cls_u32_offload cls_u32 = {}; - tc_cls_common_offload_init(&cls_u32.common, tp, NULL); + tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack); cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; @@ -550,7 +553,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, bool skip_sw = tc_skip_sw(flags); int err; - tc_cls_common_offload_init(&cls_u32.common, tp, extack); + tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack); cls_u32.command = TC_CLSU32_REPLACE_KNODE; cls_u32.knode.handle = n->handle; cls_u32.knode.fshift = n->fshift; @@ -568,7 +571,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); if (err < 0) { - u32_remove_hw_knode(tp, n); + u32_remove_hw_knode(tp, n, NULL); return err; } else if (err > 0) { tcf_block_offload_inc(block, &n->flags); @@ -580,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, return 0; } -static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_knode *n; unsigned int h; @@ -590,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) RCU_INIT_POINTER(ht->ht[h], rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); - u32_remove_hw_knode(tp, n); + u32_remove_hw_knode(tp, n, extack); idr_remove_ext(&ht->handle_idr, n->handle); if (tcf_exts_get_net(&n->exts)) call_rcu(&n->rcu, u32_delete_key_freepf_rcu); @@ -600,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) } } -static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht, + struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode __rcu **hn; @@ -608,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) WARN_ON(ht->refcnt); - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); hn = &tp_c->hlist; for (phn = rtnl_dereference(*hn); phn; hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { - u32_clear_hw_hnode(tp, ht); + u32_clear_hw_hnode(tp, ht, extack); idr_destroy(&ht->handle_idr); idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); @@ -638,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht) return true; } -static void u32_destroy(struct tcf_proto *tp) +static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); @@ -646,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp) WARN_ON(root_ht == NULL); if (root_ht && --root_ht->refcnt == 0) - u32_destroy_hnode(tp, root_ht); + u32_destroy_hnode(tp, root_ht, extack); if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; @@ -657,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp) ht; ht = rtnl_dereference(ht->next)) { ht->refcnt--; - u32_clear_hnode(tp, ht); + u32_clear_hnode(tp, ht, extack); } while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { @@ -684,7 +689,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, goto out; if (TC_U32_KEY(ht->handle)) { - u32_remove_hw_knode(tp, (struct tc_u_knode *)ht); + u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack); ret = u32_delete_key(tp, (struct tc_u_knode *)ht); goto out; } @@ -696,7 +701,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last, if (ht->refcnt == 1) { ht->refcnt--; - u32_destroy_hnode(tp, ht); + u32_destroy_hnode(tp, ht, extack); } else { NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter"); return -EBUSY; @@ -1015,6 +1020,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); + ht->flags = flags; err = u32_replace_hw_hnode(tp, ht, flags, extack); if (err) { diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index df3110d69585..07c10bac06a0 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em, if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len)) return 0; - return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len); + return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len); } static struct tcf_ematch_ops em_nbyte_ops = { diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 275925b93b29..35bc7106d182 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, struct sk_buff *segs = ERR_PTR(-EINVAL); struct sctphdr *sh; + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) + goto out; + sh = sctp_hdr(skb); if (!pskb_may_pull(skb, sizeof(*sh))) goto out; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7ff444ecee75..a40fa53c93ef 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4860,9 +4860,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; - int ret = 0; + int ret; again: + ret = 0; sctp_transport_walk_start(&hti); tsp = sctp_transport_get_idx(net, &hti, *pos + 1); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index daf8075f5a4c..267e68379110 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -115,7 +115,6 @@ static int smc_release(struct socket *sock) goto out; smc = smc_sk(sk); - sock_hold(sk); if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again @@ -124,10 +123,7 @@ static int smc_release(struct socket *sock) else lock_sock(sk); - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); - } else { + if (!smc->use_fallback) { rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -136,20 +132,21 @@ static int smc_release(struct socket *sock) sock_release(smc->clcsock); smc->clcsock = NULL; } + if (smc->use_fallback) { + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); + } /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { + if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ out: return rc; } @@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); - INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); sk->sk_prot->hash(sk); sk_refcnt_debug_inc(sk); @@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc) int rc = 0; u8 ibport; + sock_hold(&smc->sk); /* sock put in passive closing */ + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { /* peer has not signalled SMC-capability */ smc->use_fallback = true; @@ -513,6 +520,8 @@ out_connected: return rc ? rc : local_contact; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@ -526,9 +535,13 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ return rc; } @@ -581,40 +594,33 @@ out_err: static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) { - struct sock *sk = &lsmc->sk; - struct socket *new_clcsock; + struct socket *new_clcsock = NULL; + struct sock *lsk = &lsmc->sk; struct sock *new_sk; int rc; - release_sock(&lsmc->sk); - new_sk = smc_sock_alloc(sock_net(sk), NULL); + release_sock(lsk); + new_sk = smc_sock_alloc(sock_net(lsk), NULL); if (!new_sk) { rc = -ENOMEM; - lsmc->sk.sk_err = ENOMEM; + lsk->sk_err = ENOMEM; *new_smc = NULL; - lock_sock(&lsmc->sk); + lock_sock(lsk); goto out; } *new_smc = smc_sk(new_sk); rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); - lock_sock(&lsmc->sk); - if (rc < 0) { - lsmc->sk.sk_err = -rc; - new_sk->sk_state = SMC_CLOSED; - sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); - *new_smc = NULL; - goto out; - } - if (lsmc->sk.sk_state == SMC_CLOSED) { + lock_sock(lsk); + if (rc < 0) + lsk->sk_err = -rc; + if (rc < 0 || lsk->sk_state == SMC_CLOSED) { if (new_clcsock) sock_release(new_clcsock); new_sk->sk_state = SMC_CLOSED; sock_set_flag(new_sk, SOCK_DEAD); - sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + new_sk->sk_prot->unhash(new_sk); + sock_put(new_sk); /* final */ *new_smc = NULL; goto out; } @@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk) { struct smc_sock *par = smc_sk(parent); - sock_hold(sk); + sock_hold(sk); /* sock_put in smc_accept_unlink () */ spin_lock(&par->accept_q_lock); list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); spin_unlock(&par->accept_q_lock); @@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk) list_del_init(&smc_sk(sk)->accept_q); spin_unlock(&par->accept_q_lock); sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); - sock_put(sk); + sock_put(sk); /* sock_hold in smc_accept_enqueue */ } /* remove a sock from the accept queue to bind it to a new socket created @@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent, smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) { + if (isk->clcsock) { + sock_release(isk->clcsock); + isk->clcsock = NULL; + } new_sk->sk_prot->unhash(new_sk); - sock_put(new_sk); + sock_put(new_sk); /* final */ continue; } if (new_sock) @@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk) { struct smc_sock *smc = smc_sk(sk); - sock_hold(sk); lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; - if (smc->use_fallback) { - sk->sk_state = SMC_CLOSED; - } else { + if (!smc->use_fallback) { smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; @@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk) sock_release(tcp); } if (smc->use_fallback) { - schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); - } else if (sk->sk_state == SMC_CLOSED) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); + sock_put(sk); /* passive closing */ + sk->sk_state = SMC_CLOSED; + } else { + if (sk->sk_state == SMC_CLOSED) + smc_conn_free(&smc->conn); } release_sock(sk); - sock_put(sk); + sk->sk_prot->unhash(sk); + sock_put(sk); /* final sock_put */ } static int smc_serv_conf_first_link(struct smc_sock *smc) @@ -913,6 +921,8 @@ enqueue: return; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ @@ -925,8 +935,12 @@ decline_rdma: goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: + if (newsmcsk->sk_state == SMC_INIT) + sock_put(&new_smc->sk); /* passive closing */ newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); goto enqueue; /* queue new sock with sk_err set */ @@ -936,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work) { struct smc_sock *lsmc = container_of(work, struct smc_sock, tcp_listen_work); + struct sock *lsk = &lsmc->sk; struct smc_sock *new_smc; int rc = 0; - lock_sock(&lsmc->sk); - while (lsmc->sk.sk_state == SMC_LISTEN) { + lock_sock(lsk); + while (lsk->sk_state == SMC_LISTEN) { rc = smc_clcsock_accept(lsmc, &new_smc); if (rc) goto out; @@ -949,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work) new_smc->listen_smc = lsmc; new_smc->use_fallback = false; /* assume rdma capability first*/ - sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ + sock_hold(lsk); /* sock_put in smc_listen_work */ INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); smc_copy_sock_settings_to_smc(new_smc); - schedule_work(&new_smc->smc_listen_work); + sock_hold(&new_smc->sk); /* sock_put in passive closing */ + if (!schedule_work(&new_smc->smc_listen_work)) + sock_put(&new_smc->sk); } out: - release_sock(&lsmc->sk); - lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ + if (lsmc->clcsock) { + sock_release(lsmc->clcsock); + lsmc->clcsock = NULL; + } + release_sock(lsk); + /* no more listening, wake up smc_close_wait_listen_clcsock and + * accept + */ + lsk->sk_state_change(lsk); + sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } static int smc_listen(struct socket *sock, int backlog) @@ -991,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog) sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); - schedule_work(&smc->tcp_listen_work); + sock_hold(sk); /* sock_hold in tcp_listen_worker */ + if (!schedule_work(&smc->tcp_listen_work)) + sock_put(sk); out: release_sock(sk); @@ -1008,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, int rc = 0; lsmc = smc_sk(sk); + sock_hold(sk); /* sock_put below */ lock_sock(sk); if (lsmc->sk.sk_state != SMC_LISTEN) { @@ -1042,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, out: release_sock(sk); + sock_put(sk); /* sock_hold above */ return rc; } @@ -1111,21 +1140,15 @@ out: static unsigned int smc_accept_poll(struct sock *parent) { - struct smc_sock *isk; - struct sock *sk; + struct smc_sock *isk = smc_sk(parent); + int mask = 0; - lock_sock(parent); - list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { - sk = (struct sock *)isk; - - if (sk->sk_state == SMC_ACTIVE) { - release_sock(parent); - return POLLIN | POLLRDNORM; - } - } - release_sock(parent); + spin_lock(&isk->accept_q_lock); + if (!list_empty(&isk->accept_q)) + mask = POLLIN | POLLRDNORM; + spin_unlock(&isk->accept_q_lock); - return 0; + return mask; } static unsigned int smc_poll(struct file *file, struct socket *sock, @@ -1136,9 +1159,15 @@ static unsigned int smc_poll(struct file *file, struct socket *sock, struct smc_sock *smc; int rc; + if (!sk) + return POLLNVAL; + smc = smc_sk(sock->sk); + sock_hold(sk); + lock_sock(sk); if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ + release_sock(sk); mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); /* if non-blocking connect finished ... */ lock_sock(sk); @@ -1150,37 +1179,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock, rc = smc_connect_rdma(smc); if (rc < 0) mask |= POLLERR; - else - /* success cases including fallback */ - mask |= POLLOUT | POLLWRNORM; + /* success cases including fallback */ + mask |= POLLOUT | POLLWRNORM; } } - release_sock(sk); } else { - sock_poll_wait(file, sk_sleep(sk), wait); - if (sk->sk_state == SMC_LISTEN) - /* woken up by sk_data_ready in smc_listen_work() */ - mask |= smc_accept_poll(sk); + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= POLLERR; - if (atomic_read(&smc->conn.sndbuf_space) || - (sk->sk_shutdown & SEND_SHUTDOWN)) { - mask |= POLLOUT | POLLWRNORM; - } else { - sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - } - if (atomic_read(&smc->conn.bytes_to_rcv)) - mask |= POLLIN | POLLRDNORM; if ((sk->sk_shutdown == SHUTDOWN_MASK) || (sk->sk_state == SMC_CLOSED)) mask |= POLLHUP; - if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLIN | POLLRDNORM | POLLRDHUP; - if (sk->sk_state == SMC_APPCLOSEWAIT1) - mask |= POLLIN; + if (sk->sk_state == SMC_LISTEN) { + /* woken up by sk_data_ready in smc_listen_work() */ + mask = smc_accept_poll(sk); + } else { + if (atomic_read(&smc->conn.sndbuf_space) || + sk->sk_shutdown & SEND_SHUTDOWN) { + mask |= POLLOUT | POLLWRNORM; + } else { + sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + } + if (atomic_read(&smc->conn.bytes_to_rcv)) + mask |= POLLIN | POLLRDNORM; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLIN | POLLRDNORM | POLLRDHUP; + if (sk->sk_state == SMC_APPCLOSEWAIT1) + mask |= POLLIN; + } } + release_sock(sk); + sock_put(sk); return mask; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 0bee9d16cf29..9518986c97b1 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ - struct delayed_work sock_put_work; /* final socket freeing */ bool use_fallback; /* fallback to tcp */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close @@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed) static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || - smc->clcsock->sk->sk_policy[1]) ? 1 : 0; + smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { - return 0; + return false; } #endif diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index d4155ff6acde..3cd086e5bd28 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, cdcpend->conn); } smc_tx_sndbuf_nonfull(smc); - if (smc->sk.sk_state != SMC_ACTIVE) - /* wake up smc_close_wait_tx_pends() */ - smc->sk.sk_state_change(&smc->sk); bh_unlock_sock(&smc->sk); } @@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int rc; - return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, - (struct smc_wr_tx_pend_priv **)pend); + rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + (struct smc_wr_tx_pend_priv **)pend); + if (!conn->alert_token_local) + /* abnormal termination */ + rc = -EPIPE; + return rc; } static inline void smc_cdc_add_pending_send(struct smc_connection *conn, @@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) (unsigned long)conn); } -bool smc_cdc_tx_has_pending(struct smc_connection *conn) -{ - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; - - return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, (unsigned long)conn); -} - /********************************* receive ***********************************/ static inline bool smc_cdc_before(u16 seq1, u16 seq2) @@ -218,6 +212,14 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, smc->sk.sk_data_ready(&smc->sk); } + /* piggy backed tx info */ + /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ + if (diff_cons && smc_tx_prepared_sends(conn)) { + smc_tx_sndbuf_nonempty(conn); + /* trigger socket release if connection closed */ + smc_close_wake_tx_prepared(smc); + } + if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { smc->sk.sk_err = ECONNRESET; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; @@ -227,15 +229,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc, if (smc->clcsock && smc->clcsock->sk) smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(&smc->sk, SOCK_DONE); - schedule_work(&conn->close_work); - } - - /* piggy backed tx info */ - /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */ - if (diff_cons && smc_tx_prepared_sends(conn)) { - smc_tx_sndbuf_nonempty(conn); - /* trigger socket release if connection closed */ - smc_close_wake_tx_prepared(smc); + sock_hold(&smc->sk); /* sock_put in close_work */ + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); } } diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 149ceda1b088..ab240b37ad11 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); -bool smc_cdc_tx_has_pending(struct smc_connection *conn); int smc_cdc_init(void) __init; #endif /* SMC_CDC_H */ diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index e194c6cc308a..e339c0186dcf 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -19,7 +19,7 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) +#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) static void smc_close_cleanup_listen(struct sock *parent) { @@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent) smc_close_non_accepted(sk); } -static void smc_close_wait_tx_pends(struct smc_sock *smc) +static void smc_close_wait_listen_clcsock(struct smc_sock *smc) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = &smc->sk; signed long timeout; - timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; + timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME; add_wait_queue(sk_sleep(sk), &wait); - while (!signal_pending(current) && timeout) { - int rc; - - rc = sk_wait_event(sk, &timeout, - !smc_cdc_tx_has_pending(&smc->conn), - &wait); - if (rc) + do { + release_sock(sk); + if (smc->clcsock) + timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE, + timeout); + sched_annotate_sleep(); + lock_sock(sk); + if (!smc->clcsock) break; - } + } while (timeout); remove_wait_queue(sk_sleep(sk), &wait); } @@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn) } /* terminate smc socket abnormally - active abort - * RDMA communication no longer possible + * link group is terminated, i.e. RDMA communication no longer possible */ static void smc_close_active_abort(struct smc_sock *smc) { + struct sock *sk = &smc->sk; + struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - smc->sk.sk_err = ECONNABORTED; + sk->sk_err = ECONNABORTED; if (smc->clcsock && smc->clcsock->sk) { smc->clcsock->sk->sk_err = ECONNABORTED; smc->clcsock->sk->sk_state_change(smc->clcsock->sk); } - switch (smc->sk.sk_state) { + switch (sk->sk_state) { case SMC_INIT: case SMC_ACTIVE: - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); + sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) - smc->sk.sk_state = SMC_PEERABORTWAIT; + sk->sk_state = SMC_PEERABORTWAIT; else - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { - smc->sk.sk_state = SMC_PEERABORTWAIT; - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); + /* just SHUTDOWN_SEND done */ + sk->sk_state = SMC_PEERABORTWAIT; } else { - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; } + sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) { - txflags->peer_conn_abort = 1; - sock_release(smc->clcsock); - } - smc->sk.sk_state = SMC_CLOSED; + sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: case SMC_CLOSED: break; } - sock_set_flag(&smc->sk, SOCK_DEAD); - smc->sk.sk_state_change(&smc->sk); + sock_set_flag(sk, SOCK_DEAD); + sk->sk_state_change(sk); } static inline bool smc_close_sent_any_close(struct smc_connection *conn) @@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_INIT: sk->sk_state = SMC_CLOSED; - if (smc->smc_listen_work.func) - cancel_work_sync(&smc->smc_listen_work); break; case SMC_LISTEN: sk->sk_state = SMC_CLOSED; @@ -200,11 +204,9 @@ again: rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); /* wake up kernel_accept of smc_tcp_listen_worker */ smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); + smc_close_wait_listen_clcsock(smc); } - release_sock(sk); smc_close_cleanup_listen(sk); - cancel_work_sync(&smc->smc_listen_work); - lock_sock(sk); break; case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); @@ -214,6 +216,8 @@ again: if (sk->sk_state == SMC_ACTIVE) { /* send close request */ rc = smc_close_final(conn); + if (rc) + break; sk->sk_state = SMC_PEERCLOSEWAIT1; } else { /* peer event has changed the state */ @@ -226,9 +230,10 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: @@ -237,19 +242,21 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); - if (sk->sk_err != ECONNABORTED) { - /* confirm close from peer */ - rc = smc_close_final(conn); - if (rc) - break; - } - if (smc_cdc_rxed_any_close(conn)) + if (sk->sk_state != SMC_APPCLOSEWAIT1 && + sk->sk_state != SMC_APPCLOSEWAIT2) + goto again; + /* confirm close from peer */ + rc = smc_close_final(conn); + if (rc) + break; + if (smc_cdc_rxed_any_close(conn)) { /* peer has closed the socket already */ sk->sk_state = SMC_CLOSED; - else + sock_put(sk); /* postponed passive closing */ + } else { /* peer has just issued a shutdown write */ sk->sk_state = SMC_PEERFINCLOSEWAIT; - smc_close_wait_tx_pends(smc); + } break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: @@ -257,6 +264,8 @@ again: !smc_close_sent_any_close(conn)) { /* just shutdown wr done, send close request */ rc = smc_close_final(conn); + if (rc) + break; } /* peer sending PeerConnectionClosed will cause transition */ break; @@ -264,12 +273,8 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - release_sock(sk); - cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; - smc_close_wait_tx_pends(smc); break; case SMC_PEERABORTWAIT: case SMC_CLOSED: @@ -278,7 +283,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } @@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) struct sock *sk = &smc->sk; switch (sk->sk_state) { + case SMC_INIT: case SMC_ACTIVE: - case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: - case SMC_APPCLOSEWAIT2: - smc_close_abort(&smc->conn); + sk->sk_state = SMC_PROCESSABORT; + sock_put(sk); /* passive closing */ + break; + case SMC_APPFINCLOSEWAIT: sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (txflags->peer_done_writing && - !smc_close_sent_any_close(&smc->conn)) { + !smc_close_sent_any_close(&smc->conn)) /* just shutdown, but not yet closed locally */ - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; - } else { + else sk->sk_state = SMC_CLOSED; - } + sock_put(sk); /* passive closing */ break; + case SMC_APPCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + break; case SMC_PEERABORTWAIT: sk->sk_state = SMC_CLOSED; break; - case SMC_INIT: case SMC_PROCESSABORT: /* nothing to do, add tracing in future patch */ break; } } -/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, - * or peer_done_writing. +/* Either some kind of closing has been received: peer_conn_closed, + * peer_conn_abort, or peer_done_writing + * or the link group of the connection terminates abnormally. */ static void smc_close_passive_work(struct work_struct *work) { @@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work) struct sock *sk = &smc->sk; int old_state; - lock_sock(&smc->sk); + lock_sock(sk); old_state = sk->sk_state; if (!conn->alert_token_local) { @@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work) goto wakeup; } - rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; + rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { + /* peer has not received all data */ smc_close_passive_abort_received(smc); + release_sock(&smc->sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(&smc->sk); goto wakeup; } switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&smc->conn.bytes_to_rcv) || + if (atomic_read(&conn->bytes_to_rcv) || (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) + !smc_cdc_rxed_any_close(conn))) { sk->sk_state = SMC_APPCLOSEWAIT1; - else + } else { sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ break; case SMC_PEERCLOSEWAIT1: if (rxflags->peer_done_writing) @@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work) /* fall through */ /* to check for closing */ case SMC_PEERCLOSEWAIT2: - case SMC_PEERFINCLOSEWAIT: - if (!smc_cdc_rxed_any_close(&smc->conn)) + if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && smc_close_sent_any_close(conn)) { @@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work) /* just shutdown, but not yet closed locally */ sk->sk_state = SMC_APPFINCLOSEWAIT; } + sock_put(sk); /* passive closing */ + break; + case SMC_PEERFINCLOSEWAIT: + if (smc_cdc_rxed_any_close(conn)) { + sk->sk_state = SMC_CLOSED; + sock_put(sk); /* passive closing */ + } break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: + /* postpone sock_put() for passive closing to cover + * received SEND_SHUTDOWN as well + */ + break; case SMC_APPFINCLOSEWAIT: case SMC_PEERABORTWAIT: case SMC_PROCESSABORT: @@ -393,23 +422,11 @@ wakeup: if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { - smc_conn_free(&smc->conn); - schedule_delayed_work(&smc->sock_put_work, - SMC_CLOSE_SOCK_PUT_DELAY); - } + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + smc_conn_free(conn); } - release_sock(&smc->sk); -} - -void smc_close_sock_put_work(struct work_struct *work) -{ - struct smc_sock *smc = container_of(to_delayed_work(work), - struct smc_sock, - sock_put_work); - - smc->sk.sk_prot->unhash(&smc->sk); - sock_put(&smc->sk); + release_sock(sk); + sock_put(sk); /* sock_hold done by schedulers of close_work */ } int smc_close_shutdown_write(struct smc_sock *smc) @@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc) 0 : sock_flag(sk, SOCK_LINGER) ? sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; -again: old_state = sk->sk_state; - switch (old_state) { +again: + switch (sk->sk_state) { case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_ACTIVE) + goto again; /* send close wr request */ rc = smc_close_wr(conn); - if (sk->sk_state == SMC_ACTIVE) - sk->sk_state = SMC_PEERCLOSEWAIT1; - else - goto again; + if (rc) + break; + sk->sk_state = SMC_PEERCLOSEWAIT1; break; case SMC_APPCLOSEWAIT1: /* passive close */ @@ -446,8 +464,12 @@ again: release_sock(sk); cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); + if (sk->sk_state != SMC_APPCLOSEWAIT1) + goto again; /* confirm close from peer */ rc = smc_close_wr(conn); + if (rc) + break; sk->sk_state = SMC_APPCLOSEWAIT2; break; case SMC_APPCLOSEWAIT2: @@ -462,7 +484,7 @@ again: } if (old_state != sk->sk_state) - sk->sk_state_change(&smc->sk); + sk->sk_state_change(sk); return rc; } diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 8c498885d758..19eb6a211c23 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -21,7 +21,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc); int smc_close_active(struct smc_sock *smc); -void smc_close_sock_put_work(struct work_struct *work); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94f21116dac5..2424c7100aaf 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); + if (list_empty(&lgr->list)) + goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ +free: spin_unlock_bh(&smc_lgr_list.lock); smc_lgr_free(lgr); } @@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - - if (!lgr) + if (!conn->lgr) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); @@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr) while (node) { conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); - sock_hold(&smc->sk); + sock_hold(&smc->sk); /* sock_put in close work */ + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); - schedule_work(&conn->close_work); - sock_put(&smc->sk); + write_unlock_bh(&lgr->conns_lock); + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); + write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..427b91c1c964 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; - if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && + smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 90f1a7f9085c..2a8957bd6d38 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -141,6 +141,17 @@ out: return rc; } +static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) +{ + struct smc_link_group *lgr, *l; + + list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) + smc_lgr_terminate(lgr); + } +} + /* process context wrapper for might_sleep smc_ib_remember_port_attr */ static void smc_ib_port_event_work(struct work_struct *work) { @@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work) for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) { smc_ib_remember_port_attr(smcibdev, port_idx + 1); clear_bit(port_idx, &smcibdev->port_event_mask); + if (!smc_ib_port_active(smcibdev, port_idx + 1)) + smc_ib_port_terminate(smcibdev, port_idx + 1); } } @@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, switch (ibevent->event) { case IB_EVENT_PORT_ERR: - port_idx = ibevent->element.port_num - 1; - set_bit(port_idx, &smcibdev->port_event_mask); - schedule_work(&smcibdev->port_event_work); - /* fall through */ case IB_EVENT_DEVICE_FATAL: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ - break; case IB_EVENT_PORT_ACTIVE: port_idx = ibevent->element.port_num - 1; set_bit(port_idx, &smcibdev->port_event_mask); @@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler, void smc_ib_dealloc_protection_domain(struct smc_link *lnk) { - ib_dealloc_pd(lnk->roce_pd); + if (lnk->roce_pd) + ib_dealloc_pd(lnk->roce_pd); lnk->roce_pd = NULL; } @@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) { + struct smc_ib_device *smcibdev = + (struct smc_ib_device *)ibevent->device; + u8 port_idx; + switch (ibevent->event) { case IB_EVENT_DEVICE_FATAL: case IB_EVENT_GID_CHANGE: case IB_EVENT_PORT_ERR: case IB_EVENT_QP_ACCESS_ERR: - /* tbd in follow-on patch: - * abnormal close of corresponding connections - */ + port_idx = ibevent->element.port_num - 1; + set_bit(port_idx, &smcibdev->port_event_mask); + schedule_work(&smcibdev->port_event_work); break; default: break; @@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv) void smc_ib_destroy_queue_pair(struct smc_link *lnk) { - ib_destroy_qp(lnk->roce_qp); + if (lnk->roce_qp) + ib_destroy_qp(lnk->roce_qp); lnk->roce_qp = NULL; } @@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { if (!smcibdev->initialized) return; + smcibdev->initialized = 0; smc_wr_remove_dev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); ib_destroy_cq(smcibdev->roce_cq_recv); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 2e50fddf8ce9..838bce20c361 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) rc = -EPIPE; break; } - if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) { + if (smc_cdc_rxed_any_close(conn)) { rc = -ECONNRESET; break; } @@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags) sk_wait_event(sk, &timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || - smc_cdc_rxed_any_close_or_senddone(conn) || + smc_cdc_rxed_any_close(conn) || atomic_read(&conn->sndbuf_space), &wait); } @@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); - if (rc) + if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + smc_lgr_terminate(lgr); + } return rc; } @@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + if (conn->alert_token_local) /* connection healthy */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); + if (smc->sk.sk_err || + !conn->alert_token_local || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + goto out; + rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; + +out: release_sock(&smc->sk); } @@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - if (smc_cdc_get_slot_and_msg_send(conn) < 0) { + if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && + conn->alert_token_local) { /* connection healthy */ schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..1b8af23e6e2b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data) again: polled++; do { + memset(&wc, 0, sizeof(wc)); rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc); if (polled == 1) { ib_req_notify_cq(dev->roce_cq_send, @@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; + u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; u64 wr_id; - u32 idx; int rc; *wr_buf = NULL; @@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { - rc = wait_event_interruptible_timeout( + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + rc = wait_event_timeout( link->wr_tx_wait, + list_empty(&lgr->list) || /* lgr terminated */ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); smc_lgr_terminate(lgr); return -EPIPE; } - if (rc == -ERESTARTSYS) - return -EINTR; if (idx == link->wr_tx_cnt) return -EPIPE; } @@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); - if (rc) + if (rc) { + struct smc_link_group *lgr = + container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_wr_tx_put_slot(link, priv); + smc_lgr_terminate(lgr); + } return rc; } @@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, +void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data) { struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; + struct smc_wr_rx_hdr *wr_tx; int i; for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) + wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; + if (wr_tx->type != wr_tx_hdr_type) continue; tx_pend = &link->wr_tx_pends[i].priv; if (filter(tx_pend, data)) @@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, } } -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_rx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i]; - if (wr_rx->type != wr_rx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - return true; - } - return false; -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 2acf12b06063..ef0c3494c9cb 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link, int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); -bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type, - smc_wr_tx_filter filter, unsigned long data); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, diff --git a/net/socket.c b/net/socket.c index 1536515b6437..11cc2cd0f37b 100644 --- a/net/socket.c +++ b/net/socket.c @@ -961,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * If this ioctl is unknown try to hand it down * to the NIC driver. */ - if (err == -ENOIOCTLCMD) - err = dev_ioctl(net, cmd, argp); + if (err != -ENOIOCTLCMD) + return err; + if (cmd == SIOCGIFCONF) { + struct ifconf ifc; + if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) + return -EFAULT; + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); + rtnl_unlock(); + if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) + err = -EFAULT; + } else { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } return err; } @@ -988,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) sock = file->private_data; sk = sock->sk; net = sock_net(sk); - if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { - err = dev_ioctl(net, cmd, argp); + if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { + struct ifreq ifr; + bool need_copyout; + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; } else #ifdef CONFIG_WEXT_CORE if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { - err = dev_ioctl(net, cmd, argp); + err = wext_handle_ioctl(net, cmd, argp); } else #endif switch (cmd) { @@ -2654,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock, return err; } -static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(struct ifreq)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - err = dev_ioctl(net, SIOCGIFNAME, uifr); - if (err) - return err; - - if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq))) - return -EFAULT; - - return 0; -} - -static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) +static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) { struct compat_ifconf ifc32; struct ifconf ifc; - struct ifconf __user *uifc; - struct compat_ifreq __user *ifr32; - struct ifreq __user *ifr; - unsigned int i, j; int err; if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; - memset(&ifc, 0, sizeof(ifc)); - if (ifc32.ifcbuf == 0) { - ifc32.ifc_len = 0; - ifc.ifc_len = 0; - ifc.ifc_req = NULL; - uifc = compat_alloc_user_space(sizeof(struct ifconf)); - } else { - size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * - sizeof(struct ifreq); - uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); - ifc.ifc_len = len; - ifr = ifc.ifc_req = (void __user *)(uifc + 1); - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { - if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr++; - ifr32++; - } - } - if (copy_to_user(uifc, &ifc, sizeof(struct ifconf))) - return -EFAULT; + ifc.ifc_len = ifc32.ifc_len; + ifc.ifc_req = compat_ptr(ifc32.ifcbuf); - err = dev_ioctl(net, SIOCGIFCONF, uifc); + rtnl_lock(); + err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); + rtnl_unlock(); if (err) return err; - if (copy_from_user(&ifc, uifc, sizeof(struct ifconf))) - return -EFAULT; - - ifr = ifc.ifc_req; - ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0, j = 0; - i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) - return -EFAULT; - ifr32++; - ifr++; - } - - if (ifc32.ifcbuf == 0) { - /* Translate from 64-bit structure multiple to - * a 32-bit one. - */ - i = ifc.ifc_len; - i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq)); - ifc32.ifc_len = i; - } else { - ifc32.ifc_len = i; - } + ifc32.ifc_len = ifc.ifc_len; if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) return -EFAULT; @@ -2747,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { struct compat_ethtool_rxnfc __user *compat_rxnfc; bool convert_in = false, convert_out = false; - size_t buf_size = ALIGN(sizeof(struct ifreq), 8); - struct ethtool_rxnfc __user *rxnfc; - struct ifreq __user *ifr; + size_t buf_size = 0; + struct ethtool_rxnfc __user *rxnfc = NULL; + struct ifreq ifr; u32 rule_cnt = 0, actual_rule_cnt; u32 ethcmd; u32 data; @@ -2786,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) case ETHTOOL_SRXCLSRLDEL: buf_size += sizeof(struct ethtool_rxnfc); convert_in = true; + rxnfc = compat_alloc_user_space(buf_size); break; } - ifr = compat_alloc_user_space(buf_size); - rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); - - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (put_user(convert_in ? rxnfc : compat_ptr(data), - &ifr->ifr_ifru.ifru_data)) - return -EFAULT; + ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; if (convert_in) { /* We expect there to be holes between fs.m_ext and @@ -2825,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) return -EFAULT; } - ret = dev_ioctl(net, SIOCETHTOOL, ifr); + ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); if (ret) return ret; @@ -2866,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) { - void __user *uptr; compat_uptr_t uptr32; - struct ifreq __user *uifr; + struct ifreq ifr; + void __user *saved; + int err; - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) + if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) return -EFAULT; - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc)) - return -EFAULT; - - return dev_ioctl(net, SIOCWANDEV, uifr); -} - -static int bond_ioctl(struct net *net, unsigned int cmd, - struct compat_ifreq __user *ifr32) -{ - struct ifreq kifr; - mm_segment_t old_fs; - int err; - - switch (cmd) { - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, - (struct ifreq __user __force *) &kifr); - set_fs(old_fs); + saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; + ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); - return err; - default: - return -ENOIOCTLCMD; + err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); + if (!err) { + ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; + if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) + err = -EFAULT; } + return err; } /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { - struct ifreq __user *u_ifreq64; - char tmp_buf[IFNAMSIZ]; - void __user *data64; + struct ifreq ifreq; u32 data32; - if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), - IFNAMSIZ)) - return -EFAULT; - if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) - return -EFAULT; - data64 = compat_ptr(data32); - - u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - - if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], - IFNAMSIZ)) + if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; - if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_data)) return -EFAULT; + ifreq.ifr_data = compat_ptr(data32); - return dev_ioctl(net, cmd, u_ifreq64); -} - -static int dev_ifsioc(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_ifreq __user *uifr32) -{ - struct ifreq __user *uifr; - int err; - - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) - return -EFAULT; - - err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); - - if (!err) { - switch (cmd) { - case SIOCGIFFLAGS: - case SIOCGIFMETRIC: - case SIOCGIFMTU: - case SIOCGIFMEM: - case SIOCGIFHWADDR: - case SIOCGIFINDEX: - case SIOCGIFADDR: - case SIOCGIFBRDADDR: - case SIOCGIFDSTADDR: - case SIOCGIFNETMASK: - case SIOCGIFPFLAGS: - case SIOCGIFTXQLEN: - case SIOCGMIIPHY: - case SIOCGMIIREG: - if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) - err = -EFAULT; - break; - } - } - return err; + return dev_ioctl(net, cmd, &ifreq, NULL); } static int compat_sioc_ifmap(struct net *net, unsigned int cmd, @@ -2980,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, { struct ifreq ifr; struct compat_ifmap __user *uifmap32; - mm_segment_t old_fs; int err; uifmap32 = &uifr32->ifr_ifru.ifru_map; @@ -2994,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, if (err) return -EFAULT; - old_fs = get_fs(); - set_fs(KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user __force *)&ifr); - set_fs(old_fs); + err = dev_ioctl(net, cmd, &ifr, NULL); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); @@ -3130,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSIFBR: case SIOCGIFBR: return old_bridge_ioctl(argp); - case SIOCGIFNAME: - return dev_ifname32(net, argp); case SIOCGIFCONF: - return dev_ifconf(net, argp); + return compat_dev_ifconf(net, argp); case SIOCETHTOOL: return ethtool_ioctl(net, argp); case SIOCWANDEV: @@ -3141,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCBONDENSLAVE: - case SIOCBONDRELEASE: - case SIOCBONDSETHWADDR: - case SIOCBONDCHANGEACTIVE: - return bond_ioctl(net, cmd, argp); case SIOCADDRT: case SIOCDELRT: return routing_ioctl(net, sock, cmd, argp); @@ -3205,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return dev_ifsioc(net, sock, cmd, argp); - case SIOCSARP: case SIOCGARP: case SIOCDARP: case SIOCATMARK: + case SIOCBONDENSLAVE: + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCGIFNAME: return sock_do_ioctl(net, sock, cmd, arg); } @@ -3365,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; -} -EXPORT_SYMBOL(kernel_sock_ioctl); - int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 61f394d369bf..0a9b72fbd761 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -577,6 +577,8 @@ alloc_payload: get_page(page); sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem; sg_set_page(sg, page, copy, offset); + sg_unmark_end(sg); + ctx->sg_plaintext_num_elem++; sk_mem_charge(sk, copy); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6cdb054484d6..9efbfc753347 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev, } -int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd, - void __user *arg) +int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg) { struct iw_request_info info = { .cmd = cmd, .flags = 0 }; + struct iwreq iwr; int ret; - ret = wext_ioctl_dispatch(net, iwr, cmd, &info, + if (copy_from_user(&iwr, arg, sizeof(iwr))) + return -EFAULT; + + iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0; + + ret = wext_ioctl_dispatch(net, &iwr, cmd, &info, ioctl_standard_call, ioctl_private_call); if (ret >= 0 && IW_IS_GET(cmd) && - copy_to_user(arg, iwr, sizeof(struct iwreq))) + copy_to_user(arg, &iwr, sizeof(struct iwreq))) return -EFAULT; return ret; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 75982506617b..8e70291e586a 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -147,8 +147,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (!x->type_offload) return -EINVAL; - /* We don't yet support UDP encapsulation, TFC padding and ESN. */ - if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) + /* We don't yet support UDP encapsulation and TFC padding. */ + if (x->encap || x->tfcpad) return -EINVAL; dev = dev_get_by_index(net, xuo->ifindex); @@ -178,12 +178,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return 0; } + if (x->props.flags & XFRM_STATE_ESN && + !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { + xso->dev = NULL; + dev_put(dev); + return -EINVAL; + } + xso->dev = dev; xso->num_exthdrs = 1; xso->flags = xuo->flags; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { + xso->dev = NULL; dev_put(dev); return err; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 02501817227b..1d38c6acf8af 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) bitnr = replay_esn->replay_window - (diff - pos); } + xfrm_dev_state_advance_esn(x); + nr = bitnr >> 5; bitnr = bitnr & 0x1F; replay_esn->bmp[nr] |= (1U << bitnr); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 20b1e414dbee..54e21f19d722 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -317,7 +317,7 @@ retry: if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); - try_load = 0; + try_load = false; goto retry; } @@ -2279,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) goto error; } - x->km.state = XFRM_STATE_VALID; - error: return err; } @@ -2289,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state); int xfrm_init_state(struct xfrm_state *x) { - return __xfrm_init_state(x, true, false); + int err; + + err = __xfrm_init_state(x, true, false); + if (!err) + x->km.state = XFRM_STATE_VALID; + + return err; } EXPORT_SYMBOL(xfrm_init_state); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index bdb48e5dba04..7f52b8eb177d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } - if (attrs[XFRMA_OFFLOAD_DEV]) { - err = xfrm_dev_state_add(net, x, - nla_data(attrs[XFRMA_OFFLOAD_DEV])); - if (err) - goto error; - } - if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* override default values from above */ xfrm_update_ae_params(x, attrs, 0); + /* configure the hardware if offload is requested */ + if (attrs[XFRMA_OFFLOAD_DEV]) { + err = xfrm_dev_state_add(net, x, + nla_data(attrs[XFRMA_OFFLOAD_DEV])); + if (err) + goto error; + } + return x; error: @@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } + if (x->km.state == XFRM_STATE_VOID) + x->km.state = XFRM_STATE_VALID; + c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; |