summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-15 18:42:13 -0700
commit9ff9b0d392ea08090cd1780fb196f36dbb586529 (patch)
tree276a3a5c4525b84dee64eda30b423fc31bf94850 /lib
parent840e5bb326bbcb16ce82dd2416d2769de4839aea (diff)
parent105faa8742437c28815b2a3eb8314ebc5fd9288c (diff)
downloadlinux-9ff9b0d392ea08090cd1780fb196f36dbb586529.tar.gz
Merge tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:

 - Add redirect_neigh() BPF packet redirect helper, allowing to limit
   stack traversal in common container configs and improving TCP
   back-pressure.

   Daniel reports ~10Gbps => ~15Gbps single stream TCP performance gain.

 - Expand netlink policy support and improve policy export to user
   space. (Ge)netlink core performs request validation according to
   declared policies. Expand the expressiveness of those policies
   (min/max length and bitmasks). Allow dumping policies for particular
   commands. This is used for feature discovery by user space (instead
   of kernel version parsing or trial and error).

 - Support IGMPv3/MLDv2 multicast listener discovery protocols in
   bridge.

 - Allow more than 255 IPv4 multicast interfaces.

 - Add support for Type of Service (ToS) reflection in SYN/SYN-ACK
   packets of TCPv6.

 - In Multi-patch TCP (MPTCP) support concurrent transmission of data on
   multiple subflows in a load balancing scenario. Enhance advertising
   addresses via the RM_ADDR/ADD_ADDR options.

 - Support SMC-Dv2 version of SMC, which enables multi-subnet
   deployments.

 - Allow more calls to same peer in RxRPC.

 - Support two new Controller Area Network (CAN) protocols - CAN-FD and
   ISO 15765-2:2016.

 - Add xfrm/IPsec compat layer, solving the 32bit user space on 64bit
   kernel problem.

 - Add TC actions for implementing MPLS L2 VPNs.

 - Improve nexthop code - e.g. handle various corner cases when nexthop
   objects are removed from groups better, skip unnecessary
   notifications and make it easier to offload nexthops into HW by
   converting to a blocking notifier.

 - Support adding and consuming TCP header options by BPF programs,
   opening the doors for easy experimental and deployment-specific TCP
   option use.

 - Reorganize TCP congestion control (CC) initialization to simplify
   life of TCP CC implemented in BPF.

 - Add support for shipping BPF programs with the kernel and loading
   them early on boot via the User Mode Driver mechanism, hence reusing
   all the user space infra we have.

 - Support sleepable BPF programs, initially targeting LSM and tracing.

 - Add bpf_d_path() helper for returning full path for given 'struct
   path'.

 - Make bpf_tail_call compatible with bpf-to-bpf calls.

 - Allow BPF programs to call map_update_elem on sockmaps.

 - Add BPF Type Format (BTF) support for type and enum discovery, as
   well as support for using BTF within the kernel itself (current use
   is for pretty printing structures).

 - Support listing and getting information about bpf_links via the bpf
   syscall.

 - Enhance kernel interfaces around NIC firmware update. Allow
   specifying overwrite mask to control if settings etc. are reset
   during update; report expected max time operation may take to users;
   support firmware activation without machine reboot incl. limits of
   how much impact reset may have (e.g. dropping link or not).

 - Extend ethtool configuration interface to report IEEE-standard
   counters, to limit the need for per-vendor logic in user space.

 - Adopt or extend devlink use for debug, monitoring, fw update in many
   drivers (dsa loop, ice, ionic, sja1105, qed, mlxsw, mv88e6xxx,
   dpaa2-eth).

 - In mlxsw expose critical and emergency SFP module temperature alarms.
   Refactor port buffer handling to make the defaults more suitable and
   support setting these values explicitly via the DCBNL interface.

 - Add XDP support for Intel's igb driver.

 - Support offloading TC flower classification and filtering rules to
   mscc_ocelot switches.

 - Add PTP support for Marvell Octeontx2 and PP2.2 hardware, as well as
   fixed interval period pulse generator and one-step timestamping in
   dpaa-eth.

 - Add support for various auth offloads in WiFi APs, e.g. SAE (WPA3)
   offload.

 - Add Lynx PHY/PCS MDIO module, and convert various drivers which have
   this HW to use it. Convert mvpp2 to split PCS.

 - Support Marvell Prestera 98DX3255 24-port switch ASICs, as well as
   7-port Mediatek MT7531 IP.

 - Add initial support for QCA6390 and IPQ6018 in ath11k WiFi driver,
   and wcn3680 support in wcn36xx.

 - Improve performance for packets which don't require much offloads on
   recent Mellanox NICs by 20% by making multiple packets share a
   descriptor entry.

 - Move chelsio inline crypto drivers (for TLS and IPsec) from the
   crypto subtree to drivers/net. Move MDIO drivers out of the phy
   directory.

 - Clean up a lot of W=1 warnings, reportedly the actively developed
   subsections of networking drivers should now build W=1 warning free.

 - Make sure drivers don't use in_interrupt() to dynamically adapt their
   code. Convert tasklets to use new tasklet_setup API (sadly this
   conversion is not yet complete).

* tag 'net-next-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2583 commits)
  Revert "bpfilter: Fix build error with CONFIG_BPFILTER_UMH"
  net, sockmap: Don't call bpf_prog_put() on NULL pointer
  bpf, selftest: Fix flaky tcp_hdr_options test when adding addr to lo
  bpf, sockmap: Add locking annotations to iterator
  netfilter: nftables: allow re-computing sctp CRC-32C in 'payload' statements
  net: fix pos incrementment in ipv6_route_seq_next
  net/smc: fix invalid return code in smcd_new_buf_create()
  net/smc: fix valid DMBE buffer sizes
  net/smc: fix use-after-free of delayed events
  bpfilter: Fix build error with CONFIG_BPFILTER_UMH
  cxgb4/ch_ipsec: Replace the module name to ch_ipsec from chcr
  net: sched: Fix suspicious RCU usage while accessing tcf_tunnel_info
  bpf: Fix register equivalence tracking.
  rxrpc: Fix loss of final ack on shutdown
  rxrpc: Fix bundle counting for exclusive connections
  netfilter: restore NF_INET_NUMHOOKS
  ibmveth: Identify ingress large send packets.
  ibmveth: Switch order of ibmveth_helper calls.
  cxgb4: handle 4-tuple PEDIT to NAT mode translation
  selftests: Add VRF route leaking tests
  ...
Diffstat (limited to 'lib')
-rw-r--r--lib/nlattr.c122
1 files changed, 88 insertions, 34 deletions
diff --git a/lib/nlattr.c b/lib/nlattr.c
index bc5b5cf608c4..74019c8ebf6b 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -96,8 +96,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			continue;
 
 		if (nla_len(entry) < NLA_HDRLEN) {
-			NL_SET_ERR_MSG_ATTR(extack, entry,
-					    "Array element too short");
+			NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy,
+						"Array element too short");
 			return -ERANGE;
 		}
 
@@ -124,6 +124,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 		range->max = U8_MAX;
 		break;
 	case NLA_U16:
+	case NLA_BINARY:
 		range->max = U16_MAX;
 		break;
 	case NLA_U32:
@@ -140,6 +141,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 
 	switch (pt->validation_type) {
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 		range->min = pt->min;
 		range->max = pt->max;
 		break;
@@ -157,9 +159,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 	}
 }
 
-static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
-					   const struct nlattr *nla,
-					   struct netlink_ext_ack *extack)
+static int nla_validate_range_unsigned(const struct nla_policy *pt,
+				       const struct nlattr *nla,
+				       struct netlink_ext_ack *extack,
+				       unsigned int validate)
 {
 	struct netlink_range_validation range;
 	u64 value;
@@ -178,15 +181,39 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 	case NLA_MSECS:
 		value = nla_get_u64(nla);
 		break;
+	case NLA_BINARY:
+		value = nla_len(nla);
+		break;
 	default:
 		return -EINVAL;
 	}
 
 	nla_get_range_unsigned(pt, &range);
 
+	if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG &&
+	    pt->type == NLA_BINARY && value > range.max) {
+		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
+				    current->comm, pt->type);
+		if (validate & NL_VALIDATE_STRICT_ATTRS) {
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
+			return -EINVAL;
+		}
+
+		/* this assumes min <= max (don't validate against min) */
+		return 0;
+	}
+
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		bool binary = pt->type == NLA_BINARY;
+
+		if (binary)
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"binary attribute size out of range");
+		else
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"integer out of range");
+
 		return -ERANGE;
 	}
 
@@ -264,8 +291,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 	nla_get_range_signed(pt, &range);
 
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+					"integer out of range");
 		return -ERANGE;
 	}
 
@@ -274,7 +301,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 
 static int nla_validate_int_range(const struct nla_policy *pt,
 				  const struct nlattr *nla,
-				  struct netlink_ext_ack *extack)
+				  struct netlink_ext_ack *extack,
+				  unsigned int validate)
 {
 	switch (pt->type) {
 	case NLA_U8:
@@ -282,7 +310,8 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_U32:
 	case NLA_U64:
 	case NLA_MSECS:
-		return nla_validate_int_range_unsigned(pt, nla, extack);
+	case NLA_BINARY:
+		return nla_validate_range_unsigned(pt, nla, extack, validate);
 	case NLA_S8:
 	case NLA_S16:
 	case NLA_S32:
@@ -294,6 +323,37 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	}
 }
 
+static int nla_validate_mask(const struct nla_policy *pt,
+			     const struct nlattr *nla,
+			     struct netlink_ext_ack *extack)
+{
+	u64 value;
+
+	switch (pt->type) {
+	case NLA_U8:
+		value = nla_get_u8(nla);
+		break;
+	case NLA_U16:
+		value = nla_get_u16(nla);
+		break;
+	case NLA_U32:
+		value = nla_get_u32(nla);
+		break;
+	case NLA_U64:
+		value = nla_get_u64(nla);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (value & ~(u64)pt->mask) {
+		NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack, unsigned int depth)
@@ -313,15 +373,12 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
-	    (pt->type == NLA_EXACT_LEN &&
-	     pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG &&
-	     attrlen != pt->len)) {
+	if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 	}
@@ -329,14 +386,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	if (validate & NL_VALIDATE_NESTED) {
 		if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
 		    !(nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED is missing");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED is missing");
 			return -EINVAL;
 		}
 		if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
 		    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED not expected");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED not expected");
 			return -EINVAL;
 		}
 	}
@@ -449,19 +506,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 					    "Unsupported attribute");
 			return -EINVAL;
 		}
-		/* fall through */
-	case NLA_MIN_LEN:
 		if (attrlen < pt->len)
 			goto out_err;
 		break;
 
-	case NLA_EXACT_LEN:
-		if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) {
-			if (attrlen != pt->len)
-				goto out_err;
-			break;
-		}
-		/* fall through */
 	default:
 		if (pt->len)
 			minlen = pt->len;
@@ -479,9 +527,15 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		break;
 	case NLA_VALIDATE_RANGE_PTR:
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 	case NLA_VALIDATE_MIN:
 	case NLA_VALIDATE_MAX:
-		err = nla_validate_int_range(pt, nla, extack);
+		err = nla_validate_int_range(pt, nla, extack, validate);
+		if (err)
+			return err;
+		break;
+	case NLA_VALIDATE_MASK:
+		err = nla_validate_mask(pt, nla, extack);
 		if (err)
 			return err;
 		break;
@@ -496,7 +550,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	return 0;
 out_err:
-	NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation");
+	NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+				"Attribute failed policy validation");
 	return err;
 }
 
@@ -816,8 +871,7 @@ EXPORT_SYMBOL(__nla_reserve);
 struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
 				   int attrlen, int padattr)
 {
-	if (nla_need_padding_for_64bit(skb))
-		nla_align_64bit(skb, padattr);
+	nla_align_64bit(skb, padattr);
 
 	return __nla_reserve(skb, attrtype, attrlen);
 }