summary refs log tree commit diff
path: root/lib/scatterlist.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-17 11:18:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-17 11:18:18 -0700
commita1e16bc7d5f7ca3599d8a7f061841c93a563665e (patch)
tree73e557536e5dc52894e0e24439406db3510c5434 /lib/scatterlist.c
parent2a934b38c066ff221b08a9c703314a2a1c885dbd (diff)
parentc7a198c700763ac89abbb166378f546aeb9afb33 (diff)
downloadlinux-a1e16bc7d5f7ca3599d8a7f061841c93a563665e.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
 "A usual cycle for RDMA with a typical mix of driver and core subsystem
  updates:

   - Driver minor changes and bug fixes for mlx5, efa, rxe, vmw_pvrdma,
     hns, usnic, qib, qedr, cxgb4, hns, bnxt_re

   - Various rtrs fixes and updates

   - Bug fix for mlx4 CM emulation for virtualization scenarios where
     MRA wasn't working right

   - Use tracepoints instead of pr_debug in the CM code

   - Scrub the locking in ucma and cma to close more syzkaller bugs

   - Use tasklet_setup in the subsystem

   - Revert the idea that 'destroy' operations are not allowed to fail
     at the driver level. This proved unworkable from a HW perspective.

   - Revise how the umem API works so drivers make fewer mistakes using
     it

   - XRC support for qedr

   - Convert uverbs objects RWQ and MW to new the allocation scheme

   - Large queue entry sizes for hns

   - Use hmm_range_fault() for mlx5 On Demand Paging

   - uverbs APIs to inspect the GID table instead of sysfs

   - Move some of the RDMA code for building large page SGLs into
     lib/scatterlist"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (191 commits)
  RDMA/ucma: Fix use after free in destroy id flow
  RDMA/rxe: Handle skb_clone() failure in rxe_recv.c
  RDMA/rxe: Move the definitions for rxe_av.network_type to uAPI
  RDMA: Explicitly pass in the dma_device to ib_register_device
  lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values
  IB/mlx4: Convert rej_tmout radix-tree to XArray
  RDMA/rxe: Fix bug rejecting all multicast packets
  RDMA/rxe: Fix skb lifetime in rxe_rcv_mcast_pkt()
  RDMA/rxe: Remove duplicate entries in struct rxe_mr
  IB/hfi,rdmavt,qib,opa_vnic: Update MAINTAINERS
  IB/rdmavt: Fix sizeof mismatch
  MAINTAINERS: CISCO VIC LOW LATENCY NIC DRIVER
  RDMA/bnxt_re: Fix sizeof mismatch for allocation of pbl_tbl.
  RDMA/bnxt_re: Use rdma_umem_for_each_dma_block()
  RDMA/umem: Move to allocate SG table from pages
  lib/scatterlist: Add support in dynamic allocation of SG table from pages
  tools/testing/scatterlist: Show errors in human readable form
  tools/testing/scatterlist: Rejuvenate bit-rotten test
  RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces
  RDMA/uverbs: Expose the new GID query API to user space
  ...
Diffstat (limited to 'lib/scatterlist.c')
-rw-r--r--lib/scatterlist.c133
1 files changed, 107 insertions, 26 deletions
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index d94628fa3349..9a4992dc8e8c 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(sg_alloc_table);
 
+static struct scatterlist *get_next_sg(struct sg_table *table,
+				       struct scatterlist *cur,
+				       unsigned long needed_sges,
+				       gfp_t gfp_mask)
+{
+	struct scatterlist *new_sg, *next_sg;
+	unsigned int alloc_size;
+
+	if (cur) {
+		next_sg = sg_next(cur);
+		/* Check if last entry should be keeped for chainning */
+		if (!sg_is_last(next_sg) || needed_sges == 1)
+			return next_sg;
+	}
+
+	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+	new_sg = sg_kmalloc(alloc_size, gfp_mask);
+	if (!new_sg)
+		return ERR_PTR(-ENOMEM);
+	sg_init_table(new_sg, alloc_size);
+	if (cur) {
+		__sg_chain(next_sg, new_sg);
+		table->orig_nents += alloc_size - 1;
+	} else {
+		table->sgl = new_sg;
+		table->orig_nents = alloc_size;
+		table->nents = 0;
+	}
+	return new_sg;
+}
+
 /**
  * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
  *			         an array of pages
@@ -373,30 +404,69 @@ EXPORT_SYMBOL(sg_alloc_table);
  * @n_pages:	 Number of pages in the pages array
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
- * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @max_segment: Maximum size of a scatterlist element in bytes
+ * @prv:	 Last populated sge in sgt
+ * @left_pages:  Left pages caller have to set after this call
  * @gfp_mask:	 GFP allocation mask
  *
- *  Description:
- *    Allocate and initialize an sg table from a list of pages. Contiguous
- *    ranges of the pages are squashed into a single scatterlist node up to the
- *    maximum size specified in @max_segment. An user may provide an offset at a
- *    start and a size of valid data in a buffer specified by the page array.
- *    The returned sg table is released by sg_free_table.
+ * Description:
+ *    If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ *    else reuse the scatterlist passed in at @prv.
+ *    Contiguous ranges of the pages are squashed into a single scatterlist
+ *    entry up to the maximum size specified in @max_segment.  A user may
+ *    provide an offset at a start and a size of valid data in a buffer
+ *    specified by the page array.
  *
  * Returns:
- *   0 on success, negative error on failure
+ *   Last SGE in sgt on success, PTR_ERR on otherwise.
+ *   The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ *   If this function returns non-0 (eg failure), the caller must call
+ *   sg_free_table() to cleanup any leftover allocations.
  */
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-				unsigned int n_pages, unsigned int offset,
-				unsigned long size, unsigned int max_segment,
-				gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+		struct page **pages, unsigned int n_pages, unsigned int offset,
+		unsigned long size, unsigned int max_segment,
+		struct scatterlist *prv, unsigned int left_pages,
+		gfp_t gfp_mask)
 {
-	unsigned int chunks, cur_page, seg_len, i;
-	int ret;
-	struct scatterlist *s;
+	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+	unsigned int added_nents = 0;
+	struct scatterlist *s = prv;
 
-	if (WARN_ON(!max_segment || offset_in_page(max_segment)))
-		return -EINVAL;
+	/*
+	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
+	 * otherwise it can overshoot.
+	 */
+	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
+	if (WARN_ON(max_segment < PAGE_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (prv) {
+		unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
+				       prv->offset + prv->length) /
+				      PAGE_SIZE;
+
+		if (WARN_ON(offset))
+			return ERR_PTR(-EINVAL);
+
+		/* Merge contiguous pages into the last SG */
+		prv_len = prv->length;
+		while (n_pages && page_to_pfn(pages[0]) == paddr) {
+			if (prv->length + PAGE_SIZE > max_segment)
+				break;
+			prv->length += PAGE_SIZE;
+			paddr++;
+			pages++;
+			n_pages--;
+		}
+		if (!n_pages)
+			goto out;
+	}
 
 	/* compute number of contiguous chunks */
 	chunks = 1;
@@ -410,13 +480,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 		}
 	}
 
-	ret = sg_alloc_table(sgt, chunks, gfp_mask);
-	if (unlikely(ret))
-		return ret;
-
 	/* merging chunks and putting them into the scatterlist */
 	cur_page = 0;
-	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+	for (i = 0; i < chunks; i++) {
 		unsigned int j, chunk_size;
 
 		/* look for the end of the current chunk */
@@ -429,15 +495,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 				break;
 		}
 
+		/* Pass how many chunks might be left */
+		s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+		if (IS_ERR(s)) {
+			/*
+			 * Adjust entry length to be as before function was
+			 * called.
+			 */
+			if (prv)
+				prv->length = prv_len;
+			return s;
+		}
 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
 		sg_set_page(s, pages[cur_page],
 			    min_t(unsigned long, size, chunk_size), offset);
+		added_nents++;
 		size -= chunk_size;
 		offset = 0;
 		cur_page = j;
 	}
-
-	return 0;
+	sgt->nents += added_nents;
+out:
+	if (!left_pages)
+		sg_mark_end(s);
+	return s;
 }
 EXPORT_SYMBOL(__sg_alloc_table_from_pages);
 
@@ -465,8 +546,8 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 			      unsigned int n_pages, unsigned int offset,
 			      unsigned long size, gfp_t gfp_mask)
 {
-	return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
-					   SCATTERLIST_MAX_SEGMENT, gfp_mask);
+	return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
+			offset, size, UINT_MAX, NULL, 0, gfp_mask));
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);