summary refs log tree commit diff
path: root/fs/xfs/xfs_buf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 21:35:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 21:35:31 -0800
commit5cc60aeedf315a7513f92e98314e86d515b986d1 (patch)
treec83e252b8e2ad2cd42cbbb7974348290831dbb2a /fs/xfs/xfs_buf.c
parent5c2992ee7fd8a29d04125dc0aa3522784c5fa5eb (diff)
parent9807b773dad4555665931d57065d7f8adab58e0f (diff)
downloadlinux-5cc60aeedf315a7513f92e98314e86d515b986d1.tar.gz
Merge tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs updates from Dave Chinner:
 "There is quite a varied bunch of stuff in this update, and some of it
  you will have already merged through the ext4 tree which imported the
  dax-4.10-iomap-pmd topic branch from the XFS tree.

  There is also a new direct IO implementation that uses the iomap
  infrastructure. It's much simpler, faster, and has lower IO latency
  than the existing direct IO infrastructure.

  Summary:
   - DAX PMD faults via iomap infrastructure
   - Direct-io support in iomap infrastructure
   - removal of now-redundant XFS inode iolock, replaced with VFS
     i_rwsem
   - synchronisation with fixes and changes in userspace libxfs code
   - extent tree lookup helpers
   - lots of little corruption detection improvements to verifiers
   - optimised CRC calculations
   - faster buffer cache lookups
   - deprecation of barrier/nobarrier mount options - we always use
     REQ_FUA/REQ_FLUSH where appropriate for data integrity now
   - cleanups to speculative preallocation
   - miscellaneous minor bug fixes and cleanups"

* tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (63 commits)
  xfs: nuke unused tracepoint definitions
  xfs: use GPF_NOFS when allocating btree cursors
  xfs: use xfs_vn_setattr_size to check on new size
  xfs: deprecate barrier/nobarrier mount option
  xfs: Always flush caches when integrity is required
  xfs: ignore leaf attr ichdr.count in verifier during log replay
  xfs: use rhashtable to track buffer cache
  xfs: optimise CRC updates
  xfs: make xfs btree stats less huge
  xfs: don't cap maximum dedupe request length
  xfs: don't allow di_size with high bit set
  xfs: error out if trying to add attrs and anextents > 0
  xfs: don't crash if reading a directory results in an unexpected hole
  xfs: complain if we don't get nextents bmap records
  xfs: check for bogus values in btree block headers
  xfs: forbid AG btrees with level == 0
  xfs: several xattr functions can be void
  xfs: handle cow fork in xfs_bmap_trace_exlist
  xfs: pass state not whichfork to trace_xfs_extlist
  xfs: Move AGI buffer type setting to xfs_read_agi
  ...
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r--fs/xfs/xfs_buf.c123
1 files changed, 74 insertions, 49 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 33c435f3316c..7f0a01f7b592 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -219,7 +219,6 @@ _xfs_buf_alloc(
 	init_completion(&bp->b_iowait);
 	INIT_LIST_HEAD(&bp->b_lru);
 	INIT_LIST_HEAD(&bp->b_list);
-	RB_CLEAR_NODE(&bp->b_rbnode);
 	sema_init(&bp->b_sema, 0); /* held, no waiters */
 	spin_lock_init(&bp->b_lock);
 	XB_SET_OWNER(bp);
@@ -473,6 +472,62 @@ _xfs_buf_map_pages(
 /*
  *	Finding and Reading Buffers
  */
+static int
+_xfs_buf_obj_cmp(
+	struct rhashtable_compare_arg	*arg,
+	const void			*obj)
+{
+	const struct xfs_buf_map	*map = arg->key;
+	const struct xfs_buf		*bp = obj;
+
+	/*
+	 * The key hashing in the lookup path depends on the key being the
+	 * first element of the compare_arg, make sure to assert this.
+	 */
+	BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
+
+	if (bp->b_bn != map->bm_bn)
+		return 1;
+
+	if (unlikely(bp->b_length != map->bm_len)) {
+		/*
+		 * found a block number match. If the range doesn't
+		 * match, the only way this is allowed is if the buffer
+		 * in the cache is stale and the transaction that made
+		 * it stale has not yet committed. i.e. we are
+		 * reallocating a busy extent. Skip this buffer and
+		 * continue searching for an exact match.
+		 */
+		ASSERT(bp->b_flags & XBF_STALE);
+		return 1;
+	}
+	return 0;
+}
+
+static const struct rhashtable_params xfs_buf_hash_params = {
+	.min_size		= 32,	/* empty AGs have minimal footprint */
+	.nelem_hint		= 16,
+	.key_len		= sizeof(xfs_daddr_t),
+	.key_offset		= offsetof(struct xfs_buf, b_bn),
+	.head_offset		= offsetof(struct xfs_buf, b_rhash_head),
+	.automatic_shrinking	= true,
+	.obj_cmpfn		= _xfs_buf_obj_cmp,
+};
+
+int
+xfs_buf_hash_init(
+	struct xfs_perag	*pag)
+{
+	spin_lock_init(&pag->pag_buf_lock);
+	return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+}
+
+void
+xfs_buf_hash_destroy(
+	struct xfs_perag	*pag)
+{
+	rhashtable_destroy(&pag->pag_buf_hash);
+}
 
 /*
  *	Look up, and creates if absent, a lockable buffer for
@@ -488,27 +543,24 @@ _xfs_buf_find(
 	xfs_buf_t		*new_bp)
 {
 	struct xfs_perag	*pag;
-	struct rb_node		**rbp;
-	struct rb_node		*parent;
 	xfs_buf_t		*bp;
-	xfs_daddr_t		blkno = map[0].bm_bn;
+	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
 	xfs_daddr_t		eofs;
-	int			numblks = 0;
 	int			i;
 
 	for (i = 0; i < nmaps; i++)
-		numblks += map[i].bm_len;
+		cmap.bm_len += map[i].bm_len;
 
 	/* Check for IOs smaller than the sector size / not sector aligned */
-	ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
-	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
+	ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
+	ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
 
 	/*
 	 * Corrupted block numbers can get through to here, unfortunately, so we
 	 * have to check that the buffer falls within the filesystem bounds.
 	 */
 	eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
-	if (blkno < 0 || blkno >= eofs) {
+	if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
 		/*
 		 * XXX (dgc): we should really be returning -EFSCORRUPTED here,
 		 * but none of the higher level infrastructure supports
@@ -516,53 +568,29 @@ _xfs_buf_find(
 		 */
 		xfs_alert(btp->bt_mount,
 			  "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
-			  __func__, blkno, eofs);
+			  __func__, cmap.bm_bn, eofs);
 		WARN_ON(1);
 		return NULL;
 	}
 
-	/* get tree root */
 	pag = xfs_perag_get(btp->bt_mount,
-				xfs_daddr_to_agno(btp->bt_mount, blkno));
+			    xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
 
-	/* walk tree */
 	spin_lock(&pag->pag_buf_lock);
-	rbp = &pag->pag_buf_tree.rb_node;
-	parent = NULL;
-	bp = NULL;
-	while (*rbp) {
-		parent = *rbp;
-		bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
-		if (blkno < bp->b_bn)
-			rbp = &(*rbp)->rb_left;
-		else if (blkno > bp->b_bn)
-			rbp = &(*rbp)->rb_right;
-		else {
-			/*
-			 * found a block number match. If the range doesn't
-			 * match, the only way this is allowed is if the buffer
-			 * in the cache is stale and the transaction that made
-			 * it stale has not yet committed. i.e. we are
-			 * reallocating a busy extent. Skip this buffer and
-			 * continue searching to the right for an exact match.
-			 */
-			if (bp->b_length != numblks) {
-				ASSERT(bp->b_flags & XBF_STALE);
-				rbp = &(*rbp)->rb_right;
-				continue;
-			}
-			atomic_inc(&bp->b_hold);
-			goto found;
-		}
+	bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
+				    xfs_buf_hash_params);
+	if (bp) {
+		atomic_inc(&bp->b_hold);
+		goto found;
 	}
 
 	/* No match found */
 	if (new_bp) {
-		rb_link_node(&new_bp->b_rbnode, parent, rbp);
-		rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
 		/* the buffer keeps the perag reference until it is freed */
 		new_bp->b_pag = pag;
+		rhashtable_insert_fast(&pag->pag_buf_hash,
+				       &new_bp->b_rhash_head,
+				       xfs_buf_hash_params);
 		spin_unlock(&pag->pag_buf_lock);
 	} else {
 		XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
@@ -930,7 +958,6 @@ xfs_buf_rele(
 
 	if (!pag) {
 		ASSERT(list_empty(&bp->b_lru));
-		ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
 		if (atomic_dec_and_test(&bp->b_hold)) {
 			xfs_buf_ioacct_dec(bp);
 			xfs_buf_free(bp);
@@ -938,8 +965,6 @@ xfs_buf_rele(
 		return;
 	}
 
-	ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
 	ASSERT(atomic_read(&bp->b_hold) > 0);
 
 	release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
@@ -983,7 +1008,8 @@ xfs_buf_rele(
 		}
 
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-		rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+		rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
+				       xfs_buf_hash_params);
 		spin_unlock(&pag->pag_buf_lock);
 		xfs_perag_put(pag);
 		freebuf = true;
@@ -1711,8 +1737,7 @@ xfs_free_buftarg(
 	percpu_counter_destroy(&btp->bt_io_count);
 	list_lru_destroy(&btp->bt_lru);
 
-	if (mp->m_flags & XFS_MOUNT_BARRIER)
-		xfs_blkdev_issue_flush(btp);
+	xfs_blkdev_issue_flush(btp);
 
 	kmem_free(btp);
 }