summary refs log tree commit diff
path: root/fs/xfs/xfs_symlink.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-02-23 22:38:08 +1100
committerDave Chinner <david@fromorbit.com>2015-02-23 22:38:08 +1100
commit58c904734cd0917cd0953067dd68003572407c7b (patch)
treef23f98ce11dcf36c1b582b5a04357ab9eb97df19 /fs/xfs/xfs_symlink.c
parentc517d838eb7d07bbe9507871fab3931deccff539 (diff)
downloadlinux-58c904734cd0917cd0953067dd68003572407c7b.tar.gz
xfs: inodes are new until the dentry cache is set up
Al Viro noticed a generic set of issues to do with filehandle lookup
racing with dentry cache setup. They involve a filehandle lookup
occurring while an inode is being created and the filehandle lookup
racing with the dentry creation for the real file. This can lead to
multiple dentries for the one path being instantiated. There are a
host of other issues around this same set of paths.

The underlying cause is that file handle lookup only waits on inode
cache instantiation rather than full dentry cache instantiation. XFS
is mostly immune to the problems discovered due to it's own internal
inode cache, but there are a couple of corner cases where races can
happen.

We currently clear the XFS_INEW flag when the inode is fully set up
after insertion into the cache. Newly allocated inodes are inserted
locked and so aren't usable until the allocation transaction
commits. This, however, occurs before the dentry and security
information is fully initialised and hence the inode is unlocked and
available for lookups to find too early.

To solve the problem, only clear the XFS_INEW flag for newly created
inodes once the dentry is fully instantiated. This means lookups
will retry until the XFS_INEW flag is removed from the inode and
hence avoids the race conditions in questions.

THis also means that xfs_create(), xfs_create_tmpfile() and
xfs_symlink() need to finish the setup of the inode in their error
paths if we had allocated the inode but failed later in the creation
process. xfs_symlink(), in particular, needed a lot of help to make
it's error handling match that of xfs_create().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>

Diffstat (limited to 'fs/xfs/xfs_symlink.c')
-rw-r--r--fs/xfs/xfs_symlink.c58
1 files changed, 34 insertions, 24 deletions
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 25791df6f638..3df411eadb86 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -177,7 +177,7 @@ xfs_symlink(
 	int			pathlen;
 	struct xfs_bmap_free	free_list;
 	xfs_fsblock_t		first_block;
-	bool			unlock_dp_on_error = false;
+	bool                    unlock_dp_on_error = false;
 	uint			cancel_flags;
 	int			committed;
 	xfs_fileoff_t		first_fsb;
@@ -221,7 +221,7 @@ xfs_symlink(
 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
 			&udqp, &gdqp, &pdqp);
 	if (error)
-		goto std_return;
+		return error;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -241,7 +241,7 @@ xfs_symlink(
 	}
 	if (error) {
 		cancel_flags = 0;
-		goto error_return;
+		goto out_trans_cancel;
 	}
 
 	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -252,7 +252,7 @@ xfs_symlink(
 	 */
 	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
 		error = -EPERM;
-		goto error_return;
+		goto out_trans_cancel;
 	}
 
 	/*
@@ -261,7 +261,7 @@ xfs_symlink(
 	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
 						pdqp, resblks, 1, 0);
 	if (error)
-		goto error_return;
+		goto out_trans_cancel;
 
 	/*
 	 * Check for ability to enter directory entry, if no space reserved.
@@ -269,7 +269,7 @@ xfs_symlink(
 	if (!resblks) {
 		error = xfs_dir_canenter(tp, dp, link_name);
 		if (error)
-			goto error_return;
+			goto out_trans_cancel;
 	}
 	/*
 	 * Initialize the bmap freelist prior to calling either
@@ -282,15 +282,14 @@ xfs_symlink(
 	 */
 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
 			       prid, resblks > 0, &ip, NULL);
-	if (error) {
-		if (error == -ENOSPC)
-			goto error_return;
-		goto error1;
-	}
+	if (error)
+		goto out_trans_cancel;
 
 	/*
-	 * An error after we've joined dp to the transaction will result in the
-	 * transaction cancel unlocking dp so don't do it explicitly in the
+	 * Now we join the directory inode to the transaction.  We do not do it
+	 * earlier because xfs_dir_ialloc might commit the previous transaction
+	 * (and release all the locks).  An error from here on will result in
+	 * the transaction cancel unlocking dp so don't do it explicitly in the
 	 * error path.
 	 */
 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
@@ -330,7 +329,7 @@ xfs_symlink(
 				  XFS_BMAPI_METADATA, &first_block, resblks,
 				  mval, &nmaps, &free_list);
 		if (error)
-			goto error2;
+			goto out_bmap_cancel;
 
 		if (resblks)
 			resblks -= fs_blocks;
@@ -348,7 +347,7 @@ xfs_symlink(
 					       BTOBB(byte_cnt), 0);
 			if (!bp) {
 				error = -ENOMEM;
-				goto error2;
+				goto out_bmap_cancel;
 			}
 			bp->b_ops = &xfs_symlink_buf_ops;
 
@@ -378,7 +377,7 @@ xfs_symlink(
 	error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
 					&first_block, &free_list, resblks);
 	if (error)
-		goto error2;
+		goto out_bmap_cancel;
 	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
@@ -392,10 +391,13 @@ xfs_symlink(
 	}
 
 	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error) {
-		goto error2;
-	}
+	if (error)
+		goto out_bmap_cancel;
+
 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+	if (error)
+		goto out_release_inode;
+
 	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(gdqp);
 	xfs_qm_dqrele(pdqp);
@@ -403,20 +405,28 @@ xfs_symlink(
 	*ipp = ip;
 	return 0;
 
- error2:
-	IRELE(ip);
- error1:
+out_bmap_cancel:
 	xfs_bmap_cancel(&free_list);
 	cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_trans_cancel:
 	xfs_trans_cancel(tp, cancel_flags);
+out_release_inode:
+	/*
+	 * Wait until after the current transaction is aborted to finish the
+	 * setup of the inode and release the inode.  This prevents recursive
+	 * transactions and deadlocks from xfs_inactive.
+	 */
+	if (ip) {
+		xfs_finish_inode_setup(ip);
+		IRELE(ip);
+	}
+
 	xfs_qm_dqrele(udqp);
 	xfs_qm_dqrele(gdqp);
 	xfs_qm_dqrele(pdqp);
 
 	if (unlock_dp_on_error)
 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
- std_return:
 	return error;
 }