From 5e11934d13c9a3bcb0cadad6c7a7de5c32660422 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Jul 2007 12:06:17 -0400 Subject: NFS: Fix put_nfs_open_context We need to grab the inode->i_lock atomically with the last reference put in order to remove the open context that is being freed from the nfsi->open_files list. Fix by converting the kref to a standard atomic counter and then using atomic_dec_and_lock()... Thanks to Arnd Bergmann for pointing out the problem. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bca6cdcb9f0d..71a49c3acabd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -468,7 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; - kref_init(&ctx->kref); + atomic_set(&ctx->count, 1); } return ctx; } @@ -476,21 +476,18 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - kref_get(&ctx->kref); + atomic_inc(&ctx->count); return ctx; } -static void nfs_free_open_context(struct kref *kref) +void put_nfs_open_context(struct nfs_open_context *ctx) { - struct nfs_open_context *ctx = container_of(kref, - struct nfs_open_context, kref); + struct inode *inode = ctx->path.dentry->d_inode; - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->path.dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } + if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); if (ctx->state != NULL) nfs4_close_state(&ctx->path, ctx->state, ctx->mode); if (ctx->cred != NULL) @@ -500,11 +497,6 @@ static void nfs_free_open_context(struct kref *kref) kfree(ctx); } -void put_nfs_open_context(struct nfs_open_context *ctx) -{ - kref_put(&ctx->kref, nfs_free_open_context); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages -- cgit 1.4.1 From ba683031fae115d61c6b5f4c675cc27f6e9576d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 27 Jul 2007 10:23:05 -0400 Subject: NFSv4: Fix a locking regression in nfs4_set_mode_locked() We don't really need to clear &state->inode_states inside nfs4_set_mode_locked, and doing so without holding the inode->i_lock would in any case be a bug... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e9662ba81d86..3e4adf8c8312 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -341,8 +341,6 @@ nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode) else list_move_tail(&state->open_states, &state->owner->so_states); } - if (mode == 0) - list_del_init(&state->inode_states); state->state = mode; } @@ -415,8 +413,7 @@ void nfs4_put_open_state(struct nfs4_state *state) if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) return; spin_lock(&inode->i_lock); - if (!list_empty(&state->inode_states)) - list_del(&state->inode_states); + list_del(&state->inode_states); list_del(&state->open_states); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); -- cgit 1.4.1 From 45328c354e8ae16b67cb3adb72ab57459f9e5fd6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Jul 2007 17:47:34 -0400 Subject: NFS: Fix NFSv4 open stateid regressions Do not allow cached open for O_RDONLY or O_WRONLY unless the file has been previously opened in these modes. Also Fix the calculation of the mode in nfs4_close_prepare. We should only issue an OPEN_DOWNGRADE if we're sure that we will still be holding the correct open modes. This may not be the case if we've been doing delegated opens. Finally, there is no need to adjust the open mode bit flags in nfs4_close_done(): that has already been done in nfs4_close_prepare(). Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6ca2795ccd9c..62b3ae280310 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -332,11 +332,9 @@ static int can_open_cached(struct nfs4_state *state, int mode) switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { case FMODE_READ: ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; - ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; break; case FMODE_WRITE: ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; - ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; break; case FMODE_READ|FMODE_WRITE: ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; @@ -1260,7 +1258,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: - nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags); + nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_STALE_STATEID: @@ -1286,23 +1284,19 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) .rpc_cred = state->owner->so_cred, }; int clear_rd, clear_wr, clear_rdwr; - int mode; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - mode = FMODE_READ|FMODE_WRITE; clear_rd = clear_wr = clear_rdwr = 0; spin_lock(&state->owner->so_lock); /* Calculate the change in open mode */ if (state->n_rdwr == 0) { if (state->n_rdonly == 0) { - mode &= ~FMODE_READ; clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); } if (state->n_wronly == 0) { - mode &= ~FMODE_WRITE; clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); } @@ -1314,9 +1308,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) return; } nfs_fattr_init(calldata->res.fattr); - if (mode != 0) + if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.open_flags = mode; + calldata->arg.open_flags = FMODE_READ; + } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + calldata->arg.open_flags = FMODE_WRITE; + } calldata->timestamp = jiffies; rpc_call_setup(task, &msg, 0); } -- cgit 1.4.1 From 905f8d16e32fd48499e3f8b9a2d9f746af3e0949 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Aug 2007 12:18:34 -0400 Subject: NFSv4: Don't call put_rpccred() from an rcu callback Doing so would require us to introduce bh-safe locks into put_rpccred(). This patch fixes the lockdep complaint reported by Marc Dietrich: inconsistent {softirq-on-W} -> {in-softirq-W} usage. swapper/0 [HC0[0]:SC1[1]:HE1:SE0] takes: (rpc_credcache_lock){-+..}, at: [] _atomic_dec_and_lock+0x17/0x60 {softirq-on-W} state was registered at: [] __lock_acquire+0x650/0x1030 [] lock_acquire+0x61/0x80 [] _spin_lock+0x2c/0x40 [] _atomic_dec_and_lock+0x17/0x60 [] put_rpccred+0x5d/0x100 [sunrpc] [] rpcauth_unbindcred+0x21/0x60 [sunrpc] [] a0 [sunrpc] [] rpc_call_sync+0x30/0x40 [sunrpc] [] rpcb_register+0xdb/0x180 [sunrpc] [] svc_register+0x93/0x160 [sunrpc] [] __svc_create+0x1ee/0x220 [sunrpc] [] svc_create+0x13/0x20 [sunrpc] [] nfs_callback_up+0x82/0x120 [nfs] [] nfs_get_client+0x176/0x390 [nfs] [] nfs4_set_client+0x31/0x190 [nfs] [] nfs4_create_server+0x63/0x3b0 [nfs] [] nfs4_get_sb+0x346/0x5b0 [nfs] [] vfs_kern_mount+0x94/0x110 [] do_mount+0x1f2/0x7d0 [] sys_mount+0x66/0xa0 [] syscall_call+0x7/0xb [] 0xffffffff irq event stamp: 5277830 hardirqs last enabled at (5277830): [] kmem_cache_free+0x8a/0xc0 hardirqs last disabled at (5277829): [] kmem_cache_free+0x52/0xc0 softirqs last enabled at (5277798): [] __do_softirq+0xa3/0xc0 softirqs last disabled at (5277817): [] do_softirq+0x47/0x50 other info that might help us debug this: no locks held by swapper/0. stack backtrace: [] show_trace_log_lvl+0x1a/0x30 [] show_trace+0x12/0x20 [] dump_stack+0x15/0x20 [] print_usage_bug+0x153/0x160 [] mark_lock+0x449/0x620 [] __lock_acquire+0x604/0x1030 [] lock_acquire+0x61/0x80 [] _spin_lock+0x2c/0x40 [] _atomic_dec_and_lock+0x17/0x60 [] put_rpccred+0x5d/0x100 [sunrpc] [] nfs_free_delegation_callback+0x13/0x20 [nfs] [] __rcu_process_callbacks+0x6a/0x1c0 [] rcu_process_callbacks+0x12/0x30 [] tasklet_action+0x38/0x80 [] __do_softirq+0x55/0xc0 [] do_softirq+0x47/0x50 [] irq_exit+0x35/0x40 [] smp_apic_timer_interrupt+0x43/0x80 [] apic_timer_interrupt+0x33/0x38 [] cpuidle_idle_call+0x6f/0x90 [] cpu_idle+0x43/0x70 [] rest_init+0x47/0x50 [] start_kernel+0x22a/0x2b0 [<00000000>] 0x0 ======================= Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 20ac403469a0..c55a761c22bb 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -20,10 +20,8 @@ #include "delegation.h" #include "internal.h" -static void nfs_free_delegation(struct nfs_delegation *delegation) +static void nfs_do_free_delegation(struct nfs_delegation *delegation) { - if (delegation->cred) - put_rpccred(delegation->cred); kfree(delegation); } @@ -31,7 +29,18 @@ static void nfs_free_delegation_callback(struct rcu_head *head) { struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); - nfs_free_delegation(delegation); + nfs_do_free_delegation(delegation); +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + struct rpc_cred *cred; + + cred = rcu_dereference(delegation->cred); + rcu_assign_pointer(delegation->cred, NULL); + call_rcu(&delegation->rcu, nfs_free_delegation_callback); + if (cred) + put_rpccred(cred); } static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) @@ -166,7 +175,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * int res = 0; res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); - call_rcu(&delegation->rcu, nfs_free_delegation_callback); + nfs_free_delegation(delegation); return res; } @@ -448,7 +457,7 @@ restart: spin_unlock(&clp->cl_lock); rcu_read_unlock(); if (delegation != NULL) - call_rcu(&delegation->rcu, nfs_free_delegation_callback); + nfs_free_delegation(delegation); goto restart; } rcu_read_unlock(); -- cgit 1.4.1 From 3d39c691ff486142dd9aaeac12f553f4476b7a62 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Aug 2007 15:28:33 -0400 Subject: NFS: Replace flush_scheduled_work with cancel_work_sync() and friends This will avoid deadlocks of the form: stack backtrace: [] show_trace_log_lvl+0x1a/0x30 [] show_trace+0x12/0x20 [] dump_stack+0x15/0x20 [] __lock_acquire+0xc22/0x1030 [] lock_acquire+0x61/0x80 [] flush_workqueue+0x49/0x70 [] flush_scheduled_work+0xd/0x10 [] nfs_release_automount_timer+0x2c/0x30 [nfs] [] nfs_free_server+0x9e/0xd0 [nfs] [] nfs_kill_super+0x16/0x20 [nfs] [] deactivate_super+0x7d/0xa0 [] mntput_no_expire+0x4b/0x80 [] expire_mount_list+0xe4/0x140 [] mark_mounts_for_expiry+0x99/0xb0 [] nfs_expire_automounts+0xd/0x40 [nfs] [] run_workqueue+0x12b/0x1e0 [] worker_thread+0x9b/0x100 [] kthread+0x42/0x70 [] kernel_thread_helper+0x7/0x18 ======================= Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 6 ++---- fs/nfs/nfs4renewd.c | 5 ++--- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 7f86e65182e4..aea76d0e5fbd 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -175,10 +175,8 @@ static void nfs_expire_automounts(struct work_struct *work) void nfs_release_automount_timer(void) { - if (list_empty(&nfs_automount_list)) { - cancel_delayed_work(&nfs_automount_task); - flush_scheduled_work(); - } + if (list_empty(&nfs_automount_list)) + cancel_delayed_work_sync(&nfs_automount_task); } /* diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 0505ca124034..3ea352d82eba 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -127,16 +127,15 @@ nfs4_schedule_state_renewal(struct nfs_client *clp) void nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - flush_scheduled_work(); + cancel_delayed_work(&server->nfs_client->cl_renewd); } void nfs4_kill_renewd(struct nfs_client *clp) { down_read(&clp->cl_sem); - cancel_delayed_work(&clp->cl_renewd); + cancel_delayed_work_sync(&clp->cl_renewd); up_read(&clp->cl_sem); - flush_scheduled_work(); } /* -- cgit 1.4.1