From faa9560ae76ef50a3cbfb1a6afc0343fd8172374 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:49 -0400 Subject: fanotify: do not dereference inode_mark when it is unset The fanotify code is supposed to get the group from the mark. It accidentally only used the inode_mark. If the vfsmount_mark was set but not the inode_mark it would deref the NULL inode_mark. Get the group from the correct place. Reported-by: Tvrtko Ursulin Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 3970392b2722..f3e3b355ba7f 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -148,13 +148,14 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, const unsigned char *file_name, struct fsnotify_event **event) { - struct fsnotify_group *group = inode_mark->group; + struct fsnotify_group *group = NULL; __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); - pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" - " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, - mnt, inode_mark, mask, data, data_is, cookie, *event); + if (unlikely(!inode_mark && !vfsmount_mark)) { + BUG(); + return 0; + } /* clear ignored on inode modification */ if (mask & FS_MODIFY) { @@ -168,18 +169,24 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, /* does the inode mark tell us to do something? */ if (inode_mark) { + group = inode_mark->group; inode_test_mask &= inode_mark->mask; inode_test_mask &= ~inode_mark->ignored_mask; } /* does the vfsmount_mark tell us to do something? */ if (vfsmount_mark) { + group = vfsmount_mark->group; vfsmount_test_mask &= vfsmount_mark->mask; vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; if (inode_mark) vfsmount_test_mask &= ~inode_mark->ignored_mask; } + pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" + " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, + mnt, inode_mark, mask, data, data_is, cookie, *event); + if (!inode_test_mask && !vfsmount_test_mask) return 0; -- cgit 1.4.1 From 5f3f259fa8f1d7969360acfad5307d03c2f53d63 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:49 -0400 Subject: fsnotify: reset used_inode and used_vfsmount on each pass The fsnotify main loop has 2 booleans which tell if a particular mark was sent to the listeners or if it should be processed in the next pass. The problem is that the booleans were not reset on each traversal of the loop. So marks could get skipped even when they were not sent to the notifiers. Reported-by: Tvrtko Ursulin Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index f3e3b355ba7f..59dc7a02bd0c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -220,7 +220,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, struct fsnotify_event *event = NULL; struct vfsmount *mnt; int idx, ret = 0; - bool used_inode = false, used_vfsmount = false; + bool used_inode, used_vfsmount; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); @@ -261,6 +261,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, } while (inode_node || vfsmount_node) { + used_inode = used_vfsmount = false; + if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), struct fsnotify_mark, i.i_list); -- cgit 1.4.1 From 84e1ab4d875922c034db7f4f814ac445a20a14bd Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:50 -0400 Subject: fsnotify: fix ignored mask handling between inode and vfsmount marks The interesting 2 list lockstep walking didn't quite work out if the inode marks only had ignores and the vfsmount list requested events. The code to shortcut list traversal would not run the inode list since it didn't have real event requests. This code forces inode list traversal when a vfsmount mark matches the event type. Maybe we could add an i_fsnotify_ignored_mask field to struct inode to get the shortcut back, but it doesn't seem worth it to grow struct inode again. I bet with the recent changes to lock the way we do now it would actually not be a major perf hit to just drop i_fsnotify_mark_mask altogether. But that is for another day. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 59dc7a02bd0c..6f2777ce87a1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -149,8 +149,8 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, struct fsnotify_event **event) { struct fsnotify_group *group = NULL; - __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); - __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); + __u32 inode_test_mask = 0; + __u32 vfsmount_test_mask = 0; if (unlikely(!inode_mark && !vfsmount_mark)) { BUG(); @@ -170,12 +170,14 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, /* does the inode mark tell us to do something? */ if (inode_mark) { group = inode_mark->group; + inode_test_mask = (mask & ~FS_EVENT_ON_CHILD); inode_test_mask &= inode_mark->mask; inode_test_mask &= ~inode_mark->ignored_mask; } /* does the vfsmount_mark tell us to do something? */ if (vfsmount_mark) { + vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD); group = vfsmount_mark->group; vfsmount_test_mask &= vfsmount_mark->mask; vfsmount_test_mask &= ~vfsmount_mark->ignored_mask; @@ -183,9 +185,12 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, vfsmount_test_mask &= ~inode_mark->ignored_mask; } - pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p" - " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell, - mnt, inode_mark, mask, data, data_is, cookie, *event); + pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" + " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" + " data=%p data_is=%d cookie=%d event=%p\n", + __func__, group, to_tell, mnt, mask, inode_mark, + inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, + data_is, cookie, *event); if (!inode_test_mask && !vfsmount_test_mask) return 0; @@ -214,7 +219,7 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const unsigned char *file_name, u32 cookie) { - struct hlist_node *inode_node, *vfsmount_node; + struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; struct fsnotify_group *inode_group, *vfsmount_group; struct fsnotify_event *event = NULL; @@ -245,19 +250,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, (test_mask & to_tell->i_fsnotify_mask)) inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, &fsnotify_mark_srcu); - else - inode_node = NULL; - if (mnt) { - if ((mask & FS_MODIFY) || - (test_mask & mnt->mnt_fsnotify_mask)) - vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, - &fsnotify_mark_srcu); - else - vfsmount_node = NULL; - } else { - mnt = NULL; - vfsmount_node = NULL; + if (mnt && ((mask & FS_MODIFY) || + (test_mask & mnt->mnt_fsnotify_mask))) { + vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, + &fsnotify_mark_srcu); + inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + &fsnotify_mark_srcu); } while (inode_node || vfsmount_node) { -- cgit 1.4.1 From 2eebf582c9b3106abb9c33f4fc0a347fb9391037 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 18 Aug 2010 12:25:50 -0400 Subject: fanotify: flush outstanding perm requests on group destroy When an fanotify listener is closing it may cause a deadlock between the listener and the original task doing an fs operation. If the original task is waiting for a permissions response it will be holding the srcu lock. The listener cannot clean up and exit until after that srcu lock is syncronized. Thus deadlock. The fix introduced here is to stop accepting new permissions events when a listener is shutting down and to grant permission for all outstanding events. Thus the original task will eventually release the srcu lock and the listener can complete shutdown. Reported-by: Andreas Gruenbacher Cc: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 27 +++++++++++++++++++++++++++ include/linux/fanotify.h | 7 ------- include/linux/fsnotify_backend.h | 1 + 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 032b837fcd11..b966b7230f47 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -195,6 +195,14 @@ static int prepare_for_access_response(struct fsnotify_group *group, re->fd = fd; mutex_lock(&group->fanotify_data.access_mutex); + + if (group->fanotify_data.bypass_perm) { + mutex_unlock(&group->fanotify_data.access_mutex); + kmem_cache_free(fanotify_response_event_cache, re); + event->response = FAN_ALLOW; + return 0; + } + list_add_tail(&re->list, &group->fanotify_data.access_list); mutex_unlock(&group->fanotify_data.access_mutex); @@ -364,9 +372,28 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t static int fanotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct fanotify_response_event *re, *lre; pr_debug("%s: file=%p group=%p\n", __func__, file, group); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + mutex_lock(&group->fanotify_data.access_mutex); + + group->fanotify_data.bypass_perm = true; + + list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { + pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, + re, re->event); + + list_del_init(&re->list); + re->event->response = FAN_ALLOW; + + kmem_cache_free(fanotify_response_event_cache, re); + } + mutex_unlock(&group->fanotify_data.access_mutex); + + wake_up(&group->fanotify_data.access_waitq); +#endif /* matches the fanotify_init->fsnotify_alloc_group */ fsnotify_put_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index f0949a57ca9d..985435622ecd 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -95,11 +95,4 @@ struct fanotify_response { (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ (long)(meta)->event_len <= (long)(len)) -#ifdef __KERNEL__ - -struct fanotify_wait { - struct fsnotify_event *event; - __s32 fd; -}; -#endif /* __KERNEL__ */ #endif /* _LINUX_FANOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index ed36fb57c426..e40190d16878 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -156,6 +156,7 @@ struct fsnotify_group { struct mutex access_mutex; struct list_head access_list; wait_queue_head_t access_waitq; + bool bypass_perm; /* protected by access_mutex */ #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; } fanotify_data; -- cgit 1.4.1 From ff8d6e983185ce19fa92bb836eb52b589957be65 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 20 Aug 2010 10:24:18 +0100 Subject: fanotify: drop duplicate pr_debug statement This reminded me... you have two pr_debugs in fanotify_should_send_event which output redundant information. Maybe you intended it like that so it is selectable how much log spam you want, or if not you may want to apply this patch. Signed-off-by: Tvrtko Ursulin Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 756566fe8449..85366c78cc37 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -165,9 +165,6 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, inode_mark, vfsmnt_mark, event_mask, data, data_type); - pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n", - __func__, group, vfsmnt_mark, inode_mark, event_mask); - /* sorry, fanotify only gives a damn about files and dirs */ if (!S_ISREG(to_tell->i_mode) && !S_ISDIR(to_tell->i_mode)) -- cgit 1.4.1 From a2f13ad0ba5d94b9768c28469b45ca1e81a2b895 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 24 Aug 2010 12:58:54 +0200 Subject: fanotify: Return EPERM when a process is not privileged The appropriate error code when privileged operations are denied is EPERM, not EACCES. Signed-off-by: Andreas Gruenbacher Signed-off-by: Eric Paris --- fs/notify/fanotify/fanotify_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b966b7230f47..5ed8e58d7bfc 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -641,7 +641,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) __func__, flags, event_f_flags); if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; if (flags & ~FAN_ALL_INIT_FLAGS) return -EINVAL; -- cgit 1.4.1 From f72adfd540bacc4f6ff57a7d708b1a6c8906bdb4 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 27 Aug 2010 21:24:24 -0400 Subject: fsnotify: fix list walk order Marks were stored on the inode and vfsmonut mark list in order from highest memory address to lowest memory address. The code to walk those lists thought they were in order from lowest to highest with unpredictable results when trying to match up marks from each. It was possible that extra events would be sent to userspace when inode marks ignoring events wouldn't get matched with the vfsmount marks. This problem only affected fanotify when using both vfsmount and inode marks simultaneously. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 6f2777ce87a1..2169aa593d5f 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -261,27 +261,26 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, while (inode_node || vfsmount_node) { used_inode = used_vfsmount = false; + inode_group = vfsmount_group = NULL; if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), struct fsnotify_mark, i.i_list); inode_group = inode_mark->group; - } else - inode_group = (void *)-1; + } if (vfsmount_node) { vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu), struct fsnotify_mark, m.m_list); vfsmount_group = vfsmount_mark->group; - } else - vfsmount_group = (void *)-1; + } - if (inode_group < vfsmount_group) { + if (inode_group > vfsmount_group) { /* handle inode */ send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, data_is, cookie, file_name, &event); used_inode = true; - } else if (vfsmount_group < inode_group) { + } else if (vfsmount_group > inode_group) { send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); used_vfsmount = true; -- cgit 1.4.1 From 92b4678efa8ce0de9b1e01a74e3d13c4002a4136 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 27 Aug 2010 21:42:11 -0400 Subject: fsnotify: drop two useless bools in the fnsotify main loop The fsnotify main loop has 2 bools which indicated if we processed the inode or vfsmount mark in that particular pass through the loop. These bool can we replaced with the inode_group and vfsmount_group variables and actually make the code a little easier to understand. Signed-off-by: Eric Paris --- fs/notify/fsnotify.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2169aa593d5f..36802420d69a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -225,7 +225,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, struct fsnotify_event *event = NULL; struct vfsmount *mnt; int idx, ret = 0; - bool used_inode, used_vfsmount; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); @@ -260,7 +259,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, } while (inode_node || vfsmount_node) { - used_inode = used_vfsmount = false; inode_group = vfsmount_group = NULL; if (inode_node) { @@ -279,23 +277,22 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, /* handle inode */ send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, data_is, cookie, file_name, &event); - used_inode = true; + /* we didn't use the vfsmount_mark */ + vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); - used_vfsmount = true; + inode_group = NULL; } else { send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); - used_vfsmount = true; - used_inode = true; } - if (used_inode) + if (inode_group) inode_node = srcu_dereference(inode_node->next, &fsnotify_mark_srcu); - if (used_vfsmount) + if (vfsmount_group) vfsmount_node = srcu_dereference(vfsmount_node->next, &fsnotify_mark_srcu); } -- cgit 1.4.1