summary refs log tree commit diff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-05-08 08:41:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-05-08 08:41:09 -0700
commitaf38553c661207f96464e15f3506bf788daee474 (patch)
tree5b58816c3cd8126736065c831dc3bc5a61cbea12 /fs
parent79dede78c0573618e3137d3d8cbf78c84e25fabd (diff)
parent14f69140ff9c92a0928547ceefb153a842e8492c (diff)
downloadlinux-af38553c661207f96464e15f3506bf788daee474.tar.gz
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
 "14 fixes and one selftest to verify the ipc fixes herein"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm: limit boost_watermark on small zones
  ubsan: disable UBSAN_ALIGNMENT under COMPILE_TEST
  mm/vmscan: remove unnecessary argument description of isolate_lru_pages()
  epoll: atomically remove wait entry on wake up
  kselftests: introduce new epoll60 testcase for catching lost wakeups
  percpu: make pcpu_alloc() aware of current gfp context
  mm/slub: fix incorrect interpretation of s->offset
  scripts/gdb: repair rb_first() and rb_last()
  eventpoll: fix missing wakeup for ovflist in ep_poll_callback
  arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory()
  scripts/decodecode: fix trapping instruction formatting
  kernel/kcov.c: fix typos in kcov_remote_start documentation
  mm/page_alloc: fix watchdog soft lockups during set_zone_contiguous()
  mm, memcg: fix error return value of mem_cgroup_css_alloc()
  ipc/mqueue.c: change __do_notify() to bypass check_kill_permission()
Diffstat (limited to 'fs')
-rw-r--r--fs/eventpoll.c61
1 files changed, 33 insertions, 28 deletions
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8c596641a72b..aba03ee749f8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
 {
 	struct eventpoll *ep = epi->ep;
 
+	/* Fast preliminary check */
+	if (epi->next != EP_UNACTIVE_PTR)
+		return false;
+
 	/* Check that the same epi has not been just chained from another CPU */
 	if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
 		return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
 	 * chained in ep->ovflist and requeued later on.
 	 */
 	if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
-		if (epi->next == EP_UNACTIVE_PTR &&
-		    chain_epi_lockless(epi))
+		if (chain_epi_lockless(epi))
+			ep_pm_stay_awake_rcu(epi);
+	} else if (!ep_is_linked(epi)) {
+		/* In the usual case, add event to ready list. */
+		if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
 			ep_pm_stay_awake_rcu(epi);
-		goto out_unlock;
-	}
-
-	/* If this file is already in the ready list we exit soon */
-	if (!ep_is_linked(epi) &&
-	    list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
-		ep_pm_stay_awake_rcu(epi);
 	}
 
 	/*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
 	int res = 0, eavail, timed_out = 0;
 	u64 slack = 0;
-	bool waiter = false;
 	wait_queue_entry_t wait;
 	ktime_t expires, *to = NULL;
 
@@ -1867,21 +1866,23 @@ fetch_events:
 	 */
 	ep_reset_busy_poll_napi_id(ep);
 
-	/*
-	 * We don't have any available event to return to the caller.  We need
-	 * to sleep here, and we will be woken by ep_poll_callback() when events
-	 * become available.
-	 */
-	if (!waiter) {
-		waiter = true;
-		init_waitqueue_entry(&wait, current);
-
+	do {
+		/*
+		 * Internally init_wait() uses autoremove_wake_function(),
+		 * thus wait entry is removed from the wait queue on each
+		 * wakeup. Why it is important? In case of several waiters
+		 * each new wakeup will hit the next waiter, giving it the
+		 * chance to harvest new event. Otherwise wakeup can be
+		 * lost. This is also good performance-wise, because on
+		 * normal wakeup path no need to call __remove_wait_queue()
+		 * explicitly, thus ep->lock is not taken, which halts the
+		 * event delivery.
+		 */
+		init_wait(&wait);
 		write_lock_irq(&ep->lock);
 		__add_wait_queue_exclusive(&ep->wq, &wait);
 		write_unlock_irq(&ep->lock);
-	}
 
-	for (;;) {
 		/*
 		 * We don't want to sleep if the ep_poll_callback() sends us
 		 * a wakeup in between. That's why we set the task state
@@ -1911,10 +1912,20 @@ fetch_events:
 			timed_out = 1;
 			break;
 		}
-	}
+
+		/* We were woken up, thus go and try to harvest some events */
+		eavail = 1;
+
+	} while (0);
 
 	__set_current_state(TASK_RUNNING);
 
+	if (!list_empty_careful(&wait.entry)) {
+		write_lock_irq(&ep->lock);
+		__remove_wait_queue(&ep->wq, &wait);
+		write_unlock_irq(&ep->lock);
+	}
+
 send_events:
 	/*
 	 * Try to transfer events to user space. In case we get 0 events and
@@ -1925,12 +1936,6 @@ send_events:
 	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
 		goto fetch_events;
 
-	if (waiter) {
-		write_lock_irq(&ep->lock);
-		__remove_wait_queue(&ep->wq, &wait);
-		write_unlock_irq(&ep->lock);
-	}
-
 	return res;
 }