summary refs log tree commit diff
path: root/arch/um/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-11 16:36:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-11 16:36:47 -0700
commit375479c38689fbc403cf57b2999278615a4163f5 (patch)
treeda9565f449ab769802bce0f69cb41e74c4dfb605 /arch/um/kernel
parent45df60cd2cbe2a8c32fd34e474b62b2b41bacf69 (diff)
parente40238dedb484c8a19f8257e4ef5d77d038f9ad8 (diff)
downloadlinux-375479c38689fbc403cf57b2999278615a4163f5.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger:

 - a new and faster epoll based IRQ controller and NIC driver

 - misc fixes and janitorial updates

* git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  Fix vector raw inintialization logic
  Migrate vector timers to new timer API
  um: Compile with modern headers
  um: vector: Fix an error handling path in 'vector_parse()'
  um: vector: Fix a memory allocation check
  um: vector: fix missing unlock on error in vector_net_open()
  um: Add missing EXPORT for free_irq_by_fd()
  High Performance UML Vector Network Driver
  Epoll based IRQ controller
  um: Use POSIX ucontext_t instead of struct ucontext
  um: time: Use timespec64 for persistent clock
  um: Restore symbol versions for __memcpy and memcpy
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/irq.c461
-rw-r--r--arch/um/kernel/time.c6
2 files changed, 301 insertions, 166 deletions
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..6b7f3827d6e4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,362 @@
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
+#include <irq_user.h>
 
-/*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified.  IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler.  That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_fd array for each fd -
+ * one entry per IRQ type
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
 
-extern void free_irqs(void);
+struct irq_entry {
+	struct irq_entry *next;
+	int fd;
+	struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+};
+
+static struct irq_entry *active_fds;
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+{
+/*
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
+ */
+	if (irq->active) {
+		irq->active = false;
+		do {
+			irq->pending = false;
+			do_IRQ(irq->irq, regs);
+		} while (irq->pending && (!irq->purge));
+		if (!irq->purge)
+			irq->active = true;
+	} else {
+		irq->pending = true;
+	}
+}
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
-	struct irq_fd *irq_fd;
-	int n;
+	struct irq_entry *irq_entry;
+	struct irq_fd *irq;
+
+	int n, i, j;
 
 	while (1) {
-		n = os_waiting_for_events(active_fds);
+		/* This is now lockless - epoll keeps back-referencesto the irqs
+		 * which have trigger it so there is no need to walk the irq
+		 * list and lock it every time. We avoid locking by turning off
+		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
+		 * we do any changes to the actual data structures
+		 */
+		n = os_waiting_for_events_epoll();
+
 		if (n <= 0) {
 			if (n == -EINTR)
 				continue;
-			else break;
+			else
+				break;
 		}
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
+		for (i = 0; i < n ; i++) {
+			/* Epoll back reference is the entry with 3 irq_fd
+			 * leaves - one for each irq type.
+			 */
+			irq_entry = (struct irq_entry *)
+				os_epoll_get_data_pointer(i);
+			for (j = 0; j < MAX_IRQ_TYPE ; j++) {
+				irq = irq_entry->irq_array[j];
+				if (irq == NULL)
+					continue;
+				if (os_epoll_triggered(i, irq->events) > 0)
+					irq_io_loop(irq, regs);
+				if (irq->purge) {
+					irq_entry->irq_array[j] = NULL;
+					kfree(irq);
+				}
 			}
 		}
 	}
+}
+
+static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+{
+	int i;
+	int events = 0;
+	struct irq_fd *irq;
 
-	free_irqs();
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		irq = irq_entry->irq_array[i];
+		if (irq != NULL)
+			events = irq->events | events;
+	}
+	if (events > 0) {
+	/* os_add_epoll will call os_mod_epoll if this already exists */
+		return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+	}
+	/* No events - delete */
+	return os_del_epoll_fd(irq_entry->fd);
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
+	struct irq_fd *new_fd;
+	struct irq_entry *irq_entry;
+	int i, err, events;
 	unsigned long flags;
-	int events, err, n;
 
 	err = os_set_fd_async(fd);
 	if (err < 0)
 		goto out;
 
-	err = -ENOMEM;
-	new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
-	if (new_fd == NULL)
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
 
-	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
+	/* Check if we have an entry for this fd */
 
 	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
+	for (irq_entry = active_fds;
+		irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd)
+			break;
+	}
+
+	if (irq_entry == NULL) {
+		/* This needs to be atomic as it may be called from an
+		 * IRQ context.
+		 */
+		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
+		if (irq_entry == NULL) {
+			printk(KERN_ERR
+				"Failed to allocate new IRQ entry\n");
 			goto out_unlock;
 		}
+		irq_entry->fd = fd;
+		for (i = 0; i < MAX_IRQ_TYPE; i++)
+			irq_entry->irq_array[i] = NULL;
+		irq_entry->next = active_fds;
+		active_fds = irq_entry;
 	}
 
-	if (type == IRQ_WRITE)
-		fd = -1;
-
-	tmp_pfd = NULL;
-	n = 0;
+	/* Check if we are trying to re-register an interrupt for a
+	 * particular fd
+	 */
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
+	if (irq_entry->irq_array[type] != NULL) {
+		printk(KERN_ERR
+			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+			irq, fd, type, dev_id
+		);
+		goto out_unlock;
+	} else {
+		/* New entry for this fd */
+
+		err = -ENOMEM;
+		new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
+		if (new_fd == NULL)
+			goto out_unlock;
 
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
+		events = os_event_mask(type);
+
+		*new_fd = ((struct irq_fd) {
+			.id		= dev_id,
+			.irq		= irq,
+			.type		= type,
+			.events		= events,
+			.active		= true,
+			.pending	= false,
+			.purge		= false
+		});
+		/* Turn off any IO on this fd - allows us to
+		 * avoid locking the IRQ loop
 		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
-
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
-
-		spin_lock_irqsave(&irq_lock, flags);
+		os_del_epoll_fd(irq_entry->fd);
+		irq_entry->irq_array[type] = new_fd;
 	}
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
-
+	/* Turn back IO on with the correct (new) IO event mask */
+	assign_epoll_events_to_irq(irq_entry);
 	spin_unlock_irqrestore(&irq_lock, flags);
-
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+	maybe_sigio_broken(fd, (type != IRQ_NONE));
 
 	return 0;
-
- out_unlock:
+out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
-	kfree(new_fd);
- out:
+out:
 	return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+/*
+ * Walk the IRQ list and dispose of any unused entries.
+ * Should be done under irq_lock.
+ */
+
+static void garbage_collect_irq_entries(void)
 {
-	unsigned long flags;
+	int i;
+	bool reap;
+	struct irq_entry *walk;
+	struct irq_entry *previous = NULL;
+	struct irq_entry *to_free;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	if (active_fds == NULL)
+		return;
+	walk = active_fds;
+	while (walk != NULL) {
+		reap = true;
+		for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+			if (walk->irq_array[i] != NULL) {
+				reap = false;
+				break;
+			}
+		}
+		if (reap) {
+			if (previous == NULL)
+				active_fds = walk->next;
+			else
+				previous->next = walk->next;
+			to_free = walk;
+		} else {
+			to_free = NULL;
+		}
+		walk = walk->next;
+		if (to_free != NULL)
+			kfree(to_free);
+	}
 }
 
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
+/*
+ * Walk the IRQ list and get the descriptor for our FD
+ */
 
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-	struct irq_and_dev *data = d;
+	struct irq_entry *walk = active_fds;
 
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	while (walk != NULL) {
+		if (walk->fd == fd)
+			return walk;
+		walk = walk->next;
+	}
+	return NULL;
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
 
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
+/*
+ * Walk the IRQ list and dispose of an entry for a specific
+ * device, fd and number. Note - if sharing an IRQ for read
+ * and writefor the same FD it will be disposed in either case.
+ * If this behaviour is undesirable use different IRQ ids.
+ */
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return (irq->fd == *((int *)fd));
-}
+#define IGNORE_IRQ 1
+#define IGNORE_DEV (1<<1)
 
-void free_irq_by_fd(int fd)
+static void do_free_by_irq_and_dev(
+	struct irq_entry *irq_entry,
+	unsigned int irq,
+	void *dev,
+	int flags
+)
 {
-	free_irq_by_cb(same_fd, &fd);
+	int i;
+	struct irq_fd *to_free;
+
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		if (irq_entry->irq_array[i] != NULL) {
+			if (
+			((flags & IGNORE_IRQ) ||
+				(irq_entry->irq_array[i]->irq == irq)) &&
+			((flags & IGNORE_DEV) ||
+				(irq_entry->irq_array[i]->id == dev))
+			) {
+				/* Turn off any IO on this fd - allows us to
+				 * avoid locking the IRQ loop
+				 */
+				os_del_epoll_fd(irq_entry->fd);
+				to_free = irq_entry->irq_array[i];
+				irq_entry->irq_array[i] = NULL;
+				assign_epoll_events_to_irq(irq_entry);
+				if (to_free->active)
+					to_free->purge = true;
+				else
+					kfree(to_free);
+			}
+		}
+	}
 }
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+void free_irq_by_fd(int fd)
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
+	struct irq_entry *to_free;
+	unsigned long flags;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
 	}
-	*index_out = i;
- out:
-	return irq;
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
 }
+EXPORT_SYMBOL(free_irq_by_fd);
 
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irq,
+			dev,
+			0
+		);
+		to_free = to_free->next;
 	}
-	os_set_pollfd(i, irq->fd);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
+}
 
-	add_sigio_fd(fd);
+
+void reactivate_fd(int fd, int irqnum)
+{
+	/** NOP - we do auto-EOI now **/
 }
 
 void deactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
+	os_del_epoll_fd(fd);
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irqnum,
+			NULL,
+			IGNORE_DEV
+		);
 	}
-
-	os_set_pollfd(i, -1);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
-
 	ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
-	int err;
+	unsigned long flags;
+	struct irq_entry *to_free;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
-	}
-	/* If there is a signal already queued, after unblocking ignore it */
+	spin_lock_irqsave(&irq_lock, flags);
+	/* Stop IO. The IRQ loop has no lock so this is our
+	 * only way of making sure we are safe to dispose
+	 * of all IRQ handlers
+	 */
 	os_set_ioignore();
-
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
+		to_free = to_free->next;
+	}
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
+	os_close_epoll_fd();
 	return 0;
 }
 
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
 
 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
+
 	for (i = 1; i < NR_IRQS; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	/* Initialize EPOLL Loop */
+	os_setup_epoll();
 }
 
 /*
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 7f69d17de354..052de4c8acb2 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
 	clockevents_register_device(&timer_clockevent);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	long long nsecs = os_persistent_clock_emulation();
 
-	set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
-				nsecs % NSEC_PER_SEC);
+	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+				  nsecs % NSEC_PER_SEC);
 }
 
 void __init time_init(void)