summary refs log tree commit diff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-11-03 10:15:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2008-11-03 10:15:40 -0800
commitda4a22cba7cb2d922691214aed6b1977f04efaff (patch)
tree89d3f02b13cd1eb280a33240878880f91066bac2 /drivers/gpu/drm
parent20ebc0073b0fb63ce4a27ca761418ecfdecaadb7 (diff)
parente5beae16901795223d677f15aa2fe192976278ee (diff)
downloadlinux-da4a22cba7cb2d922691214aed6b1977f04efaff.tar.gz
Merge branch 'io-mappings-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'io-mappings-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  io mapping: clean up #ifdefs
  io mapping: improve documentation
  i915: use io-mapping interfaces instead of a variety of mapping kludges
  resources: add io-mapping functions to dynamically map large device apertures
  x86: add iomap_atomic*()/iounmap_atomic() on 32-bit using fixmaps
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c174
2 files changed, 83 insertions, 94 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc8a9f3f7a60..572dcd0e3e0d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -31,6 +31,7 @@
 #define _I915_DRV_H_
 
 #include "i915_reg.h"
+#include <linux/io-mapping.h>
 
 /* General customization:
  */
@@ -246,6 +247,8 @@ typedef struct drm_i915_private {
 	struct {
 		struct drm_mm gtt_space;
 
+		struct io_mapping *gtt_mapping;
+
 		/**
 		 * List of objects currently involved in rendering from the
 		 * ringbuffer.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c1733ac4a7f5..b0ec73fa6a93 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -193,35 +193,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-/*
- * Try to write quickly with an atomic kmap. Return true on success.
- *
- * If this fails (which includes a partial write), we'll redo the whole
- * thing with the slow version.
- *
- * This is a workaround for the low performance of iounmap (approximate
- * 10% cpu cost on normal 3D workloads).  kmap_atomic on HIGHMEM kernels
- * happens to let us map card memory without taking IPIs.  When the vmap
- * rework lands we should be able to dump this hack.
+/* This is the fast write path which cannot handle
+ * page faults in the source data
  */
-static inline int fast_user_write(unsigned long pfn, char __user *user_data,
-				  int l, int o)
+
+static inline int
+fast_user_write(struct io_mapping *mapping,
+		loff_t page_base, int page_offset,
+		char __user *user_data,
+		int length)
 {
-#ifdef CONFIG_HIGHMEM
-	unsigned long unwritten;
 	char *vaddr_atomic;
+	unsigned long unwritten;
 
-	vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0);
-#if WATCH_PWRITE
-	DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
-		 i, o, l, pfn, vaddr_atomic);
-#endif
-	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l);
-	kunmap_atomic(vaddr_atomic, KM_USER0);
-	return !unwritten;
-#else
+	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
+	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
+						      user_data, length);
+	io_mapping_unmap_atomic(vaddr_atomic);
+	if (unwritten)
+		return -EFAULT;
+	return 0;
+}
+
+/* Here's the write path which can sleep for
+ * page faults
+ */
+
+static inline int
+slow_user_write(struct io_mapping *mapping,
+		loff_t page_base, int page_offset,
+		char __user *user_data,
+		int length)
+{
+	char __iomem *vaddr;
+	unsigned long unwritten;
+
+	vaddr = io_mapping_map_wc(mapping, page_base);
+	if (vaddr == NULL)
+		return -EFAULT;
+	unwritten = __copy_from_user(vaddr + page_offset,
+				     user_data, length);
+	io_mapping_unmap(vaddr);
+	if (unwritten)
+		return -EFAULT;
 	return 0;
-#endif
 }
 
 static int
@@ -230,10 +245,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 		    struct drm_file *file_priv)
 {
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	ssize_t remain;
-	loff_t offset;
+	loff_t offset, page_base;
 	char __user *user_data;
-	int ret = 0;
+	int page_offset, page_length;
+	int ret;
 
 	user_data = (char __user *) (uintptr_t) args->data_ptr;
 	remain = args->size;
@@ -257,57 +274,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 	obj_priv->dirty = 1;
 
 	while (remain > 0) {
-		unsigned long pfn;
-		int i, o, l;
-
 		/* Operation in this page
 		 *
-		 * i = page number
-		 * o = offset within page
-		 * l = bytes to copy
+		 * page_base = page offset within aperture
+		 * page_offset = offset within page
+		 * page_length = bytes to copy for this page
 		 */
-		i = offset >> PAGE_SHIFT;
-		o = offset & (PAGE_SIZE-1);
-		l = remain;
-		if ((o + l) > PAGE_SIZE)
-			l = PAGE_SIZE - o;
-
-		pfn = (dev->agp->base >> PAGE_SHIFT) + i;
-
-		if (!fast_user_write(pfn, user_data, l, o)) {
-			unsigned long unwritten;
-			char __iomem *vaddr;
-
-			vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
-#if WATCH_PWRITE
-			DRM_INFO("pwrite slow i %d o %d l %d "
-				 "pfn %ld vaddr %p\n",
-				 i, o, l, pfn, vaddr);
-#endif
-			if (vaddr == NULL) {
-				ret = -EFAULT;
-				goto fail;
-			}
-			unwritten = __copy_from_user(vaddr + o, user_data, l);
-#if WATCH_PWRITE
-			DRM_INFO("unwritten %ld\n", unwritten);
-#endif
-			iounmap(vaddr);
-			if (unwritten) {
-				ret = -EFAULT;
+		page_base = (offset & ~(PAGE_SIZE-1));
+		page_offset = offset & (PAGE_SIZE-1);
+		page_length = remain;
+		if ((page_offset + remain) > PAGE_SIZE)
+			page_length = PAGE_SIZE - page_offset;
+
+		ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
+				       page_offset, user_data, page_length);
+
+		/* If we get a fault while copying data, then (presumably) our
+		 * source page isn't available. In this case, use the
+		 * non-atomic function
+		 */
+		if (ret) {
+			ret = slow_user_write (dev_priv->mm.gtt_mapping,
+					       page_base, page_offset,
+					       user_data, page_length);
+			if (ret)
 				goto fail;
-			}
 		}
 
-		remain -= l;
-		user_data += l;
-		offset += l;
+		remain -= page_length;
+		user_data += page_length;
+		offset += page_length;
 	}
-#if WATCH_PWRITE && 1
-	i915_gem_clflush_object(obj);
-	i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
-	i915_gem_clflush_object(obj);
-#endif
 
 fail:
 	i915_gem_object_unpin(obj);
@@ -1525,12 +1522,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 				 struct drm_i915_gem_exec_object *entry)
 {
 	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_relocation_entry reloc;
 	struct drm_i915_gem_relocation_entry __user *relocs;
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int i, ret;
-	uint32_t last_reloc_offset = -1;
-	void __iomem *reloc_page = NULL;
+	void __iomem *reloc_page;
 
 	/* Choose the GTT offset for our buffer and put it there. */
 	ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
@@ -1653,26 +1650,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		 * perform.
 		 */
 		reloc_offset = obj_priv->gtt_offset + reloc.offset;
-		if (reloc_page == NULL ||
-		    (last_reloc_offset & ~(PAGE_SIZE - 1)) !=
-		    (reloc_offset & ~(PAGE_SIZE - 1))) {
-			if (reloc_page != NULL)
-				iounmap(reloc_page);
-
-			reloc_page = ioremap_wc(dev->agp->base +
-						(reloc_offset &
-						 ~(PAGE_SIZE - 1)),
-						PAGE_SIZE);
-			last_reloc_offset = reloc_offset;
-			if (reloc_page == NULL) {
-				drm_gem_object_unreference(target_obj);
-				i915_gem_object_unpin(obj);
-				return -ENOMEM;
-			}
-		}
-
+		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
+						      (reloc_offset &
+						       ~(PAGE_SIZE - 1)));
 		reloc_entry = (uint32_t __iomem *)(reloc_page +
-					   (reloc_offset & (PAGE_SIZE - 1)));
+						   (reloc_offset & (PAGE_SIZE - 1)));
 		reloc_val = target_obj_priv->gtt_offset + reloc.delta;
 
 #if WATCH_BUF
@@ -1681,6 +1663,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 			  readl(reloc_entry), reloc_val);
 #endif
 		writel(reloc_val, reloc_entry);
+		io_mapping_unmap_atomic(reloc_page);
 
 		/* Write the updated presumed offset for this entry back out
 		 * to the user.
@@ -1696,9 +1679,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 		drm_gem_object_unreference(target_obj);
 	}
 
-	if (reloc_page != NULL)
-		iounmap(reloc_page);
-
 #if WATCH_BUF
 	if (0)
 		i915_gem_dump_object(obj, 128, __func__, ~0);
@@ -2540,6 +2520,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 	if (ret != 0)
 		return ret;
 
+	dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base,
+							dev->agp->agp_info.aper_size
+							* 1024 * 1024);
+
 	mutex_lock(&dev->struct_mutex);
 	BUG_ON(!list_empty(&dev_priv->mm.active_list));
 	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
@@ -2557,11 +2541,13 @@ int
 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
 	ret = i915_gem_idle(dev);
 	drm_irq_uninstall(dev);
 
+	io_mapping_free(dev_priv->mm.gtt_mapping);
 	return ret;
 }