summary refs log tree commit diff
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/Kconfig4
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c175
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c45
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c280
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c17
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h39
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c9
-rw-r--r--drivers/vfio/vfio.c70
9 files changed, 605 insertions, 35 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 02912f180c6d..24ee2605b9f0 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -26,3 +26,7 @@ config VFIO_PCI_MMAP
 config VFIO_PCI_INTX
 	depends on VFIO_PCI
 	def_bool y if !S390
+
+config VFIO_PCI_IGD
+	depends on VFIO_PCI
+	def_bool y if X86
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 131079255fd9..76d8ec058edd 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,4 +1,5 @@
 
 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8c80a48e3233..712a84978e97 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
 }
 
 static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
+static void vfio_pci_disable(struct vfio_pci_device *vdev);
 
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
@@ -169,13 +170,26 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 	if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
 		vdev->has_vga = true;
 
+
+	if (vfio_pci_is_vga(pdev) &&
+	    pdev->vendor == PCI_VENDOR_ID_INTEL &&
+	    IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
+		ret = vfio_pci_igd_init(vdev);
+		if (ret) {
+			dev_warn(&vdev->pdev->dev,
+				 "Failed to setup Intel IGD regions\n");
+			vfio_pci_disable(vdev);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
 static void vfio_pci_disable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
-	int bar;
+	int i, bar;
 
 	/* Stop the device from further DMA */
 	pci_clear_master(pdev);
@@ -186,6 +200,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 
 	vdev->virq_disabled = false;
 
+	for (i = 0; i < vdev->num_regions; i++)
+		vdev->region[i].ops->release(vdev, &vdev->region[i]);
+
+	vdev->num_regions = 0;
+	kfree(vdev->region);
+	vdev->region = NULL; /* don't krealloc a freed pointer */
+
 	vfio_config_free(vdev);
 
 	for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
@@ -421,6 +442,93 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
 	return walk.ret;
 }
 
+static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
+				struct vfio_info_cap *caps)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_sparse_mmap *sparse;
+	size_t end, size;
+	int nr_areas = 2, i = 0;
+
+	end = pci_resource_len(vdev->pdev, vdev->msix_bar);
+
+	/* If MSI-X table is aligned to the start or end, only one area */
+	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
+	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
+		nr_areas = 1;
+
+	size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
+
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	sparse = container_of(header,
+			      struct vfio_region_info_cap_sparse_mmap, header);
+	sparse->nr_areas = nr_areas;
+
+	if (vdev->msix_offset & PAGE_MASK) {
+		sparse->areas[i].offset = 0;
+		sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
+		i++;
+	}
+
+	if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
+		sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
+						     vdev->msix_size);
+		sparse->areas[i].size = end - sparse->areas[i].offset;
+		i++;
+	}
+
+	return 0;
+}
+
+static int region_type_cap(struct vfio_pci_device *vdev,
+			   struct vfio_info_cap *caps,
+			   unsigned int type, unsigned int subtype)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_region_info_cap_type *cap;
+
+	header = vfio_info_cap_add(caps, sizeof(*cap),
+				   VFIO_REGION_INFO_CAP_TYPE, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	cap = container_of(header, struct vfio_region_info_cap_type, header);
+	cap->type = type;
+	cap->subtype = subtype;
+
+	return 0;
+}
+
+int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
+				 unsigned int type, unsigned int subtype,
+				 const struct vfio_pci_regops *ops,
+				 size_t size, u32 flags, void *data)
+{
+	struct vfio_pci_region *region;
+
+	region = krealloc(vdev->region,
+			  (vdev->num_regions + 1) * sizeof(*region),
+			  GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	vdev->region = region;
+	vdev->region[vdev->num_regions].type = type;
+	vdev->region[vdev->num_regions].subtype = subtype;
+	vdev->region[vdev->num_regions].ops = ops;
+	vdev->region[vdev->num_regions].size = size;
+	vdev->region[vdev->num_regions].flags = flags;
+	vdev->region[vdev->num_regions].data = data;
+
+	vdev->num_regions++;
+
+	return 0;
+}
+
 static long vfio_pci_ioctl(void *device_data,
 			   unsigned int cmd, unsigned long arg)
 {
@@ -443,7 +551,7 @@ static long vfio_pci_ioctl(void *device_data,
 		if (vdev->reset_works)
 			info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
-		info.num_regions = VFIO_PCI_NUM_REGIONS;
+		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
 		return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -452,6 +560,8 @@ static long vfio_pci_ioctl(void *device_data,
 	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
 		struct pci_dev *pdev = vdev->pdev;
 		struct vfio_region_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		int i, ret;
 
 		minsz = offsetofend(struct vfio_region_info, offset);
 
@@ -480,8 +590,15 @@ static long vfio_pci_ioctl(void *device_data,
 				     VFIO_REGION_INFO_FLAG_WRITE;
 			if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
 			    pci_resource_flags(pdev, info.index) &
-			    IORESOURCE_MEM && info.size >= PAGE_SIZE)
+			    IORESOURCE_MEM && info.size >= PAGE_SIZE) {
 				info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
+				if (info.index == vdev->msix_bar) {
+					ret = msix_sparse_mmap_cap(vdev, &caps);
+					if (ret)
+						return ret;
+				}
+			}
+
 			break;
 		case VFIO_PCI_ROM_REGION_INDEX:
 		{
@@ -493,8 +610,14 @@ static long vfio_pci_ioctl(void *device_data,
 
 			/* Report the BAR size, not the ROM size */
 			info.size = pci_resource_len(pdev, info.index);
-			if (!info.size)
-				break;
+			if (!info.size) {
+				/* Shadow ROMs appear as PCI option ROMs */
+				if (pdev->resource[PCI_ROM_RESOURCE].flags &
+							IORESOURCE_ROM_SHADOW)
+					info.size = 0x20000;
+				else
+					break;
+			}
 
 			/* Is it really there? */
 			io = pci_map_rom(pdev, &size);
@@ -518,7 +641,40 @@ static long vfio_pci_ioctl(void *device_data,
 
 			break;
 		default:
-			return -EINVAL;
+			if (info.index >=
+			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+				return -EINVAL;
+
+			i = info.index - VFIO_PCI_NUM_REGIONS;
+
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = vdev->region[i].size;
+			info.flags = vdev->region[i].flags;
+
+			ret = region_type_cap(vdev, &caps,
+					      vdev->region[i].type,
+					      vdev->region[i].subtype);
+			if (ret)
+				return ret;
+		}
+
+		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+				info.cap_offset = 0;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						  sizeof(info), caps.buf,
+						  caps.size)) {
+					kfree(caps.buf);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
 		}
 
 		return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -798,7 +954,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
 	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 	struct vfio_pci_device *vdev = device_data;
 
-	if (index >= VFIO_PCI_NUM_REGIONS)
+	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
 		return -EINVAL;
 
 	switch (index) {
@@ -815,6 +971,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
 
 	case VFIO_PCI_VGA_REGION_INDEX:
 		return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+	default:
+		index -= VFIO_PCI_NUM_REGIONS;
+		return vdev->region[index].ops->rw(vdev, buf,
+						   count, ppos, iswrite);
 	}
 
 	return -EINVAL;
@@ -997,6 +1157,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 		return;
 
 	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
+	kfree(vdev->region);
 	kfree(vdev);
 
 	if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index fe2b470d7ec6..142c533efec7 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -33,9 +33,8 @@
 
 #define PCI_CFG_SPACE_SIZE	256
 
-/* Useful "pseudo" capabilities */
+/* Fake capability ID for standard config space */
 #define PCI_CAP_ID_BASIC	0
-#define PCI_CAP_ID_INVALID	0xFF
 
 #define is_bar(offset)	\
 	((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \
@@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
 	return count;
 }
 
+/* Virt access uses only virtualization */
+static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
+				  int count, struct perm_bits *perm,
+				  int offset, __le32 val)
+{
+	memcpy(vdev->vconfig + pos, &val, count);
+	return count;
+}
+
+static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,
+				 int count, struct perm_bits *perm,
+				 int offset, __le32 *val)
+{
+	memcpy(val, vdev->vconfig + pos, count);
+	return count;
+}
+
 /* Default capability regions to read-only, no-virtualization */
 static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = {
 	[0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
@@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = {
 	.writefn = vfio_raw_config_write
 };
 
+static struct perm_bits virt_perms = {
+	.readfn = vfio_virt_config_read,
+	.writefn = vfio_virt_config_write
+};
+
 static void free_perm_bits(struct perm_bits *perm)
 {
 	kfree(perm->virt);
@@ -454,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
 	bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS];
 
 	/*
-	 * NB. we expose the actual BAR size here, regardless of whether
-	 * we can read it.  When we report the REGION_INFO for the ROM
-	 * we report what PCI tells us is the actual ROM size.
+	 * NB. REGION_INFO will have reported zero size if we weren't able
+	 * to read the ROM, but we still return the actual BAR size here if
+	 * it exists (or the shadow ROM space).
 	 */
 	if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
 		mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1);
 		mask |= PCI_ROM_ADDRESS_ENABLE;
 		*bar &= cpu_to_le32((u32)mask);
+	} else if (pdev->resource[PCI_ROM_RESOURCE].flags &
+					IORESOURCE_ROM_SHADOW) {
+		mask = ~(0x20000 - 1);
+		mask |= PCI_ROM_ADDRESS_ENABLE;
+		*bar &= cpu_to_le32((u32)mask);
 	} else
 		*bar = 0;
 
@@ -1332,6 +1358,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
 				pos + i, map[pos + i], cap);
 		}
 
+		BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
+
 		memset(map + pos, cap, len);
 		ret = vfio_fill_vconfig_bytes(vdev, pos, len);
 		if (ret)
@@ -1419,9 +1447,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
 		/*
 		 * Even though ecap is 2 bytes, we're currently a long way
 		 * from exceeding 1 byte capabilities.  If we ever make it
-		 * up to 0xFF we'll need to up this to a two-byte, byte map.
+		 * up to 0xFE we'll need to up this to a two-byte, byte map.
 		 */
-		BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID);
+		BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
 
 		memset(map + epos, ecap, len);
 		ret = vfio_fill_vconfig_bytes(vdev, epos, len);
@@ -1597,6 +1625,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
 	if (cap_id == PCI_CAP_ID_INVALID) {
 		perm = &unassigned_perms;
 		cap_start = *ppos;
+	} else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
+		perm = &virt_perms;
+		cap_start = *ppos;
 	} else {
 		if (*ppos >= PCI_CFG_SPACE_SIZE) {
 			WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
new file mode 100644
index 000000000000..6394b168ef29
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -0,0 +1,280 @@
+/*
+ * VFIO PCI Intel Graphics support
+ *
+ * Copyright (C) 2016 Red Hat, Inc.  All rights reserved.
+ *	Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Register a device specific region through which to provide read-only
+ * access to the Intel IGD opregion.  The register defining the opregion
+ * address is also virtualized to prevent user modification.
+ */
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+#include "vfio_pci_private.h"
+
+#define OPREGION_SIGNATURE	"IntelGraphicsMem"
+#define OPREGION_SIZE		(8 * 1024)
+#define OPREGION_PCI_ADDR	0xfc
+
+static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
+			      size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	void *base = vdev->region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (pos >= vdev->region[i].size || iswrite)
+		return -EINVAL;
+
+	count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	if (copy_to_user(buf, base + pos, count))
+		return -EFAULT;
+
+	*ppos += count;
+
+	return count;
+}
+
+static void vfio_pci_igd_release(struct vfio_pci_device *vdev,
+				 struct vfio_pci_region *region)
+{
+	memunmap(region->data);
+}
+
+static const struct vfio_pci_regops vfio_pci_igd_regops = {
+	.rw		= vfio_pci_igd_rw,
+	.release	= vfio_pci_igd_release,
+};
+
+static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
+{
+	__le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
+	u32 addr, size;
+	void *base;
+	int ret;
+
+	ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
+	if (ret)
+		return ret;
+
+	if (!addr || !(~addr))
+		return -ENODEV;
+
+	base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
+	if (!base)
+		return -ENOMEM;
+
+	if (memcmp(base, OPREGION_SIGNATURE, 16)) {
+		memunmap(base);
+		return -EINVAL;
+	}
+
+	size = le32_to_cpu(*(__le32 *)(base + 16));
+	if (!size) {
+		memunmap(base);
+		return -EINVAL;
+	}
+
+	size *= 1024; /* In KB */
+
+	if (size != OPREGION_SIZE) {
+		memunmap(base);
+		base = memremap(addr, size, MEMREMAP_WB);
+		if (!base)
+			return -ENOMEM;
+	}
+
+	ret = vfio_pci_register_dev_region(vdev,
+		PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+		VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
+		&vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
+	if (ret) {
+		memunmap(base);
+		return ret;
+	}
+
+	/* Fill vconfig with the hw value and virtualize register */
+	*dwordp = cpu_to_le32(addr);
+	memset(vdev->pci_config_map + OPREGION_PCI_ADDR,
+	       PCI_CAP_ID_INVALID_VIRT, 4);
+
+	return ret;
+}
+
+static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
+				  char __user *buf, size_t count, loff_t *ppos,
+				  bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+	struct pci_dev *pdev = vdev->region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	size_t size;
+	int ret;
+
+	if (pos >= vdev->region[i].size || iswrite)
+		return -EINVAL;
+
+	size = count = min(count, (size_t)(vdev->region[i].size - pos));
+
+	if ((pos & 1) && size) {
+		u8 val;
+
+		ret = pci_user_read_config_byte(pdev, pos, &val);
+		if (ret)
+			return pcibios_err_to_errno(ret);
+
+		if (copy_to_user(buf + count - size, &val, 1))
+			return -EFAULT;
+
+		pos++;
+		size--;
+	}
+
+	if ((pos & 3) && size > 2) {
+		u16 val;
+
+		ret = pci_user_read_config_word(pdev, pos, &val);
+		if (ret)
+			return pcibios_err_to_errno(ret);
+
+		val = cpu_to_le16(val);
+		if (copy_to_user(buf + count - size, &val, 2))
+			return -EFAULT;
+
+		pos += 2;
+		size -= 2;
+	}
+
+	while (size > 3) {
+		u32 val;
+
+		ret = pci_user_read_config_dword(pdev, pos, &val);
+		if (ret)
+			return pcibios_err_to_errno(ret);
+
+		val = cpu_to_le32(val);
+		if (copy_to_user(buf + count - size, &val, 4))
+			return -EFAULT;
+
+		pos += 4;
+		size -= 4;
+	}
+
+	while (size >= 2) {
+		u16 val;
+
+		ret = pci_user_read_config_word(pdev, pos, &val);
+		if (ret)
+			return pcibios_err_to_errno(ret);
+
+		val = cpu_to_le16(val);
+		if (copy_to_user(buf + count - size, &val, 2))
+			return -EFAULT;
+
+		pos += 2;
+		size -= 2;
+	}
+
+	while (size) {
+		u8 val;
+
+		ret = pci_user_read_config_byte(pdev, pos, &val);
+		if (ret)
+			return pcibios_err_to_errno(ret);
+
+		if (copy_to_user(buf + count - size, &val, 1))
+			return -EFAULT;
+
+		pos++;
+		size--;
+	}
+
+	*ppos += count;
+
+	return count;
+}
+
+static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev,
+				     struct vfio_pci_region *region)
+{
+	struct pci_dev *pdev = region->data;
+
+	pci_dev_put(pdev);
+}
+
+static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
+	.rw		= vfio_pci_igd_cfg_rw,
+	.release	= vfio_pci_igd_cfg_release,
+};
+
+static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
+{
+	struct pci_dev *host_bridge, *lpc_bridge;
+	int ret;
+
+	host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
+	if (!host_bridge)
+		return -ENODEV;
+
+	if (host_bridge->vendor != PCI_VENDOR_ID_INTEL ||
+	    host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) {
+		pci_dev_put(host_bridge);
+		return -EINVAL;
+	}
+
+	ret = vfio_pci_register_dev_region(vdev,
+		PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+		VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
+		&vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
+		VFIO_REGION_INFO_FLAG_READ, host_bridge);
+	if (ret) {
+		pci_dev_put(host_bridge);
+		return ret;
+	}
+
+	lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0));
+	if (!lpc_bridge)
+		return -ENODEV;
+
+	if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL ||
+	    lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) {
+		pci_dev_put(lpc_bridge);
+		return -EINVAL;
+	}
+
+	ret = vfio_pci_register_dev_region(vdev,
+		PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+		VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
+		&vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
+		VFIO_REGION_INFO_FLAG_READ, lpc_bridge);
+	if (ret) {
+		pci_dev_put(lpc_bridge);
+		return ret;
+	}
+
+	return 0;
+}
+
+int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+{
+	int ret;
+
+	ret = vfio_pci_igd_opregion_init(vdev);
+	if (ret)
+		return ret;
+
+	ret = vfio_pci_igd_cfg_init(vdev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3b3ba15558b7..e9ea3fef144a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 				      int vector, int fd, bool msix)
 {
 	struct pci_dev *pdev = vdev->pdev;
-	int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
-	char *name = msix ? "vfio-msix" : "vfio-msi";
 	struct eventfd_ctx *trigger;
-	int ret;
+	int irq, ret;
 
-	if (vector >= vdev->num_ctx)
+	if (vector < 0 || vector >= vdev->num_ctx)
 		return -EINVAL;
 
+	irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
+
 	if (vdev->ctx[vector].trigger) {
 		free_irq(irq, vdev->ctx[vector].trigger);
 		irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
@@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 	if (fd < 0)
 		return 0;
 
-	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)",
-					   name, vector, pci_name(pdev));
+	vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
+					   msix ? "x" : "", vector,
+					   pci_name(pdev));
 	if (!vdev->ctx[vector].name)
 		return -ENOMEM;
 
@@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
 {
 	int i, j, ret = 0;
 
-	if (start + count > vdev->num_ctx)
+	if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
 		return -EINVAL;
 
 	for (i = 0, j = start; i < count && !ret; i++, j++) {
@@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
 	}
 
 	if (ret) {
-		for (--j; j >= start; j--)
+		for (--j; j >= (int)start; j--)
 			vfio_msi_set_vector_signal(vdev, j, -1, msix);
 	}
 
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0e7394f8f69b..8a7d546d18a0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/irqbypass.h>
+#include <linux/types.h>
 
 #ifndef VFIO_PCI_PRIVATE_H
 #define VFIO_PCI_PRIVATE_H
@@ -24,6 +25,10 @@
 #define VFIO_PCI_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 #define VFIO_PCI_OFFSET_MASK	(((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
+/* Special capability IDs predefined access */
+#define PCI_CAP_ID_INVALID		0xFF	/* default raw access */
+#define PCI_CAP_ID_INVALID_VIRT		0xFE	/* default virt access */
+
 struct vfio_pci_irq_ctx {
 	struct eventfd_ctx	*trigger;
 	struct virqfd		*unmask;
@@ -33,6 +38,25 @@ struct vfio_pci_irq_ctx {
 	struct irq_bypass_producer	producer;
 };
 
+struct vfio_pci_device;
+struct vfio_pci_region;
+
+struct vfio_pci_regops {
+	size_t	(*rw)(struct vfio_pci_device *vdev, char __user *buf,
+		      size_t count, loff_t *ppos, bool iswrite);
+	void	(*release)(struct vfio_pci_device *vdev,
+			   struct vfio_pci_region *region);
+};
+
+struct vfio_pci_region {
+	u32				type;
+	u32				subtype;
+	const struct vfio_pci_regops	*ops;
+	void				*data;
+	size_t				size;
+	u32				flags;
+};
+
 struct vfio_pci_device {
 	struct pci_dev		*pdev;
 	void __iomem		*barmap[PCI_STD_RESOURCE_END + 1];
@@ -45,6 +69,8 @@ struct vfio_pci_device {
 	struct vfio_pci_irq_ctx	*ctx;
 	int			num_ctx;
 	int			irq_type;
+	int			num_regions;
+	struct vfio_pci_region	*region;
 	u8			msi_qmax;
 	u8			msix_bar;
 	u16			msix_size;
@@ -91,4 +117,17 @@ extern void vfio_pci_uninit_perm_bits(void);
 
 extern int vfio_config_init(struct vfio_pci_device *vdev);
 extern void vfio_config_free(struct vfio_pci_device *vdev);
+
+extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
+					unsigned int type, unsigned int subtype,
+					const struct vfio_pci_regops *ops,
+					size_t size, u32 flags, void *data);
+#ifdef CONFIG_VFIO_PCI_IGD
+extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
+#else
+static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+{
+	return -ENODEV;
+}
+#endif
 #endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 210db24d2204..5ffd1d9ad4bd 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
 	void __iomem *io;
 	ssize_t done;
 
-	if (!pci_resource_start(pdev, bar))
+	if (pci_resource_start(pdev, bar))
+		end = pci_resource_len(pdev, bar);
+	else if (bar == PCI_ROM_RESOURCE &&
+		 pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
+		end = 0x20000;
+	else
 		return -EINVAL;
 
-	end = pci_resource_len(pdev, bar);
-
 	if (pos >= end)
 		return -EINVAL;
 
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index ecca316386f5..6fd6fa5469de 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1080,30 +1080,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
 			continue;
 		}
 
-		/* module reference holds the driver we're working on */
-		mutex_unlock(&vfio.iommu_drivers_lock);
-
 		data = driver->ops->open(arg);
 		if (IS_ERR(data)) {
 			ret = PTR_ERR(data);
 			module_put(driver->ops->owner);
-			goto skip_drivers_unlock;
+			continue;
 		}
 
 		ret = __vfio_container_attach_groups(container, driver, data);
-		if (!ret) {
-			container->iommu_driver = driver;
-			container->iommu_data = data;
-		} else {
+		if (ret) {
 			driver->ops->release(data);
 			module_put(driver->ops->owner);
+			continue;
 		}
 
-		goto skip_drivers_unlock;
+		container->iommu_driver = driver;
+		container->iommu_data = data;
+		break;
 	}
 
 	mutex_unlock(&vfio.iommu_drivers_lock);
-skip_drivers_unlock:
 	up_write(&container->group_lock);
 
 	return ret;
@@ -1733,6 +1729,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
 /**
+ * Sub-module support
+ */
+/*
+ * Helper for managing a buffer of info chain capabilities, allocate or
+ * reallocate a buffer with additional @size, filling in @id and @version
+ * of the capability.  A pointer to the new capability is returned.
+ *
+ * NB. The chain is based at the head of the buffer, so new entries are
+ * added to the tail, vfio_info_cap_shift() should be called to fixup the
+ * next offsets prior to copying to the user buffer.
+ */
+struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
+					       size_t size, u16 id, u16 version)
+{
+	void *buf;
+	struct vfio_info_cap_header *header, *tmp;
+
+	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
+	if (!buf) {
+		kfree(caps->buf);
+		caps->size = 0;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	caps->buf = buf;
+	header = buf + caps->size;
+
+	/* Eventually copied to user buffer, zero */
+	memset(header, 0, size);
+
+	header->id = id;
+	header->version = version;
+
+	/* Add to the end of the capability chain */
+	for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
+		; /* nothing */
+
+	tmp->next = caps->size;
+	caps->size += size;
+
+	return header;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_add);
+
+void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
+{
+	struct vfio_info_cap_header *tmp;
+
+	for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
+		tmp->next += offset;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
+
+/**
  * Module/class support
  */
 static char *vfio_devnode(struct device *dev, umode_t *mode)