summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-bus-xen-backend75
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-xen_memory77
-rw-r--r--arch/ia64/include/asm/xen/interface.h2
-rw-r--r--arch/x86/xen/Kconfig4
-rw-r--r--arch/x86/xen/grant-table.c44
-rw-r--r--drivers/block/xen-blkback/xenbus.c9
-rw-r--r--drivers/block/xen-blkfront.c11
-rw-r--r--drivers/input/misc/xen-kbdfront.c7
-rw-r--r--drivers/net/xen-netback/netback.c2
-rw-r--r--drivers/net/xen-netback/xenbus.c9
-rw-r--r--drivers/net/xen-netfront.c9
-rw-r--r--drivers/pci/xen-pcifront.c11
-rw-r--r--drivers/video/xen-fbfront.c9
-rw-r--r--drivers/xen/Kconfig7
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/events.c77
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntalloc.c121
-rw-r--r--drivers/xen/gntdev.c34
-rw-r--r--drivers/xen/grant-table.c518
-rw-r--r--drivers/xen/privcmd.c (renamed from drivers/xen/xenfs/privcmd.c)41
-rw-r--r--drivers/xen/privcmd.h3
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c4
-rw-r--r--drivers/xen/xen-pciback/xenbus.c19
-rw-r--r--drivers/xen/xenbus/Makefile2
-rw-r--r--drivers/xen/xenbus/xenbus_client.c193
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h4
-rw-r--r--drivers/xen/xenbus/xenbus_dev_backend.c90
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c (renamed from drivers/xen/xenfs/xenbus.c)40
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c9
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h6
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c8
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c23
-rw-r--r--drivers/xen/xenfs/Makefile2
-rw-r--r--drivers/xen/xenfs/super.c6
-rw-r--r--drivers/xen/xenfs/xenfs.h2
-rw-r--r--include/xen/events.h7
-rw-r--r--include/xen/grant_table.h37
-rw-r--r--include/xen/interface/grant_table.h167
-rw-r--r--include/xen/interface/io/xs_wire.h3
-rw-r--r--include/xen/interface/xen.h2
-rw-r--r--include/xen/xenbus.h31
-rw-r--r--include/xen/xenbus_dev.h41
44 files changed, 1513 insertions, 265 deletions
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
new file mode 100644
index 000000000000..3d5951c8bf5f
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -0,0 +1,75 @@
+What:		/sys/bus/xen-backend/devices/*/devtype
+Date:		Feb 2009
+KernelVersion:	2.6.38
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                The type of the device.  e.g., one of: 'vbd' (block),
+                'vif' (network), or 'vfb' (framebuffer).
+
+What:		/sys/bus/xen-backend/devices/*/nodename
+Date:		Feb 2009
+KernelVersion:	2.6.38
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                XenStore node (under /local/domain/NNN/) for this
+                backend device.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/physical_device
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                The major:minor number (in hexidecimal) of the
+                physical device providing the storage for this backend
+                block device.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/mode
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Whether the block device is read-only ('r') or
+                read-write ('w').
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/f_req
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of flush requests from the frontend.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/oo_req
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of requests delayed because the backend was too
+                busy processing previous requests.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/rd_req
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of read requests from the frontend.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/rd_sect
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of sectors read by the frontend.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/wr_req
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of write requests from the frontend.
+
+What:		/sys/bus/xen-backend/devices/vbd-*/statistics/wr_sect
+Date:		April 2011
+KernelVersion:	3.0
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Number of sectors written by the frontend.
diff --git a/Documentation/ABI/stable/sysfs-devices-system-xen_memory b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
new file mode 100644
index 000000000000..caa311d59ac1
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
@@ -0,0 +1,77 @@
+What:		/sys/devices/system/xen_memory/xen_memory0/max_retry_count
+Date:		May 2011
+KernelVersion:	2.6.39
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		The maximum number of times the balloon driver will
+		attempt to increase the balloon before giving up.  See
+		also 'retry_count' below.
+		A value of zero means retry forever and is the default one.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/max_schedule_delay
+Date:		May 2011
+KernelVersion:	2.6.39
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		The limit that 'schedule_delay' (see below) will be
+		increased to. The default value is 32 seconds.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/retry_count
+Date:		May 2011
+KernelVersion:	2.6.39
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		The current number of times that the balloon driver
+		has attempted to increase the size of the balloon.
+		The default value is one. With max_retry_count being
+		zero (unlimited), this means that the driver will attempt
+		to retry with a 'schedule_delay' delay.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/schedule_delay
+Date:		May 2011
+KernelVersion:	2.6.39
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		The time (in seconds) to wait between attempts to
+		increase the balloon.  Each time the balloon cannot be
+		increased, 'schedule_delay' is increased (until
+		'max_schedule_delay' is reached at which point it
+		will use the max value).
+
+What:		/sys/devices/system/xen_memory/xen_memory0/target
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		The target number of pages to adjust this domain's
+		memory reservation to.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/target_kb
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		As target above, except the value is in KiB.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/info/current_kb
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		Current size (in KiB) of this domain's memory
+		reservation.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/info/high_kb
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		Amount (in KiB) of high memory in the balloon.
+
+What:		/sys/devices/system/xen_memory/xen_memory0/info/low_kb
+Date:		April 2008
+KernelVersion:	2.6.26
+Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+		Amount (in KiB) of low (or normal) memory in the
+		balloon.
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index 1d2427d116e3..fbb519828aa1 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -71,7 +71,7 @@
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint, unsigned int);
 __DEFINE_GUEST_HANDLE(ulong, unsigned long);
-__DEFINE_GUEST_HANDLE(u64, unsigned long);
+
 DEFINE_GUEST_HANDLE(char);
 DEFINE_GUEST_HANDLE(int);
 DEFINE_GUEST_HANDLE(long);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 26c731a106af..fdce49c7aff6 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -29,7 +29,8 @@ config XEN_PVHVM
 
 config XEN_MAX_DOMAIN_MEMORY
        int
-       default 128
+       default 500 if X86_64
+       default 64 if X86_32
        depends on XEN
        help
          This only affects the sizing of some bss arrays, the unused
@@ -48,3 +49,4 @@ config XEN_DEBUG_FS
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
+
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 5a40d24ba331..3a5f55d51907 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -54,6 +54,20 @@ static int map_pte_fn(pte_t *pte, struct page *pmd_page,
 	return 0;
 }
 
+/*
+ * This function is used to map shared frames to store grant status. It is
+ * different from map_pte_fn above, the frames type here is uint64_t.
+ */
+static int map_pte_fn_status(pte_t *pte, struct page *pmd_page,
+			     unsigned long addr, void *data)
+{
+	uint64_t **frames = (uint64_t **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
 static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 			unsigned long addr, void *data)
 {
@@ -64,10 +78,10 @@ static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 			   unsigned long max_nr_gframes,
-			   struct grant_entry **__shared)
+			   void **__shared)
 {
 	int rc;
-	struct grant_entry *shared = *__shared;
+	void *shared = *__shared;
 
 	if (shared == NULL) {
 		struct vm_struct *area =
@@ -83,8 +97,30 @@ int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 	return rc;
 }
 
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-			      unsigned long nr_gframes)
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   grant_status_t **__shared)
+{
+	int rc;
+	grant_status_t *shared = *__shared;
+
+	if (shared == NULL) {
+		/* No need to pass in PTE as we are going to do it
+		 * in apply_to_page_range anyhow. */
+		struct vm_struct *area =
+			alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL);
+		BUG_ON(area == NULL);
+		shared = area->addr;
+		*__shared = shared;
+	}
+
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn_status, &frames);
+	return rc;
+}
+
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes)
 {
 	apply_to_page_range(&init_mm, (unsigned long)shared,
 			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8069322e4c9e..37c794d31264 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -787,17 +787,14 @@ static const struct xenbus_device_id xen_blkbk_ids[] = {
 };
 
 
-static struct xenbus_driver xen_blkbk = {
-	.name = "vbd",
-	.owner = THIS_MODULE,
-	.ids = xen_blkbk_ids,
+static DEFINE_XENBUS_DRIVER(xen_blkbk, ,
 	.probe = xen_blkbk_probe,
 	.remove = xen_blkbk_remove,
 	.otherend_changed = frontend_changed
-};
+);
 
 
 int xen_blkif_xenbus_init(void)
 {
-	return xenbus_register_backend(&xen_blkbk);
+	return xenbus_register_backend(&xen_blkbk_driver);
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 7b2ec5908413..9fd3ee203b1e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1437,16 +1437,13 @@ static const struct xenbus_device_id blkfront_ids[] = {
 	{ "" }
 };
 
-static struct xenbus_driver blkfront = {
-	.name = "vbd",
-	.owner = THIS_MODULE,
-	.ids = blkfront_ids,
+static DEFINE_XENBUS_DRIVER(blkfront, ,
 	.probe = blkfront_probe,
 	.remove = blkfront_remove,
 	.resume = blkfront_resume,
 	.otherend_changed = blkback_changed,
 	.is_ready = blkfront_is_ready,
-};
+);
 
 static int __init xlblk_init(void)
 {
@@ -1461,7 +1458,7 @@ static int __init xlblk_init(void)
 		return -ENODEV;
 	}
 
-	ret = xenbus_register_frontend(&blkfront);
+	ret = xenbus_register_frontend(&blkfront_driver);
 	if (ret) {
 		unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
 		return ret;
@@ -1474,7 +1471,7 @@ module_init(xlblk_init);
 
 static void __exit xlblk_exit(void)
 {
-	return xenbus_unregister_driver(&blkfront);
+	return xenbus_unregister_driver(&blkfront_driver);
 }
 module_exit(xlblk_exit);
 
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index ad2e51c04db8..02ca8680ea5b 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -361,15 +361,12 @@ static const struct xenbus_device_id xenkbd_ids[] = {
 	{ "" }
 };
 
-static struct xenbus_driver xenkbd_driver = {
-	.name = "vkbd",
-	.owner = THIS_MODULE,
-	.ids = xenkbd_ids,
+static DEFINE_XENBUS_DRIVER(xenkbd, ,
 	.probe = xenkbd_probe,
 	.remove = xenkbd_remove,
 	.resume = xenkbd_resume,
 	.otherend_changed = xenkbd_backend_changed,
-};
+);
 
 static int __init xenkbd_init(void)
 {
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 639cf8ab62ba..59effac15f36 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1634,7 +1634,7 @@ static int __init netback_init(void)
 	int rc = 0;
 	int group;
 
-	if (!xen_pv_domain())
+	if (!xen_domain())
 		return -ENODEV;
 
 	xen_netbk_group_nr = num_online_cpus();
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 1ce729d6af75..410018c4c528 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -474,17 +474,14 @@ static const struct xenbus_device_id netback_ids[] = {
 };
 
 
-static struct xenbus_driver netback = {
-	.name = "vif",
-	.owner = THIS_MODULE,
-	.ids = netback_ids,
+static DEFINE_XENBUS_DRIVER(netback, ,
 	.probe = netback_probe,
 	.remove = netback_remove,
 	.uevent = netback_uevent,
 	.otherend_changed = frontend_changed,
-};
+);
 
 int xenvif_xenbus_init(void)
 {
-	return xenbus_register_backend(&netback);
+	return xenbus_register_backend(&netback_driver);
 }
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 0a59c57864f5..fa679057630f 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1914,7 +1914,7 @@ static void xennet_sysfs_delif(struct net_device *netdev)
 
 #endif /* CONFIG_SYSFS */
 
-static struct xenbus_device_id netfront_ids[] = {
+static const struct xenbus_device_id netfront_ids[] = {
 	{ "vif" },
 	{ "" }
 };
@@ -1941,15 +1941,12 @@ static int __devexit xennet_remove(struct xenbus_device *dev)
 	return 0;
 }
 
-static struct xenbus_driver netfront_driver = {
-	.name = "vif",
-	.owner = THIS_MODULE,
-	.ids = netfront_ids,
+static DEFINE_XENBUS_DRIVER(netfront, ,
 	.probe = netfront_probe,
 	.remove = __devexit_p(xennet_remove),
 	.resume = netfront_resume,
 	.otherend_changed = netback_changed,
-};
+);
 
 static int __init netif_init(void)
 {
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 90832a955991..7cf3d2fcf56a 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -1126,14 +1126,11 @@ static const struct xenbus_device_id xenpci_ids[] = {
 	{""},
 };
 
-static struct xenbus_driver xenbus_pcifront_driver = {
-	.name			= "pcifront",
-	.owner			= THIS_MODULE,
-	.ids			= xenpci_ids,
+static DEFINE_XENBUS_DRIVER(xenpci, "pcifront",
 	.probe			= pcifront_xenbus_probe,
 	.remove			= pcifront_xenbus_remove,
 	.otherend_changed	= pcifront_backend_changed,
-};
+);
 
 static int __init pcifront_init(void)
 {
@@ -1142,12 +1139,12 @@ static int __init pcifront_init(void)
 
 	pci_frontend_registrar(1 /* enable */);
 
-	return xenbus_register_frontend(&xenbus_pcifront_driver);
+	return xenbus_register_frontend(&xenpci_driver);
 }
 
 static void __exit pcifront_cleanup(void)
 {
-	xenbus_unregister_driver(&xenbus_pcifront_driver);
+	xenbus_unregister_driver(&xenpci_driver);
 	pci_frontend_registrar(0 /* disable */);
 }
 module_init(pcifront_init);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index beac52fc1c0e..cb4529c40d74 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -671,20 +671,17 @@ InitWait:
 	}
 }
 
-static struct xenbus_device_id xenfb_ids[] = {
+static const struct xenbus_device_id xenfb_ids[] = {
 	{ "vfb" },
 	{ "" }
 };
 
-static struct xenbus_driver xenfb_driver = {
-	.name = "vfb",
-	.owner = THIS_MODULE,
-	.ids = xenfb_ids,
+static DEFINE_XENBUS_DRIVER(xenfb, ,
 	.probe = xenfb_probe,
 	.remove = xenfb_remove,
 	.resume = xenfb_resume,
 	.otherend_changed = xenfb_backend_changed,
-};
+);
 
 static int __init xenfb_init(void)
 {
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8795480c2350..a1ced521cf74 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -86,6 +86,7 @@ config XEN_BACKEND
 
 config XENFS
 	tristate "Xen filesystem"
+	select XEN_PRIVCMD
 	default y
 	help
 	  The xen filesystem provides a way for domains to share
@@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND
 	  xen-pciback.hide=(03:00.0)(04:00.0)
 
 	  If in doubt, say m.
+
+config XEN_PRIVCMD
+	tristate
+	depends on XEN
+	default m
+
 endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 974fffdf22b2..aa31337192cc 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
 obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
+obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
 
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
+xen-privcmd-y				:= privcmd.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6e075cdd0c6b..e5e5812a1014 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -87,6 +87,7 @@ enum xen_irq_type {
  */
 struct irq_info {
 	struct list_head list;
+	int refcnt;
 	enum xen_irq_type type;	/* type */
 	unsigned irq;
 	unsigned short evtchn;	/* event channel */
@@ -406,6 +407,7 @@ static void xen_irq_init(unsigned irq)
 		panic("Unable to allocate metadata for IRQ%d\n", irq);
 
 	info->type = IRQT_UNBOUND;
+	info->refcnt = -1;
 
 	irq_set_handler_data(irq, info);
 
@@ -469,6 +471,8 @@ static void xen_free_irq(unsigned irq)
 
 	irq_set_handler_data(irq, NULL);
 
+	WARN_ON(info->refcnt > 0);
+
 	kfree(info);
 
 	/* Legacy IRQ descriptors are managed by the arch. */
@@ -637,7 +641,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
 	if (irq != -1) {
 		printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
 		       irq, gsi);
-		goto out;	/* XXX need refcount? */
+		goto out;
 	}
 
 	irq = xen_allocate_irq_gsi(gsi);
@@ -939,9 +943,16 @@ static void unbind_from_irq(unsigned int irq)
 {
 	struct evtchn_close close;
 	int evtchn = evtchn_from_irq(irq);
+	struct irq_info *info = irq_get_handler_data(irq);
 
 	mutex_lock(&irq_mapping_update_lock);
 
+	if (info->refcnt > 0) {
+		info->refcnt--;
+		if (info->refcnt != 0)
+			goto done;
+	}
+
 	if (VALID_EVTCHN(evtchn)) {
 		close.port = evtchn;
 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
@@ -970,6 +981,7 @@ static void unbind_from_irq(unsigned int irq)
 
 	xen_free_irq(irq);
 
+ done:
 	mutex_unlock(&irq_mapping_update_lock);
 }
 
@@ -1065,6 +1077,69 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
 
+int evtchn_make_refcounted(unsigned int evtchn)
+{
+	int irq = evtchn_to_irq[evtchn];
+	struct irq_info *info;
+
+	if (irq == -1)
+		return -ENOENT;
+
+	info = irq_get_handler_data(irq);
+
+	if (!info)
+		return -ENOENT;
+
+	WARN_ON(info->refcnt != -1);
+
+	info->refcnt = 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
+
+int evtchn_get(unsigned int evtchn)
+{
+	int irq;
+	struct irq_info *info;
+	int err = -ENOENT;
+
+	if (evtchn >= NR_EVENT_CHANNELS)
+		return -EINVAL;
+
+	mutex_lock(&irq_mapping_update_lock);
+
+	irq = evtchn_to_irq[evtchn];
+	if (irq == -1)
+		goto done;
+
+	info = irq_get_handler_data(irq);
+
+	if (!info)
+		goto done;
+
+	err = -EINVAL;
+	if (info->refcnt <= 0)
+		goto done;
+
+	info->refcnt++;
+	err = 0;
+ done:
+	mutex_unlock(&irq_mapping_update_lock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(evtchn_get);
+
+void evtchn_put(unsigned int evtchn)
+{
+	int irq = evtchn_to_irq[evtchn];
+	if (WARN_ON(irq == -1))
+		return;
+	unbind_from_irq(irq);
+}
+EXPORT_SYMBOL_GPL(evtchn_put);
+
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 {
 	int irq = per_cpu(ipi_to_irq, cpu)[vector];
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index dbc13e94b612..b1f60a0c0bea 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
 				       u->name, (void *)(unsigned long)port);
 	if (rc >= 0)
-		rc = 0;
+		rc = evtchn_make_refcounted(port);
 
 	return rc;
 }
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index e1c4c6e5b469..934985d14c24 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by "
 		"the gntalloc device");
 
 static LIST_HEAD(gref_list);
-static DEFINE_SPINLOCK(gref_lock);
+static DEFINE_MUTEX(gref_mutex);
 static int gref_size;
 
 struct notify_info {
@@ -99,6 +99,12 @@ struct gntalloc_file_private_data {
 	uint64_t index;
 };
 
+struct gntalloc_vma_private_data {
+	struct gntalloc_gref *gref;
+	int users;
+	int count;
+};
+
 static void __del_gref(struct gntalloc_gref *gref);
 
 static void do_cleanup(void)
@@ -143,15 +149,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
 	}
 
 	/* Add to gref lists. */
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 	list_splice_tail(&queue_gref, &gref_list);
 	list_splice_tail(&queue_file, &priv->list);
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 
 	return 0;
 
 undo:
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 	gref_size -= (op->count - i);
 
 	list_for_each_entry(gref, &queue_file, next_file) {
@@ -167,7 +173,7 @@ undo:
 	 */
 	if (unlikely(!list_empty(&queue_gref)))
 		list_splice_tail(&queue_gref, &gref_list);
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 	return rc;
 }
 
@@ -178,8 +184,10 @@ static void __del_gref(struct gntalloc_gref *gref)
 		tmp[gref->notify.pgoff] = 0;
 		kunmap(gref->page);
 	}
-	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 		notify_remote_via_evtchn(gref->notify.event);
+		evtchn_put(gref->notify.event);
+	}
 
 	gref->notify.flags = 0;
 
@@ -189,6 +197,8 @@ static void __del_gref(struct gntalloc_gref *gref)
 
 		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
 			return;
+
+		gnttab_free_grant_reference(gref->gref_id);
 	}
 
 	gref_size--;
@@ -251,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
 
 	pr_debug("%s: priv %p\n", __func__, priv);
 
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 	while (!list_empty(&priv->list)) {
 		gref = list_entry(priv->list.next,
 			struct gntalloc_gref, next_file);
@@ -261,7 +271,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp)
 			__del_gref(gref);
 	}
 	kfree(priv);
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 
 	return 0;
 }
@@ -286,21 +296,21 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
 		goto out;
 	}
 
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 	/* Clean up pages that were at zero (local) users but were still mapped
 	 * by remote domains. Since those pages count towards the limit that we
 	 * are about to enforce, removing them here is a good idea.
 	 */
 	do_cleanup();
 	if (gref_size + op.count > limit) {
-		spin_unlock(&gref_lock);
+		mutex_unlock(&gref_mutex);
 		rc = -ENOSPC;
 		goto out_free;
 	}
 	gref_size += op.count;
 	op.index = priv->index;
 	priv->index += op.count * PAGE_SIZE;
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 
 	rc = add_grefs(&op, gref_ids, priv);
 	if (rc < 0)
@@ -343,7 +353,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
 		goto dealloc_grant_out;
 	}
 
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 	gref = find_grefs(priv, op.index, op.count);
 	if (gref) {
 		/* Remove from the file list only, and decrease reference count.
@@ -363,7 +373,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
 
 	do_cleanup();
 
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 dealloc_grant_out:
 	return rc;
 }
@@ -383,7 +393,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
 	index = op.index & ~(PAGE_SIZE - 1);
 	pgoff = op.index & (PAGE_SIZE - 1);
 
-	spin_lock(&gref_lock);
+	mutex_lock(&gref_mutex);
 
 	gref = find_grefs(priv, index, 1);
 	if (!gref) {
@@ -396,12 +406,30 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv,
 		goto unlock_out;
 	}
 
+	/* We need to grab a reference to the event channel we are going to use
+	 * to send the notify before releasing the reference we may already have
+	 * (if someone has called this ioctl twice). This is required so that
+	 * it is possible to change the clear_byte part of the notification
+	 * without disturbing the event channel part, which may now be the last
+	 * reference to that event channel.
+	 */
+	if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
+		if (evtchn_get(op.event_channel_port)) {
+			rc = -EINVAL;
+			goto unlock_out;
+		}
+	}
+
+	if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+		evtchn_put(gref->notify.event);
+
 	gref->notify.flags = op.action;
 	gref->notify.pgoff = pgoff;
 	gref->notify.event = op.event_channel_port;
 	rc = 0;
+
  unlock_out:
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 	return rc;
 }
 
@@ -429,26 +457,40 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
 
 static void gntalloc_vma_open(struct vm_area_struct *vma)
 {
-	struct gntalloc_gref *gref = vma->vm_private_data;
-	if (!gref)
+	struct gntalloc_vma_private_data *priv = vma->vm_private_data;
+
+	if (!priv)
 		return;
 
-	spin_lock(&gref_lock);
-	gref->users++;
-	spin_unlock(&gref_lock);
+	mutex_lock(&gref_mutex);
+	priv->users++;
+	mutex_unlock(&gref_mutex);
 }
 
 static void gntalloc_vma_close(struct vm_area_struct *vma)
 {
-	struct gntalloc_gref *gref = vma->vm_private_data;
-	if (!gref)
+	struct gntalloc_vma_private_data *priv = vma->vm_private_data;
+	struct gntalloc_gref *gref, *next;
+	int i;
+
+	if (!priv)
 		return;
 
-	spin_lock(&gref_lock);
-	gref->users--;
-	if (gref->users == 0)
-		__del_gref(gref);
-	spin_unlock(&gref_lock);
+	mutex_lock(&gref_mutex);
+	priv->users--;
+	if (priv->users == 0) {
+		gref = priv->gref;
+		for (i = 0; i < priv->count; i++) {
+			gref->users--;
+			next = list_entry(gref->next_gref.next,
+					  struct gntalloc_gref, next_gref);
+			if (gref->users == 0)
+				__del_gref(gref);
+			gref = next;
+		}
+		kfree(priv);
+	}
+	mutex_unlock(&gref_mutex);
 }
 
 static struct vm_operations_struct gntalloc_vmops = {
@@ -459,30 +501,41 @@ static struct vm_operations_struct gntalloc_vmops = {
 static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct gntalloc_file_private_data *priv = filp->private_data;
+	struct gntalloc_vma_private_data *vm_priv;
 	struct gntalloc_gref *gref;
 	int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 	int rv, i;
 
-	pr_debug("%s: priv %p, page %lu+%d\n", __func__,
-		       priv, vma->vm_pgoff, count);
-
 	if (!(vma->vm_flags & VM_SHARED)) {
 		printk(KERN_ERR "%s: Mapping must be shared.\n", __func__);
 		return -EINVAL;
 	}
 
-	spin_lock(&gref_lock);
+	vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL);
+	if (!vm_priv)
+		return -ENOMEM;
+
+	mutex_lock(&gref_mutex);
+
+	pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__,
+		       priv, vm_priv, vma->vm_pgoff, count);
+
 	gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count);
 	if (gref == NULL) {
 		rv = -ENOENT;
 		pr_debug("%s: Could not find grant reference",
 				__func__);
+		kfree(vm_priv);
 		goto out_unlock;
 	}
 
-	vma->vm_private_data = gref;
+	vm_priv->gref = gref;
+	vm_priv->users = 1;
+	vm_priv->count = count;
+
+	vma->vm_private_data = vm_priv;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
 
 	vma->vm_ops = &gntalloc_vmops;
 
@@ -499,7 +552,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 	rv = 0;
 
 out_unlock:
-	spin_unlock(&gref_lock);
+	mutex_unlock(&gref_mutex);
 	return rv;
 }
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index afca14d9042e..99d8151c824a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -193,8 +193,10 @@ static void gntdev_put_map(struct grant_map *map)
 
 	atomic_sub(map->count, &pages_mapped);
 
-	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT)
+	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 		notify_remote_via_evtchn(map->notify.event);
+		evtchn_put(map->notify.event);
+	}
 
 	if (map->pages) {
 		if (!use_ptemod)
@@ -312,7 +314,8 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages);
+	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
+				pages, true);
 	if (err)
 		return err;
 
@@ -599,6 +602,8 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 	struct ioctl_gntdev_unmap_notify op;
 	struct grant_map *map;
 	int rc;
+	int out_flags;
+	unsigned int out_event;
 
 	if (copy_from_user(&op, u, sizeof(op)))
 		return -EFAULT;
@@ -606,6 +611,21 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
 		return -EINVAL;
 
+	/* We need to grab a reference to the event channel we are going to use
+	 * to send the notify before releasing the reference we may already have
+	 * (if someone has called this ioctl twice). This is required so that
+	 * it is possible to change the clear_byte part of the notification
+	 * without disturbing the event channel part, which may now be the last
+	 * reference to that event channel.
+	 */
+	if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
+		if (evtchn_get(op.event_channel_port))
+			return -EINVAL;
+	}
+
+	out_flags = op.action;
+	out_event = op.event_channel_port;
+
 	spin_lock(&priv->lock);
 
 	list_for_each_entry(map, &priv->maps, next) {
@@ -624,12 +644,22 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
 		goto unlock_out;
 	}
 
+	out_flags = map->notify.flags;
+	out_event = map->notify.event;
+
 	map->notify.flags = op.action;
 	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
 	map->notify.event = op.event_channel_port;
+
 	rc = 0;
+
  unlock_out:
 	spin_unlock(&priv->lock);
+
+	/* Drop the reference to the event channel we did not save in the map */
+	if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
+		evtchn_put(out_event);
+
 	return rc;
 }
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index bf1c094f4ebf..1cd94daa71db 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -44,16 +44,19 @@
 #include <xen/page.h>
 #include <xen/grant_table.h>
 #include <xen/interface/memory.h>
+#include <xen/hvc-console.h>
 #include <asm/xen/hypercall.h>
 
 #include <asm/pgtable.h>
 #include <asm/sync_bitops.h>
 
-
 /* External tools reserve first few grant table entries. */
 #define NR_RESERVED_ENTRIES 8
 #define GNTTAB_LIST_END 0xffffffff
-#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+#define GREFS_PER_GRANT_FRAME \
+(grant_table_version == 1 ?                      \
+(PAGE_SIZE / sizeof(struct grant_entry_v1)) :   \
+(PAGE_SIZE / sizeof(union grant_entry_v2)))
 
 static grant_ref_t **gnttab_list;
 static unsigned int nr_grant_frames;
@@ -64,13 +67,97 @@ static DEFINE_SPINLOCK(gnttab_list_lock);
 unsigned long xen_hvm_resume_frames;
 EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
 
-static struct grant_entry *shared;
+static union {
+	struct grant_entry_v1 *v1;
+	union grant_entry_v2 *v2;
+	void *addr;
+} gnttab_shared;
+
+/*This is a structure of function pointers for grant table*/
+struct gnttab_ops {
+	/*
+	 * Mapping a list of frames for storing grant entries. Frames parameter
+	 * is used to store grant table address when grant table being setup,
+	 * nr_gframes is the number of frames to map grant table. Returning
+	 * GNTST_okay means success and negative value means failure.
+	 */
+	int (*map_frames)(unsigned long *frames, unsigned int nr_gframes);
+	/*
+	 * Release a list of frames which are mapped in map_frames for grant
+	 * entry status.
+	 */
+	void (*unmap_frames)(void);
+	/*
+	 * Introducing a valid entry into the grant table, granting the frame of
+	 * this grant entry to domain for accessing or transfering. Ref
+	 * parameter is reference of this introduced grant entry, domid is id of
+	 * granted domain, frame is the page frame to be granted, and flags is
+	 * status of the grant entry to be updated.
+	 */
+	void (*update_entry)(grant_ref_t ref, domid_t domid,
+			     unsigned long frame, unsigned flags);
+	/*
+	 * Stop granting a grant entry to domain for accessing. Ref parameter is
+	 * reference of a grant entry whose grant access will be stopped,
+	 * readonly is not in use in this function. If the grant entry is
+	 * currently mapped for reading or writing, just return failure(==0)
+	 * directly and don't tear down the grant access. Otherwise, stop grant
+	 * access for this entry and return success(==1).
+	 */
+	int (*end_foreign_access_ref)(grant_ref_t ref, int readonly);
+	/*
+	 * Stop granting a grant entry to domain for transfer. Ref parameter is
+	 * reference of a grant entry whose grant transfer will be stopped. If
+	 * tranfer has not started, just reclaim the grant entry and return
+	 * failure(==0). Otherwise, wait for the transfer to complete and then
+	 * return the frame.
+	 */
+	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
+	/*
+	 * Query the status of a grant entry. Ref parameter is reference of
+	 * queried grant entry, return value is the status of queried entry.
+	 * Detailed status(writing/reading) can be gotten from the return value
+	 * by bit operations.
+	 */
+	int (*query_foreign_access)(grant_ref_t ref);
+	/*
+	 * Grant a domain to access a range of bytes within the page referred by
+	 * an available grant entry. Ref parameter is reference of a grant entry
+	 * which will be sub-page accessed, domid is id of grantee domain, frame
+	 * is frame address of subpage grant, flags is grant type and flag
+	 * information, page_off is offset of the range of bytes, and length is
+	 * length of bytes to be accessed.
+	 */
+	void (*update_subpage_entry)(grant_ref_t ref, domid_t domid,
+				     unsigned long frame, int flags,
+				     unsigned page_off, unsigned length);
+	/*
+	 * Redirect an available grant entry on domain A to another grant
+	 * reference of domain B, then allow domain C to use grant reference
+	 * of domain B transitively. Ref parameter is an available grant entry
+	 * reference on domain A, domid is id of domain C which accesses grant
+	 * entry transitively, flags is grant type and flag information,
+	 * trans_domid is id of domain B whose grant entry is finally accessed
+	 * transitively, trans_gref is grant entry transitive reference of
+	 * domain B.
+	 */
+	void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags,
+				   domid_t trans_domid, grant_ref_t trans_gref);
+};
+
+static struct gnttab_ops *gnttab_interface;
+
+/*This reflects status of grant entries, so act as a global value*/
+static grant_status_t *grstatus;
+
+static int grant_table_version;
 
 static struct gnttab_free_callback *gnttab_free_callback_list;
 
 static int gnttab_expand(unsigned int req_entries);
 
 #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define SPP (PAGE_SIZE / sizeof(grant_status_t))
 
 static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
 {
@@ -142,23 +229,33 @@ static void put_free_entry(grant_ref_t ref)
 	spin_unlock_irqrestore(&gnttab_list_lock, flags);
 }
 
-static void update_grant_entry(grant_ref_t ref, domid_t domid,
-			       unsigned long frame, unsigned flags)
+/*
+ * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
+ * Introducing a valid entry into the grant table:
+ *  1. Write ent->domid.
+ *  2. Write ent->frame:
+ *      GTF_permit_access:   Frame to which access is permitted.
+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ *                           frame, or zero if none.
+ *  3. Write memory barrier (WMB).
+ *  4. Write ent->flags, inc. valid type.
+ */
+static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
+				   unsigned long frame, unsigned flags)
+{
+	gnttab_shared.v1[ref].domid = domid;
+	gnttab_shared.v1[ref].frame = frame;
+	wmb();
+	gnttab_shared.v1[ref].flags = flags;
+}
+
+static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
+				   unsigned long frame, unsigned flags)
 {
-	/*
-	 * Introducing a valid entry into the grant table:
-	 *  1. Write ent->domid.
-	 *  2. Write ent->frame:
-	 *      GTF_permit_access:   Frame to which access is permitted.
-	 *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
-	 *                           frame, or zero if none.
-	 *  3. Write memory barrier (WMB).
-	 *  4. Write ent->flags, inc. valid type.
-	 */
-	shared[ref].frame = frame;
-	shared[ref].domid = domid;
+	gnttab_shared.v2[ref].hdr.domid = domid;
+	gnttab_shared.v2[ref].full_page.frame = frame;
 	wmb();
-	shared[ref].flags = flags;
+	gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
 }
 
 /*
@@ -167,7 +264,7 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid,
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 				     unsigned long frame, int readonly)
 {
-	update_grant_entry(ref, domid, frame,
+	gnttab_interface->update_entry(ref, domid, frame,
 			   GTF_permit_access | (readonly ? GTF_readonly : 0));
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
@@ -187,31 +284,184 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
-int gnttab_query_foreign_access(grant_ref_t ref)
+void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
+				    unsigned long frame, int flags,
+				    unsigned page_off,
+				    unsigned length)
+{
+	gnttab_shared.v2[ref].sub_page.frame = frame;
+	gnttab_shared.v2[ref].sub_page.page_off = page_off;
+	gnttab_shared.v2[ref].sub_page.length = length;
+	gnttab_shared.v2[ref].hdr.domid = domid;
+	wmb();
+	gnttab_shared.v2[ref].hdr.flags =
+				GTF_permit_access | GTF_sub_page | flags;
+}
+
+int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
+					    unsigned long frame, int flags,
+					    unsigned page_off,
+					    unsigned length)
 {
-	u16 nflags;
+	if (flags & (GTF_accept_transfer | GTF_reading |
+		     GTF_writing | GTF_transitive))
+		return -EPERM;
 
-	nflags = shared[ref].flags;
+	if (gnttab_interface->update_subpage_entry == NULL)
+		return -ENOSYS;
 
-	return nflags & (GTF_reading|GTF_writing);
+	gnttab_interface->update_subpage_entry(ref, domid, frame, flags,
+					       page_off, length);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref);
+
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+					int flags, unsigned page_off,
+					unsigned length)
+{
+	int ref, rc;
+
+	ref = get_free_entries(1);
+	if (unlikely(ref < 0))
+		return -ENOSPC;
+
+	rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags,
+						     page_off, length);
+	if (rc < 0) {
+		put_free_entry(ref);
+		return rc;
+	}
+
+	return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage);
+
+bool gnttab_subpage_grants_available(void)
+{
+	return gnttab_interface->update_subpage_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
+
+void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
+				  int flags, domid_t trans_domid,
+				  grant_ref_t trans_gref)
+{
+	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
+	gnttab_shared.v2[ref].transitive.gref = trans_gref;
+	gnttab_shared.v2[ref].hdr.domid = domid;
+	wmb();
+	gnttab_shared.v2[ref].hdr.flags =
+				GTF_permit_access | GTF_transitive | flags;
+}
+
+int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
+					  int flags, domid_t trans_domid,
+					  grant_ref_t trans_gref)
+{
+	if (flags & (GTF_accept_transfer | GTF_reading |
+		     GTF_writing | GTF_sub_page))
+		return -EPERM;
+
+	if (gnttab_interface->update_trans_entry == NULL)
+		return -ENOSYS;
+
+	gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid,
+					     trans_gref);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref);
+
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+				      domid_t trans_domid,
+				      grant_ref_t trans_gref)
+{
+	int ref, rc;
+
+	ref = get_free_entries(1);
+	if (unlikely(ref < 0))
+		return -ENOSPC;
+
+	rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags,
+						   trans_domid, trans_gref);
+	if (rc < 0) {
+		put_free_entry(ref);
+		return rc;
+	}
+
+	return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans);
+
+bool gnttab_trans_grants_available(void)
+{
+	return gnttab_interface->update_trans_entry != NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_trans_grants_available);
+
+static int gnttab_query_foreign_access_v1(grant_ref_t ref)
+{
+	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
+}
+
+static int gnttab_query_foreign_access_v2(grant_ref_t ref)
+{
+	return grstatus[ref] & (GTF_reading|GTF_writing);
+}
+
+int gnttab_query_foreign_access(grant_ref_t ref)
+{
+	return gnttab_interface->query_foreign_access(ref);
 }
 EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
 
-int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
 {
 	u16 flags, nflags;
+	u16 *pflags;
 
-	nflags = shared[ref].flags;
+	pflags = &gnttab_shared.v1[ref].flags;
+	nflags = *pflags;
 	do {
 		flags = nflags;
 		if (flags & (GTF_reading|GTF_writing)) {
 			printk(KERN_ALERT "WARNING: g.e. still in use!\n");
 			return 0;
 		}
-	} while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+	} while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags);
+
+	return 1;
+}
+
+static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly)
+{
+	gnttab_shared.v2[ref].hdr.flags = 0;
+	mb();
+	if (grstatus[ref] & (GTF_reading|GTF_writing)) {
+		return 0;
+	} else {
+		/* The read of grstatus needs to have acquire
+		semantics.  On x86, reads already have
+		that, and we just need to protect against
+		compiler reorderings.  On other
+		architectures we may need a full
+		barrier. */
+#ifdef CONFIG_X86
+		barrier();
+#else
+		mb();
+#endif
+	}
 
 	return 1;
 }
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+	return gnttab_interface->end_foreign_access_ref(ref, readonly);
+}
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
 
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
@@ -246,37 +496,76 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
 void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
 				       unsigned long pfn)
 {
-	update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
+	gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer);
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
 
-unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref)
 {
 	unsigned long frame;
 	u16           flags;
+	u16          *pflags;
+
+	pflags = &gnttab_shared.v1[ref].flags;
 
 	/*
 	 * If a transfer is not even yet started, try to reclaim the grant
 	 * reference and return failure (== 0).
 	 */
-	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
-		if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+	while (!((flags = *pflags) & GTF_transfer_committed)) {
+		if (sync_cmpxchg(pflags, flags, 0) == flags)
 			return 0;
 		cpu_relax();
 	}
 
 	/* If a transfer is in progress then wait until it is completed. */
 	while (!(flags & GTF_transfer_completed)) {
-		flags = shared[ref].flags;
+		flags = *pflags;
 		cpu_relax();
 	}
 
 	rmb();	/* Read the frame number /after/ reading completion status. */
-	frame = shared[ref].frame;
+	frame = gnttab_shared.v1[ref].frame;
+	BUG_ON(frame == 0);
+
+	return frame;
+}
+
+static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref)
+{
+	unsigned long frame;
+	u16           flags;
+	u16          *pflags;
+
+	pflags = &gnttab_shared.v2[ref].hdr.flags;
+
+	/*
+	 * If a transfer is not even yet started, try to reclaim the grant
+	 * reference and return failure (== 0).
+	 */
+	while (!((flags = *pflags) & GTF_transfer_committed)) {
+		if (sync_cmpxchg(pflags, flags, 0) == flags)
+			return 0;
+		cpu_relax();
+	}
+
+	/* If a transfer is in progress then wait until it is completed. */
+	while (!(flags & GTF_transfer_completed)) {
+		flags = *pflags;
+		cpu_relax();
+	}
+
+	rmb();  /* Read the frame number /after/ reading completion status. */
+	frame = gnttab_shared.v2[ref].full_page.frame;
 	BUG_ON(frame == 0);
 
 	return frame;
 }
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+	return gnttab_interface->end_foreign_transfer_ref(ref);
+}
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
 
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
@@ -448,8 +737,8 @@ unsigned int gnttab_max_grant_frames(void)
 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
-			struct gnttab_map_grant_ref *kmap_ops,
-			struct page **pages, unsigned int count)
+		    struct gnttab_map_grant_ref *kmap_ops,
+		    struct page **pages, unsigned int count)
 {
 	int i, ret;
 	pte_t *pte;
@@ -472,24 +761,10 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 				(map_ops[i].host_addr & ~PAGE_MASK));
 			mfn = pte_mfn(*pte);
 		} else {
-			/* If you really wanted to do this:
-			 * mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
-			 *
-			 * The reason we do not implement it is b/c on the
-			 * unmap path (gnttab_unmap_refs) we have no means of
-			 * checking whether the page is !GNTMAP_contains_pte.
-			 *
-			 * That is without some extra data-structure to carry
-			 * the struct page, bool clear_pte, and list_head next
-			 * tuples and deal with allocation/delallocation, etc.
-			 *
-			 * The users of this API set the GNTMAP_contains_pte
-			 * flag so lets just return not supported until it
-			 * becomes neccessary to implement.
-			 */
-			return -EOPNOTSUPP;
+			mfn = PFN_DOWN(map_ops[i].dev_bus_addr);
 		}
-		ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
+		ret = m2p_add_override(mfn, pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			return ret;
 	}
@@ -499,7 +774,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		struct page **pages, unsigned int count)
+		      struct page **pages, unsigned int count, bool clear_pte)
 {
 	int i, ret;
 
@@ -511,7 +786,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		return ret;
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i], true /* clear the PTE */);
+		ret = m2p_remove_override(pages[i], clear_pte);
 		if (ret)
 			return ret;
 	}
@@ -520,6 +795,77 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 }
 EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
 
+static unsigned nr_status_frames(unsigned nr_grant_frames)
+{
+	return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP;
+}
+
+static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes)
+{
+	int rc;
+
+	rc = arch_gnttab_map_shared(frames, nr_gframes,
+				    gnttab_max_grant_frames(),
+				    &gnttab_shared.addr);
+	BUG_ON(rc);
+
+	return 0;
+}
+
+static void gnttab_unmap_frames_v1(void)
+{
+	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
+}
+
+static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes)
+{
+	uint64_t *sframes;
+	unsigned int nr_sframes;
+	struct gnttab_get_status_frames getframes;
+	int rc;
+
+	nr_sframes = nr_status_frames(nr_gframes);
+
+	/* No need for kzalloc as it is initialized in following hypercall
+	 * GNTTABOP_get_status_frames.
+	 */
+	sframes = kmalloc(nr_sframes  * sizeof(uint64_t), GFP_ATOMIC);
+	if (!sframes)
+		return -ENOMEM;
+
+	getframes.dom        = DOMID_SELF;
+	getframes.nr_frames  = nr_sframes;
+	set_xen_guest_handle(getframes.frame_list, sframes);
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
+				       &getframes, 1);
+	if (rc == -ENOSYS) {
+		kfree(sframes);
+		return -ENOSYS;
+	}
+
+	BUG_ON(rc || getframes.status);
+
+	rc = arch_gnttab_map_status(sframes, nr_sframes,
+				    nr_status_frames(gnttab_max_grant_frames()),
+				    &grstatus);
+	BUG_ON(rc);
+	kfree(sframes);
+
+	rc = arch_gnttab_map_shared(frames, nr_gframes,
+				    gnttab_max_grant_frames(),
+				    &gnttab_shared.addr);
+	BUG_ON(rc);
+
+	return 0;
+}
+
+static void gnttab_unmap_frames_v2(void)
+{
+	arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
+	arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
+}
+
 static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 {
 	struct gnttab_setup_table setup;
@@ -551,6 +897,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 		return rc;
 	}
 
+	/* No need for kzalloc as it is initialized in following hypercall
+	 * GNTTABOP_setup_table.
+	 */
 	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
 	if (!frames)
 		return -ENOMEM;
@@ -567,19 +916,65 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
 
 	BUG_ON(rc || setup.status);
 
-	rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(),
-				    &shared);
-	BUG_ON(rc);
+	rc = gnttab_interface->map_frames(frames, nr_gframes);
 
 	kfree(frames);
 
-	return 0;
+	return rc;
+}
+
+static struct gnttab_ops gnttab_v1_ops = {
+	.map_frames			= gnttab_map_frames_v1,
+	.unmap_frames			= gnttab_unmap_frames_v1,
+	.update_entry			= gnttab_update_entry_v1,
+	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
+	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
+	.query_foreign_access		= gnttab_query_foreign_access_v1,
+};
+
+static struct gnttab_ops gnttab_v2_ops = {
+	.map_frames			= gnttab_map_frames_v2,
+	.unmap_frames			= gnttab_unmap_frames_v2,
+	.update_entry			= gnttab_update_entry_v2,
+	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
+	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
+	.query_foreign_access		= gnttab_query_foreign_access_v2,
+	.update_subpage_entry		= gnttab_update_subpage_entry_v2,
+	.update_trans_entry		= gnttab_update_trans_entry_v2,
+};
+
+static void gnttab_request_version(void)
+{
+	int rc;
+	struct gnttab_set_version gsv;
+
+	gsv.version = 2;
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
+	if (rc == 0) {
+		grant_table_version = 2;
+		gnttab_interface = &gnttab_v2_ops;
+	} else if (grant_table_version == 2) {
+		/*
+		 * If we've already used version 2 features,
+		 * but then suddenly discover that they're not
+		 * available (e.g. migrating to an older
+		 * version of Xen), almost unbounded badness
+		 * can happen.
+		 */
+		panic("we need grant tables version 2, but only version 1 is available");
+	} else {
+		grant_table_version = 1;
+		gnttab_interface = &gnttab_v1_ops;
+	}
+	printk(KERN_INFO "Grant tables using version %d layout.\n",
+		grant_table_version);
 }
 
 int gnttab_resume(void)
 {
 	unsigned int max_nr_gframes;
 
+	gnttab_request_version();
 	max_nr_gframes = gnttab_max_grant_frames();
 	if (max_nr_gframes < nr_grant_frames)
 		return -ENOSYS;
@@ -587,9 +982,10 @@ int gnttab_resume(void)
 	if (xen_pv_domain())
 		return gnttab_map(0, nr_grant_frames - 1);
 
-	if (!shared) {
-		shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes);
-		if (shared == NULL) {
+	if (gnttab_shared.addr == NULL) {
+		gnttab_shared.addr = ioremap(xen_hvm_resume_frames,
+						PAGE_SIZE * max_nr_gframes);
+		if (gnttab_shared.addr == NULL) {
 			printk(KERN_WARNING
 					"Failed to ioremap gnttab share frames!");
 			return -ENOMEM;
@@ -603,7 +999,7 @@ int gnttab_resume(void)
 
 int gnttab_suspend(void)
 {
-	arch_gnttab_unmap_shared(shared, nr_grant_frames);
+	gnttab_interface->unmap_frames();
 	return 0;
 }
 
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/privcmd.c
index dbd3b16fd131..ccee0f16bcf8 100644
--- a/drivers/xen/xenfs/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -18,6 +19,7 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/seq_file.h>
+#include <linux/miscdevice.h>
 
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -32,6 +34,10 @@
 #include <xen/page.h>
 #include <xen/xen-ops.h>
 
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
 #ifndef HAVE_ARCH_PRIVCMD_MMAP
 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
 #endif
@@ -359,7 +365,6 @@ static long privcmd_ioctl(struct file *file,
 	return ret;
 }
 
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
@@ -392,9 +397,39 @@ static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
 {
 	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
 }
-#endif
 
-const struct file_operations privcmd_file_ops = {
+const struct file_operations xen_privcmd_fops = {
+	.owner = THIS_MODULE,
 	.unlocked_ioctl = privcmd_ioctl,
 	.mmap = privcmd_mmap,
 };
+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
+
+static struct miscdevice privcmd_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/privcmd",
+	.fops = &xen_privcmd_fops,
+};
+
+static int __init privcmd_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&privcmd_dev);
+	if (err != 0) {
+		printk(KERN_ERR "Could not register Xen privcmd device\n");
+		return err;
+	}
+	return 0;
+}
+
+static void __exit privcmd_exit(void)
+{
+	misc_deregister(&privcmd_dev);
+}
+
+module_init(privcmd_init);
+module_exit(privcmd_exit);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
new file mode 100644
index 000000000000..14facaeed36f
--- /dev/null
+++ b/drivers/xen/privcmd.h
@@ -0,0 +1,3 @@
+#include <linux/fs.h>
+
+extern const struct file_operations xen_privcmd_fops;
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 8f06e1ed028c..7944a17f5cbf 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -99,6 +99,7 @@ static void pcistub_device_release(struct kref *kref)
 	kfree(pci_get_drvdata(psdev->dev));
 	pci_set_drvdata(psdev->dev, NULL);
 
+	psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 	pci_dev_put(psdev->dev);
 
 	kfree(psdev);
@@ -234,6 +235,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
 	xen_pcibk_config_free_dyn_fields(found_psdev->dev);
 	xen_pcibk_config_reset_dev(found_psdev->dev);
 
+	xen_unregister_device_domain_owner(found_psdev->dev);
+
 	spin_lock_irqsave(&found_psdev->lock, flags);
 	found_psdev->pdev = NULL;
 	spin_unlock_irqrestore(&found_psdev->lock, flags);
@@ -331,6 +334,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev)
 	dev_dbg(&dev->dev, "reset device\n");
 	xen_pcibk_reset_device(dev);
 
+	dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 	return 0;
 
 config_release:
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 075525945e36..8e1c44d8ab46 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -241,11 +241,10 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
 		goto out;
 
 	dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
-	dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
 	if (xen_register_device_domain_owner(dev,
 					     pdev->xdev->otherend_id) != 0) {
-		dev_err(&dev->dev, "device has been assigned to another " \
-			"domain! Over-writting the ownership, but beware.\n");
+		dev_err(&dev->dev, "Stealing ownership from dom%d.\n",
+			xen_find_device_domain_owner(dev));
 		xen_unregister_device_domain_owner(dev);
 		xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
 	}
@@ -281,7 +280,6 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
 	}
 
 	dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
-	dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
 	xen_unregister_device_domain_owner(dev);
 
 	xen_pcibk_release_pci_dev(pdev, dev);
@@ -707,19 +705,16 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
 	return 0;
 }
 
-static const struct xenbus_device_id xenpci_ids[] = {
+static const struct xenbus_device_id xen_pcibk_ids[] = {
 	{"pci"},
 	{""},
 };
 
-static struct xenbus_driver xenbus_xen_pcibk_driver = {
-	.name			= DRV_NAME,
-	.owner			= THIS_MODULE,
-	.ids			= xenpci_ids,
+static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME,
 	.probe			= xen_pcibk_xenbus_probe,
 	.remove			= xen_pcibk_xenbus_remove,
 	.otherend_changed	= xen_pcibk_frontend_changed,
-};
+);
 
 const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
 
@@ -735,11 +730,11 @@ int __init xen_pcibk_xenbus_register(void)
 	if (passthrough)
 		xen_pcibk_backend = &xen_pcibk_passthrough_backend;
 	pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
-	return xenbus_register_backend(&xenbus_xen_pcibk_driver);
+	return xenbus_register_backend(&xen_pcibk_driver);
 }
 
 void __exit xen_pcibk_xenbus_unregister(void)
 {
 	destroy_workqueue(xen_pcibk_wq);
-	xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
+	xenbus_unregister_driver(&xen_pcibk_driver);
 }
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
index 8dca685358b4..31e2e9050c7a 100644
--- a/drivers/xen/xenbus/Makefile
+++ b/drivers/xen/xenbus/Makefile
@@ -1,4 +1,5 @@
 obj-y	+= xenbus.o
+obj-y	+= xenbus_dev_frontend.o
 
 xenbus-objs =
 xenbus-objs += xenbus_client.o
@@ -9,4 +10,5 @@ xenbus-objs += xenbus_probe.o
 xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
 xenbus-objs += $(xenbus-be-objs-y)
 
+obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o
 obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 1906125eab49..566d2adbd6ea 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -32,15 +32,39 @@
 
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/event_channel.h>
+#include <xen/balloon.h>
 #include <xen/events.h>
 #include <xen/grant_table.h>
 #include <xen/xenbus.h>
+#include <xen/xen.h>
+
+#include "xenbus_probe.h"
+
+struct xenbus_map_node {
+	struct list_head next;
+	union {
+		struct vm_struct *area; /* PV */
+		struct page *page;     /* HVM */
+	};
+	grant_handle_t handle;
+};
+
+static DEFINE_SPINLOCK(xenbus_valloc_lock);
+static LIST_HEAD(xenbus_valloc_pages);
+
+struct xenbus_ring_ops {
+	int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
+	int (*unmap)(struct xenbus_device *dev, void *vaddr);
+};
+
+static const struct xenbus_ring_ops *ring_ops __read_mostly;
 
 const char *xenbus_strstate(enum xenbus_state state)
 {
@@ -436,19 +460,33 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
  */
 int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
 {
+	return ring_ops->map(dev, gnt_ref, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
+				     int gnt_ref, void **vaddr)
+{
 	struct gnttab_map_grant_ref op = {
 		.flags = GNTMAP_host_map | GNTMAP_contains_pte,
 		.ref   = gnt_ref,
 		.dom   = dev->otherend_id,
 	};
+	struct xenbus_map_node *node;
 	struct vm_struct *area;
 	pte_t *pte;
 
 	*vaddr = NULL;
 
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
 	area = alloc_vm_area(PAGE_SIZE, &pte);
-	if (!area)
+	if (!area) {
+		kfree(node);
 		return -ENOMEM;
+	}
 
 	op.host_addr = arbitrary_virt_to_machine(pte).maddr;
 
@@ -457,19 +495,59 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
 
 	if (op.status != GNTST_okay) {
 		free_vm_area(area);
+		kfree(node);
 		xenbus_dev_fatal(dev, op.status,
 				 "mapping in shared page %d from domain %d",
 				 gnt_ref, dev->otherend_id);
 		return op.status;
 	}
 
-	/* Stuff the handle in an unused field */
-	area->phys_addr = (unsigned long)op.handle;
+	node->handle = op.handle;
+	node->area = area;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
 
 	*vaddr = area->addr;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
+				      int gnt_ref, void **vaddr)
+{
+	struct xenbus_map_node *node;
+	int err;
+	void *addr;
+
+	*vaddr = NULL;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+
+	err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
+	if (err)
+		goto out_err;
+
+	addr = pfn_to_kaddr(page_to_pfn(node->page));
+
+	err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
+	if (err)
+		goto out_err;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_add(&node->next, &xenbus_valloc_pages);
+	spin_unlock(&xenbus_valloc_lock);
+
+	*vaddr = addr;
+	return 0;
+
+ out_err:
+	free_xenballooned_pages(1, &node->page);
+	kfree(node);
+	return err;
+}
 
 
 /**
@@ -489,12 +567,10 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
 int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
 		    grant_handle_t *handle, void *vaddr)
 {
-	struct gnttab_map_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.flags     = GNTMAP_host_map,
-		.ref       = gnt_ref,
-		.dom       = dev->otherend_id,
-	};
+	struct gnttab_map_grant_ref op;
+
+	gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref,
+			  dev->otherend_id);
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
 		BUG();
@@ -525,32 +601,36 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring);
  */
 int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 {
-	struct vm_struct *area;
+	return ring_ops->unmap(dev, vaddr);
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr)
+{
+	struct xenbus_map_node *node;
 	struct gnttab_unmap_grant_ref op = {
 		.host_addr = (unsigned long)vaddr,
 	};
 	unsigned int level;
 
-	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
-	 * method so that we don't have to muck with vmalloc internals here.
-	 * We could force the user to hang on to their struct vm_struct from
-	 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
-	 * this API.
-	 */
-	read_lock(&vmlist_lock);
-	for (area = vmlist; area != NULL; area = area->next) {
-		if (area->addr == vaddr)
-			break;
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		if (node->area->addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
 	}
-	read_unlock(&vmlist_lock);
+	node = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
 
-	if (!area) {
+	if (!node) {
 		xenbus_dev_error(dev, -ENOENT,
 				 "can't find mapped virtual address %p", vaddr);
 		return GNTST_bad_virt_addr;
 	}
 
-	op.handle = (grant_handle_t)area->phys_addr;
+	op.handle = node->handle;
 	op.host_addr = arbitrary_virt_to_machine(
 		lookup_address((unsigned long)vaddr, &level)).maddr;
 
@@ -558,16 +638,50 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
 		BUG();
 
 	if (op.status == GNTST_okay)
-		free_vm_area(area);
+		free_vm_area(node->area);
 	else
 		xenbus_dev_error(dev, op.status,
 				 "unmapping page at handle %d error %d",
-				 (int16_t)area->phys_addr, op.status);
+				 node->handle, op.status);
 
+	kfree(node);
 	return op.status;
 }
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 
+static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr)
+{
+	int rv;
+	struct xenbus_map_node *node;
+	void *addr;
+
+	spin_lock(&xenbus_valloc_lock);
+	list_for_each_entry(node, &xenbus_valloc_pages, next) {
+		addr = pfn_to_kaddr(page_to_pfn(node->page));
+		if (addr == vaddr) {
+			list_del(&node->next);
+			goto found;
+		}
+	}
+	node = NULL;
+ found:
+	spin_unlock(&xenbus_valloc_lock);
+
+	if (!node) {
+		xenbus_dev_error(dev, -ENOENT,
+				 "can't find mapped virtual address %p", vaddr);
+		return GNTST_bad_virt_addr;
+	}
+
+	rv = xenbus_unmap_ring(dev, node->handle, addr);
+
+	if (!rv)
+		free_xenballooned_pages(1, &node->page);
+	else
+		WARN(1, "Leaking %p\n", vaddr);
+
+	kfree(node);
+	return rv;
+}
 
 /**
  * xenbus_unmap_ring
@@ -582,10 +696,9 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
 int xenbus_unmap_ring(struct xenbus_device *dev,
 		      grant_handle_t handle, void *vaddr)
 {
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.handle    = handle,
-	};
+	struct gnttab_unmap_grant_ref op;
+
+	gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle);
 
 	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
 		BUG();
@@ -617,3 +730,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path)
 	return result;
 }
 EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
+
+static const struct xenbus_ring_ops ring_ops_pv = {
+	.map = xenbus_map_ring_valloc_pv,
+	.unmap = xenbus_unmap_ring_vfree_pv,
+};
+
+static const struct xenbus_ring_ops ring_ops_hvm = {
+	.map = xenbus_map_ring_valloc_hvm,
+	.unmap = xenbus_unmap_ring_vfree_hvm,
+};
+
+void __init xenbus_ring_ops_init(void)
+{
+	if (xen_pv_domain())
+		ring_ops = &ring_ops_pv;
+	else
+		ring_ops = &ring_ops_hvm;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
index c21db7513736..6e42800fa499 100644
--- a/drivers/xen/xenbus/xenbus_comms.h
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -31,6 +31,8 @@
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
 
+#include <linux/fs.h>
+
 int xs_init(void);
 int xb_init_comms(void);
 
@@ -43,4 +45,6 @@ int xs_input_avail(void);
 extern struct xenstore_domain_interface *xen_store_interface;
 extern int xen_store_evtchn;
 
+extern const struct file_operations xen_xenbus_fops;
+
 #endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
new file mode 100644
index 000000000000..3d3be78c1093
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -0,0 +1,90 @@
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/capability.h>
+
+#include <xen/xen.h>
+#include <xen/page.h>
+#include <xen/xenbus_dev.h>
+
+#include "xenbus_comms.h"
+
+MODULE_LICENSE("GPL");
+
+static int xenbus_backend_open(struct inode *inode, struct file *filp)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return nonseekable_open(inode, filp);
+}
+
+static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (cmd) {
+		case IOCTL_XENBUS_BACKEND_EVTCHN:
+			if (xen_store_evtchn > 0)
+				return xen_store_evtchn;
+			return -ENODEV;
+
+		default:
+			return -ENOTTY;
+	}
+}
+
+static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t size = vma->vm_end - vma->vm_start;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
+		return -EINVAL;
+
+	if (remap_pfn_range(vma, vma->vm_start,
+			    virt_to_pfn(xen_store_interface),
+			    size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+const struct file_operations xenbus_backend_fops = {
+	.open = xenbus_backend_open,
+	.mmap = xenbus_backend_mmap,
+	.unlocked_ioctl = xenbus_backend_ioctl,
+};
+
+static struct miscdevice xenbus_backend_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/xenbus_backend",
+	.fops = &xenbus_backend_fops,
+};
+
+static int __init xenbus_backend_init(void)
+{
+	int err;
+
+	if (!xen_initial_domain())
+		return -ENODEV;
+
+	err = misc_register(&xenbus_backend_dev);
+	if (err)
+		printk(KERN_ERR "Could not register xenbus backend device\n");
+	return err;
+}
+
+static void __exit xenbus_backend_exit(void)
+{
+	misc_deregister(&xenbus_backend_dev);
+}
+
+module_init(xenbus_backend_init);
+module_exit(xenbus_backend_exit);
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index bbd000f88af7..527dc2a3b89f 100644
--- a/drivers/xen/xenfs/xenbus.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -52,13 +52,17 @@
 #include <linux/namei.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
 
-#include "xenfs.h"
-#include "../xenbus/xenbus_comms.h"
+#include "xenbus_comms.h"
 
 #include <xen/xenbus.h>
+#include <xen/xen.h>
 #include <asm/xen/hypervisor.h>
 
+MODULE_LICENSE("GPL");
+
 /*
  * An element of a list of outstanding transactions, for which we're
  * still waiting a reply.
@@ -101,7 +105,7 @@ struct xenbus_file_priv {
 	unsigned int len;
 	union {
 		struct xsd_sockmsg msg;
-		char buffer[PAGE_SIZE];
+		char buffer[XENSTORE_PAYLOAD_MAX];
 	} u;
 
 	/* Response queue. */
@@ -583,7 +587,7 @@ static unsigned int xenbus_file_poll(struct file *file, poll_table *wait)
 	return 0;
 }
 
-const struct file_operations xenbus_file_ops = {
+const struct file_operations xen_xenbus_fops = {
 	.read = xenbus_file_read,
 	.write = xenbus_file_write,
 	.open = xenbus_file_open,
@@ -591,3 +595,31 @@ const struct file_operations xenbus_file_ops = {
 	.poll = xenbus_file_poll,
 	.llseek = no_llseek,
 };
+EXPORT_SYMBOL_GPL(xen_xenbus_fops);
+
+static struct miscdevice xenbus_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/xenbus",
+	.fops = &xen_xenbus_fops,
+};
+
+static int __init xenbus_init(void)
+{
+	int err;
+
+	if (!xen_domain())
+		return -ENODEV;
+
+	err = misc_register(&xenbus_dev);
+	if (err)
+		printk(KERN_ERR "Could not register xenbus frontend device\n");
+	return err;
+}
+
+static void __exit xenbus_exit(void)
+{
+	misc_deregister(&xenbus_dev);
+}
+
+module_init(xenbus_init);
+module_exit(xenbus_exit);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 1b178c6e8937..3864967202b5 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -291,14 +291,9 @@ void xenbus_dev_shutdown(struct device *_dev)
 EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
 
 int xenbus_register_driver_common(struct xenbus_driver *drv,
-				  struct xen_bus_type *bus,
-				  struct module *owner,
-				  const char *mod_name)
+				  struct xen_bus_type *bus)
 {
-	drv->driver.name = drv->name;
 	drv->driver.bus = &bus->bus;
-	drv->driver.owner = owner;
-	drv->driver.mod_name = mod_name;
 
 	return driver_register(&drv->driver);
 }
@@ -730,6 +725,8 @@ static int __init xenbus_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
+	xenbus_ring_ops_init();
+
 	if (xen_hvm_domain()) {
 		uint64_t v = 0;
 		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
index 9b1de4e34c64..bb4f92ed8730 100644
--- a/drivers/xen/xenbus/xenbus_probe.h
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -53,9 +53,7 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
 extern int xenbus_dev_probe(struct device *_dev);
 extern int xenbus_dev_remove(struct device *_dev);
 extern int xenbus_register_driver_common(struct xenbus_driver *drv,
-					 struct xen_bus_type *bus,
-					 struct module *owner,
-					 const char *mod_name);
+					 struct xen_bus_type *bus);
 extern int xenbus_probe_node(struct xen_bus_type *bus,
 			     const char *type,
 			     const char *nodename);
@@ -76,4 +74,6 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch,
 extern int xenbus_read_otherend_details(struct xenbus_device *xendev,
 					char *id_node, char *path_node);
 
+void xenbus_ring_ops_init(void);
+
 #endif
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index c3c7cd195c11..257be37d9091 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -232,15 +232,13 @@ int xenbus_dev_is_online(struct xenbus_device *dev)
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
 
-int __xenbus_register_backend(struct xenbus_driver *drv,
-			      struct module *owner, const char *mod_name)
+int xenbus_register_backend(struct xenbus_driver *drv)
 {
 	drv->read_otherend_details = read_frontend_details;
 
-	return xenbus_register_driver_common(drv, &xenbus_backend,
-					     owner, mod_name);
+	return xenbus_register_driver_common(drv, &xenbus_backend);
 }
-EXPORT_SYMBOL_GPL(__xenbus_register_backend);
+EXPORT_SYMBOL_GPL(xenbus_register_backend);
 
 static int backend_probe_and_watch(struct notifier_block *notifier,
 				   unsigned long event,
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 2f73195512b4..9c57819df51a 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -230,15 +230,13 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
 			 print_device_status);
 }
 
-int __xenbus_register_frontend(struct xenbus_driver *drv,
-			       struct module *owner, const char *mod_name)
+int xenbus_register_frontend(struct xenbus_driver *drv)
 {
 	int ret;
 
 	drv->read_otherend_details = read_backend_details;
 
-	ret = xenbus_register_driver_common(drv, &xenbus_frontend,
-					    owner, mod_name);
+	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
 	if (ret)
 		return ret;
 
@@ -247,7 +245,7 @@ int __xenbus_register_frontend(struct xenbus_driver *drv,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+EXPORT_SYMBOL_GPL(xenbus_register_frontend);
 
 static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
 static int backend_state;
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index ede860f921df..d1c217b23a42 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -532,21 +532,18 @@ int xenbus_printf(struct xenbus_transaction t,
 {
 	va_list ap;
 	int ret;
-#define PRINTF_BUFFER_SIZE 4096
-	char *printf_buffer;
-
-	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
-	if (printf_buffer == NULL)
-		return -ENOMEM;
+	char *buf;
 
 	va_start(ap, fmt);
-	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+	buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
 	va_end(ap);
 
-	BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
-	ret = xenbus_write(t, dir, node, printf_buffer);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = xenbus_write(t, dir, node, buf);
 
-	kfree(printf_buffer);
+	kfree(buf);
 
 	return ret;
 }
@@ -801,6 +798,12 @@ static int process_msg(void)
 		goto out;
 	}
 
+	if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) {
+		kfree(msg);
+		err = -EINVAL;
+		goto out;
+	}
+
 	body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH);
 	if (body == NULL) {
 		kfree(msg);
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 4fde9440fe1f..b019865fcc56 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
 obj-$(CONFIG_XENFS) += xenfs.o
 
-xenfs-y			  = super.o xenbus.o privcmd.o
+xenfs-y			  = super.o
 xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 1aa389719846..a84b53c01436 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -16,6 +16,8 @@
 #include <xen/xen.h>
 
 #include "xenfs.h"
+#include "../privcmd.h"
+#include "../xenbus/xenbus_comms.h"
 
 #include <asm/xen/hypervisor.h>
 
@@ -82,9 +84,9 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr xenfs_files[] = {
 		[1] = {},
-		{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+		{ "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR },
 		{ "capabilities", &capabilities_file_ops, S_IRUGO },
-		{ "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
+		{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
 		{""},
 	};
 	int rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index b68aa6200003..6b80c7779c02 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -1,8 +1,6 @@
 #ifndef _XENFS_XENBUS_H
 #define _XENFS_XENBUS_H
 
-extern const struct file_operations xenbus_file_ops;
-extern const struct file_operations privcmd_file_ops;
 extern const struct file_operations xsd_kva_file_ops;
 extern const struct file_operations xsd_port_file_ops;
 
diff --git a/include/xen/events.h b/include/xen/events.h
index d287997d3eab..0f773708e02c 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -37,6 +37,13 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
  */
 void unbind_from_irqhandler(unsigned int irq, void *dev_id);
 
+/*
+ * Allow extra references to event channels exposed to userspace by evtchn
+ */
+int evtchn_make_refcounted(unsigned int evtchn);
+int evtchn_get(unsigned int evtchn);
+void evtchn_put(unsigned int evtchn);
+
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 int resend_irq_on_evtchn(unsigned int irq);
 void rebind_evtchn_irq(int evtchn, int irq);
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 11e2dfce42f8..15f8a00ff003 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -62,6 +62,24 @@ int gnttab_resume(void);
 
 int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 				int readonly);
+int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame,
+					int flags, unsigned page_off,
+					unsigned length);
+int gnttab_grant_foreign_access_trans(domid_t domid, int flags,
+				      domid_t trans_domid,
+				      grant_ref_t trans_gref);
+
+/*
+ * Are sub-page grants available on this version of Xen?  Returns true if they
+ * are, and false if they're not.
+ */
+bool gnttab_subpage_grants_available(void);
+
+/*
+ * Are transitive grants available on this version of Xen?  Returns true if they
+ * are, and false if they're not.
+ */
+bool gnttab_trans_grants_available(void);
 
 /*
  * End access through the given grant reference, iff the grant entry is no
@@ -108,6 +126,13 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
 
 void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
 				     unsigned long frame, int readonly);
+int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid,
+					    unsigned long frame, int flags,
+					    unsigned page_off,
+					    unsigned length);
+int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid,
+					  int flags, domid_t trans_domid,
+					  grant_ref_t trans_gref);
 
 void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
 				       unsigned long pfn);
@@ -145,9 +170,11 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr,
 
 int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
 			   unsigned long max_nr_gframes,
-			   struct grant_entry **__shared);
-void arch_gnttab_unmap_shared(struct grant_entry *shared,
-			      unsigned long nr_gframes);
+			   void **__shared);
+int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   grant_status_t **__shared);
+void arch_gnttab_unmap(void *shared, unsigned long nr_gframes);
 
 extern unsigned long xen_hvm_resume_frames;
 unsigned int gnttab_max_grant_frames(void);
@@ -155,9 +182,9 @@ unsigned int gnttab_max_grant_frames(void);
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
-			struct gnttab_map_grant_ref *kmap_ops,
+		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count);
+		      struct page **pages, unsigned int count, bool clear_pte);
 
 #endif /* __ASM_GNTTAB_H__ */
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index 39e571796e32..a17d84433e6a 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -85,12 +85,22 @@
  */
 
 /*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
  * A grant table comprises a packed array of grant entries in one or more
  * page frames shared between Xen and a guest.
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility.  New guests should use version 2.
+ */
+struct grant_entry_v1 {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
     uint16_t flags;
     /* The domain being granted foreign privileges. [GST] */
@@ -108,10 +118,13 @@ struct grant_entry {
  *  GTF_permit_access: Allow @domid to map/access @frame.
  *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
  *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
  */
 #define GTF_invalid         (0U<<0)
 #define GTF_permit_access   (1U<<0)
 #define GTF_accept_transfer (2U<<0)
+#define GTF_transitive      (3U<<0)
 #define GTF_type_mask       (3U<<0)
 
 /*
@@ -119,6 +132,9 @@ struct grant_entry {
  *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
  *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
  *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
  */
 #define _GTF_readonly       (2)
 #define GTF_readonly        (1U<<_GTF_readonly)
@@ -126,6 +142,8 @@ struct grant_entry {
 #define GTF_reading         (1U<<_GTF_reading)
 #define _GTF_writing        (4)
 #define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U<<_GTF_sub_page)
 
 /*
  * Subflags for GTF_accept_transfer:
@@ -142,15 +160,81 @@ struct grant_entry {
 #define _GTF_transfer_completed (3)
 #define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
 
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
+ */
 
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
  */
+struct grant_entry_header {
+    uint16_t flags;
+    domid_t  domid;
+};
 
 /*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure, here is an union because three
+ * different types are suppotted: full_page, sub_page and transitive.
+ */
+union grant_entry_v2 {
+    struct grant_entry_header hdr;
+
+    /*
+     * This member is used for V1-style full page grants, where either:
+     *
+     * -- hdr.type is GTF_accept_transfer, or
+     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+     *
+     * In that case, the frame field has the same semantics as the
+     * field of the same name in the V1 entry structure.
+     */
+    struct {
+	struct grant_entry_header hdr;
+	uint32_t pad0;
+	uint64_t frame;
+    } full_page;
+
+    /*
+     * If the grant type is GTF_grant_access and GTF_sub_page is set,
+     * @domid is allowed to access bytes [@page_off,@page_off+@length)
+     * in frame @frame.
+     */
+    struct {
+	struct grant_entry_header hdr;
+	uint16_t page_off;
+	uint16_t length;
+	uint64_t frame;
+    } sub_page;
+
+    /*
+     * If the grant is GTF_transitive, @domid is allowed to use the
+     * grant @gref in domain @trans_domid, as if it was the local
+     * domain.  Obviously, the transitive access must be compatible
+     * with the original grant.
+     */
+    struct {
+	struct grant_entry_header hdr;
+	domid_t trans_domid;
+	uint16_t pad0;
+	grant_ref_t gref;
+    } transitive;
+
+    uint32_t __spacer[4]; /* Pad to a power of two */
+};
+
+typedef uint16_t grant_status_t;
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
  */
-typedef uint32_t grant_ref_t;
 
 /*
  * Handle to track a mapping created via a grant reference.
@@ -322,6 +406,79 @@ struct gnttab_query_size {
 DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
 
 /*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace    7
+struct gnttab_unmap_and_replace {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace);
+
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation can only be performed
+ * once in any given domain.  It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version          8
+struct gnttab_set_version {
+    /* IN parameters */
+    uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version);
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames     9
+struct gnttab_get_status_frames {
+    /* IN parameters. */
+    uint32_t nr_frames;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    GUEST_HANDLE(uint64_t) frame_list;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames);
+
+/*
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+#define GNTTABOP_get_version          10
+struct gnttab_get_version {
+    /* IN parameters */
+    domid_t dom;
+    uint16_t pad;
+    /* OUT parameters */
+    uint32_t version;
+};
+DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version);
+
+/*
  * Bitfield values for update_pin_status.flags.
  */
  /* Map the grant entry for access by I/O devices. */
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
index f6f07aa35af5..7cdfca24eafb 100644
--- a/include/xen/interface/io/xs_wire.h
+++ b/include/xen/interface/io/xs_wire.h
@@ -87,4 +87,7 @@ struct xenstore_domain_interface {
     XENSTORE_RING_IDX rsp_cons, rsp_prod;
 };
 
+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
 #endif /* _XS_WIRE_H */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 6a6e91449347..a890804945e3 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -523,6 +523,8 @@ struct tmem_op {
 	} u;
 };
 
+DEFINE_GUEST_HANDLE(u64);
+
 #else /* __ASSEMBLY__ */
 
 /* In assembly code we cannot use C numeric constant suffixes. */
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index b1b6676c1c43..e8c599b237c2 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -85,8 +85,6 @@ struct xenbus_device_id
 
 /* A xenbus driver. */
 struct xenbus_driver {
-	char *name;
-	struct module *owner;
 	const struct xenbus_device_id *ids;
 	int (*probe)(struct xenbus_device *dev,
 		     const struct xenbus_device_id *id);
@@ -101,31 +99,20 @@ struct xenbus_driver {
 	int (*is_ready)(struct xenbus_device *dev);
 };
 
-static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
-{
-	return container_of(drv, struct xenbus_driver, driver);
+#define DEFINE_XENBUS_DRIVER(var, drvname, methods...)		\
+struct xenbus_driver var ## _driver = {				\
+	.driver.name = drvname + 0 ?: var ## _ids->devicetype,	\
+	.driver.owner = THIS_MODULE,				\
+	.ids = var ## _ids, ## methods				\
 }
 
-int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
-					    struct module *owner,
-					    const char *mod_name);
-
-static inline int __must_check
-xenbus_register_frontend(struct xenbus_driver *drv)
+static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)
 {
-	WARN_ON(drv->owner != THIS_MODULE);
-	return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
+	return container_of(drv, struct xenbus_driver, driver);
 }
 
-int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
-					   struct module *owner,
-					   const char *mod_name);
-static inline int __must_check
-xenbus_register_backend(struct xenbus_driver *drv)
-{
-	WARN_ON(drv->owner != THIS_MODULE);
-	return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
-}
+int __must_check xenbus_register_frontend(struct xenbus_driver *);
+int __must_check xenbus_register_backend(struct xenbus_driver *);
 
 void xenbus_unregister_driver(struct xenbus_driver *drv);
 
diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h
new file mode 100644
index 000000000000..ac5f0fe47ed9
--- /dev/null
+++ b/include/xen/xenbus_dev.h
@@ -0,0 +1,41 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/xenbus_backend.
+ *
+ * Copyright (c) 2011 Bastian Blank <waldi@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_XEN_XENBUS_DEV_H__
+#define __LINUX_XEN_XENBUS_DEV_H__
+
+#include <linux/ioctl.h>
+
+#define IOCTL_XENBUS_BACKEND_EVTCHN			\
+	_IOC(_IOC_NONE, 'B', 0, 0)
+
+#endif /* __LINUX_XEN_XENBUS_DEV_H__ */