From 35c8ab4c5c31bbe9e90ee63ba1b199cb979f6520 Mon Sep 17 00:00:00 2001 From: Chuck Tuffli Date: Sun, 21 Dec 2014 12:30:58 -0800 Subject: arm64: Relax licensing of arm64 Xen DMA operations With Xen configured into the arm64 kernel, any driver allocating DMA'able memory for PCI operations, must be GPL compatible, regardless of its interaction with Xen. This patch relaxes the GPL requirement of xen_dma_ops and its dependencies to allow open source drivers to be compiled for the arm64 architecture. Signed-off-by: Chuck Tuffli --- arch/arm/xen/enlighten.c | 4 ++-- arch/arm/xen/mm.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index c7ca936ebd99..263a2044c65b 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -29,10 +29,10 @@ struct start_info _xen_start_info; struct start_info *xen_start_info = &_xen_start_info; -EXPORT_SYMBOL_GPL(xen_start_info); +EXPORT_SYMBOL(xen_start_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); +EXPORT_SYMBOL(xen_domain_type); struct shared_info xen_dummy_shared_info; struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 351b24a979d4..793551d15f1d 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -149,7 +149,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); struct dma_map_ops *xen_dma_ops; -EXPORT_SYMBOL_GPL(xen_dma_ops); +EXPORT_SYMBOL(xen_dma_ops); static struct dma_map_ops xen_swiotlb_dma_ops = { .mapping_error = xen_swiotlb_dma_mapping_error, -- cgit 1.4.1 From 94dd85f6a0688245dbb5a452a86a2a545dee96c1 Mon Sep 17 00:00:00 2001 From: "Palik, Imre" Date: Tue, 13 Jan 2015 09:14:22 +0100 Subject: x86/xen: prefer TSC over xen clocksource for dom0 In Dom0's the use of the TSC clocksource (whenever it is stable enough to be used) instead of the Xen clocksource should not cause any issues, as Dom0 VMs never live-migrated. The TSC clocksource is somewhat more efficient than the Xen paravirtualised clocksource, thus it should have higher rating. This patch decreases the rating of the Xen clocksource in Dom0s to 275. Which is half-way between the rating of the TSC clocksource (300) and the hpet clocksource (250). Cc: Anthony Liguori Signed-off-by: Imre Palik Signed-off-by: David Vrabel --- arch/x86/xen/time.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 69087341d9ae..55da33b1d51c 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -479,6 +479,10 @@ static void __init xen_time_init(void) int cpu = smp_processor_id(); struct timespec tp; + /* As Dom0 is never moved, no penalty on using TSC there */ + if (xen_initial_domain()) + xen_clocksource.rating = 275; + clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { -- cgit 1.4.1 From 144c61ad355a4f31a45a2cd91c808020c4322374 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Jan 2015 08:37:01 +0000 Subject: xen/tmem: mark xen_tmem_init() __init As a module_init() function, this should have been this way from the beginning. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel --- drivers/xen/tmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 83b5c53bec6b..8a65423bc696 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -374,7 +374,7 @@ static struct frontswap_ops tmem_frontswap_ops = { }; #endif -static int xen_tmem_init(void) +static int __init xen_tmem_init(void) { if (!xen_domain()) return 0; -- cgit 1.4.1 From 57b6b99bac045bece3b3892377e863b571314950 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 26 Jan 2015 02:10:08 -0800 Subject: x86,xen: use current->state helpers Call __set_current_state() instead of assigning the new state directly. These interfaces also aid CONFIG_DEBUG_ATOMIC_SLEEP environments, keeping track of who changed the state. Signed-off-by: Davidlohr Bueso Signed-off-by: David Vrabel --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 4c071aeb8417..08e8489c47f1 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -507,7 +507,7 @@ static int xen_cpu_disable(void) static void xen_cpu_die(unsigned int cpu) { while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { - current->state = TASK_UNINTERRUPTIBLE; + __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } -- cgit 1.4.1 From f0feed10aa3ef6b6f6254bf9a66abd58c0011d90 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 28 Jan 2015 07:44:21 +0100 Subject: x86/xen: cleanup arch/x86/xen/setup.c Remove extern declarations in arch/x86/xen/setup.c which are either not used or redundant. Move needed other extern declarations to xen-ops.h Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/setup.c | 10 ---------- arch/x86/xen/xen-ops.h | 6 ++++++ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 865e56cea7a0..d2520c30c2f2 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -32,16 +32,6 @@ #include "p2m.h" #include "mmu.h" -/* These are code, but not functions. Defined in entry.S */ -extern const char xen_hypervisor_callback[]; -extern const char xen_failsafe_callback[]; -#ifdef CONFIG_X86_64 -extern asmlinkage void nmi(void); -#endif -extern void xen_sysenter_target(void); -extern void xen_syscall_target(void); -extern void xen_syscall32_target(void); - /* Amount of extra memory space we add to the e820 ranges */ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 5686bd9d58cc..9e195c683549 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -10,6 +10,12 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +void xen_sysenter_target(void); +#ifdef CONFIG_X86_64 +void xen_syscall_target(void); +void xen_syscall32_target(void); +#endif + extern void *xen_initial_gdt; struct trap_info; -- cgit 1.4.1 From 3ba5c867ca504a9fbdadbbd1b36b690c7e2718df Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 28 Jan 2015 07:44:22 +0100 Subject: x86/xen: use correct types for addresses in arch/x86/xen/setup.c In many places in arch/x86/xen/setup.c wrong types are used for physical addresses (u64 or unsigned long long). Use phys_addr_t instead. Use macros already defined instead of open coding them. Correct some other type mismatches. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/setup.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d2520c30c2f2..4dcc60819eda 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -64,7 +64,7 @@ static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; */ #define EXTRA_MEM_RATIO (10) -static void __init xen_add_extra_mem(u64 start, u64 size) +static void __init xen_add_extra_mem(phys_addr_t start, phys_addr_t size) { int i; @@ -87,10 +87,10 @@ static void __init xen_add_extra_mem(u64 start, u64 size) memblock_reserve(start, size); } -static void __init xen_del_extra_mem(u64 start, u64 size) +static void __init xen_del_extra_mem(phys_addr_t start, phys_addr_t size) { int i; - u64 start_r, size_r; + phys_addr_t start_r, size_r; for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { start_r = xen_extra_mem[i].start; @@ -257,7 +257,7 @@ static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) { struct mmu_update update = { - .ptr = ((unsigned long long)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, + .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, .val = pfn }; @@ -535,14 +535,15 @@ static unsigned long __init xen_get_max_pages(void) return min(max_pages, MAX_DOMAIN_PAGES); } -static void xen_align_and_add_e820_region(u64 start, u64 size, int type) +static void xen_align_and_add_e820_region(phys_addr_t start, phys_addr_t size, + int type) { - u64 end = start + size; + phys_addr_t end = start + size; /* Align RAM regions to page boundaries. */ if (type == E820_RAM) { start = PAGE_ALIGN(start); - end &= ~((u64)PAGE_SIZE - 1); + end &= ~((phys_addr_t)PAGE_SIZE - 1); } e820_add_region(start, end - start, type); @@ -567,7 +568,7 @@ char * __init xen_memory_setup(void) static struct e820entry map[E820MAX] __initdata; unsigned long max_pfn = xen_start_info->nr_pages; - unsigned long long mem_end; + phys_addr_t mem_end; int rc; struct xen_memory_map memmap; unsigned long max_pages; @@ -642,16 +643,16 @@ char * __init xen_memory_setup(void) extra_pages); i = 0; while (i < memmap.nr_entries) { - u64 addr = map[i].addr; - u64 size = map[i].size; + phys_addr_t addr = map[i].addr; + phys_addr_t size = map[i].size; u32 type = map[i].type; if (type == E820_RAM) { if (addr < mem_end) { size = min(size, mem_end - addr); } else if (extra_pages) { - size = min(size, (u64)extra_pages * PAGE_SIZE); - extra_pages -= size / PAGE_SIZE; + size = min(size, PFN_PHYS(extra_pages)); + extra_pages -= PFN_DOWN(size); xen_add_extra_mem(addr, size); xen_max_p2m_pfn = PFN_DOWN(addr + size); } else -- cgit 1.4.1 From a3f5239650a9c08df0473261aedd6f50f7775410 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 28 Jan 2015 07:44:23 +0100 Subject: x86/xen: add some __init and static annotations in arch/x86/xen/setup.c Some more functions in arch/x86/xen/setup.c can be made "__init". xen_ignore_unusable() can be made "static". Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 4dcc60819eda..55f388ef481a 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -535,8 +535,8 @@ static unsigned long __init xen_get_max_pages(void) return min(max_pages, MAX_DOMAIN_PAGES); } -static void xen_align_and_add_e820_region(phys_addr_t start, phys_addr_t size, - int type) +static void __init xen_align_and_add_e820_region(phys_addr_t start, + phys_addr_t size, int type) { phys_addr_t end = start + size; @@ -549,7 +549,7 @@ static void xen_align_and_add_e820_region(phys_addr_t start, phys_addr_t size, e820_add_region(start, end - start, type); } -void xen_ignore_unusable(struct e820entry *list, size_t map_size) +static void __init xen_ignore_unusable(struct e820entry *list, size_t map_size) { struct e820entry *entry; unsigned int i; -- cgit 1.4.1 From bf9d834a9bc54477f3745ba0bf926c8917c45680 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 28 Jan 2015 07:44:24 +0100 Subject: x86/xen: add some __init annotations in arch/x86/xen/mmu.c The file arch/x86/xen/mmu.c has some functions that can be annotated with "__init". Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5c1f9ace7ae7..6a8bbf43e617 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1489,7 +1489,7 @@ static void __init xen_set_pte_init(pte_t *ptep, pte_t pte) native_set_pte(ptep, pte); } -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn) { struct mmuext_op op; op.cmd = cmd; @@ -1657,7 +1657,7 @@ void __init xen_reserve_top(void) * Like __va(), but returns address in the kernel mapping (which is * all we have until the physical memory mapping has been set up. */ -static void *__ka(phys_addr_t paddr) +static void * __init __ka(phys_addr_t paddr) { #ifdef CONFIG_X86_64 return (void *)(paddr + __START_KERNEL_map); @@ -1667,7 +1667,7 @@ static void *__ka(phys_addr_t paddr) } /* Convert a machine address to physical address */ -static unsigned long m2p(phys_addr_t maddr) +static unsigned long __init m2p(phys_addr_t maddr) { phys_addr_t paddr; @@ -1678,13 +1678,14 @@ static unsigned long m2p(phys_addr_t maddr) } /* Convert a machine address to kernel virtual */ -static void *m2v(phys_addr_t maddr) +static void * __init m2v(phys_addr_t maddr) { return __ka(m2p(maddr)); } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) +static void __init set_page_prot_flags(void *addr, pgprot_t prot, + unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); @@ -1696,7 +1697,7 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } -static void set_page_prot(void *addr, pgprot_t prot) +static void __init set_page_prot(void *addr, pgprot_t prot) { return set_page_prot_flags(addr, prot, UVMF_NONE); } @@ -1769,7 +1770,7 @@ void __init xen_setup_machphys_mapping(void) } #ifdef CONFIG_X86_64 -static void convert_pfn_mfn(void *v) +static void __init convert_pfn_mfn(void *v) { pte_t *pte = v; int i; -- cgit 1.4.1 From 270b79338eb1bd1eb28e62994ffa7b9ecd9975d8 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 28 Jan 2015 07:44:25 +0100 Subject: x86/xen: cleanup arch/x86/xen/mmu.c Remove a nested ifdef. Signed-off-by: Juergen Gross Signed-off-by: David Vrabel --- arch/x86/xen/mmu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6a8bbf43e617..adca9e2b6553 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1734,10 +1734,8 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { pte_t pte; -#ifdef CONFIG_X86_32 if (pfn > max_pfn_mapped) max_pfn_mapped = pfn; -#endif if (!pte_none(pte_page[pteidx])) continue; -- cgit 1.4.1 From 667a0a06c99d5291433b869ed35dabdd95ba1453 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:48:15 +0000 Subject: mm: provide a find_special_page vma operation The optional find_special_page VMA operation is used to lookup the pages backing a VMA. This is useful in cases where the normal mechanisms for finding the page don't work. This is only called if the PTE is special. One use case is a Xen PV guest mapping foreign pages into userspace. In a Xen PV guest, the PTEs contain MFNs so get_user_pages() (for example) must do an MFN to PFN (M2P) lookup before it can get the page. For foreign pages (those owned by another guest) the M2P lookup returns the PFN as seen by the foreign guest (which would be completely the wrong page for the local guest). This cannot be fixed up improving the M2P lookup since one MFN may be mapped onto two or more pages so getting the right page is impossible given just the MFN. Signed-off-by: David Vrabel Acked-by: Andrew Morton --- include/linux/mm.h | 8 ++++++++ mm/memory.c | 2 ++ 2 files changed, 10 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 80fc92a49649..9269af7349fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -290,6 +290,14 @@ struct vm_operations_struct { /* called by sys_remap_file_pages() to populate non-linear mapping */ int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr, unsigned long size, pgoff_t pgoff); + + /* + * Called by vm_normal_page() for special PTEs to find the + * page for @addr. This is useful if the default behavior + * (using pte_page()) would not find the correct page. + */ + struct page *(*find_special_page)(struct vm_area_struct *vma, + unsigned long addr); }; struct mmu_gather; diff --git a/mm/memory.c b/mm/memory.c index 54f3a9b00956..dc2e01a315e2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -754,6 +754,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, if (HAVE_PTE_SPECIAL) { if (likely(!pte_special(pte))) goto check_pfn; + if (vma->vm_ops && vma->vm_ops->find_special_page) + return vma->vm_ops->find_special_page(vma, addr); if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) return NULL; if (!is_zero_pfn(pfn)) -- cgit 1.4.1 From d8ac3dd41aea245f65465449efc35dd3ac71e91d Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 13:24:09 +0000 Subject: mm: add 'foreign' alias for the 'pinned' page flag The foreign page flag will be used by Xen guests to mark pages that have grant mappings of frames from other (foreign) guests. The foreign flag is an alias for the existing (Xen-specific) pinned flag. This is safe because pinned is only used on pages used for page tables and these cannot also be foreign. Signed-off-by: Jennifer Herbert Acked-by: Andrew Morton Signed-off-by: David Vrabel --- include/linux/page-flags.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e1f5fcd79792..5ed7bdaf22d5 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -121,8 +121,12 @@ enum pageflags { PG_fscache = PG_private_2, /* page backed by cache */ /* XEN */ + /* Pinned in Xen as a read-only pagetable page. */ PG_pinned = PG_owner_priv_1, + /* Pinned as part of domain save (see xen_mm_pin_all()). */ PG_savepinned = PG_dirty, + /* Has a grant mapping of another (foreign) domain's page. */ + PG_foreign = PG_owner_priv_1, /* SLOB */ PG_slob_free = PG_private, @@ -215,6 +219,7 @@ __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, checked) /* Used by some filesystems */ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ +PAGEFLAG(Foreign, foreign); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) __SETPAGEFLAG(SwapBacked, swapbacked) -- cgit 1.4.1 From 853d0289340026b30f93fd0e768340221d4e605c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 5 Jan 2015 14:13:41 +0000 Subject: xen/grant-table: pre-populate kernel unmap ops for xen_gnttab_unmap_refs() When unmapping grants, instead of converting the kernel map ops to unmap ops on the fly, pre-populate the set of unmap ops. This allows the grant unmap for the kernel mappings to be trivially batched in the future. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/arm/include/asm/xen/page.h | 2 +- arch/arm/xen/p2m.c | 2 +- arch/x86/include/asm/xen/page.h | 2 +- arch/x86/xen/p2m.c | 21 ++++++++++----------- drivers/xen/gntdev.c | 20 ++++++++++++++------ drivers/xen/grant-table.c | 4 ++-- include/xen/grant_table.h | 2 +- 7 files changed, 30 insertions(+), 23 deletions(-) diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 68c739b3fdf4..2f7e6ff67d51 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -92,7 +92,7 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct page **pages, unsigned int count); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index 054857776254..cb7a14c5cd69 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -102,7 +102,7 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int i; diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 5eea09915a15..e9f52fe2d56a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -55,7 +55,7 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 70fb5075c901..df40b2888eae 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -816,7 +816,7 @@ static struct page *m2p_find_override(unsigned long mfn) } static int m2p_remove_override(struct page *page, - struct gnttab_map_grant_ref *kmap_op, + struct gnttab_unmap_grant_ref *kunmap_op, unsigned long mfn) { unsigned long flags; @@ -840,7 +840,7 @@ static int m2p_remove_override(struct page *page, list_del(&page->lru); spin_unlock_irqrestore(&m2p_override_lock, flags); - if (kmap_op != NULL) { + if (kunmap_op != NULL) { if (!PageHighMem(page)) { struct multicall_space mcs; struct gnttab_unmap_and_replace *unmap_op; @@ -855,13 +855,13 @@ static int m2p_remove_override(struct page *page, * issued. In this case handle is going to -1 because * it hasn't been modified yet. */ - if (kmap_op->handle == -1) + if (kunmap_op->handle == -1) xen_mc_flush(); /* * Now if kmap_op->handle is negative it means that the * hypercall actually returned an error. */ - if (kmap_op->handle == GNTST_general_error) { + if (kunmap_op->handle == GNTST_general_error) { pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", pfn, mfn); put_balloon_scratch_page(); @@ -873,9 +873,9 @@ static int m2p_remove_override(struct page *page, mcs = __xen_mc_entry( sizeof(struct gnttab_unmap_and_replace)); unmap_op = mcs.args; - unmap_op->host_addr = kmap_op->host_addr; + unmap_op->host_addr = kunmap_op->host_addr; unmap_op->new_addr = scratch_page_address; - unmap_op->handle = kmap_op->handle; + unmap_op->handle = kunmap_op->handle; MULTI_grant_table_op(mcs.mc, GNTTABOP_unmap_and_replace, unmap_op, 1); @@ -887,7 +887,6 @@ static int m2p_remove_override(struct page *page, xen_mc_issue(PARAVIRT_LAZY_MMU); - kmap_op->host_addr = 0; put_balloon_scratch_page(); } } @@ -912,7 +911,7 @@ static int m2p_remove_override(struct page *page, } int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; @@ -921,7 +920,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - if (kmap_ops && + if (kunmap_ops && !in_interrupt() && paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { arch_enter_lazy_mmu_mode(); @@ -942,8 +941,8 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, pages[i]->index); - if (kmap_ops) - ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); + if (kunmap_ops) + ret = m2p_remove_override(pages[i], &kunmap_ops[i], mfn); if (ret) goto out; } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 073b4a19a8b0..6444172f2842 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -91,6 +91,7 @@ struct grant_map { struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; + struct gnttab_unmap_grant_ref *kunmap_ops; struct page **pages; }; @@ -124,6 +125,7 @@ static void gntdev_free_map(struct grant_map *map) kfree(map->map_ops); kfree(map->unmap_ops); kfree(map->kmap_ops); + kfree(map->kunmap_ops); kfree(map); } @@ -140,11 +142,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || NULL == add->kmap_ops || + NULL == add->kunmap_ops || NULL == add->pages) goto err; @@ -155,6 +159,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; add->kmap_ops[i].handle = -1; + add->kunmap_ops[i].handle = -1; } add->index = 0; @@ -280,6 +285,8 @@ static int map_grant_pages(struct grant_map *map) map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); + gnttab_set_unmap_op(&map->kunmap_ops[i], address, + map->flags | GNTMAP_host_map, -1); } } @@ -290,13 +297,14 @@ static int map_grant_pages(struct grant_map *map) return err; for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) + if (map->map_ops[i].status) { err = -EINVAL; - else { - BUG_ON(map->map_ops[i].handle == -1); - map->unmap_ops[i].handle = map->map_ops[i].handle; - pr_debug("map handle=%d\n", map->map_ops[i].handle); + continue; } + + map->unmap_ops[i].handle = map->map_ops[i].handle; + if (use_ptemod) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } return err; } @@ -316,7 +324,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + use_ptemod ? map->kunmap_ops + offset : NULL, map->pages + offset, pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7786291ba229..999d7abdbcec 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -738,7 +738,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int ret; @@ -747,7 +747,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (ret) return ret; - return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 3387465b9caa..7235d8f35459 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -167,7 +167,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kunmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); /* Perform a batch of grant map/copy operations. Retry every batch slot -- cgit 1.4.1 From 0bb599fd30108883b00c7d4a226eeb49111e6932 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 5 Jan 2015 17:06:01 +0000 Subject: xen: remove scratch frames for ballooned pages and m2p override The scratch frame mappings for ballooned pages and the m2p override are broken. Remove them in preparation for replacing them with simpler mechanisms that works. The scratch pages did not ensure that the page was not in use. In particular, the foreign page could still be in use by hardware. If the guest reused the frame the hardware could read or write that frame. The m2p override did not handle the same frame being granted by two different grant references. Trying an M2P override lookup in this case is impossible. With the m2p override removed, the grant map/unmap for the kernel mappings (for x86 PV) can be easily batched in set_foreign_p2m_mapping() and clear_foreign_p2m_mapping(). Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/x86/include/asm/xen/page.h | 18 +-- arch/x86/xen/p2m.c | 254 ++-------------------------------------- drivers/xen/balloon.c | 86 +------------- 3 files changed, 14 insertions(+), 344 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index e9f52fe2d56a..358dcd338915 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -57,7 +57,6 @@ extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); -extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); /* * Helper functions to write or read unsigned long values to/from @@ -154,21 +153,12 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) return mfn; pfn = mfn_to_pfn_no_overrides(mfn); - if (__pfn_to_mfn(pfn) != mfn) { - /* - * If this appears to be a foreign mfn (because the pfn - * doesn't map back to the mfn), then check the local override - * table to see if there's a better pfn to use. - * - * m2p_find_override_pfn returns ~0 if it doesn't find anything. - */ - pfn = m2p_find_override_pfn(mfn, ~0); - } + if (__pfn_to_mfn(pfn) != mfn) + pfn = ~0; /* - * pfn is ~0 if there are no entries in the m2p for mfn or if the - * entry doesn't map back to the mfn and m2p_override doesn't have a - * valid entry for it. + * pfn is ~0 if there are no entries in the m2p for mfn or the + * entry doesn't map back to the mfn. */ if (pfn == ~0 && __pfn_to_mfn(mfn) == IDENTITY_FRAME(mfn)) pfn = mfn; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index df40b2888eae..c9bc53f64359 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -84,8 +84,6 @@ #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) -static void __init m2p_override_init(void); - unsigned long *xen_p2m_addr __read_mostly; EXPORT_SYMBOL_GPL(xen_p2m_addr); unsigned long xen_p2m_size __read_mostly; @@ -402,8 +400,6 @@ void __init xen_vmalloc_p2m_tree(void) xen_p2m_size = xen_max_p2m_pfn; xen_inv_extra_mem(); - - m2p_override_init(); } unsigned long get_phys_to_machine(unsigned long pfn) @@ -652,100 +648,21 @@ bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) return true; } -#define M2P_OVERRIDE_HASH_SHIFT 10 -#define M2P_OVERRIDE_HASH (1 << M2P_OVERRIDE_HASH_SHIFT) - -static struct list_head *m2p_overrides; -static DEFINE_SPINLOCK(m2p_override_lock); - -static void __init m2p_override_init(void) -{ - unsigned i; - - m2p_overrides = alloc_bootmem_align( - sizeof(*m2p_overrides) * M2P_OVERRIDE_HASH, - sizeof(unsigned long)); - - for (i = 0; i < M2P_OVERRIDE_HASH; i++) - INIT_LIST_HEAD(&m2p_overrides[i]); -} - -static unsigned long mfn_hash(unsigned long mfn) -{ - return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); -} - -/* Add an MFN override for a particular page */ -static int m2p_add_override(unsigned long mfn, struct page *page, - struct gnttab_map_grant_ref *kmap_op) -{ - unsigned long flags; - unsigned long pfn; - unsigned long uninitialized_var(address); - unsigned level; - pte_t *ptep = NULL; - - pfn = page_to_pfn(page); - if (!PageHighMem(page)) { - address = (unsigned long)__va(pfn << PAGE_SHIFT); - ptep = lookup_address(address, &level); - if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_add_override: pfn %lx not mapped", pfn)) - return -EINVAL; - } - - if (kmap_op != NULL) { - if (!PageHighMem(page)) { - struct multicall_space mcs = - xen_mc_entry(sizeof(*kmap_op)); - - MULTI_grant_table_op(mcs.mc, - GNTTABOP_map_grant_ref, kmap_op, 1); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - } - } - spin_lock_irqsave(&m2p_override_lock, flags); - list_add(&page->lru, &m2p_overrides[mfn_hash(mfn)]); - spin_unlock_irqrestore(&m2p_override_lock, flags); - - /* p2m(m2p(mfn)) == mfn: the mfn is already present somewhere in - * this domain. Set the FOREIGN_FRAME_BIT in the p2m for the other - * pfn so that the following mfn_to_pfn(mfn) calls will return the - * pfn from the m2p_override (the backend pfn) instead. - * We need to do this because the pages shared by the frontend - * (xen-blkfront) can be already locked (lock_page, called by - * do_read_cache_page); when the userspace backend tries to use them - * with direct_IO, mfn_to_pfn returns the pfn of the frontend, so - * do_blockdev_direct_IO is going to try to lock the same pages - * again resulting in a deadlock. - * As a side effect get_user_pages_fast might not be safe on the - * frontend pages while they are being shared with the backend, - * because mfn_to_pfn (that ends up being called by GUPF) will - * return the backend pfn rather than the frontend pfn. */ - pfn = mfn_to_pfn_no_overrides(mfn); - if (__pfn_to_mfn(pfn) == mfn) - set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)); - - return 0; -} - int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; - bool lazy = false; pte_t *pte; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - if (kmap_ops && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { - arch_enter_lazy_mmu_mode(); - lazy = true; + if (kmap_ops) { + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + kmap_ops, count); + if (ret) + goto out; } for (i = 0; i < count; i++) { @@ -773,160 +690,22 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, ret = -ENOMEM; goto out; } - - if (kmap_ops) { - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); - if (ret) - goto out; - } } out: - if (lazy) - arch_leave_lazy_mmu_mode(); - return ret; } EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); -static struct page *m2p_find_override(unsigned long mfn) -{ - unsigned long flags; - struct list_head *bucket; - struct page *p, *ret; - - if (unlikely(!m2p_overrides)) - return NULL; - - ret = NULL; - bucket = &m2p_overrides[mfn_hash(mfn)]; - - spin_lock_irqsave(&m2p_override_lock, flags); - - list_for_each_entry(p, bucket, lru) { - if (page_private(p) == mfn) { - ret = p; - break; - } - } - - spin_unlock_irqrestore(&m2p_override_lock, flags); - - return ret; -} - -static int m2p_remove_override(struct page *page, - struct gnttab_unmap_grant_ref *kunmap_op, - unsigned long mfn) -{ - unsigned long flags; - unsigned long pfn; - unsigned long uninitialized_var(address); - unsigned level; - pte_t *ptep = NULL; - - pfn = page_to_pfn(page); - - if (!PageHighMem(page)) { - address = (unsigned long)__va(pfn << PAGE_SHIFT); - ptep = lookup_address(address, &level); - - if (WARN(ptep == NULL || level != PG_LEVEL_4K, - "m2p_remove_override: pfn %lx not mapped", pfn)) - return -EINVAL; - } - - spin_lock_irqsave(&m2p_override_lock, flags); - list_del(&page->lru); - spin_unlock_irqrestore(&m2p_override_lock, flags); - - if (kunmap_op != NULL) { - if (!PageHighMem(page)) { - struct multicall_space mcs; - struct gnttab_unmap_and_replace *unmap_op; - struct page *scratch_page = get_balloon_scratch_page(); - unsigned long scratch_page_address = (unsigned long) - __va(page_to_pfn(scratch_page) << PAGE_SHIFT); - - /* - * It might be that we queued all the m2p grant table - * hypercalls in a multicall, then m2p_remove_override - * get called before the multicall has actually been - * issued. In this case handle is going to -1 because - * it hasn't been modified yet. - */ - if (kunmap_op->handle == -1) - xen_mc_flush(); - /* - * Now if kmap_op->handle is negative it means that the - * hypercall actually returned an error. - */ - if (kunmap_op->handle == GNTST_general_error) { - pr_warn("m2p_remove_override: pfn %lx mfn %lx, failed to modify kernel mappings", - pfn, mfn); - put_balloon_scratch_page(); - return -1; - } - - xen_mc_batch(); - - mcs = __xen_mc_entry( - sizeof(struct gnttab_unmap_and_replace)); - unmap_op = mcs.args; - unmap_op->host_addr = kunmap_op->host_addr; - unmap_op->new_addr = scratch_page_address; - unmap_op->handle = kunmap_op->handle; - - MULTI_grant_table_op(mcs.mc, - GNTTABOP_unmap_and_replace, unmap_op, 1); - - mcs = __xen_mc_entry(0); - MULTI_update_va_mapping(mcs.mc, scratch_page_address, - pfn_pte(page_to_pfn(scratch_page), - PAGE_KERNEL_RO), 0); - - xen_mc_issue(PARAVIRT_LAZY_MMU); - - put_balloon_scratch_page(); - } - } - - /* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present - * somewhere in this domain, even before being added to the - * m2p_override (see comment above in m2p_add_override). - * If there are no other entries in the m2p_override corresponding - * to this mfn, then remove the FOREIGN_FRAME_BIT from the p2m for - * the original pfn (the one shared by the frontend): the backend - * cannot do any IO on this page anymore because it has been - * unshared. Removing the FOREIGN_FRAME_BIT from the p2m entry of - * the original pfn causes mfn_to_pfn(mfn) to return the frontend - * pfn again. */ - mfn &= ~FOREIGN_FRAME_BIT; - pfn = mfn_to_pfn_no_overrides(mfn); - if (__pfn_to_mfn(pfn) == FOREIGN_FRAME(mfn) && - m2p_find_override(mfn) == NULL) - set_phys_to_machine(pfn, mfn); - - return 0; -} - int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int i, ret = 0; - bool lazy = false; if (xen_feature(XENFEAT_auto_translated_physmap)) return 0; - if (kunmap_ops && - !in_interrupt() && - paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { - arch_enter_lazy_mmu_mode(); - lazy = true; - } - for (i = 0; i < count; i++) { unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); @@ -940,32 +719,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, WARN_ON(!PagePrivate(pages[i])); ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, pages[i]->index); - - if (kunmap_ops) - ret = m2p_remove_override(pages[i], &kunmap_ops[i], mfn); - if (ret) - goto out; } - + if (kunmap_ops) + ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + kunmap_ops, count); out: - if (lazy) - arch_leave_lazy_mmu_mode(); return ret; } EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); -unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) -{ - struct page *p = m2p_find_override(mfn); - unsigned long ret = pfn; - - if (p) - ret = page_to_pfn(p); - - return ret; -} -EXPORT_SYMBOL_GPL(m2p_find_override_pfn); - #ifdef CONFIG_XEN_DEBUG_FS #include #include "debugfs.h" diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 3860d02729dc..0b52d92cb2e5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; -static DEFINE_PER_CPU(struct page *, balloon_scratch_page); /* List of ballooned pages, threaded through the mem_map array. */ @@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) page = pfn_to_page(pfn); #ifdef CONFIG_XEN_HAVE_PVMMU - /* - * Ballooned out frames are effectively replaced with - * a scratch frame. Ensure direct mappings and the - * p2m are consistent. - */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!PageHighMem(page)) { - struct page *scratch_page = get_balloon_scratch_page(); - ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte(page_to_pfn(scratch_page), - PAGE_KERNEL_RO), 0); + __pte_ma(0), 0); BUG_ON(ret); - - put_balloon_scratch_page(); } __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } @@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); } -struct page *get_balloon_scratch_page(void) -{ - struct page *ret = get_cpu_var(balloon_scratch_page); - BUG_ON(ret == NULL); - return ret; -} - -void put_balloon_scratch_page(void) -{ - put_cpu_var(balloon_scratch_page); -} - /* Resets the Xen limit, sets new target, and kicks off processing. */ void balloon_set_new_target(unsigned long target) { @@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long start_pfn, } } -static int alloc_balloon_scratch_page(int cpu) -{ - if (per_cpu(balloon_scratch_page, cpu) != NULL) - return 0; - - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); - if (per_cpu(balloon_scratch_page, cpu) == NULL) { - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); - return -ENOMEM; - } - - return 0; -} - - -static int balloon_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - switch (action) { - case CPU_UP_PREPARE: - if (alloc_balloon_scratch_page(cpu)) - return NOTIFY_BAD; - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block balloon_cpu_notifier = { - .notifier_call = balloon_cpu_notify, -}; - static int __init balloon_init(void) { - int i, cpu; + int i; if (!xen_domain()) return -ENODEV; - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - register_cpu_notifier(&balloon_cpu_notifier); - - get_online_cpus(); - for_each_online_cpu(cpu) { - if (alloc_balloon_scratch_page(cpu)) { - put_online_cpus(); - unregister_cpu_notifier(&balloon_cpu_notifier); - return -ENOMEM; - } - } - put_online_cpus(); - } - pr_info("Initialising balloon driver\n"); balloon_stats.current_pages = xen_pv_domain() @@ -696,15 +625,4 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); -static int __init balloon_clear(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(balloon_scratch_page, cpu) = NULL; - - return 0; -} -early_initcall(balloon_clear); - MODULE_LICENSE("GPL"); -- cgit 1.4.1 From 0ae65f49af64d68f0daca37b83383115cae5e690 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Wed, 24 Dec 2014 14:03:16 +0000 Subject: x86/xen: require ballooned pages for grant maps Ballooned pages are always used for grant maps which means the original frame does not need to be saved in page->index nor restored after the grant unmap. This allows the workaround in netback for the conflicting use of the (unionized) page->index and page->pfmemalloc to be removed. Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 5 +++-- drivers/net/xen-netback/netback.c | 6 ------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index c9bc53f64359..a8691cb08420 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -682,9 +682,10 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, pfn = page_to_pfn(pages[i]); WARN_ON(PagePrivate(pages[i])); + WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); + SetPagePrivate(pages[i]); set_page_private(pages[i], mfn); - pages[i]->index = pfn_to_mfn(pfn); if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { ret = -ENOMEM; @@ -718,7 +719,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, set_page_private(pages[i], INVALID_P2M_ENTRY); WARN_ON(!PagePrivate(pages[i])); ClearPagePrivate(pages[i]); - set_phys_to_machine(pfn, pages[i]->index); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 908e65e9b821..64413189ad06 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1241,12 +1241,6 @@ static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb) /* Take an extra reference to offset network stack's put_page */ get_page(queue->mmap_pages[pending_idx]); } - /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc - * overlaps with "index", and "mapping" is not set. I think mapping - * should be set. If delivered to local stack, it would drop this - * skb in sk_filter unless the socket has the right to use it. - */ - skb->pfmemalloc = false; } static int xenvif_get_extras(struct xenvif_queue *queue, -- cgit 1.4.1 From ff4b156f166b3931894d2a8b5cdba6cdf4da0618 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 8 Jan 2015 18:06:01 +0000 Subject: xen/grant-table: add helpers for allocating pages Add gnttab_alloc_pages() and gnttab_free_pages() to allocate/free pages suitable to for granted maps. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/block/xen-blkback/blkback.c | 8 ++++---- drivers/net/xen-netback/interface.c | 7 +++---- drivers/xen/gntdev.c | 4 ++-- drivers/xen/grant-table.c | 29 +++++++++++++++++++++++++++++ drivers/xen/xen-scsiback.c | 6 +++--- include/xen/grant_table.h | 3 +++ 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 63fc7f06a014..908e630240bd 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -100,7 +100,7 @@ module_param(log_stats, int, 0644); #define BLKBACK_INVALID_HANDLE (~0) -/* Number of free pages to remove on each call to free_xenballooned_pages */ +/* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 static inline int get_free_page(struct xen_blkif *blkif, struct page **page) @@ -111,7 +111,7 @@ static inline int get_free_page(struct xen_blkif *blkif, struct page **page) if (list_empty(&blkif->free_pages)) { BUG_ON(blkif->free_pages_num != 0); spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } BUG_ON(blkif->free_pages_num == 0); page[0] = list_first_entry(&blkif->free_pages, struct page, lru); @@ -151,14 +151,14 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) blkif->free_pages_num--; if (++num_pages == NUM_BATCH_FREE_PAGES) { spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); spin_lock_irqsave(&blkif->free_pages_lock, flags); num_pages = 0; } } spin_unlock_irqrestore(&blkif->free_pages_lock, flags); if (num_pages != 0) - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); } #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 9259a732e8a4..2e07f8433412 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -483,9 +483,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) * better enable it. The long term solution would be to use just a * bunch of valid page descriptors, without dependency on ballooning */ - err = alloc_xenballooned_pages(MAX_PENDING_REQS, - queue->mmap_pages, - false); + err = gnttab_alloc_pages(MAX_PENDING_REQS, + queue->mmap_pages); if (err) { netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n"); return -ENOMEM; @@ -662,7 +661,7 @@ void xenvif_disconnect(struct xenvif *vif) */ void xenvif_deinit_queue(struct xenvif_queue *queue) { - free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); + gnttab_free_pages(MAX_PENDING_REQS, queue->mmap_pages); } void xenvif_free(struct xenvif *vif) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 6444172f2842..8cc3f069a10f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -119,7 +119,7 @@ static void gntdev_free_map(struct grant_map *map) return; if (map->pages) - free_xenballooned_pages(map->count, map->pages); + gnttab_free_pages(map->count, map->pages); kfree(map->pages); kfree(map->grants); kfree(map->map_ops); @@ -152,7 +152,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) + if (gnttab_alloc_pages(count, add->pages)) goto err; for (i = 0; i < count; i++) { diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 999d7abdbcec..b4f93c490f83 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,34 @@ void gnttab_free_auto_xlat_frames(void) } EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); +/** + * gnttab_alloc_pages - alloc pages suitable for grant mapping into + * @nr_pages: number of pages to alloc + * @pages: returns the pages + */ +int gnttab_alloc_pages(int nr_pages, struct page **pages) +{ + int ret; + + ret = alloc_xenballooned_pages(nr_pages, pages, false); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(gnttab_alloc_pages); + +/** + * gnttab_free_pages - free pages allocated by gnttab_alloc_pages() + * @nr_pages; number of pages to free + * @pages: the pages + */ +void gnttab_free_pages(int nr_pages, struct page **pages) +{ + free_xenballooned_pages(nr_pages, pages); +} +EXPORT_SYMBOL(gnttab_free_pages); + /* Handling of paged out grant targets (GNTST_eagain) */ #define MAX_DELAY 256 static inline void diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index e999496eda3e..ecd540a7a562 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -227,7 +227,7 @@ static void put_free_pages(struct page **page, int num) return; if (i > scsiback_max_buffer_pages) { n = min(num, i - scsiback_max_buffer_pages); - free_xenballooned_pages(n, page + num - n); + gnttab_free_pages(n, page + num - n); n = num - n; } spin_lock_irqsave(&free_pages_lock, flags); @@ -244,7 +244,7 @@ static int get_free_page(struct page **page) spin_lock_irqsave(&free_pages_lock, flags); if (list_empty(&scsiback_free_pages)) { spin_unlock_irqrestore(&free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } page[0] = list_first_entry(&scsiback_free_pages, struct page, lru); list_del(&page[0]->lru); @@ -2106,7 +2106,7 @@ static void __exit scsiback_exit(void) while (free_pages_num) { if (get_free_page(&page)) BUG(); - free_xenballooned_pages(1, &page); + gnttab_free_pages(1, &page); } scsiback_deregister_configfs(); xenbus_unregister_driver(&scsiback_driver); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 7235d8f35459..949803e20872 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -163,6 +163,9 @@ void gnttab_free_auto_xlat_frames(void); #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) +int gnttab_alloc_pages(int nr_pages, struct page **pages); +void gnttab_free_pages(int nr_pages, struct page **pages); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); -- cgit 1.4.1 From 8da7633f168b5428e2cfb7342408b2c44088f5df Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Wed, 24 Dec 2014 14:17:06 +0000 Subject: xen: mark grant mapped pages as foreign Use the "foreign" page flag to mark pages that have a grant map. Use page->private to store information of the grant (the granting domain and the grant reference). Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 7 ------- drivers/xen/grant-table.c | 43 +++++++++++++++++++++++++++++++++++++++++-- include/xen/grant_table.h | 20 ++++++++++++++++++++ 3 files changed, 61 insertions(+), 9 deletions(-) diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index a8691cb08420..f18fd1d411f6 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -681,12 +681,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, } pfn = page_to_pfn(pages[i]); - WARN_ON(PagePrivate(pages[i])); WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - SetPagePrivate(pages[i]); - set_page_private(pages[i], mfn); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { ret = -ENOMEM; goto out; @@ -716,9 +712,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, goto out; } - set_page_private(pages[i], INVALID_P2M_ENTRY); - WARN_ON(!PagePrivate(pages[i])); - ClearPagePrivate(pages[i]); set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b4f93c490f83..89dcca448bb6 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -679,12 +679,27 @@ EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); */ int gnttab_alloc_pages(int nr_pages, struct page **pages) { + int i; int ret; ret = alloc_xenballooned_pages(nr_pages, pages, false); if (ret < 0) return ret; + for (i = 0; i < nr_pages; i++) { +#if BITS_PER_LONG < 64 + struct xen_page_foreign *foreign; + + foreign = kzalloc(sizeof(*foreign), GFP_KERNEL); + if (!foreign) { + gnttab_free_pages(nr_pages, pages); + return -ENOMEM; + } + set_page_private(pages[i], (unsigned long)foreign); +#endif + SetPagePrivate(pages[i]); + } + return 0; } EXPORT_SYMBOL(gnttab_alloc_pages); @@ -696,6 +711,16 @@ EXPORT_SYMBOL(gnttab_alloc_pages); */ void gnttab_free_pages(int nr_pages, struct page **pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + if (PagePrivate(pages[i])) { +#if BITS_PER_LONG < 64 + kfree((void *)page_private(pages[i])); +#endif + ClearPagePrivate(pages[i]); + } + } free_xenballooned_pages(nr_pages, pages); } EXPORT_SYMBOL(gnttab_free_pages); @@ -756,12 +781,22 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; - /* Retry eagain maps */ - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + /* Retry eagain maps */ if (map_ops[i].status == GNTST_eagain) gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, &map_ops[i].status, __func__); + if (map_ops[i].status == GNTST_okay) { + struct xen_page_foreign *foreign; + + SetPageForeign(pages[i]); + foreign = xen_page_foreign(pages[i]); + foreign->domid = map_ops[i].dom; + foreign->gref = map_ops[i].ref; + } + } + return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_map_refs); @@ -770,12 +805,16 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { + unsigned int i; int ret; ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; + for (i = 0; i < count; i++) + ClearPageForeign(pages[i]); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 949803e20872..d3bef563e8da 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -45,6 +45,8 @@ #include #include +#include +#include #define GNTTAB_RESERVED_XENSTORE 1 @@ -185,4 +187,22 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); + +struct xen_page_foreign { + domid_t domid; + grant_ref_t gref; +}; + +static inline struct xen_page_foreign *xen_page_foreign(struct page *page) +{ + if (!PageForeign(page)) + return NULL; +#if BITS_PER_LONG < 64 + return (struct xen_page_foreign *)page->private; +#else + BUILD_BUG_ON(sizeof(struct xen_page_foreign) > BITS_PER_LONG); + return (struct xen_page_foreign *)&page->private; +#endif +} + #endif /* __ASM_GNTTAB_H__ */ -- cgit 1.4.1 From c2677a6fc4dee765fff8f7ac3d61f657dc295650 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 14:45:10 +0000 Subject: xen-netback: use foreign page information from the pages themselves Use the foreign page flag in netback to get the domid and grant ref needed for the grant copy. This signficiantly simplifies the netback code and makes netback work with foreign pages from other backends (e.g., blkback). This allows blkback to use iSCSI disks provided by domUs running on the same host. Signed-off-by: Jennifer Herbert Acked-by: Ian Campbell Acked-by: David S. Miller Signed-off-by: David Vrabel --- drivers/net/xen-netback/netback.c | 100 ++++---------------------------------- 1 file changed, 9 insertions(+), 91 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64413189ad06..ae3ab3752ea8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -314,9 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb, struct netrx_pending_operations *npo, struct page *page, unsigned long size, - unsigned long offset, int *head, - struct xenvif_queue *foreign_queue, - grant_ref_t foreign_gref) + unsigned long offset, int *head) { struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; @@ -333,6 +331,8 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb offset &= ~PAGE_MASK; while (size > 0) { + struct xen_page_foreign *foreign; + BUG_ON(offset >= PAGE_SIZE); BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); @@ -361,9 +361,10 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb copy_gop->flags = GNTCOPY_dest_gref; copy_gop->len = bytes; - if (foreign_queue) { - copy_gop->source.domid = foreign_queue->vif->domid; - copy_gop->source.u.ref = foreign_gref; + foreign = xen_page_foreign(page); + if (foreign) { + copy_gop->source.domid = foreign->domid; + copy_gop->source.u.ref = foreign->gref; copy_gop->flags |= GNTCOPY_source_gref; } else { copy_gop->source.domid = DOMID_SELF; @@ -405,35 +406,6 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb } } -/* - * Find the grant ref for a given frag in a chain of struct ubuf_info's - * skb: the skb itself - * i: the frag's number - * ubuf: a pointer to an element in the chain. It should not be NULL - * - * Returns a pointer to the element in the chain where the page were found. If - * not found, returns NULL. - * See the definition of callback_struct in common.h for more details about - * the chain. - */ -static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb, - const int i, - const struct ubuf_info *ubuf) -{ - struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf); - - do { - u16 pending_idx = ubuf->desc; - - if (skb_shinfo(skb)->frags[i].page.p == - foreign_queue->mmap_pages[pending_idx]) - break; - ubuf = (struct ubuf_info *) ubuf->ctx; - } while (ubuf); - - return ubuf; -} - /* * Prepare an SKB to be transmitted to the frontend. * @@ -459,8 +431,6 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; - const struct ubuf_info *const head_ubuf = ubuf; old_meta_prod = npo->meta_prod; @@ -507,68 +477,16 @@ static int xenvif_gop_skb(struct sk_buff *skb, len = skb_tail_pointer(skb) - data; xenvif_gop_frag_copy(queue, skb, npo, - virt_to_page(data), len, offset, &head, - NULL, - 0); + virt_to_page(data), len, offset, &head); data += len; } for (i = 0; i < nr_frags; i++) { - /* This variable also signals whether foreign_gref has a real - * value or not. - */ - struct xenvif_queue *foreign_queue = NULL; - grant_ref_t foreign_gref; - - if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && - (ubuf->callback == &xenvif_zerocopy_callback)) { - const struct ubuf_info *const startpoint = ubuf; - - /* Ideally ubuf points to the chain element which - * belongs to this frag. Or if frags were removed from - * the beginning, then shortly before it. - */ - ubuf = xenvif_find_gref(skb, i, ubuf); - - /* Try again from the beginning of the list, if we - * haven't tried from there. This only makes sense in - * the unlikely event of reordering the original frags. - * For injected local pages it's an unnecessary second - * run. - */ - if (unlikely(!ubuf) && startpoint != head_ubuf) - ubuf = xenvif_find_gref(skb, i, head_ubuf); - - if (likely(ubuf)) { - u16 pending_idx = ubuf->desc; - - foreign_queue = ubuf_to_queue(ubuf); - foreign_gref = - foreign_queue->pending_tx_info[pending_idx].req.gref; - /* Just a safety measure. If this was the last - * element on the list, the for loop will - * iterate again if a local page were added to - * the end. Using head_ubuf here prevents the - * second search on the chain. Or the original - * frags changed order, but that's less likely. - * In any way, ubuf shouldn't be NULL. - */ - ubuf = ubuf->ctx ? - (struct ubuf_info *) ubuf->ctx : - head_ubuf; - } else - /* This frag was a local page, added to the - * array after the skb left netback. - */ - ubuf = head_ubuf; - } xenvif_gop_frag_copy(queue, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, - &head, - foreign_queue, - foreign_queue ? foreign_gref : UINT_MAX); + &head); } return npo->meta_prod - old_meta_prod; -- cgit 1.4.1 From 3f9f1c67572f5e5e6dc84216d48d1480f3c4fcf6 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Tue, 9 Dec 2014 18:28:37 +0000 Subject: xen/grant-table: add a mechanism to safely unmap pages that are in use Introduce gnttab_unmap_refs_async() that can be used to safely unmap pages that may be in use (ref count > 1). If the pages are in use the unmap is deferred and retried later. This polling is not very clever but it should be good enough if the cases where the delay is necessary are rare. The initial delay is 5 ms and is increased linearly on each subsequent retry (to reduce load if the page is in use for a long time). This is needed to allow block backends using grant mapping to safely use network storage (block or filesystem based such as iSCSI or NFS). The network storage driver may complete a block request whilst there is a queued network packet retry (because the ack from the remote end races with deciding to queue the retry). The pages for the retried packet would be grant unmapped and the network driver (or hardware) would access the unmapped page. Signed-off-by: Jennifer Herbert Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/grant-table.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/xen/grant_table.h | 18 ++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 89dcca448bb6..17972fbacddc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -819,6 +820,49 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +#define GNTTAB_UNMAP_REFS_DELAY 5 + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); + +static void gnttab_unmap_work(struct work_struct *work) +{ + struct gntab_unmap_queue_data + *unmap_data = container_of(work, + struct gntab_unmap_queue_data, + gnttab_work.work); + if (unmap_data->age != UINT_MAX) + unmap_data->age++; + __gnttab_unmap_refs_async(unmap_data); +} + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + int ret; + int pc; + + for (pc = 0; pc < item->count; pc++) { + if (page_count(item->pages[pc]) > 1) { + unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1); + schedule_delayed_work(&item->gnttab_work, + msecs_to_jiffies(delay)); + return; + } + } + + ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops, + item->pages, item->count); + item->done(ret, item); +} + +void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work); + item->age = 0; + + __gnttab_unmap_refs_async(item); +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async); + static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) { int rc; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index d3bef563e8da..143ca5ffab7a 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -60,6 +60,22 @@ struct gnttab_free_callback { u16 count; }; +struct gntab_unmap_queue_data; + +typedef void (*gnttab_unmap_refs_done)(int result, struct gntab_unmap_queue_data *data); + +struct gntab_unmap_queue_data +{ + struct delayed_work gnttab_work; + void *data; + gnttab_unmap_refs_done done; + struct gnttab_unmap_grant_ref *unmap_ops; + struct gnttab_unmap_grant_ref *kunmap_ops; + struct page **pages; + unsigned int count; + unsigned int age; +}; + int gnttab_init(void); int gnttab_suspend(void); int gnttab_resume(void); @@ -174,6 +190,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count); +void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); + /* Perform a batch of grant map/copy operations. Retry every batch slot * for which the hypervisor returns GNTST_eagain. This is typically due -- cgit 1.4.1 From 1401c00e59ea021c575f74612fe2dbba36d6a4ee Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 9 Jan 2015 18:06:12 +0000 Subject: xen/gntdev: convert priv->lock to a mutex Unmapping may require sleeping and we unmap while holding priv->lock, so convert it to a mutex. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 8cc3f069a10f..3c2534433b30 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -67,7 +67,7 @@ struct gntdev_priv { * Only populated if populate_freeable_maps == 1 */ struct list_head freeable_maps; /* lock protects maps and freeable_maps */ - spinlock_t lock; + struct mutex lock; struct mm_struct *mm; struct mmu_notifier mn; }; @@ -221,9 +221,9 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) } if (populate_freeable_maps && priv) { - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_del(&map->next); - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } if (map->pages && !use_ptemod) @@ -395,9 +395,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma) * not do any unmapping, since that has been done prior to * closing the vma, but it may still iterate the unmap_ops list. */ - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map->vma = NULL; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } vma->vm_private_data = NULL; gntdev_put_map(priv, map); @@ -441,14 +441,14 @@ static void mn_invl_range_start(struct mmu_notifier *mn, struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct grant_map *map; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { unmap_if_in_range(map, start, end); } list_for_each_entry(map, &priv->freeable_maps, next) { unmap_if_in_range(map, start, end); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static void mn_invl_page(struct mmu_notifier *mn, @@ -465,7 +465,7 @@ static void mn_release(struct mmu_notifier *mn, struct grant_map *map; int err; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -484,7 +484,7 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, /* offset */ 0, map->count); WARN_ON(err); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static struct mmu_notifier_ops gntdev_mmu_ops = { @@ -506,7 +506,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); INIT_LIST_HEAD(&priv->freeable_maps); - spin_lock_init(&priv->lock); + mutex_init(&priv->lock); if (use_ptemod) { priv->mm = get_task_mm(current); @@ -580,10 +580,10 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, return -EFAULT; } - spin_lock(&priv->lock); + mutex_lock(&priv->lock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; @@ -602,7 +602,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, return -EFAULT; pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); @@ -610,7 +610,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, list_add_tail(&map->next, &priv->freeable_maps); err = 0; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (map) gntdev_put_map(priv, map); return err; @@ -678,7 +678,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) out_flags = op.action; out_event = op.event_channel_port; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { uint64_t begin = map->index << PAGE_SHIFT; @@ -706,7 +706,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) rc = 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); /* Drop the reference to the event channel we did not save in the map */ if (out_flags & UNMAP_NOTIFY_SEND_EVENT) @@ -756,7 +756,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) pr_debug("map %d+%d at %lx (pgoff %lx)\n", index, count, vma->vm_start, vma->vm_pgoff); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; @@ -791,7 +791,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (use_ptemod) { err = apply_to_page_range(vma->vm_mm, vma->vm_start, @@ -819,11 +819,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); return err; out_unlock_put: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); out_put_map: if (use_ptemod) map->vma = NULL; -- cgit 1.4.1 From 745282256c754ac5ed3dbe2fbef6471dc1373417 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 15:07:46 +0000 Subject: xen/gntdev: safely unmap grants in case they are still in use Use gnttab_unmap_refs_async() to wait until the mapped pages are no longer in use before unmapping them. This allows userspace programs to safely use Direct I/O and AIO to a network filesystem which may retain refs to pages in queued skbs after the filesystem I/O has completed. Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/gntdev.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 3c2534433b30..bccc54a80559 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -309,9 +309,30 @@ static int map_grant_pages(struct grant_map *map) return err; } +struct unmap_grant_pages_callback_data +{ + struct completion completion; + int result; +}; + +static void unmap_grant_callback(int result, + struct gntab_unmap_queue_data *data) +{ + struct unmap_grant_pages_callback_data* d = data->data; + + d->result = result; + complete(&d->completion); +} + static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) { int i, err = 0; + struct gntab_unmap_queue_data unmap_data; + struct unmap_grant_pages_callback_data data; + + init_completion(&data.completion); + unmap_data.data = &data; + unmap_data.done= &unmap_grant_callback; if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { int pgno = (map->notify.addr >> PAGE_SHIFT); @@ -323,11 +344,16 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kunmap_ops + offset : NULL, map->pages + offset, - pages); - if (err) - return err; + unmap_data.unmap_ops = map->unmap_ops + offset; + unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + unmap_data.pages = map->pages + offset; + unmap_data.count = pages; + + gnttab_unmap_refs_async(&unmap_data); + + wait_for_completion(&data.completion); + if (data.result) + return data.result; for (i = 0; i < pages; i++) { if (map->unmap_ops[offset+i].status) -- cgit 1.4.1 From c43cf3ea838541ea9f066f4f1aa7b197cba6276e Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 16:49:22 +0000 Subject: xen-blkback: safely unmap grants in case they are still in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use gnttab_unmap_refs_async() to wait until the mapped pages are no longer in use before unmapping them. This allows blkback to use network storage which may retain refs to pages in queued skbs after the block I/O has completed. Signed-off-by: Jennifer Herbert Acked-by: Roger Pau Monné Acked-by: Jens Axboe Signed-off-by: David Vrabel --- drivers/block/xen-blkback/blkback.c | 169 +++++++++++++++++++++++++----------- drivers/block/xen-blkback/common.h | 3 + 2 files changed, 122 insertions(+), 50 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 908e630240bd..2a04d341e598 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "common.h" /* @@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif, atomic_dec(&blkif->persistent_gnt_in_use); } +static void free_persistent_gnts_unmap_callback(int result, + struct gntab_unmap_queue_data *data) +{ + struct completion *c = data->data; + + /* BUG_ON used to reproduce existing behaviour, + but is this the best way to deal with this? */ + BUG_ON(result); + complete(c); +} + static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, unsigned int num) { @@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; struct rb_node *n; - int ret = 0; int segs_to_unmap = 0; + struct gntab_unmap_queue_data unmap_data; + struct completion unmap_completion; + + init_completion(&unmap_completion); + + unmap_data.data = &unmap_completion; + unmap_data.done = &free_persistent_gnts_unmap_callback; + unmap_data.pages = pages; + unmap_data.unmap_ops = unmap; + unmap_data.kunmap_ops = NULL; foreach_grant_safe(persistent_gnt, n, root, node) { BUG_ON(persistent_gnt->handle == @@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); - BUG_ON(ret); + + unmap_data.count = segs_to_unmap; + gnttab_unmap_refs_async(&unmap_data); + wait_for_completion(&unmap_completion); + put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } @@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif) shrink_free_pagepool(blkif, 0 /* All */); } -/* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. - */ -static void xen_blkbk_unmap(struct xen_blkif *blkif, - struct grant_page *pages[], - int num) +static unsigned int xen_blkbk_unmap_prepare( + struct xen_blkif *blkif, + struct grant_page **pages, + unsigned int num, + struct gnttab_unmap_grant_ref *unmap_ops, + struct page **unmap_pages) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; - int ret; for (i = 0; i < num; i++) { if (pages[i]->persistent_gnt != NULL) { @@ -674,21 +693,95 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, if (pages[i]->handle == BLKBACK_INVALID_HANDLE) continue; unmap_pages[invcount] = pages[i]->page; - gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), + gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[i]->page), GNTMAP_host_map, pages[i]->handle); pages[i]->handle = BLKBACK_INVALID_HANDLE; - if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, - invcount); + invcount++; + } + + return invcount; +} + +static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) +{ + struct pending_req* pending_req = (struct pending_req*) (data->data); + struct xen_blkif *blkif = pending_req->blkif; + + /* BUG_ON used to reproduce existing behaviour, + but is this the best way to deal with this? */ + BUG_ON(result); + + put_free_pages(blkif, data->pages, data->count); + make_response(blkif, pending_req->id, + pending_req->operation, pending_req->status); + free_req(blkif, pending_req); + /* + * Make sure the request is freed before releasing blkif, + * or there could be a race between free_req and the + * cleanup done in xen_blkif_free during shutdown. + * + * NB: The fact that we might try to wake up pending_free_wq + * before drain_complete (in case there's a drain going on) + * it's not a problem with our current implementation + * because we can assure there's no thread waiting on + * pending_free_wq if there's a drain going on, but it has + * to be taken into account if the current model is changed. + */ + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { + complete(&blkif->drain_complete); + } + xen_blkif_put(blkif); +} + +static void xen_blkbk_unmap_and_respond(struct pending_req *req) +{ + struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data; + struct xen_blkif *blkif = req->blkif; + struct grant_page **pages = req->segments; + unsigned int invcount; + + invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages, + req->unmap, req->unmap_pages); + + work->data = req; + work->done = xen_blkbk_unmap_and_respond_callback; + work->unmap_ops = req->unmap; + work->kunmap_ops = NULL; + work->pages = req->unmap_pages; + work->count = invcount; + + gnttab_unmap_refs_async(&req->gnttab_unmap_data); +} + + +/* + * Unmap the grant references. + * + * This could accumulate ops up to the batch size to reduce the number + * of hypercalls, but since this is only used in error paths there's + * no real need. + */ +static void xen_blkbk_unmap(struct xen_blkif *blkif, + struct grant_page *pages[], + int num) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int invcount = 0; + int ret; + + while (num) { + unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + invcount = xen_blkbk_unmap_prepare(blkif, pages, batch, + unmap, unmap_pages); + if (invcount) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); - invcount = 0; } - } - if (invcount) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); - BUG_ON(ret); - put_free_pages(blkif, unmap_pages, invcount); + pages += batch; + num -= batch; } } @@ -982,32 +1075,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the grant references associated with 'request' and provide * the proper response on the ring. */ - if (atomic_dec_and_test(&pending_req->pendcnt)) { - struct xen_blkif *blkif = pending_req->blkif; - - xen_blkbk_unmap(blkif, - pending_req->segments, - pending_req->nr_pages); - make_response(blkif, pending_req->id, - pending_req->operation, pending_req->status); - free_req(blkif, pending_req); - /* - * Make sure the request is freed before releasing blkif, - * or there could be a race between free_req and the - * cleanup done in xen_blkif_free during shutdown. - * - * NB: The fact that we might try to wake up pending_free_wq - * before drain_complete (in case there's a drain going on) - * it's not a problem with our current implementation - * because we can assure there's no thread waiting on - * pending_free_wq if there's a drain going on, but it has - * to be taken into account if the current model is changed. - */ - if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { - complete(&blkif->drain_complete); - } - xen_blkif_put(blkif); - } + if (atomic_dec_and_test(&pending_req->pendcnt)) + xen_blkbk_unmap_and_respond(pending_req); } /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index f65b807e3236..cc90a840e616 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -350,6 +350,9 @@ struct pending_req { struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; struct bio *biolist[MAX_INDIRECT_SEGMENTS]; + struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS]; + struct page *unmap_pages[MAX_INDIRECT_SEGMENTS]; + struct gntab_unmap_queue_data gnttab_unmap_data; }; -- cgit 1.4.1 From 923b2919e2c318ee1c360a2119a14889fd0fcce4 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:56:54 +0000 Subject: xen/gntdev: mark userspace PTEs as special on x86 PV guests In an x86 PV guest, get_user_pages_fast() on a userspace address range containing foreign mappings does not work correctly because the M2P lookup of the MFN from a userspace PTE may return the wrong page. Force get_user_pages_fast() to fail on such addresses by marking the PTEs as special. If Xen has XENFEAT_gnttab_map_avail_bits (available since at least 4.0), we can do so efficiently in the grant map hypercall. Otherwise, it needs to be done afterwards. This is both inefficient and racy (the mapping is visible to the task before we fixup the PTEs), but will be fine for well-behaved applications that do not use the mapping until after the mmap() system call returns. Guests with XENFEAT_auto_translated_physmap (ARM and x86 HVM or PVH) do not need this since get_user_pages() has always worked correctly for them. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 34 ++++++++++++++++++++++++++++++++++ include/xen/interface/features.h | 6 ++++++ include/xen/interface/grant_table.h | 7 +++++++ 3 files changed, 47 insertions(+) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index bccc54a80559..20c65771017d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -244,6 +244,14 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; + /* + * Set the PTE as special to force get_user_pages_fast() fall + * back to the slow path. If this is not supported as part of + * the grant map, it will be done afterwards. + */ + if (xen_feature(XENFEAT_gnttab_map_avail_bits)) + flags |= (1 << _GNTMAP_guest_avail0); + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); @@ -252,6 +260,15 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, return 0; } +#ifdef CONFIG_X86 +static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); + return 0; +} +#endif + static int map_grant_pages(struct grant_map *map) { int i, err = 0; @@ -840,6 +857,23 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) if (err) goto out_put_map; } + } else { +#ifdef CONFIG_X86 + /* + * If the PTEs were not made special by the grant map + * hypercall, do so here. + * + * This is racy since the mapping is already visible + * to userspace but userspace should be well-behaved + * enough to not touch it until the mmap() call + * returns. + */ + if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { + apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + set_grant_ptes_as_special, NULL); + } +#endif } return 0; diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index 131a6ccdba25..6ad3d110bb81 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -41,6 +41,12 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + /* x86: Does this Xen host support the HVM callback vector type? */ #define XENFEAT_hvm_callback_vector 8 diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index bcce56439d64..56806bc90c2f 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -525,6 +525,13 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ -- cgit 1.4.1 From dab069c61aa386f6a46c620f3a1075a4818f285b Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:59:07 +0000 Subject: xen/gntdev: provide find_special_page VMA operation For a PV guest, use the find_special_page op to find the right page. To handle VMAs being split, remember the start of the original VMA so the correct index in the pages array can be calculated. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 20c65771017d..d5bb1a33d0a3 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -93,6 +93,7 @@ struct grant_map { struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; struct page **pages; + unsigned long pages_vm_start; }; static int unmap_grant_pages(struct grant_map *map, int offset, int pages); @@ -446,9 +447,18 @@ static void gntdev_vma_close(struct vm_area_struct *vma) gntdev_put_map(priv, map); } +static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, + unsigned long addr) +{ + struct grant_map *map = vma->vm_private_data; + + return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; +} + static struct vm_operations_struct gntdev_vmops = { .open = gntdev_vma_open, .close = gntdev_vma_close, + .find_special_page = gntdev_vma_find_special_page, }; /* ------------------------------------------------------------------ */ @@ -874,6 +884,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) set_grant_ptes_as_special, NULL); } #endif + map->pages_vm_start = vma->vm_start; } return 0; -- cgit 1.4.1 From a2e75bc2ee207351e6806e77a5379c6c1dd4598a Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Thu, 5 Feb 2015 14:45:40 +0000 Subject: xenbus: Add proper handling of XS_ERROR from Xenbus for transactions. If Xenstore sends back a XS_ERROR for TRANSACTION_END, the driver BUGs because it cannot find the matching transaction in the list. For TRANSACTION_START, it leaks memory. Check the message as returned from xenbus_dev_request_and_reply(), and clean up for TRANSACTION_START or discard the error for TRANSACTION_END. Signed-off-by: Jennifer Herbert Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_dev_frontend.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 85534ea63555..9433e46518c8 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -326,10 +326,13 @@ static int xenbus_write_transaction(unsigned msg_type, } if (msg_type == XS_TRANSACTION_START) { - trans->handle.id = simple_strtoul(reply, NULL, 0); - - list_add(&trans->list, &u->transactions); - } else if (msg_type == XS_TRANSACTION_END) { + if (u->u.msg.type == XS_ERROR) + kfree(trans); + else { + trans->handle.id = simple_strtoul(reply, NULL, 0); + list_add(&trans->list, &u->transactions); + } + } else if (u->u.msg.type == XS_TRANSACTION_END) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; -- cgit 1.4.1 From 72978b2fe2f2cdf9f319c6c6dcdbe92b38de2be2 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 19 Jan 2015 13:19:38 +0000 Subject: xen/manage: Fix USB interaction issues when resuming Commit 61a734d305e1 ("xen/manage: Always freeze/thaw processes when suspend/resuming") ensured that userspace processes were always frozen before suspending to reduce interaction issues when resuming devices. However, freeze_processes() does not freeze kernel threads. Freeze kernel threads as well to prevent deadlocks with the khubd thread when resuming devices. This is what native suspend and resume does. Example deadlock: [ 7279.648010] [] ? xen_poll_irq_timeout+0x3e/0x50 [ 7279.648010] [] xen_poll_irq+0x10/0x20 [ 7279.648010] [] xen_lock_spinning+0xb3/0x120 [ 7279.648010] [] __raw_callee_save_xen_lock_spinning+0x11/0x20 [ 7279.648010] [] ? usb_control_msg+0xe6/0x120 [ 7279.648010] [] ? _raw_spin_lock_irq+0x50/0x60 [ 7279.648010] [] wait_for_completion+0xac/0x160 [ 7279.648010] [] ? try_to_wake_up+0x2c0/0x2c0 [ 7279.648010] [] dpm_wait+0x32/0x40 [ 7279.648010] [] device_resume+0x90/0x210 [ 7279.648010] [] dpm_resume+0x121/0x250 [ 7279.648010] [] ? xenbus_dev_request_and_reply+0xc0/0xc0 [ 7279.648010] [] dpm_resume_end+0x15/0x30 [ 7279.648010] [] do_suspend+0x10a/0x200 [ 7279.648010] [] ? xen_pre_suspend+0x20/0x20 [ 7279.648010] [] shutdown_handler+0x120/0x150 [ 7279.648010] [] xenwatch_thread+0x9f/0x160 [ 7279.648010] [] ? finish_wait+0x80/0x80 [ 7279.648010] [] kthread+0xc9/0xe0 [ 7279.648010] [] ? flush_kthread_worker+0x80/0x80 [ 7279.648010] [] ret_from_fork+0x7c/0xb0 [ 7279.648010] [] ? flush_kthread_worker+0x80/0x80 [ 7441.216287] INFO: task khubd:89 blocked for more than 120 seconds. [ 7441.219457] Tainted: G X 3.13.11-ckt12.kz #1 [ 7441.222176] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 7441.225827] khubd D ffff88003f433440 0 89 2 0x00000000 [ 7441.229258] ffff88003ceb9b98 0000000000000046 ffff88003ce83000 0000000000013440 [ 7441.232959] ffff88003ceb9fd8 0000000000013440 ffff88003cd13000 ffff88003ce83000 [ 7441.236658] 0000000000000286 ffff88003d3e0000 ffff88003ceb9bd0 00000001001aa01e [ 7441.240415] Call Trace: [ 7441.241614] [] schedule+0x29/0x70 [ 7441.243930] [] schedule_timeout+0x166/0x2c0 [ 7441.246681] [] ? call_timer_fn+0x110/0x110 [ 7441.249339] [] schedule_timeout_uninterruptible+0x1e/0x20 [ 7441.252644] [] msleep+0x20/0x30 [ 7441.254812] [] hub_port_reset+0xf0/0x580 [ 7441.257400] [] hub_port_init+0x75/0xb40 [ 7441.259981] [] ? update_autosuspend+0x39/0x60 [ 7441.262817] [] ? pm_runtime_set_autosuspend_delay+0x50/0xa0 [ 7441.266212] [] hub_thread+0x71a/0x1750 [ 7441.268728] [] ? finish_wait+0x80/0x80 [ 7441.271272] [] ? usb_port_resume+0x670/0x670 [ 7441.274067] [] kthread+0xc9/0xe0 [ 7441.276305] [] ? flush_kthread_worker+0x80/0x80 [ 7441.279131] [] ret_from_fork+0x7c/0xb0 [ 7441.281659] [] ? flush_kthread_worker+0x80/0x80 Signed-off-by: Ross Lagerwall Cc: stable@vger.kernel.org Signed-off-by: David Vrabel --- drivers/xen/manage.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index f8bb36f9d9ce..bf1940706422 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -105,10 +105,16 @@ static void do_suspend(void) err = freeze_processes(); if (err) { - pr_err("%s: freeze failed %d\n", __func__, err); + pr_err("%s: freeze processes failed %d\n", __func__, err); goto out; } + err = freeze_kernel_threads(); + if (err) { + pr_err("%s: freeze kernel threads failed %d\n", __func__, err); + goto out_thaw; + } + err = dpm_suspend_start(PMSG_FREEZE); if (err) { pr_err("%s: dpm_suspend_start %d\n", __func__, err); -- cgit 1.4.1