summary refs log tree commit diff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h39
-rw-r--r--arch/s390/include/asm/qdio.h119
-rw-r--r--arch/s390/include/asm/tlb.h94
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/s390/kvm/sie64a.S2
-rw-r--r--arch/s390/mm/pgtable.c292
8 files changed, 223 insertions, 333 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 9fab2aa9c2c8..90d77bd078f5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -89,6 +89,7 @@ config S390
 	select HAVE_GET_USER_PAGES_FAST
 	select HAVE_ARCH_MUTEX_CPU_RELAX
 	select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+	select HAVE_RCU_TABLE_FREE if SMP
 	select ARCH_INLINE_SPIN_TRYLOCK
 	select ARCH_INLINE_SPIN_TRYLOCK_BH
 	select ARCH_INLINE_SPIN_LOCK
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f6314af3b354..38e71ebcd3c2 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -17,15 +17,15 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 
-#define check_pgt_cache()	do {} while (0)
-
 unsigned long *crst_table_alloc(struct mm_struct *);
 void crst_table_free(struct mm_struct *, unsigned long *);
-void crst_table_free_rcu(struct mm_struct *, unsigned long *);
 
 unsigned long *page_table_alloc(struct mm_struct *);
 void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mm_struct *, unsigned long *);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void __tlb_remove_table(void *_table);
+#endif
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index e4efacfe1b63..801fbe1d837d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -293,19 +293,6 @@ extern unsigned long VMALLOC_START;
  * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid.
  */
 
-/* Page status table bits for virtualization */
-#define RCP_ACC_BITS	0xf000000000000000UL
-#define RCP_FP_BIT	0x0800000000000000UL
-#define RCP_PCL_BIT	0x0080000000000000UL
-#define RCP_HR_BIT	0x0040000000000000UL
-#define RCP_HC_BIT	0x0020000000000000UL
-#define RCP_GR_BIT	0x0004000000000000UL
-#define RCP_GC_BIT	0x0002000000000000UL
-
-/* User dirty / referenced bit for KVM's migration feature */
-#define KVM_UR_BIT	0x0000800000000000UL
-#define KVM_UC_BIT	0x0000400000000000UL
-
 #ifndef __s390x__
 
 /* Bits in the segment table address-space-control-element */
@@ -325,6 +312,19 @@ extern unsigned long VMALLOC_START;
 #define _SEGMENT_ENTRY		(_SEGMENT_ENTRY_PTL)
 #define _SEGMENT_ENTRY_EMPTY	(_SEGMENT_ENTRY_INV)
 
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS	0xf0000000UL
+#define RCP_FP_BIT	0x08000000UL
+#define RCP_PCL_BIT	0x00800000UL
+#define RCP_HR_BIT	0x00400000UL
+#define RCP_HC_BIT	0x00200000UL
+#define RCP_GR_BIT	0x00040000UL
+#define RCP_GC_BIT	0x00020000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT	0x00008000UL
+#define KVM_UC_BIT	0x00004000UL
+
 #else /* __s390x__ */
 
 /* Bits in the segment/region table address-space-control-element */
@@ -367,6 +367,19 @@ extern unsigned long VMALLOC_START;
 #define _SEGMENT_ENTRY_LARGE	0x400	/* STE-format control, large page   */
 #define _SEGMENT_ENTRY_CO	0x100	/* change-recording override   */
 
+/* Page status table bits for virtualization */
+#define RCP_ACC_BITS	0xf000000000000000UL
+#define RCP_FP_BIT	0x0800000000000000UL
+#define RCP_PCL_BIT	0x0080000000000000UL
+#define RCP_HR_BIT	0x0040000000000000UL
+#define RCP_HC_BIT	0x0020000000000000UL
+#define RCP_GR_BIT	0x0004000000000000UL
+#define RCP_GC_BIT	0x0002000000000000UL
+
+/* User dirty / referenced bit for KVM's migration feature */
+#define KVM_UR_BIT	0x0000800000000000UL
+#define KVM_UC_BIT	0x0000400000000000UL
+
 #endif /* __s390x__ */
 
 /*
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 350e7ee5952d..15c97625df8d 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -139,110 +139,47 @@ struct slib {
 	struct slibe slibe[QDIO_MAX_BUFFERS_PER_Q];
 } __attribute__ ((packed, aligned(2048)));
 
-/**
- * struct sbal_flags - storage block address list flags
- * @last: last entry
- * @cont: contiguous storage
- * @frag: fragmentation
- */
-struct sbal_flags {
-	u8	: 1;
-	u8 last : 1;
-	u8 cont : 1;
-	u8	: 1;
-	u8 frag : 2;
-	u8	: 2;
-} __attribute__ ((packed));
-
-#define SBAL_FLAGS_FIRST_FRAG		0x04000000UL
-#define SBAL_FLAGS_MIDDLE_FRAG		0x08000000UL
-#define SBAL_FLAGS_LAST_FRAG		0x0c000000UL
-#define SBAL_FLAGS_LAST_ENTRY		0x40000000UL
-#define SBAL_FLAGS_CONTIGUOUS		0x20000000UL
+#define SBAL_EFLAGS_LAST_ENTRY		0x40
+#define SBAL_EFLAGS_CONTIGUOUS		0x20
+#define SBAL_EFLAGS_FIRST_FRAG		0x04
+#define SBAL_EFLAGS_MIDDLE_FRAG		0x08
+#define SBAL_EFLAGS_LAST_FRAG		0x0c
+#define SBAL_EFLAGS_MASK		0x6f
 
-#define SBAL_FLAGS0_DATA_CONTINUATION	0x20UL
+#define SBAL_SFLAGS0_PCI_REQ		0x40
+#define SBAL_SFLAGS0_DATA_CONTINUATION	0x20
 
 /* Awesome OpenFCP extensions */
-#define SBAL_FLAGS0_TYPE_STATUS		0x00UL
-#define SBAL_FLAGS0_TYPE_WRITE		0x08UL
-#define SBAL_FLAGS0_TYPE_READ		0x10UL
-#define SBAL_FLAGS0_TYPE_WRITE_READ	0x18UL
-#define SBAL_FLAGS0_MORE_SBALS		0x04UL
-#define SBAL_FLAGS0_COMMAND		0x02UL
-#define SBAL_FLAGS0_LAST_SBAL		0x00UL
-#define SBAL_FLAGS0_ONLY_SBAL		SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_MIDDLE_SBAL		SBAL_FLAGS0_MORE_SBALS
-#define SBAL_FLAGS0_FIRST_SBAL SBAL_FLAGS0_MORE_SBALS | SBAL_FLAGS0_COMMAND
-#define SBAL_FLAGS0_PCI			0x40
-
-/**
- * struct sbal_sbalf_0 - sbal flags for sbale 0
- * @pci: PCI indicator
- * @cont: data continuation
- * @sbtype: storage-block type (FCP)
- */
-struct sbal_sbalf_0 {
-	u8	  : 1;
-	u8 pci	  : 1;
-	u8 cont   : 1;
-	u8 sbtype : 2;
-	u8	  : 3;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_1 - sbal flags for sbale 1
- * @key: storage key
- */
-struct sbal_sbalf_1 {
-	u8     : 4;
-	u8 key : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_14 - sbal flags for sbale 14
- * @erridx: error index
- */
-struct sbal_sbalf_14 {
-	u8	  : 4;
-	u8 erridx : 4;
-} __attribute__ ((packed));
-
-/**
- * struct sbal_sbalf_15 - sbal flags for sbale 15
- * @reason: reason for error state
- */
-struct sbal_sbalf_15 {
-	u8 reason;
-} __attribute__ ((packed));
-
-/**
- * union sbal_sbalf - storage block address list flags
- * @i0: sbalf0
- * @i1: sbalf1
- * @i14: sbalf14
- * @i15: sblaf15
- * @value: raw value
- */
-union sbal_sbalf {
-	struct sbal_sbalf_0  i0;
-	struct sbal_sbalf_1  i1;
-	struct sbal_sbalf_14 i14;
-	struct sbal_sbalf_15 i15;
-	u8 value;
-};
+#define SBAL_SFLAGS0_TYPE_STATUS	0x00
+#define SBAL_SFLAGS0_TYPE_WRITE		0x08
+#define SBAL_SFLAGS0_TYPE_READ		0x10
+#define SBAL_SFLAGS0_TYPE_WRITE_READ	0x18
+#define SBAL_SFLAGS0_MORE_SBALS		0x04
+#define SBAL_SFLAGS0_COMMAND		0x02
+#define SBAL_SFLAGS0_LAST_SBAL		0x00
+#define SBAL_SFLAGS0_ONLY_SBAL		SBAL_SFLAGS0_COMMAND
+#define SBAL_SFLAGS0_MIDDLE_SBAL	SBAL_SFLAGS0_MORE_SBALS
+#define SBAL_SFLAGS0_FIRST_SBAL (SBAL_SFLAGS0_MORE_SBALS | SBAL_SFLAGS0_COMMAND)
 
 /**
  * struct qdio_buffer_element - SBAL entry
- * @flags: flags
+ * @eflags: SBAL entry flags
+ * @scount: SBAL count
+ * @sflags: whole SBAL flags
  * @length: length
  * @addr: address
 */
 struct qdio_buffer_element {
-	u32 flags;
+	u8 eflags;
+	/* private: */
+	u8 res1;
+	/* public: */
+	u8 scount;
+	u8 sflags;
 	u32 length;
 #ifdef CONFIG_32BIT
 	/* private: */
-	void *reserved;
+	void *res2;
 	/* public: */
 #endif
 	void *addr;
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 77eee5477a52..c687a2c83462 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -26,67 +26,60 @@
 #include <linux/swap.h>
 #include <asm/processor.h>
 #include <asm/pgalloc.h>
-#include <asm/smp.h>
 #include <asm/tlbflush.h>
 
 struct mmu_gather {
 	struct mm_struct *mm;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	struct mmu_table_batch *batch;
+#endif
 	unsigned int fullmm;
-	unsigned int nr_ptes;
-	unsigned int nr_pxds;
-	unsigned int max;
-	void **array;
-	void *local[8];
+	unsigned int need_flush;
 };
 
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
-	unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+struct mmu_table_batch {
+	struct rcu_head		rcu;
+	unsigned int		nr;
+	void			*tables[0];
+};
 
-	if (addr) {
-		tlb->array = (void *) addr;
-		tlb->max = PAGE_SIZE / sizeof(void *);
-	}
-}
+#define MAX_TABLE_BATCH		\
+	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
+
+extern void tlb_table_flush(struct mmu_gather *tlb);
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#endif
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
 				  unsigned int full_mm_flush)
 {
 	tlb->mm = mm;
-	tlb->max = ARRAY_SIZE(tlb->local);
-	tlb->array = tlb->local;
 	tlb->fullmm = full_mm_flush;
+	tlb->need_flush = 0;
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb->batch = NULL;
+#endif
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
-	else
-		__tlb_alloc_page(tlb);
-	tlb->nr_ptes = 0;
-	tlb->nr_pxds = tlb->max;
 }
 
 static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 {
-	if (!tlb->fullmm && (tlb->nr_ptes > 0 || tlb->nr_pxds < tlb->max))
-		__tlb_flush_mm(tlb->mm);
-	while (tlb->nr_ptes > 0)
-		page_table_free_rcu(tlb->mm, tlb->array[--tlb->nr_ptes]);
-	while (tlb->nr_pxds < tlb->max)
-		crst_table_free_rcu(tlb->mm, tlb->array[tlb->nr_pxds++]);
+	if (!tlb->need_flush)
+		return;
+	tlb->need_flush = 0;
+	__tlb_flush_mm(tlb->mm);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb_table_flush(tlb);
+#endif
 }
 
 static inline void tlb_finish_mmu(struct mmu_gather *tlb,
 				  unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
-
-	rcu_table_freelist_finish();
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	if (tlb->array != tlb->local)
-		free_pages((unsigned long) tlb->array, 0);
 }
 
 /*
@@ -112,12 +105,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				unsigned long address)
 {
-	if (!tlb->fullmm) {
-		tlb->array[tlb->nr_ptes++] = pte;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		page_table_free(tlb->mm, (unsigned long *) pte);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return page_table_free_rcu(tlb, (unsigned long *) pte);
+#endif
+	page_table_free(tlb->mm, (unsigned long *) pte);
 }
 
 /*
@@ -133,12 +125,11 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
 		return;
-	if (!tlb->fullmm) {
-		tlb->array[--tlb->nr_pxds] = pmd;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		crst_table_free(tlb->mm, (unsigned long *) pmd);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return tlb_remove_table(tlb, pmd);
+#endif
+	crst_table_free(tlb->mm, (unsigned long *) pmd);
 #endif
 }
 
@@ -155,12 +146,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
 		return;
-	if (!tlb->fullmm) {
-		tlb->array[--tlb->nr_pxds] = pud;
-		if (tlb->nr_ptes >= tlb->nr_pxds)
-			tlb_flush_mmu(tlb);
-	} else
-		crst_table_free(tlb->mm, (unsigned long *) pud);
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	if (!tlb->fullmm)
+		return tlb_remove_table(tlb, pud);
+#endif
+	crst_table_free(tlb->mm, (unsigned long *) pud);
 #endif
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 30ca85cce314..67345ae7ce8d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -731,6 +731,7 @@ static int __init kvm_s390_init(void)
 	}
 	memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
 	facilities[0] &= 0xff00fff3f47c0000ULL;
+	facilities[1] &= 0x201c000000000000ULL;
 	return 0;
 }
 
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
index ab0e041ac54c..5faa1b1b23fa 100644
--- a/arch/s390/kvm/sie64a.S
+++ b/arch/s390/kvm/sie64a.S
@@ -93,4 +93,6 @@ sie_err:
 
 	.section __ex_table,"a"
 	.quad	sie_inst,sie_err
+	.quad	sie_exit,sie_err
+	.quad	sie_reenter,sie_err
 	.previous
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index b09763fe5da1..37a23c223705 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -24,94 +24,12 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 
-struct rcu_table_freelist {
-	struct rcu_head rcu;
-	struct mm_struct *mm;
-	unsigned int pgt_index;
-	unsigned int crst_index;
-	unsigned long *table[0];
-};
-
-#define RCU_FREELIST_SIZE \
-	((PAGE_SIZE - sizeof(struct rcu_table_freelist)) \
-	  / sizeof(unsigned long))
-
-static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
-
-static void __page_table_free(struct mm_struct *mm, unsigned long *table);
-
-static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
-{
-	struct rcu_table_freelist **batchp = &__get_cpu_var(rcu_table_freelist);
-	struct rcu_table_freelist *batch = *batchp;
-
-	if (batch)
-		return batch;
-	batch = (struct rcu_table_freelist *) __get_free_page(GFP_ATOMIC);
-	if (batch) {
-		batch->mm = mm;
-		batch->pgt_index = 0;
-		batch->crst_index = RCU_FREELIST_SIZE;
-		*batchp = batch;
-	}
-	return batch;
-}
-
-static void rcu_table_freelist_callback(struct rcu_head *head)
-{
-	struct rcu_table_freelist *batch =
-		container_of(head, struct rcu_table_freelist, rcu);
-
-	while (batch->pgt_index > 0)
-		__page_table_free(batch->mm, batch->table[--batch->pgt_index]);
-	while (batch->crst_index < RCU_FREELIST_SIZE)
-		crst_table_free(batch->mm, batch->table[batch->crst_index++]);
-	free_page((unsigned long) batch);
-}
-
-void rcu_table_freelist_finish(void)
-{
-	struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist);
-	struct rcu_table_freelist *batch = *batchp;
-
-	if (!batch)
-		goto out;
-	call_rcu(&batch->rcu, rcu_table_freelist_callback);
-	*batchp = NULL;
-out:
-	put_cpu_var(rcu_table_freelist);
-}
-
-static void smp_sync(void *arg)
-{
-}
-
 #ifndef CONFIG_64BIT
 #define ALLOC_ORDER	1
-#define TABLES_PER_PAGE	4
-#define FRAG_MASK	15UL
-#define SECOND_HALVES	10UL
-
-void clear_table_pgstes(unsigned long *table)
-{
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
-	memset(table + 256, 0, PAGE_SIZE/4);
-	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
-	memset(table + 768, 0, PAGE_SIZE/4);
-}
-
+#define FRAG_MASK	0x0f
 #else
 #define ALLOC_ORDER	2
-#define TABLES_PER_PAGE	2
-#define FRAG_MASK	3UL
-#define SECOND_HALVES	2UL
-
-void clear_table_pgstes(unsigned long *table)
-{
-	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
-	memset(table + 256, 0, PAGE_SIZE/2);
-}
-
+#define FRAG_MASK	0x03
 #endif
 
 unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
@@ -140,29 +58,6 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
 	free_pages((unsigned long) table, ALLOC_ORDER);
 }
 
-void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
-{
-	struct rcu_table_freelist *batch;
-
-	preempt_disable();
-	if (atomic_read(&mm->mm_users) < 2 &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		crst_table_free(mm, table);
-		goto out;
-	}
-	batch = rcu_table_freelist_get(mm);
-	if (!batch) {
-		smp_call_function(smp_sync, NULL, 1);
-		crst_table_free(mm, table);
-		goto out;
-	}
-	batch->table[--batch->crst_index] = table;
-	if (batch->pgt_index >= batch->crst_index)
-		rcu_table_freelist_finish();
-out:
-	preempt_enable();
-}
-
 #ifdef CONFIG_64BIT
 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
 {
@@ -238,124 +133,175 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 }
 #endif
 
+static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
+{
+	unsigned int old, new;
+
+	do {
+		old = atomic_read(v);
+		new = old ^ bits;
+	} while (atomic_cmpxchg(v, old, new) != old);
+	return new;
+}
+
 /*
  * page table entry allocation/free routines.
  */
+#ifdef CONFIG_PGSTE
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
+{
+	struct page *page;
+	unsigned long *table;
+
+	page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!page)
+		return NULL;
+	pgtable_page_ctor(page);
+	atomic_set(&page->_mapcount, 3);
+	table = (unsigned long *) page_to_phys(page);
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+	return table;
+}
+
+static inline void page_table_free_pgste(unsigned long *table)
+{
+	struct page *page;
+
+	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+	pgtable_page_ctor(page);
+	atomic_set(&page->_mapcount, -1);
+	__free_page(page);
+}
+#endif
+
 unsigned long *page_table_alloc(struct mm_struct *mm)
 {
 	struct page *page;
 	unsigned long *table;
-	unsigned long bits;
+	unsigned int mask, bit;
 
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm))
+		return page_table_alloc_pgste(mm);
+#endif
+	/* Allocate fragments of a 4K page as 1K/2K page table */
 	spin_lock_bh(&mm->context.list_lock);
-	page = NULL;
+	mask = FRAG_MASK;
 	if (!list_empty(&mm->context.pgtable_list)) {
 		page = list_first_entry(&mm->context.pgtable_list,
 					struct page, lru);
-		if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
-			page = NULL;
+		table = (unsigned long *) page_to_phys(page);
+		mask = atomic_read(&page->_mapcount);
+		mask = mask | (mask >> 4);
 	}
-	if (!page) {
+	if ((mask & FRAG_MASK) == FRAG_MASK) {
 		spin_unlock_bh(&mm->context.list_lock);
 		page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 		if (!page)
 			return NULL;
 		pgtable_page_ctor(page);
-		page->flags &= ~FRAG_MASK;
+		atomic_set(&page->_mapcount, 1);
 		table = (unsigned long *) page_to_phys(page);
-		if (mm->context.has_pgste)
-			clear_table_pgstes(table);
-		else
-			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock_bh(&mm->context.list_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
+	} else {
+		for (bit = 1; mask & bit; bit <<= 1)
+			table += PTRS_PER_PTE;
+		mask = atomic_xor_bits(&page->_mapcount, bit);
+		if ((mask & FRAG_MASK) == FRAG_MASK)
+			list_del(&page->lru);
 	}
-	table = (unsigned long *) page_to_phys(page);
-	while (page->flags & bits) {
-		table += 256;
-		bits <<= 1;
-	}
-	page->flags |= bits;
-	if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
-		list_move_tail(&page->lru, &mm->context.pgtable_list);
 	spin_unlock_bh(&mm->context.list_lock);
 	return table;
 }
 
-static void __page_table_free(struct mm_struct *mm, unsigned long *table)
+void page_table_free(struct mm_struct *mm, unsigned long *table)
 {
 	struct page *page;
-	unsigned long bits;
+	unsigned int bit, mask;
 
-	bits = ((unsigned long) table) & 15;
-	table = (unsigned long *)(((unsigned long) table) ^ bits);
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm))
+		return page_table_free_pgste(table);
+#endif
+	/* Free 1K/2K page table fragment of a 4K page */
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	page->flags ^= bits;
-	if (!(page->flags & FRAG_MASK)) {
+	bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
+	spin_lock_bh(&mm->context.list_lock);
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit);
+	if (mask & FRAG_MASK)
+		list_add(&page->lru, &mm->context.pgtable_list);
+	spin_unlock_bh(&mm->context.list_lock);
+	if (mask == 0) {
 		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
 		__free_page(page);
 	}
 }
 
-void page_table_free(struct mm_struct *mm, unsigned long *table)
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+static void __page_table_free_rcu(void *table, unsigned bit)
 {
 	struct page *page;
-	unsigned long bits;
 
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
-	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#ifdef CONFIG_PGSTE
+	if (bit == FRAG_MASK)
+		return page_table_free_pgste(table);
+#endif
+	/* Free 1K/2K page table fragment of a 4K page */
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
-	spin_lock_bh(&mm->context.list_lock);
-	page->flags ^= bits;
-	if (page->flags & FRAG_MASK) {
-		/* Page now has some free pgtable fragments. */
-		if (!list_empty(&page->lru))
-			list_move(&page->lru, &mm->context.pgtable_list);
-		page = NULL;
-	} else
-		/* All fragments of the 4K page have been freed. */
-		list_del(&page->lru);
-	spin_unlock_bh(&mm->context.list_lock);
-	if (page) {
+	if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
 		pgtable_page_dtor(page);
+		atomic_set(&page->_mapcount, -1);
 		__free_page(page);
 	}
 }
 
-void page_table_free_rcu(struct mm_struct *mm, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
 {
-	struct rcu_table_freelist *batch;
+	struct mm_struct *mm;
 	struct page *page;
-	unsigned long bits;
+	unsigned int bit, mask;
 
-	preempt_disable();
-	if (atomic_read(&mm->mm_users) < 2 &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		page_table_free(mm, table);
-		goto out;
-	}
-	batch = rcu_table_freelist_get(mm);
-	if (!batch) {
-		smp_call_function(smp_sync, NULL, 1);
-		page_table_free(mm, table);
-		goto out;
+	mm = tlb->mm;
+#ifdef CONFIG_PGSTE
+	if (mm_has_pgste(mm)) {
+		table = (unsigned long *) (__pa(table) | FRAG_MASK);
+		tlb_remove_table(tlb, table);
+		return;
 	}
-	bits = (mm->context.has_pgste) ? 3UL : 1UL;
-	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
+#endif
+	bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock_bh(&mm->context.list_lock);
-	/* Delayed freeing with rcu prevents reuse of pgtable fragments */
-	list_del_init(&page->lru);
+	if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
+		list_del(&page->lru);
+	mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
+	if (mask & FRAG_MASK)
+		list_add_tail(&page->lru, &mm->context.pgtable_list);
 	spin_unlock_bh(&mm->context.list_lock);
-	table = (unsigned long *)(((unsigned long) table) | bits);
-	batch->table[batch->pgt_index++] = table;
-	if (batch->pgt_index >= batch->crst_index)
-		rcu_table_freelist_finish();
-out:
-	preempt_enable();
+	table = (unsigned long *) (__pa(table) | (bit << 4));
+	tlb_remove_table(tlb, table);
 }
 
+void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long) _table & PAGE_MASK);
+	unsigned type = (unsigned long) _table & ~PAGE_MASK;
+
+	if (type)
+		__page_table_free_rcu(table, type);
+	else
+		free_pages((unsigned long) table, ALLOC_ORDER);
+}
+
+#endif
+
 /*
  * switch on pgstes for its userspace process (for kvm)
  */
@@ -369,7 +315,7 @@ int s390_enable_sie(void)
 		return -EINVAL;
 
 	/* Do we have pgstes? if yes, we are done */
-	if (tsk->mm->context.has_pgste)
+	if (mm_has_pgste(tsk->mm))
 		return 0;
 
 	/* lets check if we are allowed to replace the mm */