summary refs log tree commit diff
path: root/arch/s390/include/asm/tlbflush.h
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2014-04-03 13:55:01 +0200
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2014-04-03 14:31:00 +0200
commit1b948d6caec4f28e3524244ca0f77c6ae8ddceef (patch)
treebc7e1d5800f10c39979d3f47872ba7047568f8a4 /arch/s390/include/asm/tlbflush.h
parent02a8f3abb708919149cb657a5202f4603f0c38e2 (diff)
downloadlinux-1b948d6caec4f28e3524244ca0f77c6ae8ddceef.tar.gz
s390/mm,tlb: optimize TLB flushing for zEC12
The zEC12 machines introduced the local-clearing control for the IDTE
and IPTE instruction. If the control is set only the TLB of the local
CPU is cleared of entries, either all entries of a single address space
for IDTE, or the entry for a single page-table entry for IPTE.
Without the local-clearing control the TLB flush is broadcasted to all
CPUs in the configuration, which is expensive.

The reset of the bit mask of the CPUs that need flushing after a
non-local IDTE is tricky. As TLB entries for an address space remain
in the TLB even if the address space is detached a new bit field is
required to keep track of attached CPUs vs. CPUs in the need of a
flush. After a non-local flush with IDTE the bit-field of attached CPUs
is copied to the bit-field of CPUs in need of a flush. The ordering
of operations on cpu_attach_mask, attach_count and mm_cpumask(mm) is
such that an underindication in mm_cpumask(mm) is prevented but an
overindication in mm_cpumask(mm) is possible.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/include/asm/tlbflush.h')
-rw-r--r--arch/s390/include/asm/tlbflush.h115
1 files changed, 95 insertions, 20 deletions
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index f9fef0425fee..16c9c88658c8 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -7,19 +7,41 @@
 #include <asm/pgalloc.h>
 
 /*
- * Flush all tlb entries on the local cpu.
+ * Flush all TLB entries on the local CPU.
  */
 static inline void __tlb_flush_local(void)
 {
 	asm volatile("ptlb" : : : "memory");
 }
 
-#ifdef CONFIG_SMP
 /*
- * Flush all tlb entries on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs
  */
+static inline void __tlb_flush_idte(unsigned long asce)
+{
+	/* Global TLB flush for the mm */
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,0,%0,%1,0"
+		: : "a" (2048), "a" (asce) : "cc");
+}
+
+/*
+ * Flush TLB entries for a specific ASCE on the local CPU
+ */
+static inline void __tlb_flush_idte_local(unsigned long asce)
+{
+	/* Local TLB flush for the mm */
+	asm volatile(
+		"	.insn	rrf,0xb98e0000,0,%0,%1,1"
+		: : "a" (2048), "a" (asce) : "cc");
+}
+
+#ifdef CONFIG_SMP
 void smp_ptlb_all(void);
 
+/*
+ * Flush all TLB entries on all CPUs.
+ */
 static inline void __tlb_flush_global(void)
 {
 	register unsigned long reg2 asm("2");
@@ -42,36 +64,89 @@ static inline void __tlb_flush_global(void)
 		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
 }
 
+/*
+ * Flush TLB entries for a specific mm on all CPUs (in case gmap is used
+ * this implicates multiple ASCEs!).
+ */
 static inline void __tlb_flush_full(struct mm_struct *mm)
 {
-	cpumask_t local_cpumask;
-
 	preempt_disable();
-	/*
-	 * If the process only ran on the local cpu, do a local flush.
-	 */
-	cpumask_copy(&local_cpumask, cpumask_of(smp_processor_id()));
-	if (cpumask_equal(mm_cpumask(mm), &local_cpumask))
+	atomic_add(0x10000, &mm->context.attach_count);
+	if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+		/* Local TLB flush */
 		__tlb_flush_local();
-	else
+	} else {
+		/* Global TLB flush */
 		__tlb_flush_global();
+		/* Reset TLB flush mask */
+		if (MACHINE_HAS_TLB_LC)
+			cpumask_copy(mm_cpumask(mm),
+				     &mm->context.cpu_attach_mask);
+	}
+	atomic_sub(0x10000, &mm->context.attach_count);
 	preempt_enable();
 }
+
+/*
+ * Flush TLB entries for a specific ASCE on all CPUs.
+ */
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+{
+	int active, count;
+
+	preempt_disable();
+	active = (mm == current->active_mm) ? 1 : 0;
+	count = atomic_add_return(0x10000, &mm->context.attach_count);
+	if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active &&
+	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
+		__tlb_flush_idte_local(asce);
+	} else {
+		if (MACHINE_HAS_IDTE)
+			__tlb_flush_idte(asce);
+		else
+			__tlb_flush_global();
+		/* Reset TLB flush mask */
+		if (MACHINE_HAS_TLB_LC)
+			cpumask_copy(mm_cpumask(mm),
+				     &mm->context.cpu_attach_mask);
+	}
+	atomic_sub(0x10000, &mm->context.attach_count);
+	preempt_enable();
+}
+
+static inline void __tlb_flush_kernel(void)
+{
+	if (MACHINE_HAS_IDTE)
+		__tlb_flush_idte((unsigned long) init_mm.pgd |
+				 init_mm.context.asce_bits);
+	else
+		__tlb_flush_global();
+}
 #else
-#define __tlb_flush_full(mm)	__tlb_flush_local()
 #define __tlb_flush_global()	__tlb_flush_local()
-#endif
+#define __tlb_flush_full(mm)	__tlb_flush_local()
 
 /*
- * Flush all tlb entries of a page table on all cpus.
+ * Flush TLB entries for a specific ASCE on all CPUs.
  */
-static inline void __tlb_flush_idte(unsigned long asce)
+static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 {
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,0,%0,%1,0"
-		: : "a" (2048), "a" (asce) : "cc" );
+	if (MACHINE_HAS_TLB_LC)
+		__tlb_flush_idte_local(asce);
+	else
+		__tlb_flush_local();
 }
 
+static inline void __tlb_flush_kernel(void)
+{
+	if (MACHINE_HAS_TLB_LC)
+		__tlb_flush_idte_local((unsigned long) init_mm.pgd |
+				       init_mm.context.asce_bits);
+	else
+		__tlb_flush_local();
+}
+#endif
+
 static inline void __tlb_flush_mm(struct mm_struct * mm)
 {
 	/*
@@ -80,7 +155,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 	 * only ran on the local cpu.
 	 */
 	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
-		__tlb_flush_idte((unsigned long) mm->pgd |
+		__tlb_flush_asce(mm, (unsigned long) mm->pgd |
 				 mm->context.asce_bits);
 	else
 		__tlb_flush_full(mm);
@@ -130,7 +205,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
 static inline void flush_tlb_kernel_range(unsigned long start,
 					  unsigned long end)
 {
-	__tlb_flush_mm(&init_mm);
+	__tlb_flush_kernel();
 }
 
 #endif /* _S390_TLBFLUSH_H */