summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc/include/asm/hugetlb.h5
-rw-r--r--arch/sparc/include/asm/mmu_64.h2
-rw-r--r--arch/sparc/include/asm/mmu_context_64.h2
-rw-r--r--arch/sparc/include/asm/page_64.h7
-rw-r--r--arch/sparc/include/asm/pgalloc_64.h4
-rw-r--r--arch/sparc/include/asm/pgtable_64.h177
-rw-r--r--arch/sparc/include/asm/tsb.h94
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S2
-rw-r--r--arch/sparc/kernel/tsb.S9
-rw-r--r--arch/sparc/mm/fault_64.c4
-rw-r--r--arch/sparc/mm/hugetlbpage.c50
-rw-r--r--arch/sparc/mm/init_64.c204
-rw-r--r--arch/sparc/mm/tlb.c118
-rw-r--r--arch/sparc/mm/tsb.c14
14 files changed, 582 insertions, 110 deletions
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index e7927c9758a1..8c5eed6d267f 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -10,7 +10,10 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep);
 
-void hugetlb_prefault_arch_hook(struct mm_struct *mm);
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+	hugetlb_setup(mm);
+}
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index 31977c8dd942..76092c4dd277 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -82,7 +82,7 @@ struct tsb_config {
 
 #define MM_TSB_BASE	0
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define MM_TSB_HUGE	1
 #define MM_NUM_TSBS	2
 #else
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index a97fd085cebe..9191ca62ed9c 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -36,7 +36,7 @@ static inline void tsb_context_switch(struct mm_struct *mm)
 {
 	__tsb_context_switch(__pa(mm->pgd),
 			     &mm->context.tsb_block[0],
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 			     (mm->context.tsb_block[1].tsb ?
 			      &mm->context.tsb_block[1] :
 			      NULL)
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 087a5c505c69..4b39f74d6ca0 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,7 +17,7 @@
 
 #define HPAGE_SHIFT		22
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1UL))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
@@ -26,6 +26,11 @@
 
 #ifndef __ASSEMBLY__
 
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+struct mm_struct;
+extern void hugetlb_setup(struct mm_struct *mm);
+#endif
+
 #define WANT_PAGE_VIRTUAL
 
 extern void _clear_page(void *page);
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 0ebca93ef0f5..bcfe063bce23 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -45,8 +45,8 @@ extern pgtable_t pte_alloc_one(struct mm_struct *mm,
 extern void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
 extern void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 
-#define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(PMD, PTE)
-#define pmd_populate(MM, PMD, PTE)		pmd_set(PMD, PTE)
+#define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE)		pmd_set(MM, PMD, PTE)
 #define pmd_pgtable(PMD)			((pte_t *)__pmd_page(PMD))
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index af3cd7a9e9ac..95515f1e7cef 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -63,10 +63,31 @@
 #error Page table parameters do not cover virtual address space properly.
 #endif
 
+#if (PMD_SHIFT != HPAGE_SHIFT)
+#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
+#endif
+
 /* PMDs point to PTE tables which are 4K aligned.  */
 #define PMD_PADDR	_AC(0xfffffffe,UL)
 #define PMD_PADDR_SHIFT	_AC(11,UL)
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_ISHUGE	_AC(0x00000001,UL)
+
+/* This is the PMD layout when PMD_ISHUGE is set.  With 4MB huge
+ * pages, this frees up a bunch of bits in the layout that we can
+ * use for the protection settings and software metadata.
+ */
+#define PMD_HUGE_PADDR		_AC(0xfffff800,UL)
+#define PMD_HUGE_PROTBITS	_AC(0x000007ff,UL)
+#define PMD_HUGE_PRESENT	_AC(0x00000400,UL)
+#define PMD_HUGE_WRITE		_AC(0x00000200,UL)
+#define PMD_HUGE_DIRTY		_AC(0x00000100,UL)
+#define PMD_HUGE_ACCESSED	_AC(0x00000080,UL)
+#define PMD_HUGE_EXEC		_AC(0x00000040,UL)
+#define PMD_HUGE_SPLITTING	_AC(0x00000020,UL)
+#endif
+
 /* PGDs point to PMD tables which are 8K aligned.  */
 #define PGD_PADDR	_AC(0xfffffffc,UL)
 #define PGD_PADDR_SHIFT	_AC(11,UL)
@@ -219,6 +240,19 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 }
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot);
+#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
+
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	/* Do nothing, mk_pmd() does this part.  */
+	return pmd;
+}
+#endif
+
 /* This one can be done with two shifts.  */
 static inline unsigned long pte_pfn(pte_t pte)
 {
@@ -588,19 +622,130 @@ static inline unsigned long pte_special(pte_t pte)
 	return pte_val(pte) & _PAGE_SPECIAL;
 }
 
-#define pmd_set(pmdp, ptep)	\
-	(pmd_val(*(pmdp)) = (__pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT))
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline int pmd_young(pmd_t pmd)
+{
+	return pmd_val(pmd) & PMD_HUGE_ACCESSED;
+}
+
+static inline int pmd_write(pmd_t pmd)
+{
+	return pmd_val(pmd) & PMD_HUGE_WRITE;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	unsigned long val = pmd_val(pmd) & PMD_HUGE_PADDR;
+
+	return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT);
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) ==
+		(PMD_ISHUGE | PMD_HUGE_PRESENT);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) ==
+		(PMD_ISHUGE|PMD_HUGE_SPLITTING);
+}
+
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) & PMD_ISHUGE;
+}
+
+#define has_transparent_hugepage() 1
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~PMD_HUGE_ACCESSED;
+	return pmd;
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~PMD_HUGE_WRITE;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	pmd_val(pmd) |= PMD_HUGE_DIRTY;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+	return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	pmd_val(pmd) |= PMD_HUGE_WRITE;
+	return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~PMD_HUGE_PRESENT;
+	return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	pmd_val(pmd) |= PMD_HUGE_SPLITTING;
+	return pmd;
+}
+
+extern pgprot_t pmd_pgprot(pmd_t entry);
+#endif
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) != 0U;
+}
+
+#define pmd_none(pmd)			(!pmd_val(pmd))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+#else
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+#endif
+
+static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+	unsigned long val = __pa((unsigned long) (ptep)) >> PMD_PADDR_SHIFT;
+
+	pmd_val(*pmdp) = val;
+}
+
 #define pud_set(pudp, pmdp)	\
 	(pud_val(*(pudp)) = (__pa((unsigned long) (pmdp)) >> PGD_PADDR_SHIFT))
-#define __pmd_page(pmd)		\
-	((unsigned long) __va((((unsigned long)pmd_val(pmd))<<PMD_PADDR_SHIFT)))
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+	unsigned long paddr = (unsigned long) pmd_val(pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_val(pmd) & PMD_ISHUGE)
+		paddr &= PMD_HUGE_PADDR;
+#endif
+	paddr <<= PMD_PADDR_SHIFT;
+	return ((unsigned long) __va(paddr));
+}
 #define pmd_page(pmd) 			virt_to_page((void *)__pmd_page(pmd))
 #define pud_page_vaddr(pud)		\
 	((unsigned long) __va((((unsigned long)pud_val(pud))<<PGD_PADDR_SHIFT)))
 #define pud_page(pud) 			virt_to_page((void *)pud_page_vaddr(pud))
-#define pmd_none(pmd)			(!pmd_val(pmd))
 #define pmd_bad(pmd)			(0)
-#define pmd_present(pmd)		(pmd_val(pmd) != 0U)
 #define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0U)
 #define pud_none(pud)			(!pud_val(pud))
 #define pud_bad(pud)			(0)
@@ -634,6 +779,16 @@ static inline unsigned long pte_special(pte_t pte)
 extern void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 			  pte_t *ptep, pte_t orig, int fullmm);
 
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr,
+				       pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+	set_pmd_at(mm, addr, pmdp, __pmd(0U));
+	return pmd;
+}
+
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 			     pte_t *ptep, pte_t pte, int fullmm)
 {
@@ -689,6 +844,16 @@ extern void mmu_info(struct seq_file *);
 
 struct vm_area_struct;
 extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+#endif
 
 /* Encode and de-code a swap entry */
 #define __swp_type(entry)	(((entry).val >> PAGE_SHIFT) & 0xffUL)
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index ef8cd1a174f1..b4c258de4443 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -157,10 +157,86 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	andn		REG2, 0x7, REG2; \
 	add		REG1, REG2, REG1;
 
-	/* Do a user page table walk in MMU globals.  Leaves physical PTE
-	 * pointer in REG1.  Jumps to FAIL_LABEL on early page table walk
-	 * termination.  Physical base of page tables is in PHYS_PGD which
-	 * will not be modified.
+	/* This macro exists only to make the PMD translator below easier
+	 * to read.  It hides the ELF section switch for the sun4v code
+	 * patching.
+	 */
+#define OR_PTE_BIT(REG, NAME)				\
+661:	or		REG, _PAGE_##NAME##_4U, REG;	\
+	.section	.sun4v_1insn_patch, "ax";	\
+	.word		661b;				\
+	or		REG, _PAGE_##NAME##_4V, REG;	\
+	.previous;
+
+	/* Load into REG the PTE value for VALID, CACHE, and SZHUGE.  */
+#define BUILD_PTE_VALID_SZHUGE_CACHE(REG)				   \
+661:	sethi		%uhi(_PAGE_VALID|_PAGE_SZHUGE_4U), REG;		   \
+	.section	.sun4v_1insn_patch, "ax";			   \
+	.word		661b;						   \
+	sethi		%uhi(_PAGE_VALID), REG;				   \
+	.previous;							   \
+	sllx		REG, 32, REG;					   \
+661:	or		REG, _PAGE_CP_4U|_PAGE_CV_4U, REG;		   \
+	.section	.sun4v_1insn_patch, "ax";			   \
+	.word		661b;						   \
+	or		REG, _PAGE_CP_4V|_PAGE_CV_4V|_PAGE_SZHUGE_4V, REG; \
+	.previous;
+
+	/* PMD has been loaded into REG1, interpret the value, seeing
+	 * if it is a HUGE PMD or a normal one.  If it is not valid
+	 * then jump to FAIL_LABEL.  If it is a HUGE PMD, and it
+	 * translates to a valid PTE, branch to PTE_LABEL.
+	 *
+	 * We translate the PMD by hand, one bit at a time,
+	 * constructing the huge PTE.
+	 *
+	 * So we construct the PTE in REG2 as follows:
+	 *
+	 * 1) Extract the PMD PFN from REG1 and place it into REG2.
+	 *
+	 * 2) Translate PMD protection bits in REG1 into REG2, one bit
+	 *    at a time using andcc tests on REG1 and OR's into REG2.
+	 *
+	 *    Only two bits to be concerned with here, EXEC and WRITE.
+	 *    Now REG1 is freed up and we can use it as a temporary.
+	 *
+	 * 3) Construct the VALID, CACHE, and page size PTE bits in
+	 *    REG1, OR with REG2 to form final PTE.
+	 */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL;				      \
+	 andcc		REG1, PMD_ISHUGE, %g0;				      \
+	be,pt		%xcc, 700f;					      \
+	 and		REG1, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED, REG2;	      \
+	cmp		REG2, PMD_HUGE_PRESENT|PMD_HUGE_ACCESSED;	      \
+	bne,pn		%xcc, FAIL_LABEL;				      \
+	 andn		REG1, PMD_HUGE_PROTBITS, REG2;			      \
+	sllx		REG2, PMD_PADDR_SHIFT, REG2;			      \
+	/* REG2 now holds PFN << PAGE_SHIFT */				      \
+	andcc		REG1, PMD_HUGE_EXEC, %g0;			      \
+	bne,a,pt	%xcc, 1f;					      \
+	 OR_PTE_BIT(REG2, EXEC);					      \
+1:	andcc		REG1, PMD_HUGE_WRITE, %g0;			      \
+	bne,a,pt	%xcc, 1f;					      \
+	 OR_PTE_BIT(REG2, W);						      \
+	/* REG1 can now be clobbered, build final PTE */		      \
+1:	BUILD_PTE_VALID_SZHUGE_CACHE(REG1);				      \
+	ba,pt		%xcc, PTE_LABEL;				      \
+	 or		REG1, REG2, REG1;				      \
+700:
+#else
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL; \
+	 nop;
+#endif
+
+	/* Do a user page table walk in MMU globals.  Leaves final,
+	 * valid, PTE value in REG1.  Jumps to FAIL_LABEL on early
+	 * page table walk termination or if the PTE is not valid.
+	 *
+	 * Physical base of page tables is in PHYS_PGD which will not
+	 * be modified.
 	 *
 	 * VADDR will not be clobbered, but REG1 and REG2 will.
 	 */
@@ -175,12 +251,16 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	sllx		REG1, PGD_PADDR_SHIFT, REG1; \
 	andn		REG2, 0x3, REG2; \
 	lduwa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
-	brz,pn		REG1, FAIL_LABEL; \
-	 sllx		VADDR, 64 - PMD_SHIFT, REG2; \
+	USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
+	sllx		VADDR, 64 - PMD_SHIFT, REG2; \
 	srlx		REG2, 64 - (PAGE_SHIFT - 1), REG2; \
 	sllx		REG1, PMD_PADDR_SHIFT, REG1; \
 	andn		REG2, 0x7, REG2; \
-	add		REG1, REG2, REG1;
+	add		REG1, REG2, REG1; \
+	ldxa		[REG1] ASI_PHYS_USE_EC, REG1; \
+	brgez,pn	REG1, FAIL_LABEL; \
+	 nop; \
+800:
 
 /* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
  * If no entry is found, FAIL_LABEL will be branched to.  On success
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e1fbf8c75787..bde867fd71e8 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -176,7 +176,7 @@ sun4v_tsb_miss_common:
 
 	sub	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mov	SCRATCHPAD_UTSBREG2, %g5
 	ldxa	[%g5] ASI_SCRATCHPAD, %g5
 	cmp	%g5, -1
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index db15d123f054..d4bdc7a62375 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -49,7 +49,7 @@ tsb_miss_page_table_walk:
 	/* Before committing to a full page table walk,
 	 * check the huge page TSB.
 	 */
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 
 661:	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
 	nop
@@ -110,12 +110,9 @@ tsb_miss_page_table_walk:
 tsb_miss_page_table_walk_sun4v_fastpath:
 	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
 
-	/* Load and check PTE.  */
-	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
-	brgez,pn	%g5, tsb_do_fault
-	 nop
+	/* Valid PTE is now in %g5.  */
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 661:	sethi		%uhi(_PAGE_SZALL_4U), %g7
 	sllx		%g7, 32, %g7
 	.section	.sun4v_2insn_patch, "ax"
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 413d29263304..2976dba1ebaf 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -465,13 +465,13 @@ good_area:
 	up_read(&mm->mmap_sem);
 
 	mm_rss = get_mm_rss(mm);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mm_rss -= (mm->context.huge_pte_count * (HPAGE_SIZE / PAGE_SIZE));
 #endif
 	if (unlikely(mm_rss >
 		     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
 		tsb_grow(mm, MM_TSB_BASE, mm_rss);
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mm_rss = mm->context.huge_pte_count;
 	if (unlikely(mm_rss >
 		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit))
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 07e14535375c..f76f83d5ac63 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -303,53 +303,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 	return NULL;
 }
-
-static void context_reload(void *__data)
-{
-	struct mm_struct *mm = __data;
-
-	if (mm == current->mm)
-		load_secondary_context(mm);
-}
-
-void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
-	struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
-
-	if (likely(tp->tsb != NULL))
-		return;
-
-	tsb_grow(mm, MM_TSB_HUGE, 0);
-	tsb_context_switch(mm);
-	smp_tsb_sync(mm);
-
-	/* On UltraSPARC-III+ and later, configure the second half of
-	 * the Data-TLB for huge pages.
-	 */
-	if (tlb_type == cheetah_plus) {
-		unsigned long ctx;
-
-		spin_lock(&ctx_alloc_lock);
-		ctx = mm->context.sparc64_ctx_val;
-		ctx &= ~CTX_PGSZ_MASK;
-		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
-		ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
-
-		if (ctx != mm->context.sparc64_ctx_val) {
-			/* When changing the page size fields, we
-			 * must perform a context flush so that no
-			 * stale entries match.  This flush must
-			 * occur with the original context register
-			 * settings.
-			 */
-			do_flush_tlb_mm(mm);
-
-			/* Reload the context register of all processors
-			 * also executing in this address space.
-			 */
-			mm->context.sparc64_ctx_val = ctx;
-			on_each_cpu(context_reload, mm, 0);
-		}
-		spin_unlock(&ctx_alloc_lock);
-	}
-}
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 12ef4ea60c88..9e28a118e6a4 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -306,12 +306,24 @@ static void flush_dcache(unsigned long pfn)
 	}
 }
 
+/* mm->context.lock must be held */
+static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
+				    unsigned long tsb_hash_shift, unsigned long address,
+				    unsigned long tte)
+{
+	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
+	unsigned long tag;
+
+	tsb += ((address >> tsb_hash_shift) &
+		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
+	tag = (address >> 22UL);
+	tsb_insert(tsb, tag, tte);
+}
+
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
+	unsigned long tsb_index, tsb_hash_shift, flags;
 	struct mm_struct *mm;
-	struct tsb *tsb;
-	unsigned long tag, flags;
-	unsigned long tsb_index, tsb_hash_shift;
 	pte_t pte = *ptep;
 
 	if (tlb_type != hypervisor) {
@@ -328,7 +340,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 	spin_lock_irqsave(&mm->context.lock, flags);
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
 		if ((tlb_type == hypervisor &&
 		     (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
@@ -340,11 +352,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 	}
 #endif
 
-	tsb = mm->context.tsb_block[tsb_index].tsb;
-	tsb += ((address >> tsb_hash_shift) &
-		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
-	tag = (address >> 22UL);
-	tsb_insert(tsb, tag, pte_val(pte));
+	__update_mmu_tsb_insert(mm, tsb_index, tsb_hash_shift,
+				address, pte_val(pte));
 
 	spin_unlock_irqrestore(&mm->context.lock, flags);
 }
@@ -2568,3 +2577,180 @@ void pgtable_free(void *table, bool is_page)
 	else
 		kmem_cache_free(pgtable_cache, table);
 }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
+{
+	if (pgprot_val(pgprot) & _PAGE_VALID)
+		pmd_val(pmd) |= PMD_HUGE_PRESENT;
+	if (tlb_type == hypervisor) {
+		if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
+			pmd_val(pmd) |= PMD_HUGE_WRITE;
+		if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
+			pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+		if (!for_modify) {
+			if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
+				pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+			if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
+				pmd_val(pmd) |= PMD_HUGE_DIRTY;
+		}
+	} else {
+		if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
+			pmd_val(pmd) |= PMD_HUGE_WRITE;
+		if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
+			pmd_val(pmd) |= PMD_HUGE_EXEC;
+
+		if (!for_modify) {
+			if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
+				pmd_val(pmd) |= PMD_HUGE_ACCESSED;
+			if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
+				pmd_val(pmd) |= PMD_HUGE_DIRTY;
+		}
+	}
+
+	return pmd;
+}
+
+pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+	pmd_t pmd;
+
+	pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
+	pmd_val(pmd) |= PMD_ISHUGE;
+	pmd = pmd_set_protbits(pmd, pgprot, false);
+	return pmd;
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
+			  PMD_HUGE_WRITE |
+			  PMD_HUGE_EXEC);
+	pmd = pmd_set_protbits(pmd, newprot, true);
+	return pmd;
+}
+
+pgprot_t pmd_pgprot(pmd_t entry)
+{
+	unsigned long pte = 0;
+
+	if (pmd_val(entry) & PMD_HUGE_PRESENT)
+		pte |= _PAGE_VALID;
+
+	if (tlb_type == hypervisor) {
+		if (pmd_val(entry) & PMD_HUGE_PRESENT)
+			pte |= _PAGE_PRESENT_4V;
+		if (pmd_val(entry) & PMD_HUGE_EXEC)
+			pte |= _PAGE_EXEC_4V;
+		if (pmd_val(entry) & PMD_HUGE_WRITE)
+			pte |= _PAGE_W_4V;
+		if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+			pte |= _PAGE_ACCESSED_4V;
+		if (pmd_val(entry) & PMD_HUGE_DIRTY)
+			pte |= _PAGE_MODIFIED_4V;
+		pte |= _PAGE_CP_4V|_PAGE_CV_4V;
+	} else {
+		if (pmd_val(entry) & PMD_HUGE_PRESENT)
+			pte |= _PAGE_PRESENT_4U;
+		if (pmd_val(entry) & PMD_HUGE_EXEC)
+			pte |= _PAGE_EXEC_4U;
+		if (pmd_val(entry) & PMD_HUGE_WRITE)
+			pte |= _PAGE_W_4U;
+		if (pmd_val(entry) & PMD_HUGE_ACCESSED)
+			pte |= _PAGE_ACCESSED_4U;
+		if (pmd_val(entry) & PMD_HUGE_DIRTY)
+			pte |= _PAGE_MODIFIED_4U;
+		pte |= _PAGE_CP_4U|_PAGE_CV_4U;
+	}
+
+	return __pgprot(pte);
+}
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd)
+{
+	unsigned long pte, flags;
+	struct mm_struct *mm;
+	pmd_t entry = *pmd;
+	pgprot_t prot;
+
+	if (!pmd_large(entry) || !pmd_young(entry))
+		return;
+
+	pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
+	pte <<= PMD_PADDR_SHIFT;
+	pte |= _PAGE_VALID;
+
+	prot = pmd_pgprot(entry);
+
+	if (tlb_type == hypervisor)
+		pgprot_val(prot) |= _PAGE_SZHUGE_4V;
+	else
+		pgprot_val(prot) |= _PAGE_SZHUGE_4U;
+
+	pte |= pgprot_val(prot);
+
+	mm = vma->vm_mm;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
+		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+					addr, pte);
+
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void context_reload(void *__data)
+{
+	struct mm_struct *mm = __data;
+
+	if (mm == current->mm)
+		load_secondary_context(mm);
+}
+
+void hugetlb_setup(struct mm_struct *mm)
+{
+	struct tsb_config *tp = &mm->context.tsb_block[MM_TSB_HUGE];
+
+	if (likely(tp->tsb != NULL))
+		return;
+
+	tsb_grow(mm, MM_TSB_HUGE, 0);
+	tsb_context_switch(mm);
+	smp_tsb_sync(mm);
+
+	/* On UltraSPARC-III+ and later, configure the second half of
+	 * the Data-TLB for huge pages.
+	 */
+	if (tlb_type == cheetah_plus) {
+		unsigned long ctx;
+
+		spin_lock(&ctx_alloc_lock);
+		ctx = mm->context.sparc64_ctx_val;
+		ctx &= ~CTX_PGSZ_MASK;
+		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+		ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+		if (ctx != mm->context.sparc64_ctx_val) {
+			/* When changing the page size fields, we
+			 * must perform a context flush so that no
+			 * stale entries match.  This flush must
+			 * occur with the original context register
+			 * settings.
+			 */
+			do_flush_tlb_mm(mm);
+
+			/* Reload the context register of all processors
+			 * also executing in this address space.
+			 */
+			mm->context.sparc64_ctx_val = ctx;
+			on_each_cpu(context_reload, mm, 0);
+		}
+		spin_unlock(&ctx_alloc_lock);
+	}
+}
+#endif
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index b1f279cd00bf..3e8fec391fe0 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -43,16 +43,37 @@ void flush_tlb_pending(void)
 	put_cpu_var(tlb_batch);
 }
 
-void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
-		   pte_t *ptep, pte_t orig, int fullmm)
+static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+			      bool exec)
 {
 	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
 	unsigned long nr;
 
 	vaddr &= PAGE_MASK;
-	if (pte_exec(orig))
+	if (exec)
 		vaddr |= 0x1UL;
 
+	nr = tb->tlb_nr;
+
+	if (unlikely(nr != 0 && mm != tb->mm)) {
+		flush_tlb_pending();
+		nr = 0;
+	}
+
+	if (nr == 0)
+		tb->mm = mm;
+
+	tb->vaddrs[nr] = vaddr;
+	tb->tlb_nr = ++nr;
+	if (nr >= TLB_BATCH_NR)
+		flush_tlb_pending();
+
+	put_cpu_var(tlb_batch);
+}
+
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig, int fullmm)
+{
 	if (tlb_type != hypervisor &&
 	    pte_dirty(orig)) {
 		unsigned long paddr, pfn = pte_pfn(orig);
@@ -77,26 +98,91 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
 	}
 
 no_cache_flush:
+	if (!fullmm)
+		tlb_batch_add_one(mm, vaddr, pte_exec(orig));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+			       pmd_t pmd, bool exec)
+{
+	unsigned long end;
+	pte_t *pte;
+
+	pte = pte_offset_map(&pmd, vaddr);
+	end = vaddr + HPAGE_SIZE;
+	while (vaddr < end) {
+		if (pte_val(*pte) & _PAGE_VALID)
+			tlb_batch_add_one(mm, vaddr, exec);
+		pte++;
+		vaddr += PAGE_SIZE;
+	}
+	pte_unmap(pte);
+}
 
-	if (fullmm) {
-		put_cpu_var(tlb_batch);
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t orig = *pmdp;
+
+	*pmdp = pmd;
+
+	if (mm == &init_mm)
 		return;
+
+	if ((pmd_val(pmd) ^ pmd_val(orig)) & PMD_ISHUGE) {
+		if (pmd_val(pmd) & PMD_ISHUGE)
+			mm->context.huge_pte_count++;
+		else
+			mm->context.huge_pte_count--;
+		if (mm->context.huge_pte_count == 1)
+			hugetlb_setup(mm);
 	}
 
-	nr = tb->tlb_nr;
+	if (!pmd_none(orig)) {
+		bool exec = ((pmd_val(orig) & PMD_HUGE_EXEC) != 0);
 
-	if (unlikely(nr != 0 && mm != tb->mm)) {
-		flush_tlb_pending();
-		nr = 0;
+		addr &= HPAGE_MASK;
+		if (pmd_val(orig) & PMD_ISHUGE)
+			tlb_batch_add_one(mm, addr, exec);
+		else
+			tlb_batch_pmd_scan(mm, addr, orig, exec);
 	}
+}
 
-	if (nr == 0)
-		tb->mm = mm;
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
 
-	tb->vaddrs[nr] = vaddr;
-	tb->tlb_nr = ++nr;
-	if (nr >= TLB_BATCH_NR)
-		flush_tlb_pending();
+	assert_spin_locked(&mm->page_table_lock);
 
-	put_cpu_var(tlb_batch);
+	/* FIFO */
+	if (!mm->pmd_huge_pte)
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) mm->pmd_huge_pte);
+	mm->pmd_huge_pte = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	pgtable = mm->pmd_huge_pte;
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		mm->pmd_huge_pte = NULL;
+	else {
+		mm->pmd_huge_pte = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	pte_val(pgtable[0]) = 0;
+	pte_val(pgtable[1]) = 0;
+
+	return pgtable;
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index a35ee832baf3..7f6474347491 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -78,7 +78,7 @@ void flush_tsb_user(struct tlb_batch *tb)
 		base = __pa(base);
 	__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
 		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
 		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
@@ -93,7 +93,7 @@ void flush_tsb_user(struct tlb_batch *tb)
 #define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_8K
 #define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_8K
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 #define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_4MB
 #define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_4MB
 #endif
@@ -190,7 +190,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
 		case MM_TSB_BASE:
 			hp->pgsz_idx = HV_PGSZ_IDX_BASE;
 			break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 		case MM_TSB_HUGE:
 			hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
 			break;
@@ -205,7 +205,7 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsign
 		case MM_TSB_BASE:
 			hp->pgsz_mask = HV_PGSZ_MASK_BASE;
 			break;
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 		case MM_TSB_HUGE:
 			hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
 			break;
@@ -427,7 +427,7 @@ retry_tsb_alloc:
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	unsigned long huge_pte_count;
 #endif
 	unsigned int i;
@@ -436,7 +436,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 
 	mm->context.sparc64_ctx_val = 0UL;
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	/* We reset it to zero because the fork() page copying
 	 * will re-increment the counters as the parent PTEs are
 	 * copied into the child address space.
@@ -459,7 +459,7 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	 */
 	tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
 
-#ifdef CONFIG_HUGETLB_PAGE
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	if (unlikely(huge_pte_count))
 		tsb_grow(mm, MM_TSB_HUGE, huge_pte_count);
 #endif