summary refs log tree commit diff
path: root/arch/sparc64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc64')
-rw-r--r--arch/sparc64/kernel/entry.S82
-rw-r--r--arch/sparc64/kernel/etrap.S51
-rw-r--r--arch/sparc64/kernel/head.S152
-rw-r--r--arch/sparc64/kernel/irq.c1
-rw-r--r--arch/sparc64/kernel/power.c64
-rw-r--r--arch/sparc64/kernel/rtrap.S30
-rw-r--r--arch/sparc64/kernel/setup.c8
-rw-r--r--arch/sparc64/kernel/trampoline.S15
-rw-r--r--arch/sparc64/kernel/winfixup.S33
-rw-r--r--arch/sparc64/lib/VISsave.S8
-rw-r--r--arch/sparc64/mm/init.c182
11 files changed, 272 insertions, 354 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 2879b1072921..11a848402fb1 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -33,7 +33,7 @@
 	/* This is trivial with the new code... */
 	.globl		do_fpdis
 do_fpdis:
-	sethi		%hi(TSTATE_PEF), %g4					! IEU0
+	sethi		%hi(TSTATE_PEF), %g4
 	rdpr		%tstate, %g5
 	andcc		%g5, %g4, %g0
 	be,pt		%xcc, 1f
@@ -50,18 +50,18 @@ do_fpdis:
 	add		%g0, %g0, %g0
 	ba,a,pt		%xcc, rtrap_clr_l6
 
-1:	ldub		[%g6 + TI_FPSAVED], %g5					! Load	Group
-	wr		%g0, FPRS_FEF, %fprs					! LSU	Group+4bubbles
-	andcc		%g5, FPRS_FEF, %g0					! IEU1	Group
-	be,a,pt		%icc, 1f						! CTI
-	 clr		%g7							! IEU0
-	ldx		[%g6 + TI_GSR], %g7					! Load	Group
-1:	andcc		%g5, FPRS_DL, %g0					! IEU1
-	bne,pn		%icc, 2f						! CTI
-	 fzero		%f0							! FPA
-	andcc		%g5, FPRS_DU, %g0					! IEU1  Group
-	bne,pn		%icc, 1f						! CTI
-	 fzero		%f2							! FPA
+1:	ldub		[%g6 + TI_FPSAVED], %g5
+	wr		%g0, FPRS_FEF, %fprs
+	andcc		%g5, FPRS_FEF, %g0
+	be,a,pt		%icc, 1f
+	 clr		%g7
+	ldx		[%g6 + TI_GSR], %g7
+1:	andcc		%g5, FPRS_DL, %g0
+	bne,pn		%icc, 2f
+	 fzero		%f0
+	andcc		%g5, FPRS_DU, %g0
+	bne,pn		%icc, 1f
+	 fzero		%f2
 	faddd		%f0, %f2, %f4
 	fmuld		%f0, %f2, %f6
 	faddd		%f0, %f2, %f8
@@ -97,15 +97,17 @@ do_fpdis:
 	faddd		%f0, %f2, %f4
 	fmuld		%f0, %f2, %f6
 	ldxa		[%g3] ASI_DMMU, %g5
-cplus_fptrap_insn_1:
-	sethi		%hi(0), %g2
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
 	stxa		%g2, [%g3] ASI_DMMU
 	membar		#Sync
 	add		%g6, TI_FPREGS + 0xc0, %g2
 	faddd		%f0, %f2, %f8
 	fmuld		%f0, %f2, %f10
-	ldda		[%g1] ASI_BLK_S, %f32	! grrr, where is ASI_BLK_NUCLEUS 8-(
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f32
 	ldda		[%g2] ASI_BLK_S, %f48
+	membar		#Sync
 	faddd		%f0, %f2, %f12
 	fmuld		%f0, %f2, %f14
 	faddd		%f0, %f2, %f16
@@ -116,7 +118,6 @@ cplus_fptrap_insn_1:
 	fmuld		%f0, %f2, %f26
 	faddd		%f0, %f2, %f28
 	fmuld		%f0, %f2, %f30
-	membar		#Sync
 	b,pt		%xcc, fpdis_exit
 	 nop
 2:	andcc		%g5, FPRS_DU, %g0
@@ -126,15 +127,17 @@ cplus_fptrap_insn_1:
 	fzero		%f34
 	ldxa		[%g3] ASI_DMMU, %g5
 	add		%g6, TI_FPREGS, %g1
-cplus_fptrap_insn_2:
-	sethi		%hi(0), %g2
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
 	stxa		%g2, [%g3] ASI_DMMU
 	membar		#Sync
 	add		%g6, TI_FPREGS + 0x40, %g2
 	faddd		%f32, %f34, %f36
 	fmuld		%f32, %f34, %f38
-	ldda		[%g1] ASI_BLK_S, %f0	! grrr, where is ASI_BLK_NUCLEUS 8-(
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f0
 	ldda		[%g2] ASI_BLK_S, %f16
+	membar		#Sync
 	faddd		%f32, %f34, %f40
 	fmuld		%f32, %f34, %f42
 	faddd		%f32, %f34, %f44
@@ -147,18 +150,18 @@ cplus_fptrap_insn_2:
 	fmuld		%f32, %f34, %f58
 	faddd		%f32, %f34, %f60
 	fmuld		%f32, %f34, %f62
-	membar		#Sync
 	ba,pt		%xcc, fpdis_exit
 	 nop
 3:	mov		SECONDARY_CONTEXT, %g3
 	add		%g6, TI_FPREGS, %g1
 	ldxa		[%g3] ASI_DMMU, %g5
-cplus_fptrap_insn_3:
-	sethi		%hi(0), %g2
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
 	stxa		%g2, [%g3] ASI_DMMU
 	membar		#Sync
 	mov		0x40, %g2
-	ldda		[%g1] ASI_BLK_S, %f0		! grrr, where is ASI_BLK_NUCLEUS 8-(
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f0
 	ldda		[%g1 + %g2] ASI_BLK_S, %f16
 	add		%g1, 0x80, %g1
 	ldda		[%g1] ASI_BLK_S, %f32
@@ -319,8 +322,8 @@ do_fptrap_after_fsr:
 	stx		%g3, [%g6 + TI_GSR]
 	mov		SECONDARY_CONTEXT, %g3
 	ldxa		[%g3] ASI_DMMU, %g5
-cplus_fptrap_insn_4:
-	sethi		%hi(0), %g2
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
 	stxa		%g2, [%g3] ASI_DMMU
 	membar		#Sync
 	add		%g6, TI_FPREGS, %g2
@@ -341,33 +344,6 @@ cplus_fptrap_insn_4:
 	ba,pt		%xcc, etrap
 	 wr		%g0, 0, %fprs
 
-cplus_fptrap_1:
-	sethi		%hi(CTX_CHEETAH_PLUS_CTX0), %g2
-
-	.globl		cheetah_plus_patch_fpdis
-cheetah_plus_patch_fpdis:
-	/* We configure the dTLB512_0 for 4MB pages and the
-	 * dTLB512_1 for 8K pages when in context zero.
-	 */
-	sethi			%hi(cplus_fptrap_1), %o0
-	lduw			[%o0 + %lo(cplus_fptrap_1)], %o1
-
-	set			cplus_fptrap_insn_1, %o2
-	stw			%o1, [%o2]
-	flush			%o2
-	set			cplus_fptrap_insn_2, %o2
-	stw			%o1, [%o2]
-	flush			%o2
-	set			cplus_fptrap_insn_3, %o2
-	stw			%o1, [%o2]
-	flush			%o2
-	set			cplus_fptrap_insn_4, %o2
-	stw			%o1, [%o2]
-	flush			%o2
-
-	retl
-	 nop
-
 	/* The registers for cross calls will be:
 	 *
 	 * DATA 0: [low 32-bits]  Address of function to call, jmp to this
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 50d2af1d98ae..0d8eba21111b 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -68,12 +68,8 @@ etrap_irq:
 
 		wrpr	%g3, 0, %otherwin
 		wrpr	%g2, 0, %wstate
-cplus_etrap_insn_1:
-		sethi	%hi(0), %g3
-		sllx	%g3, 32, %g3
-cplus_etrap_insn_2:
-		sethi	%hi(0), %g2
-		or	%g3, %g2, %g3
+		sethi	%hi(sparc64_kern_pri_context), %g2
+		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g3
 		stxa	%g3, [%l4] ASI_DMMU
 		flush	%l6
 		wr	%g0, ASI_AIUS, %asi
@@ -215,12 +211,8 @@ scetrap:	rdpr	%pil, %g2
 		mov	PRIMARY_CONTEXT, %l4
 		wrpr	%g3, 0, %otherwin
 		wrpr	%g2, 0, %wstate
-cplus_etrap_insn_3:
-		sethi	%hi(0), %g3
-		sllx	%g3, 32, %g3
-cplus_etrap_insn_4:
-		sethi	%hi(0), %g2
-		or	%g3, %g2, %g3
+		sethi	%hi(sparc64_kern_pri_context), %g2
+		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g3
 		stxa	%g3, [%l4] ASI_DMMU
 		flush	%l6
 
@@ -264,38 +256,3 @@ cplus_etrap_insn_4:
 
 #undef TASK_REGOFF
 #undef ETRAP_PSTATE1
-
-cplus_einsn_1:
-		sethi			%uhi(CTX_CHEETAH_PLUS_NUC), %g3
-cplus_einsn_2:
-		sethi			%hi(CTX_CHEETAH_PLUS_CTX0), %g2
-
-		.globl			cheetah_plus_patch_etrap
-cheetah_plus_patch_etrap:
-		/* We configure the dTLB512_0 for 4MB pages and the
-		 * dTLB512_1 for 8K pages when in context zero.
-		 */
-		sethi			%hi(cplus_einsn_1), %o0
-		sethi			%hi(cplus_etrap_insn_1), %o2
-		lduw			[%o0 + %lo(cplus_einsn_1)], %o1
-		or			%o2, %lo(cplus_etrap_insn_1), %o2
-		stw			%o1, [%o2]
-		flush			%o2
-		sethi			%hi(cplus_etrap_insn_3), %o2
-		or			%o2, %lo(cplus_etrap_insn_3), %o2
-		stw			%o1, [%o2]
-		flush			%o2
-
-		sethi			%hi(cplus_einsn_2), %o0
-		sethi			%hi(cplus_etrap_insn_2), %o2
-		lduw			[%o0 + %lo(cplus_einsn_2)], %o1
-		or			%o2, %lo(cplus_etrap_insn_2), %o2
-		stw			%o1, [%o2]
-		flush			%o2
-		sethi			%hi(cplus_etrap_insn_4), %o2
-		or			%o2, %lo(cplus_etrap_insn_4), %o2
-		stw			%o1, [%o2]
-		flush			%o2
-
-		retl
-		 nop
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 89406f9649a9..f1dcdf8f7433 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -325,23 +325,7 @@ cheetah_tlb_fixup:
 1:	sethi	%hi(tlb_type), %g1
 	stw	%g2, [%g1 + %lo(tlb_type)]
 
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
-	ba,pt	%xcc, 2f
-	 nop
-
-1:	/* Patch context register writes to support nucleus page
-	 * size correctly.
-	 */
-	call	cheetah_plus_patch_etrap
-	 nop
-	call	cheetah_plus_patch_rtrap
-	 nop
-	call	cheetah_plus_patch_fpdis
-	 nop
-	call	cheetah_plus_patch_winfixup
-	 nop
-
-2:	/* Patch copy/page operations to cheetah optimized versions. */
+	/* Patch copy/page operations to cheetah optimized versions. */
 	call	cheetah_patch_copyops
 	 nop
 	call	cheetah_patch_copy_page
@@ -398,32 +382,79 @@ tlb_fixup_done:
 	 nop
 	/* Not reached... */
 
-/* IMPORTANT NOTE: Whenever making changes here, check
- * trampoline.S as well. -jj */
-	.globl	setup_tba
-setup_tba:	/* i0 = is_starfire */
-	save	%sp, -160, %sp
+	/* This is meant to allow the sharing of this code between
+	 * boot processor invocation (via setup_tba() below) and
+	 * secondary processor startup (via trampoline.S).  The
+	 * former does use this code, the latter does not yet due
+	 * to some complexities.  That should be fixed up at some
+	 * point.
+	 */
+	.globl	setup_trap_table
+setup_trap_table:
+	save	%sp, -192, %sp
+
+	/* Force interrupts to be disabled.  Transferring over to
+	 * the Linux trap table is a very delicate operation.
+	 * Until we are actually on the Linux trap table, we cannot
+	 * get the PAGE_OFFSET linear mappings translated.  We need
+	 * that mapping to be setup in order to initialize the firmware
+	 * page tables.
+	 *
+	 * So there is this window of time, from the return from
+	 * prom_set_trap_table() until inherit_prom_mappings_post()
+	 * (in arch/sparc64/mm/init.c) completes, during which no
+	 * firmware address space accesses can be made.
+	 */
+	rdpr	%pstate, %o1
+	andn	%o1, PSTATE_IE, %o1
+	wrpr	%o1, 0x0, %pstate
+	wrpr	%g0, 15, %pil
 
-	rdpr	%tba, %g7
-	sethi	%hi(prom_tba), %o1
-	or	%o1, %lo(prom_tba), %o1
-	stx	%g7, [%o1]
+	/* Ok, now make the final valid firmware call to jump over
+	 * to the Linux trap table.
+	 */
+	call	prom_set_trap_table
+	 sethi	%hi(sparc64_ttable_tl0), %o0
+
+	/* Start using proper page size encodings in ctx register.  */
+	sethi	%hi(sparc64_kern_pri_context), %g3
+	ldx	[%g3 + %lo(sparc64_kern_pri_context)], %g2
+	mov	PRIMARY_CONTEXT, %g1
+	stxa	%g2, [%g1] ASI_DMMU
+	membar	#Sync
+
+	/* The Linux trap handlers expect various trap global registers
+	 * to be setup with some fixed values.  So here we set these
+	 * up very carefully.  These globals are:
+	 *
+	 * Alternate Globals (PSTATE_AG):
+	 *
+	 * %g6			--> current_thread_info()
+	 *
+	 * MMU Globals (PSTATE_MG):
+	 *
+	 * %g1			--> TLB_SFSR
+	 * %g2			--> ((_PAGE_VALID | _PAGE_SZ4MB |
+	 *			      _PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
+	 *			     ^ 0xfffff80000000000)
+	 * (this %g2 value is used for computing the PAGE_OFFSET kernel
+	 *  TLB entries quickly, the virtual address of the fault XOR'd
+	 *  with this %g2 value is the PTE to load into the TLB)
+	 * %g3			--> VPTE_BASE_CHEETAH or VPTE_BASE_SPITFIRE
+	 *
+	 * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
+	 *
+	 * %g6			--> __irq_work[smp_processor_id()]
+	 */
 
-	/* Setup "Linux" globals 8-) */
 	rdpr	%pstate, %o1
 	mov	%g6, %o2
-	wrpr	%o1, (PSTATE_AG|PSTATE_IE), %pstate
-	sethi	%hi(sparc64_ttable_tl0), %g1
-	wrpr	%g1, %tba
+	wrpr	%o1, PSTATE_AG, %pstate
 	mov	%o2, %g6
 
-	/* Set up MMU globals */
-	wrpr	%o1, (PSTATE_MG|PSTATE_IE), %pstate
-
-	/* Set fixed globals used by dTLB miss handler. */
 #define KERN_HIGHBITS		((_PAGE_VALID|_PAGE_SZ4MB)^0xfffff80000000000)
 #define KERN_LOWBITS		(_PAGE_CP | _PAGE_CV | _PAGE_P | _PAGE_W)
-
+	wrpr	%o1, PSTATE_MG, %pstate
 	mov	TSB_REG, %g1
 	stxa	%g0, [%g1] ASI_DMMU
 	membar	#Sync
@@ -435,17 +466,17 @@ setup_tba:	/* i0 = is_starfire */
 	sllx	%g2, 32, %g2
 	or	%g2, KERN_LOWBITS, %g2
 
-	BRANCH_IF_ANY_CHEETAH(g3,g7,cheetah_vpte_base)
-	ba,pt	%xcc, spitfire_vpte_base
+	BRANCH_IF_ANY_CHEETAH(g3,g7,8f)
+	ba,pt	%xcc, 9f
 	 nop
 
-cheetah_vpte_base:
+8:
 	sethi		%uhi(VPTE_BASE_CHEETAH), %g3
 	or		%g3, %ulo(VPTE_BASE_CHEETAH), %g3
 	ba,pt		%xcc, 2f
 	 sllx		%g3, 32, %g3
 
-spitfire_vpte_base:
+9:
 	sethi		%uhi(VPTE_BASE_SPITFIRE), %g3
 	or		%g3, %ulo(VPTE_BASE_SPITFIRE), %g3
 	sllx		%g3, 32, %g3
@@ -471,36 +502,37 @@ spitfire_vpte_base:
 	sllx	%o2, 32, %o2
 	wr	%o2, %asr25
 
-	/* Ok, we're done setting up all the state our trap mechanims needs,
-	 * now get back into normal globals and let the PROM know what is up.
-	 */
 2:
 	wrpr	%g0, %g0, %wstate
-	wrpr	%o1, PSTATE_IE, %pstate
+	wrpr	%o1, 0x0, %pstate
 
 	call	init_irqwork_curcpu
 	 nop
 
-	call	prom_set_trap_table
-	 sethi	%hi(sparc64_ttable_tl0), %o0
-
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
-	ba,pt	%xcc, 2f
-	 nop
-
-1:	/* Start using proper page size encodings in ctx register.  */
-	sethi	%uhi(CTX_CHEETAH_PLUS_NUC), %g3
-	mov	PRIMARY_CONTEXT, %g1
-	sllx	%g3, 32, %g3
-	sethi	%hi(CTX_CHEETAH_PLUS_CTX0), %g2
-	or	%g3, %g2, %g3
-	stxa	%g3, [%g1] ASI_DMMU
-	membar	#Sync
-
-2:
+	/* Now we can turn interrupts back on. */
 	rdpr	%pstate, %o1
 	or	%o1, PSTATE_IE, %o1
 	wrpr	%o1, 0, %pstate
+	wrpr	%g0, 0x0, %pil
+
+	ret
+	 restore
+
+	.globl	setup_tba
+setup_tba:	/* i0 = is_starfire */
+	save	%sp, -192, %sp
+
+	/* The boot processor is the only cpu which invokes this
+	 * routine, the other cpus set things up via trampoline.S.
+	 * So save the OBP trap table address here.
+	 */
+	rdpr	%tba, %g7
+	sethi	%hi(prom_tba), %o1
+	or	%o1, %lo(prom_tba), %o1
+	stx	%g7, [%o1]
+
+	call	setup_trap_table
+	 nop
 
 	ret
 	 restore
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index c9b69167632a..233526ba3abe 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -27,6 +27,7 @@
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/irq.h>
+#include <asm/io.h>
 #include <asm/sbus.h>
 #include <asm/iommu.h>
 #include <asm/upa.h>
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index 946cee0257ea..9e8362ea3104 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -17,6 +17,7 @@
 
 #include <asm/system.h>
 #include <asm/ebus.h>
+#include <asm/isa.h>
 #include <asm/auxio.h>
 
 #include <linux/unistd.h>
@@ -100,46 +101,83 @@ again:
 	return 0;
 }
 
-static int __init has_button_interrupt(struct linux_ebus_device *edev)
+static int __init has_button_interrupt(unsigned int irq, int prom_node)
 {
-	if (edev->irqs[0] == PCI_IRQ_NONE)
+	if (irq == PCI_IRQ_NONE)
 		return 0;
-	if (!prom_node_has_property(edev->prom_node, "button"))
+	if (!prom_node_has_property(prom_node, "button"))
 		return 0;
 
 	return 1;
 }
 
-void __init power_init(void)
+static int __init power_probe_ebus(struct resource **resp, unsigned int *irq_p, int *prom_node_p)
 {
 	struct linux_ebus *ebus;
 	struct linux_ebus_device *edev;
+
+	for_each_ebus(ebus) {
+		for_each_ebusdev(edev, ebus) {
+			if (!strcmp(edev->prom_name, "power")) {
+				*resp = &edev->resource[0];
+				*irq_p = edev->irqs[0];
+				*prom_node_p = edev->prom_node;
+				return 0;
+			}
+		}
+	}
+	return -ENODEV;
+}
+
+static int __init power_probe_isa(struct resource **resp, unsigned int *irq_p, int *prom_node_p)
+{
+	struct sparc_isa_bridge *isa_bus;
+	struct sparc_isa_device *isa_dev;
+
+	for_each_isa(isa_bus) {
+		for_each_isadev(isa_dev, isa_bus) {
+			if (!strcmp(isa_dev->prom_name, "power")) {
+				*resp = &isa_dev->resource;
+				*irq_p = isa_dev->irq;
+				*prom_node_p = isa_dev->prom_node;
+				return 0;
+			}
+		}
+	}
+	return -ENODEV;
+}
+
+void __init power_init(void)
+{
+	struct resource *res = NULL;
+	unsigned int irq;
+	int prom_node;
 	static int invoked;
 
 	if (invoked)
 		return;
 	invoked = 1;
 
-	for_each_ebus(ebus) {
-		for_each_ebusdev(edev, ebus) {
-			if (!strcmp(edev->prom_name, "power"))
-				goto found;
-		}
-	}
+	if (!power_probe_ebus(&res, &irq, &prom_node))
+		goto found;
+
+	if (!power_probe_isa(&res, &irq, &prom_node))
+		goto found;
+
 	return;
 
 found:
-	power_reg = ioremap(edev->resource[0].start, 0x4);
+	power_reg = ioremap(res->start, 0x4);
 	printk("power: Control reg at %p ... ", power_reg);
 	poweroff_method = machine_halt;  /* able to use the standard halt */
-	if (has_button_interrupt(edev)) {
+	if (has_button_interrupt(irq, prom_node)) {
 		if (kernel_thread(powerd, NULL, CLONE_FS) < 0) {
 			printk("Failed to start power daemon.\n");
 			return;
 		}
 		printk("powerd running.\n");
 
-		if (request_irq(edev->irqs[0],
+		if (request_irq(irq,
 				power_handler, SA_SHIRQ, "power", NULL) < 0)
 			printk("power: Error, cannot register IRQ handler.\n");
 	} else {
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index fafd227735fa..090dcca00d2a 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -256,9 +256,8 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
 		brnz,pn			%l3, kern_rtt
 		 mov			PRIMARY_CONTEXT, %l7
 		ldxa			[%l7 + %l7] ASI_DMMU, %l0
-cplus_rtrap_insn_1:
-		sethi			%hi(0), %l1
-		sllx			%l1, 32, %l1
+		sethi			%hi(sparc64_kern_pri_nuc_bits), %l1
+		ldx			[%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1
 		or			%l0, %l1, %l0
 		stxa			%l0, [%l7] ASI_DMMU
 		flush			%g6
@@ -313,53 +312,36 @@ kern_fpucheck:	ldub			[%g6 + TI_FPDEPTH], %l5
 		wr			%g1, FPRS_FEF, %fprs
 		ldx			[%o1 + %o5], %g1
 		add			%g6, TI_XFSR, %o1
-		membar			#StoreLoad | #LoadLoad
 		sll			%o0, 8, %o2
 		add			%g6, TI_FPREGS, %o3
 		brz,pn			%l6, 1f
 		 add			%g6, TI_FPREGS+0x40, %o4
 
+		membar			#Sync
 		ldda			[%o3 + %o2] ASI_BLK_P, %f0
 		ldda			[%o4 + %o2] ASI_BLK_P, %f16
+		membar			#Sync
 1:		andcc			%l2, FPRS_DU, %g0
 		be,pn			%icc, 1f
 		 wr			%g1, 0, %gsr
 		add			%o2, 0x80, %o2
+		membar			#Sync
 		ldda			[%o3 + %o2] ASI_BLK_P, %f32
 		ldda			[%o4 + %o2] ASI_BLK_P, %f48
-
 1:		membar			#Sync
 		ldx			[%o1 + %o5], %fsr
 2:		stb			%l5, [%g6 + TI_FPDEPTH]
 		ba,pt			%xcc, rt_continue
 		 nop
 5:		wr			%g0, FPRS_FEF, %fprs
-		membar			#StoreLoad | #LoadLoad
 		sll			%o0, 8, %o2
 
 		add			%g6, TI_FPREGS+0x80, %o3
 		add			%g6, TI_FPREGS+0xc0, %o4
+		membar			#Sync
 		ldda			[%o3 + %o2] ASI_BLK_P, %f32
 		ldda			[%o4 + %o2] ASI_BLK_P, %f48
 		membar			#Sync
 		wr			%g0, FPRS_DU, %fprs
 		ba,pt			%xcc, rt_continue
 		 stb			%l5, [%g6 + TI_FPDEPTH]
-
-cplus_rinsn_1:
-		sethi			%uhi(CTX_CHEETAH_PLUS_NUC), %l1
-
-		.globl			cheetah_plus_patch_rtrap
-cheetah_plus_patch_rtrap:
-		/* We configure the dTLB512_0 for 4MB pages and the
-		 * dTLB512_1 for 8K pages when in context zero.
-		 */
-		sethi			%hi(cplus_rinsn_1), %o0
-		sethi			%hi(cplus_rtrap_insn_1), %o2
-		lduw			[%o0 + %lo(cplus_rinsn_1)], %o1
-		or			%o2, %lo(cplus_rtrap_insn_1), %o2
-		stw			%o1, [%o2]
-		flush			%o2
-
-		retl
-		 nop
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 4c9c8f241748..c1f34237cdf2 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -187,17 +187,13 @@ int prom_callback(long *args)
 		}
 
 		if ((va >= KERNBASE) && (va < (KERNBASE + (4 * 1024 * 1024)))) {
-			unsigned long kernel_pctx = 0;
-
-			if (tlb_type == cheetah_plus)
-				kernel_pctx |= (CTX_CHEETAH_PLUS_NUC |
-						CTX_CHEETAH_PLUS_CTX0);
+			extern unsigned long sparc64_kern_pri_context;
 
 			/* Spitfire Errata #32 workaround */
 			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
 					     "flush	%%g6"
 					     : /* No outputs */
-					     : "r" (kernel_pctx),
+					     : "r" (sparc64_kern_pri_context),
 					       "r" (PRIMARY_CONTEXT),
 					       "i" (ASI_DMMU));
 
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 89f2fcfcd662..9478551cb020 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -336,20 +336,13 @@ do_unlock:
 	call		init_irqwork_curcpu
 	 nop
 
-	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g2,g3,1f)
-	ba,pt	%xcc, 2f
-	 nop
-
-1:	/* Start using proper page size encodings in ctx register.  */
-	sethi	%uhi(CTX_CHEETAH_PLUS_NUC), %g3
+	/* Start using proper page size encodings in ctx register.  */
+	sethi	%hi(sparc64_kern_pri_context), %g3
+	ldx	[%g3 + %lo(sparc64_kern_pri_context)], %g2
 	mov	PRIMARY_CONTEXT, %g1
-	sllx	%g3, 32, %g3
-	sethi	%hi(CTX_CHEETAH_PLUS_CTX0), %g2
-	or	%g3, %g2, %g3
-	stxa	%g3, [%g1] ASI_DMMU
+	stxa	%g2, [%g1] ASI_DMMU
 	membar	#Sync
 
-2:
 	rdpr		%pstate, %o1
 	or		%o1, PSTATE_IE, %o1
 	wrpr		%o1, 0, %pstate
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index 99c809a1e5ac..39160926267b 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -16,23 +16,14 @@
 	.text
 
 set_pcontext:
-cplus_winfixup_insn_1:
-	sethi	%hi(0), %l1
+	sethi	%hi(sparc64_kern_pri_context), %l1
+	ldx	[%l1 + %lo(sparc64_kern_pri_context)], %l1
 	mov	PRIMARY_CONTEXT, %g1
-	sllx	%l1, 32, %l1
-cplus_winfixup_insn_2:
-	sethi	%hi(0), %g2
-	or	%l1, %g2, %l1
 	stxa	%l1, [%g1] ASI_DMMU
 	flush	%g6
 	retl
 	 nop
 
-cplus_wfinsn_1:
-	sethi	%uhi(CTX_CHEETAH_PLUS_NUC), %l1
-cplus_wfinsn_2:
-	sethi	%hi(CTX_CHEETAH_PLUS_CTX0), %g2
-
 	.align	32
 
 	/* Here are the rules, pay attention.
@@ -395,23 +386,3 @@ window_dax_from_user_common:
 	 add		%sp, PTREGS_OFF, %o0
 	ba,pt		%xcc, rtrap
 	 clr		%l6
-	
-
-	.globl		cheetah_plus_patch_winfixup
-cheetah_plus_patch_winfixup:
-	sethi			%hi(cplus_wfinsn_1), %o0
-	sethi			%hi(cplus_winfixup_insn_1), %o2
-	lduw			[%o0 + %lo(cplus_wfinsn_1)], %o1
-	or			%o2, %lo(cplus_winfixup_insn_1), %o2
-	stw			%o1, [%o2]
-	flush			%o2
-
-	sethi			%hi(cplus_wfinsn_2), %o0
-	sethi			%hi(cplus_winfixup_insn_2), %o2
-	lduw			[%o0 + %lo(cplus_wfinsn_2)], %o1
-	or			%o2, %lo(cplus_winfixup_insn_2), %o2
-	stw			%o1, [%o2]
-	flush			%o2
-
-	retl
-	 nop
diff --git a/arch/sparc64/lib/VISsave.S b/arch/sparc64/lib/VISsave.S
index 4e18989bd602..a0ded5c5aa5c 100644
--- a/arch/sparc64/lib/VISsave.S
+++ b/arch/sparc64/lib/VISsave.S
@@ -59,15 +59,17 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	be,pn		%icc, 9b
 	 add		%g6, TI_FPREGS, %g2
 	andcc		%o5, FPRS_DL, %g0
-	membar		#StoreStore | #LoadStore
 
 	be,pn		%icc, 4f
 	 add		%g6, TI_FPREGS+0x40, %g3
+	membar		#Sync
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
+	membar		#Sync
 	andcc		%o5, FPRS_DU, %g0
 	be,pn		%icc, 5f
 4:	 add		%g1, 128, %g1
+	membar		#Sync
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
@@ -87,7 +89,7 @@ vis1:	ldub		[%g6 + TI_FPSAVED], %g3
 	sll		%g1, 5, %g1
 	add		%g6, TI_FPREGS+0xc0, %g3
 	wr		%g0, FPRS_FEF, %fprs
-	membar		#StoreStore | #LoadStore
+	membar		#Sync
 	stda		%f32, [%g2 + %g1] ASI_BLK_P
 	stda		%f48, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
@@ -128,8 +130,8 @@ VISenterhalf:
 	be,pn		%icc, 4f
 	 add		%g6, TI_FPREGS, %g2
 
-	membar		#StoreStore | #LoadStore
 	add		%g6, TI_FPREGS+0x40, %g3
+	membar		#Sync
 	stda		%f0, [%g2 + %g1] ASI_BLK_P
 	stda		%f16, [%g3 + %g1] ASI_BLK_P
 	membar		#Sync
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 5db50524f20d..0d2e967c7200 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -133,6 +133,12 @@ extern unsigned int sparc_ramdisk_size;
 
 struct page *mem_map_zero __read_mostly;
 
+unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
+
+unsigned long sparc64_kern_pri_context __read_mostly;
+unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
+unsigned long sparc64_kern_sec_context __read_mostly;
+
 int bigkernel = 0;
 
 /* XXX Tune this... */
@@ -362,6 +368,7 @@ struct linux_prom_translation {
 	unsigned long data;
 };
 static struct linux_prom_translation prom_trans[512] __initdata;
+static unsigned int prom_trans_ents __initdata;
 
 extern unsigned long prom_boot_page;
 extern void prom_remap(unsigned long physpage, unsigned long virtpage, int mmu_ihandle);
@@ -375,57 +382,7 @@ unsigned long kern_locked_tte_data;
 unsigned long prom_pmd_phys __read_mostly;
 unsigned int swapper_pgd_zero __read_mostly;
 
-/* Allocate power-of-2 aligned chunks from the end of the
- * kernel image.  Return physical address.
- */
-static inline unsigned long early_alloc_phys(unsigned long size)
-{
-	unsigned long base;
-
-	BUILD_BUG_ON(size & (size - 1));
-
-	kern_size = (kern_size + (size - 1)) & ~(size - 1);
-	base = kern_base + kern_size;
-	kern_size += size;
-
-	return base;
-}
-
-static inline unsigned long load_phys32(unsigned long pa)
-{
-	unsigned long val;
-
-	__asm__ __volatile__("lduwa	[%1] %2, %0"
-			     : "=&r" (val)
-			     : "r" (pa), "i" (ASI_PHYS_USE_EC));
-
-	return val;
-}
-
-static inline unsigned long load_phys64(unsigned long pa)
-{
-	unsigned long val;
-
-	__asm__ __volatile__("ldxa	[%1] %2, %0"
-			     : "=&r" (val)
-			     : "r" (pa), "i" (ASI_PHYS_USE_EC));
-
-	return val;
-}
-
-static inline void store_phys32(unsigned long pa, unsigned long val)
-{
-	__asm__ __volatile__("stwa	%0, [%1] %2"
-			     : /* no outputs */
-			     : "r" (val), "r" (pa), "i" (ASI_PHYS_USE_EC));
-}
-
-static inline void store_phys64(unsigned long pa, unsigned long val)
-{
-	__asm__ __volatile__("stxa	%0, [%1] %2"
-			     : /* no outputs */
-			     : "r" (val), "r" (pa), "i" (ASI_PHYS_USE_EC));
-}
+static pmd_t *prompmd __read_mostly;
 
 #define BASE_PAGE_SIZE 8192
 
@@ -435,34 +392,28 @@ static inline void store_phys64(unsigned long pa, unsigned long val)
  */
 unsigned long prom_virt_to_phys(unsigned long promva, int *error)
 {
-	unsigned long pmd_phys = (prom_pmd_phys +
-				  ((promva >> 23) & 0x7ff) * sizeof(pmd_t));
-	unsigned long pte_phys;
-	pmd_t pmd_ent;
-	pte_t pte_ent;
+	pmd_t *pmdp = prompmd + ((promva >> 23) & 0x7ff);
+	pte_t *ptep;
 	unsigned long base;
 
-	pmd_val(pmd_ent) = load_phys32(pmd_phys);
-	if (pmd_none(pmd_ent)) {
+	if (pmd_none(*pmdp)) {
 		if (error)
 			*error = 1;
 		return 0;
 	}
-
-	pte_phys = (unsigned long)pmd_val(pmd_ent) << 11UL;
-	pte_phys += ((promva >> 13) & 0x3ff) * sizeof(pte_t);
-	pte_val(pte_ent) = load_phys64(pte_phys);
-	if (!pte_present(pte_ent)) {
+	ptep = (pte_t *)__pmd_page(*pmdp) + ((promva >> 13) & 0x3ff);
+	if (!pte_present(*ptep)) {
 		if (error)
 			*error = 1;
 		return 0;
 	}
 	if (error) {
 		*error = 0;
-		return pte_val(pte_ent);
+		return pte_val(*ptep);
 	}
-	base = pte_val(pte_ent) & _PAGE_PADDR;
-	return (base + (promva & (BASE_PAGE_SIZE - 1)));
+	base = pte_val(*ptep) & _PAGE_PADDR;
+
+	return base + (promva & (BASE_PAGE_SIZE - 1));
 }
 
 /* The obp translations are saved based on 8k pagesize, since obp can
@@ -475,25 +426,20 @@ static void __init build_obp_range(unsigned long start, unsigned long end, unsig
 	unsigned long vaddr;
 
 	for (vaddr = start; vaddr < end; vaddr += BASE_PAGE_SIZE) {
-		unsigned long val, pte_phys, pmd_phys;
-		pmd_t pmd_ent;
-		int i;
-
-		pmd_phys = (prom_pmd_phys +
-			    (((vaddr >> 23) & 0x7ff) * sizeof(pmd_t)));
-		pmd_val(pmd_ent) = load_phys32(pmd_phys);
-		if (pmd_none(pmd_ent)) {
-			pte_phys = early_alloc_phys(BASE_PAGE_SIZE);
-
-			for (i = 0; i < BASE_PAGE_SIZE / sizeof(pte_t); i++)
-				store_phys64(pte_phys+i*sizeof(pte_t),0);
+		unsigned long val;
+		pmd_t *pmd;
+		pte_t *pte;
 
-			pmd_val(pmd_ent) = pte_phys >> 11UL;
-			store_phys32(pmd_phys, pmd_val(pmd_ent));
+		pmd = prompmd + ((vaddr >> 23) & 0x7ff);
+		if (pmd_none(*pmd)) {
+			pte = __alloc_bootmem(BASE_PAGE_SIZE, BASE_PAGE_SIZE,
+					      PAGE_SIZE);
+			if (!pte)
+				prom_halt();
+			memset(pte, 0, BASE_PAGE_SIZE);
+			pmd_set(pmd, pte);
 		}
-
-		pte_phys = (unsigned long)pmd_val(pmd_ent) << 11UL;
-		pte_phys += (((vaddr >> 13) & 0x3ff) * sizeof(pte_t));
+		pte = (pte_t *) __pmd_page(*pmd) + ((vaddr >> 13) & 0x3ff);
 
 		val = data;
 
@@ -501,7 +447,8 @@ static void __init build_obp_range(unsigned long start, unsigned long end, unsig
 		if (tlb_type == spitfire)
 			val &= ~0x0003fe0000000000UL;
 
-		store_phys64(pte_phys, val | _PAGE_MODIFIED);
+		set_pte_at(&init_mm, vaddr, pte,
+			   __pte(val | _PAGE_MODIFIED));
 
 		data += BASE_PAGE_SIZE;
 	}
@@ -514,13 +461,17 @@ static inline int in_obp_range(unsigned long vaddr)
 }
 
 #define OBP_PMD_SIZE 2048
-static void __init build_obp_pgtable(int prom_trans_ents)
+static void __init build_obp_pgtable(void)
 {
 	unsigned long i;
 
-	prom_pmd_phys = early_alloc_phys(OBP_PMD_SIZE);
-	for (i = 0; i < OBP_PMD_SIZE; i += 4)
-		store_phys32(prom_pmd_phys + i, 0);
+	prompmd = __alloc_bootmem(OBP_PMD_SIZE, OBP_PMD_SIZE, PAGE_SIZE);
+	if (!prompmd)
+		prom_halt();
+
+	memset(prompmd, 0, OBP_PMD_SIZE);
+
+	prom_pmd_phys = __pa(prompmd);
 
 	for (i = 0; i < prom_trans_ents; i++) {
 		unsigned long start, end;
@@ -540,7 +491,7 @@ static void __init build_obp_pgtable(int prom_trans_ents)
 /* Read OBP translations property into 'prom_trans[]'.
  * Return the number of entries.
  */
-static int __init read_obp_translations(void)
+static void __init read_obp_translations(void)
 {
 	int n, node;
 
@@ -561,8 +512,10 @@ static int __init read_obp_translations(void)
 		prom_printf("prom_mappings: Couldn't get property.\n");
 		prom_halt();
 	}
+
 	n = n / sizeof(struct linux_prom_translation);
-	return n;
+
+	prom_trans_ents = n;
 }
 
 static void __init remap_kernel(void)
@@ -582,28 +535,38 @@ static void __init remap_kernel(void)
 	prom_dtlb_load(tlb_ent, tte_data, tte_vaddr);
 	prom_itlb_load(tlb_ent, tte_data, tte_vaddr);
 	if (bigkernel) {
-		prom_dtlb_load(tlb_ent - 1,
+		tlb_ent -= 1;
+		prom_dtlb_load(tlb_ent,
 			       tte_data + 0x400000, 
 			       tte_vaddr + 0x400000);
-		prom_itlb_load(tlb_ent - 1,
+		prom_itlb_load(tlb_ent,
 			       tte_data + 0x400000, 
 			       tte_vaddr + 0x400000);
 	}
+	sparc64_highest_unlocked_tlb_ent = tlb_ent - 1;
+	if (tlb_type == cheetah_plus) {
+		sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
+					    CTX_CHEETAH_PLUS_NUC);
+		sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
+		sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
+	}
 }
 
-static void __init inherit_prom_mappings(void)
-{
-	int n;
 
-	n = read_obp_translations();
-	build_obp_pgtable(n);
+static void __init inherit_prom_mappings_pre(void)
+{
+	read_obp_translations();
 
 	/* Now fixup OBP's idea about where we really are mapped. */
 	prom_printf("Remapping the kernel... ");
 	remap_kernel();
 
 	prom_printf("done.\n");
+}
 
+static void __init inherit_prom_mappings_post(void)
+{
+	build_obp_pgtable();
 	register_prom_callbacks();
 }
 
@@ -788,8 +751,8 @@ void inherit_locked_prom_mappings(int save_p)
 		}
 	}
 	if (tlb_type == spitfire) {
-		int high = SPITFIRE_HIGHEST_LOCKED_TLBENT - bigkernel;
-		for (i = 0; i < high; i++) {
+		int high = sparc64_highest_unlocked_tlb_ent;
+		for (i = 0; i <= high; i++) {
 			unsigned long data;
 
 			/* Spitfire Errata #32 workaround */
@@ -877,9 +840,9 @@ void inherit_locked_prom_mappings(int save_p)
 			}
 		}
 	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
-		int high = CHEETAH_HIGHEST_LOCKED_TLBENT - bigkernel;
+		int high = sparc64_highest_unlocked_tlb_ent;
 
-		for (i = 0; i < high; i++) {
+		for (i = 0; i <= high; i++) {
 			unsigned long data;
 
 			data = cheetah_get_ldtlb_data(i);
@@ -1556,8 +1519,7 @@ void __init paging_init(void)
 	
 	swapper_pgd_zero = pgd_val(swapper_pg_dir[0]);
 	
-	/* Inherit non-locked OBP mappings. */
-	inherit_prom_mappings();
+	inherit_prom_mappings_pre();
 	
 	/* Ok, we can use our TLB miss and window trap handlers safely.
 	 * We need to do a quick peek here to see if we are on StarFire
@@ -1568,15 +1530,23 @@ void __init paging_init(void)
 		extern void setup_tba(int);
 		setup_tba(this_is_starfire);
 	}
-
-	inherit_locked_prom_mappings(1);
-
 	__flush_tlb_all();
 
+	/* Everything from this point forward, until we are done with
+	 * inherit_prom_mappings_post(), must complete successfully
+	 * without calling into the firmware.  The firwmare page tables
+	 * have not been built, but we are running on the Linux kernel's
+	 * trap table.
+	 */
+
 	/* Setup bootmem... */
 	pages_avail = 0;
 	last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
 
+	inherit_prom_mappings_post();
+
+	inherit_locked_prom_mappings(1);
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	kernel_physical_mapping_init();
 #endif