summary refs log tree commit diff
path: root/arch/arm/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/mm')
-rw-r--r--arch/arm/mm/Kconfig8
-rw-r--r--arch/arm/mm/Makefile2
-rw-r--r--arch/arm/mm/abort-ev7.S1
-rw-r--r--arch/arm/mm/abort-nommu.S1
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/mm/cache-feroceon-l2.c42
-rw-r--r--arch/arm/mm/cache-l2x0.c2
-rw-r--r--arch/arm/mm/cache-v7.S10
-rw-r--r--arch/arm/mm/cache-xsc3l2.c3
-rw-r--r--arch/arm/mm/copypage-v4mc.c2
-rw-r--r--arch/arm/mm/copypage-v6.c1
-rw-r--r--arch/arm/mm/copypage-xscale.c2
-rw-r--r--arch/arm/mm/dma-mapping.c (renamed from arch/arm/mm/consistent.c)104
-rw-r--r--arch/arm/mm/extable.c2
-rw-r--r--arch/arm/mm/fault-armv.c13
-rw-r--r--arch/arm/mm/fault.c7
-rw-r--r--arch/arm/mm/flush.c1
-rw-r--r--arch/arm/mm/init.c193
-rw-r--r--arch/arm/mm/iomap.c3
-rw-r--r--arch/arm/mm/ioremap.c15
-rw-r--r--arch/arm/mm/mm.h3
-rw-r--r--arch/arm/mm/mmap.c6
-rw-r--r--arch/arm/mm/mmu.c106
-rw-r--r--arch/arm/mm/nommu.c18
-rw-r--r--arch/arm/mm/proc-arm1020.S26
-rw-r--r--arch/arm/mm/proc-arm1020e.S26
-rw-r--r--arch/arm/mm/proc-arm1022.S26
-rw-r--r--arch/arm/mm/proc-arm1026.S26
-rw-r--r--arch/arm/mm/proc-arm6_7.S27
-rw-r--r--arch/arm/mm/proc-arm720.S25
-rw-r--r--arch/arm/mm/proc-arm740.S2
-rw-r--r--arch/arm/mm/proc-arm7tdmi.S2
-rw-r--r--arch/arm/mm/proc-arm920.S28
-rw-r--r--arch/arm/mm/proc-arm922.S26
-rw-r--r--arch/arm/mm/proc-arm925.S26
-rw-r--r--arch/arm/mm/proc-arm926.S26
-rw-r--r--arch/arm/mm/proc-arm940.S2
-rw-r--r--arch/arm/mm/proc-arm946.S2
-rw-r--r--arch/arm/mm/proc-arm9tdmi.S2
-rw-r--r--arch/arm/mm/proc-feroceon.S33
-rw-r--r--arch/arm/mm/proc-macros.S170
-rw-r--r--arch/arm/mm/proc-sa110.S21
-rw-r--r--arch/arm/mm/proc-sa1100.S21
-rw-r--r--arch/arm/mm/proc-v6.S42
-rw-r--r--arch/arm/mm/proc-v7.S39
-rw-r--r--arch/arm/mm/proc-xsc3.S56
-rw-r--r--arch/arm/mm/proc-xscale.S76
-rw-r--r--arch/arm/mm/tlb-v7.S2
48 files changed, 679 insertions, 600 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index ed15f876c725..330814d1ee25 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -735,6 +735,14 @@ config CACHE_FEROCEON_L2
 	help
 	  This option enables the Feroceon L2 cache controller.
 
+config CACHE_FEROCEON_L2_WRITETHROUGH
+	bool "Force Feroceon L2 cache write through"
+	depends on CACHE_FEROCEON_L2
+	default n
+	help
+	  Say Y here to use the Feroceon L2 cache in writethrough mode.
+	  Unless you specifically require this, say N for writeback mode.
+
 config CACHE_L2X0
 	bool "Enable the L2x0 outer cache controller"
 	depends on REALVIEW_EB_ARM11MP || MACH_REALVIEW_PB11MP || MACH_REALVIEW_PB1176
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 2e27a8c8372b..480f78a3611a 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux arm-specific parts of the memory manager.
 #
 
-obj-y				:= consistent.o extable.o fault.o init.o \
+obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   iomap.o
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o ioremap.o mmap.o \
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
index eb90bce38e14..2e6dc040c654 100644
--- a/arch/arm/mm/abort-ev7.S
+++ b/arch/arm/mm/abort-ev7.S
@@ -30,3 +30,4 @@ ENTRY(v7_early_abort)
 	 * New designs should not need to patch up faults.
 	 */
 	mov	pc, lr
+ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
index a7cc7f9ee45d..625e580945b5 100644
--- a/arch/arm/mm/abort-nommu.S
+++ b/arch/arm/mm/abort-nommu.S
@@ -17,3 +17,4 @@ ENTRY(nommu_early_abort)
 	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
 	mov	r1, #0
 	mov	pc, lr
+ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index e162cca5917f..133e65d166b3 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -17,8 +17,8 @@
 #include <linux/string.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
 #include <asm/unaligned.h>
 
 #include "fault.h"
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
index 7b5a25d81576..13cdae8b0d44 100644
--- a/arch/arm/mm/cache-feroceon-l2.c
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -48,11 +48,12 @@ static inline void l2_clean_mva_range(unsigned long start, unsigned long end)
 	 * L2 is PIPT and range operations only do a TLB lookup on
 	 * the start address.
 	 */
-	BUG_ON((start ^ end) & ~(PAGE_SIZE - 1));
+	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
-	__asm__("mcr p15, 1, %0, c15, c9, 4" : : "r" (start));
-	__asm__("mcr p15, 1, %0, c15, c9, 5" : : "r" (end));
+	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
+		"mcr p15, 1, %1, c15, c9, 5"
+		: : "r" (start), "r" (end));
 	raw_local_irq_restore(flags);
 }
 
@@ -80,11 +81,12 @@ static inline void l2_inv_mva_range(unsigned long start, unsigned long end)
 	 * L2 is PIPT and range operations only do a TLB lookup on
 	 * the start address.
 	 */
-	BUG_ON((start ^ end) & ~(PAGE_SIZE - 1));
+	BUG_ON((start ^ end) >> PAGE_SHIFT);
 
 	raw_local_irq_save(flags);
-	__asm__("mcr p15, 1, %0, c15, c11, 4" : : "r" (start));
-	__asm__("mcr p15, 1, %0, c15, c11, 5" : : "r" (end));
+	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
+		"mcr p15, 1, %1, c15, c11, 5"
+		: : "r" (start), "r" (end));
 	raw_local_irq_restore(flags);
 }
 
@@ -205,7 +207,7 @@ static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
  * time.  These are necessary because the L2 cache can only be enabled
  * or disabled while the L1 Dcache and Icache are both disabled.
  */
-static void __init invalidate_and_disable_dcache(void)
+static int __init flush_and_disable_dcache(void)
 {
 	u32 cr;
 
@@ -217,7 +219,9 @@ static void __init invalidate_and_disable_dcache(void)
 		flush_cache_all();
 		set_cr(cr & ~CR_C);
 		raw_local_irq_restore(flags);
+		return 1;
 	}
+	return 0;
 }
 
 static void __init enable_dcache(void)
@@ -225,18 +229,17 @@ static void __init enable_dcache(void)
 	u32 cr;
 
 	cr = get_cr();
-	if (!(cr & CR_C))
-		set_cr(cr | CR_C);
+	set_cr(cr | CR_C);
 }
 
 static void __init __invalidate_icache(void)
 {
 	int dummy;
 
-	__asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0\n" : "=r" (dummy));
+	__asm__ __volatile__("mcr p15, 0, %0, c7, c5, 0" : "=r" (dummy));
 }
 
-static void __init invalidate_and_disable_icache(void)
+static int __init invalidate_and_disable_icache(void)
 {
 	u32 cr;
 
@@ -244,7 +247,9 @@ static void __init invalidate_and_disable_icache(void)
 	if (cr & CR_I) {
 		set_cr(cr & ~CR_I);
 		__invalidate_icache();
+		return 1;
 	}
+	return 0;
 }
 
 static void __init enable_icache(void)
@@ -252,8 +257,7 @@ static void __init enable_icache(void)
 	u32 cr;
 
 	cr = get_cr();
-	if (!(cr & CR_I))
-		set_cr(cr | CR_I);
+	set_cr(cr | CR_I);
 }
 
 static inline u32 read_extra_features(void)
@@ -291,13 +295,17 @@ static void __init enable_l2(void)
 
 	u = read_extra_features();
 	if (!(u & 0x00400000)) {
+		int i, d;
+
 		printk(KERN_INFO "Feroceon L2: Enabling L2\n");
 
-		invalidate_and_disable_dcache();
-		invalidate_and_disable_icache();
+		d = flush_and_disable_dcache();
+		i = invalidate_and_disable_icache();
 		write_extra_features(u | 0x00400000);
-		enable_icache();
-		enable_dcache();
+		if (i)
+			enable_icache();
+		if (d)
+			enable_dcache();
 	}
 }
 
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 76b800a95191..b480f1d3591f 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -18,9 +18,9 @@
  */
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/io.h>
 #include <asm/hardware/cache-l2x0.h>
 
 #define CACHE_LINE_SIZE		32
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 35ffc4d95997..d19c2bec2b1f 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -66,6 +66,7 @@ finished:
 	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
 	isb
 	mov	pc, lr
+ENDPROC(v7_flush_dcache_all)
 
 /*
  *	v7_flush_cache_all()
@@ -85,6 +86,7 @@ ENTRY(v7_flush_kern_cache_all)
 	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
 	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}
 	mov	pc, lr
+ENDPROC(v7_flush_kern_cache_all)
 
 /*
  *	v7_flush_cache_all()
@@ -110,6 +112,8 @@ ENTRY(v7_flush_user_cache_all)
  */
 ENTRY(v7_flush_user_cache_range)
 	mov	pc, lr
+ENDPROC(v7_flush_user_cache_all)
+ENDPROC(v7_flush_user_cache_range)
 
 /*
  *	v7_coherent_kern_range(start,end)
@@ -155,6 +159,8 @@ ENTRY(v7_coherent_user_range)
 	dsb
 	isb
 	mov	pc, lr
+ENDPROC(v7_coherent_kern_range)
+ENDPROC(v7_coherent_user_range)
 
 /*
  *	v7_flush_kern_dcache_page(kaddr)
@@ -174,6 +180,7 @@ ENTRY(v7_flush_kern_dcache_page)
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_flush_kern_dcache_page)
 
 /*
  *	v7_dma_inv_range(start,end)
@@ -202,6 +209,7 @@ ENTRY(v7_dma_inv_range)
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_inv_range)
 
 /*
  *	v7_dma_clean_range(start,end)
@@ -219,6 +227,7 @@ ENTRY(v7_dma_clean_range)
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_clean_range)
 
 /*
  *	v7_dma_flush_range(start,end)
@@ -236,6 +245,7 @@ ENTRY(v7_dma_flush_range)
 	blo	1b
 	dsb
 	mov	pc, lr
+ENDPROC(v7_dma_flush_range)
 
 	__INITDATA
 
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
index 158bd96763d3..10b1bae1a258 100644
--- a/arch/arm/mm/cache-xsc3l2.c
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -18,10 +18,11 @@
  */
 #include <linux/init.h>
 #include <linux/spinlock.h>
+#include <linux/io.h>
 
 #include <asm/system.h>
+#include <asm/cputype.h>
 #include <asm/cacheflush.h>
-#include <asm/io.h>
 
 #define CR_L2	(1 << 26)
 
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
index ded0e96d069d..8d33e2549344 100644
--- a/arch/arm/mm/copypage-v4mc.c
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -28,7 +28,7 @@
  * specific hacks for copying pages efficiently.
  */
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
-				  L_PTE_CACHEABLE)
+				  L_PTE_MT_MINICACHE)
 
 static DEFINE_SPINLOCK(minicache_lock);
 
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
index 3adb79257f43..0e21c0767580 100644
--- a/arch/arm/mm/copypage-v6.c
+++ b/arch/arm/mm/copypage-v6.c
@@ -16,6 +16,7 @@
 #include <asm/shmparam.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
+#include <asm/cachetype.h>
 
 #include "mm.h"
 
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
index 2e455f82a4d5..bad49331bbf9 100644
--- a/arch/arm/mm/copypage-xscale.c
+++ b/arch/arm/mm/copypage-xscale.c
@@ -30,7 +30,7 @@
 #define COPYPAGE_MINICACHE	0xffff8000
 
 #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
-				  L_PTE_CACHEABLE)
+				  L_PTE_MT_MINICACHE)
 
 static DEFINE_SPINLOCK(minicache_lock);
 
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/dma-mapping.c
index db7b3e38ef1d..67960017dc8f 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/arm/mm/consistent.c
+ *  linux/arch/arm/mm/dma-mapping.c
  *
  *  Copyright (C) 2000-2004 Russell King
  *
@@ -512,3 +512,105 @@ void dma_cache_maint(const void *start, size_t size, int direction)
 	}
 }
 EXPORT_SYMBOL(dma_cache_maint);
+
+/**
+ * dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the dma_map_single interface.
+ * Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}.
+ *
+ * Device ownership issues as mentioned for dma_map_single are the same
+ * here.
+ */
+int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i, j;
+
+	for_each_sg(sg, s, nents, i) {
+		s->dma_address = dma_map_page(dev, sg_page(s), s->offset,
+						s->length, dir);
+		if (dma_mapping_error(dev, s->dma_address))
+			goto bad_mapping;
+	}
+	return nents;
+
+ bad_mapping:
+	for_each_sg(sg, s, i, j)
+		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+	return 0;
+}
+EXPORT_SYMBOL(dma_map_sg);
+
+/**
+ * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir);
+}
+EXPORT_SYMBOL(dma_unmap_sg);
+
+/**
+ * dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0,
+					sg_dma_len(s), dir);
+	}
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+/**
+ * dma_sync_sg_for_device
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0,
+					sg_dma_len(s), dir))
+			continue;
+
+		if (!arch_is_coherent())
+			dma_cache_maint(sg_virt(s), s->length, dir);
+	}
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c
index 9592c3ee4cb2..9d285626bc7d 100644
--- a/arch/arm/mm/extable.c
+++ b/arch/arm/mm/extable.c
@@ -2,7 +2,7 @@
  *  linux/arch/arm/mm/extable.c
  */
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index a8ec97b4752e..81d0b8772de3 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -17,11 +17,13 @@
 #include <linux/init.h>
 #include <linux/pagemap.h>
 
+#include <asm/bugs.h>
 #include <asm/cacheflush.h>
+#include <asm/cachetype.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
+static unsigned long shared_pte_mask = L_PTE_MT_BUFFERABLE;
 
 /*
  * We take the easy way out of this problem - we make the
@@ -63,9 +65,10 @@ static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
 	 * If this page isn't present, or is already setup to
 	 * fault (ie, is old), we can safely ignore any issues.
 	 */
-	if (ret && pte_val(entry) & shared_pte_mask) {
+	if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) {
 		flush_cache_page(vma, address, pte_pfn(entry));
-		pte_val(entry) &= ~shared_pte_mask;
+		pte_val(entry) &= ~L_PTE_MT_MASK;
+		pte_val(entry) |= shared_pte_mask;
 		set_pte_at(vma->vm_mm, address, pte, entry);
 		flush_tlb_page(vma, address);
 	}
@@ -197,7 +200,7 @@ void __init check_writebuffer_bugs(void)
 		unsigned long *p1, *p2;
 		pgprot_t prot = __pgprot(L_PTE_PRESENT|L_PTE_YOUNG|
 					 L_PTE_DIRTY|L_PTE_WRITE|
-					 L_PTE_BUFFERABLE);
+					 L_PTE_MT_BUFFERABLE);
 
 		p1 = vmap(&page, 1, VM_IOREMAP, prot);
 		p2 = vmap(&page, 1, VM_IOREMAP, prot);
@@ -218,7 +221,7 @@ void __init check_writebuffer_bugs(void)
 
 	if (v) {
 		printk("failed, %s\n", reason);
-		shared_pte_mask |= L_PTE_BUFFERABLE;
+		shared_pte_mask = L_PTE_MT_UNCACHED;
 	} else {
 		printk("ok\n");
 	}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 28ad7ab1c0cd..2df8d9facf57 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -13,11 +13,11 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
+#include <linux/uaccess.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include <asm/uaccess.h>
 
 #include "fault.h"
 
@@ -72,9 +72,8 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 		}
 
 		pmd = pmd_offset(pgd, addr);
-#if PTRS_PER_PMD != 1
-		printk(", *pmd=%08lx", pmd_val(*pmd));
-#endif
+		if (PTRS_PER_PMD != 1)
+			printk(", *pmd=%08lx", pmd_val(*pmd));
 
 		if (pmd_none(*pmd))
 			break;
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 029ee65fda2b..0fa9bf388f0b 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cachetype.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 30a69d67d673..82c4b4217989 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -26,9 +26,42 @@
 
 #include "mm.h"
 
-extern void _text, _etext, __data_start, _end, __init_begin, __init_end;
-extern unsigned long phys_initrd_start;
-extern unsigned long phys_initrd_size;
+static unsigned long phys_initrd_start __initdata = 0;
+static unsigned long phys_initrd_size __initdata = 0;
+
+static void __init early_initrd(char **p)
+{
+	unsigned long start, size;
+
+	start = memparse(*p, p);
+	if (**p == ',') {
+		size = memparse((*p) + 1, p);
+
+		phys_initrd_start = start;
+		phys_initrd_size = size;
+	}
+}
+__early_param("initrd=", early_initrd);
+
+static int __init parse_tag_initrd(const struct tag *tag)
+{
+	printk(KERN_WARNING "ATAG_INITRD is deprecated; "
+		"please update your bootloader.\n");
+	phys_initrd_start = __virt_to_phys(tag->u.initrd.start);
+	phys_initrd_size = tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD, parse_tag_initrd);
+
+static int __init parse_tag_initrd2(const struct tag *tag)
+{
+	phys_initrd_start = tag->u.initrd.start;
+	phys_initrd_size = tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
 /*
  * This is used to pass memory configuration data from paging_init
@@ -36,10 +69,6 @@ extern unsigned long phys_initrd_size;
  */
 static struct meminfo meminfo = { 0, };
 
-#define for_each_nodebank(iter,mi,no)			\
-	for (iter = 0; iter < mi->nr_banks; iter++)	\
-		if (mi->bank[iter].node == no)
-
 void show_mem(void)
 {
 	int free = 0, total = 0, reserved = 0;
@@ -50,14 +79,15 @@ void show_mem(void)
 	show_free_areas();
 	for_each_online_node(node) {
 		pg_data_t *n = NODE_DATA(node);
-		struct page *map = n->node_mem_map - n->node_start_pfn;
+		struct page *map = pgdat_page_nr(n, 0) - n->node_start_pfn;
 
 		for_each_nodebank (i,mi,node) {
+			struct membank *bank = &mi->bank[i];
 			unsigned int pfn1, pfn2;
 			struct page *page, *end;
 
-			pfn1 = __phys_to_pfn(mi->bank[i].start);
-			pfn2 = __phys_to_pfn(mi->bank[i].size + mi->bank[i].start);
+			pfn1 = bank_pfn_start(bank);
+			pfn2 = bank_pfn_end(bank);
 
 			page = map + pfn1;
 			end  = map + pfn2;
@@ -96,17 +126,17 @@ void show_mem(void)
 static unsigned int __init
 find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
 {
-	unsigned int start_pfn, bank, bootmap_pfn;
+	unsigned int start_pfn, i, bootmap_pfn;
 
 	start_pfn   = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT;
 	bootmap_pfn = 0;
 
-	for_each_nodebank(bank, mi, node) {
+	for_each_nodebank(i, mi, node) {
+		struct membank *bank = &mi->bank[i];
 		unsigned int start, end;
 
-		start = mi->bank[bank].start >> PAGE_SHIFT;
-		end   = (mi->bank[bank].size +
-			 mi->bank[bank].start) >> PAGE_SHIFT;
+		start = bank_pfn_start(bank);
+		end   = bank_pfn_end(bank);
 
 		if (end < start_pfn)
 			continue;
@@ -145,13 +175,10 @@ static int __init check_initrd(struct meminfo *mi)
 		initrd_node = -1;
 
 		for (i = 0; i < mi->nr_banks; i++) {
-			unsigned long bank_end;
-
-			bank_end = mi->bank[i].start + mi->bank[i].size;
-
-			if (mi->bank[i].start <= phys_initrd_start &&
-			    end <= bank_end)
-				initrd_node = mi->bank[i].node;
+			struct membank *bank = &mi->bank[i];
+			if (bank_phys_start(bank) <= phys_initrd_start &&
+			    end <= bank_phys_end(bank))
+				initrd_node = bank->node;
 		}
 	}
 
@@ -171,19 +198,17 @@ static inline void map_memory_bank(struct membank *bank)
 #ifdef CONFIG_MMU
 	struct map_desc map;
 
-	map.pfn = __phys_to_pfn(bank->start);
-	map.virtual = __phys_to_virt(bank->start);
-	map.length = bank->size;
+	map.pfn = bank_pfn_start(bank);
+	map.virtual = __phys_to_virt(bank_phys_start(bank));
+	map.length = bank_phys_size(bank);
 	map.type = MT_MEMORY;
 
 	create_mapping(&map);
 #endif
 }
 
-static unsigned long __init
-bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
+static unsigned long __init bootmem_init_node(int node, struct meminfo *mi)
 {
-	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
 	unsigned long start_pfn, end_pfn, boot_pfn;
 	unsigned int boot_pages;
 	pg_data_t *pgdat;
@@ -199,8 +224,8 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 		struct membank *bank = &mi->bank[i];
 		unsigned long start, end;
 
-		start = bank->start >> PAGE_SHIFT;
-		end = (bank->start + bank->size) >> PAGE_SHIFT;
+		start = bank_pfn_start(bank);
+		end = bank_pfn_end(bank);
 
 		if (start_pfn > start)
 			start_pfn = start;
@@ -230,8 +255,11 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	pgdat = NODE_DATA(node);
 	init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
 
-	for_each_nodebank(i, mi, node)
-		free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size);
+	for_each_nodebank(i, mi, node) {
+		struct membank *bank = &mi->bank[i];
+		free_bootmem_node(pgdat, bank_phys_start(bank), bank_phys_size(bank));
+		memory_present(node, bank_pfn_start(bank), bank_pfn_end(bank));
+	}
 
 	/*
 	 * Reserve the bootmem bitmap for this node.
@@ -239,31 +267,39 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
 			     boot_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-	/*
-	 * Reserve any special node zero regions.
-	 */
-	if (node == 0)
-		reserve_node_zero(pgdat);
+	return end_pfn;
+}
 
+static void __init bootmem_reserve_initrd(int node)
+{
 #ifdef CONFIG_BLK_DEV_INITRD
-	/*
-	 * If the initrd is in this node, reserve its memory.
-	 */
-	if (node == initrd_node) {
-		int res = reserve_bootmem_node(pgdat, phys_initrd_start,
-				     phys_initrd_size, BOOTMEM_EXCLUSIVE);
-
-		if (res == 0) {
-			initrd_start = __phys_to_virt(phys_initrd_start);
-			initrd_end = initrd_start + phys_initrd_size;
-		} else {
-			printk(KERN_ERR
-				"INITRD: 0x%08lx+0x%08lx overlaps in-use "
-				"memory region - disabling initrd\n",
-				phys_initrd_start, phys_initrd_size);
-		}
+	pg_data_t *pgdat = NODE_DATA(node);
+	int res;
+
+	res = reserve_bootmem_node(pgdat, phys_initrd_start,
+			     phys_initrd_size, BOOTMEM_EXCLUSIVE);
+
+	if (res == 0) {
+		initrd_start = __phys_to_virt(phys_initrd_start);
+		initrd_end = initrd_start + phys_initrd_size;
+	} else {
+		printk(KERN_ERR
+			"INITRD: 0x%08lx+0x%08lx overlaps in-use "
+			"memory region - disabling initrd\n",
+			phys_initrd_start, phys_initrd_size);
 	}
 #endif
+}
+
+static void __init bootmem_free_node(int node, struct meminfo *mi)
+{
+	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
+	unsigned long start_pfn, end_pfn;
+	pg_data_t *pgdat = NODE_DATA(node);
+	int i;
+
+	start_pfn = pgdat->bdata->node_min_pfn;
+	end_pfn = pgdat->bdata->node_low_pfn;
 
 	/*
 	 * initialise the zones within this node.
@@ -284,7 +320,7 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	 */
 	zhole_size[0] = zone_size[0];
 	for_each_nodebank(i, mi, node)
-		zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
+		zhole_size[0] -= bank_pfn_size(&mi->bank[i]);
 
 	/*
 	 * Adjust the sizes according to any special requirements for
@@ -293,21 +329,12 @@ bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
 	arch_adjust_zones(node, zone_size, zhole_size);
 
 	free_area_init_node(node, zone_size, start_pfn, zhole_size);
-
-	return end_pfn;
 }
 
 void __init bootmem_init(struct meminfo *mi)
 {
 	unsigned long memend_pfn = 0;
-	int node, initrd_node, i;
-
-	/*
-	 * Invalidate the node number for empty or invalid memory banks
-	 */
-	for (i = 0; i < mi->nr_banks; i++)
-		if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES)
-			mi->bank[i].node = -1;
+	int node, initrd_node;
 
 	memcpy(&meminfo, mi, sizeof(meminfo));
 
@@ -320,9 +347,19 @@ void __init bootmem_init(struct meminfo *mi)
 	 * Run through each node initialising the bootmem allocator.
 	 */
 	for_each_node(node) {
-		unsigned long end_pfn;
+		unsigned long end_pfn = bootmem_init_node(node, mi);
 
-		end_pfn = bootmem_init_node(node, initrd_node, mi);
+		/*
+		 * Reserve any special node zero regions.
+		 */
+		if (node == 0)
+			reserve_node_zero(NODE_DATA(node));
+
+		/*
+		 * If the initrd is in this node, reserve its memory.
+		 */
+		if (node == initrd_node)
+			bootmem_reserve_initrd(node);
 
 		/*
 		 * Remember the highest memory PFN.
@@ -331,6 +368,19 @@ void __init bootmem_init(struct meminfo *mi)
 			memend_pfn = end_pfn;
 	}
 
+	/*
+	 * sparse_init() needs the bootmem allocator up and running.
+	 */
+	sparse_init();
+
+	/*
+	 * Now free memory in each node - free_area_init_node needs
+	 * the sparse mem_map arrays initialized by sparse_init()
+	 * for memmap_init_zone(), otherwise all PFNs are invalid.
+	 */
+	for_each_node(node)
+		bootmem_free_node(node, mi);
+
 	high_memory = __va(memend_pfn << PAGE_SHIFT);
 
 	/*
@@ -401,7 +451,9 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi)
 	 * information on the command line.
 	 */
 	for_each_nodebank(i, mi, node) {
-		bank_start = mi->bank[i].start >> PAGE_SHIFT;
+		struct membank *bank = &mi->bank[i];
+
+		bank_start = bank_pfn_start(bank);
 		if (bank_start < prev_bank_end) {
 			printk(KERN_ERR "MEM: unordered memory banks.  "
 				"Not freeing memmap.\n");
@@ -415,8 +467,7 @@ static void __init free_unused_memmap_node(int node, struct meminfo *mi)
 		if (prev_bank_end && prev_bank_end != bank_start)
 			free_memmap(node, prev_bank_end, bank_start);
 
-		prev_bank_end = (mi->bank[i].start +
-				 mi->bank[i].size) >> PAGE_SHIFT;
+		prev_bank_end = bank_pfn_end(bank);
 	}
 }
 
@@ -461,8 +512,8 @@ void __init mem_init(void)
 
 	num_physpages = 0;
 	for (i = 0; i < meminfo.nr_banks; i++) {
-		num_physpages += meminfo.bank[i].size >> PAGE_SHIFT;
-		printk(" %ldMB", meminfo.bank[i].size >> 20);
+		num_physpages += bank_pfn_size(&meminfo.bank[i]);
+		printk(" %ldMB", bank_phys_size(&meminfo.bank[i]) >> 20);
 	}
 
 	printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c
index 7429f8c01015..ffad039cbb73 100644
--- a/arch/arm/mm/iomap.c
+++ b/arch/arm/mm/iomap.c
@@ -7,8 +7,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/ioport.h>
-
-#include <asm/io.h>
+#include <linux/io.h>
 
 #ifdef __io
 void __iomem *ioport_map(unsigned long port, unsigned int nr)
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index b81dbf9ffb77..18373f73f2fc 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -24,9 +24,10 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
+#include <linux/io.h>
 
+#include <asm/cputype.h>
 #include <asm/cacheflush.h>
-#include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
@@ -55,8 +56,7 @@ static int remap_area_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
 		if (!pte_none(*pte))
 			goto bad;
 
-		set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot),
-			    type->prot_pte_ext);
+		set_pte_ext(pte, pfn_pte(phys_addr >> PAGE_SHIFT, prot), 0);
 		phys_addr += PAGE_SIZE;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	return 0;
@@ -332,15 +332,14 @@ __arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype)
 }
 EXPORT_SYMBOL(__arm_ioremap);
 
-void __iounmap(volatile void __iomem *addr)
+void __iounmap(volatile void __iomem *io_addr)
 {
+	void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
 #ifndef CONFIG_SMP
 	struct vm_struct **p, *tmp;
 #endif
 	unsigned int section_mapping = 0;
 
-	addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long)addr);
-
 #ifndef CONFIG_SMP
 	/*
 	 * If this is a section based mapping we need to handle it
@@ -351,7 +350,7 @@ void __iounmap(volatile void __iomem *addr)
 	 */
 	write_lock(&vmlist_lock);
 	for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
-		if((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) {
+		if ((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) {
 			if (tmp->flags & VM_ARM_SECTION_MAPPING) {
 				*p = tmp->next;
 				unmap_area_sections((unsigned long)tmp->addr,
@@ -366,6 +365,6 @@ void __iounmap(volatile void __iomem *addr)
 #endif
 
 	if (!section_mapping)
-		vunmap((void __force *)addr);
+		vunmap(addr);
 }
 EXPORT_SYMBOL(__iounmap);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 7647c597fc59..5d9f53907b4e 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -18,7 +18,6 @@ static inline pmd_t *pmd_off_k(unsigned long virt)
 
 struct mem_type {
 	unsigned int prot_pte;
-	unsigned int prot_pte_ext;
 	unsigned int prot_l1;
 	unsigned int prot_sect;
 	unsigned int domain;
@@ -35,3 +34,5 @@ struct pglist_data;
 void __init create_mapping(struct map_desc *md);
 void __init bootmem_init(struct meminfo *mi);
 void reserve_node_zero(struct pglist_data *pgdat);
+
+extern void _text, _stext, _etext, __data_start, _end, __init_begin, __init_end;
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 3f6dc40b8353..5358fcc7f61e 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -6,6 +6,8 @@
 #include <linux/mman.h>
 #include <linux/shm.h>
 #include <linux/sched.h>
+#include <linux/io.h>
+#include <asm/cputype.h>
 #include <asm/system.h>
 
 #define COLOUR_ALIGN(addr,pgoff)		\
@@ -37,8 +39,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	 * caches alias.  This is indicated by bits 9 and 21 of the
 	 * cache type register.
 	 */
-	cache_type = read_cpuid(CPUID_CACHETYPE);
-	if (cache_type != read_cpuid(CPUID_ID)) {
+	cache_type = read_cpuid_cachetype();
+	if (cache_type != read_cpuid_id()) {
 		aliasing = (cache_type | cache_type >> 12) & (1 << 11);
 		if (aliasing)
 			do_align = filp || flags & MAP_SHARED;
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index a713e40e1f1a..8ba754064559 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -15,6 +15,7 @@
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 
+#include <asm/cputype.h>
 #include <asm/mach-types.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -27,9 +28,6 @@
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
-extern void _stext, _etext, __data_start, _end;
-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
@@ -68,27 +66,27 @@ static struct cachepolicy cache_policies[] __initdata = {
 		.policy		= "uncached",
 		.cr_mask	= CR_W|CR_C,
 		.pmd		= PMD_SECT_UNCACHED,
-		.pte		= 0,
+		.pte		= L_PTE_MT_UNCACHED,
 	}, {
 		.policy		= "buffered",
 		.cr_mask	= CR_C,
 		.pmd		= PMD_SECT_BUFFERED,
-		.pte		= PTE_BUFFERABLE,
+		.pte		= L_PTE_MT_BUFFERABLE,
 	}, {
 		.policy		= "writethrough",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WT,
-		.pte		= PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITETHROUGH,
 	}, {
 		.policy		= "writeback",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WB,
-		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITEBACK,
 	}, {
 		.policy		= "writealloc",
 		.cr_mask	= 0,
 		.pmd		= PMD_SECT_WBWA,
-		.pte		= PTE_BUFFERABLE|PTE_CACHEABLE,
+		.pte		= L_PTE_MT_WRITEALLOC,
 	}
 };
 
@@ -186,35 +184,28 @@ void adjust_cr(unsigned long mask, unsigned long set)
 
 static struct mem_type mem_types[] = {
 	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
+				  L_PTE_SHARED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_UNCACHED,
 		.domain		= DOMAIN_IO,
 	},
 	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
-		.prot_pte	= PROT_PTE_DEVICE,
-		.prot_pte_ext	= PTE_EXT_TEX(2),
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_TEX(2),
 		.domain		= DOMAIN_IO,
 	},
 	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
-		.prot_pte	= PROT_PTE_DEVICE | L_PTE_CACHEABLE | L_PTE_BUFFERABLE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
 		.prot_l1	= PMD_TYPE_TABLE,
 		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
 		.domain		= DOMAIN_IO,
 	},	
-	[MT_DEVICE_IXP2000] = {	  /* IXP2400 requires XCB=101 for on-chip I/O */
-		.prot_pte	= PROT_PTE_DEVICE,
-		.prot_l1	= PMD_TYPE_TABLE,
-		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE |
-				  PMD_SECT_TEX(1),
-		.domain		= DOMAIN_IO,
-	},
 	[MT_DEVICE_WC] = {	/* ioremap_wc */
-		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
 		.prot_l1	= PMD_TYPE_TABLE,
-		.prot_sect	= PROT_SECT_DEVICE,
+		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE,
 		.domain		= DOMAIN_IO,
 	},
 	[MT_CACHECLEAN] = {
@@ -259,7 +250,7 @@ static void __init build_mem_type_table(void)
 {
 	struct cachepolicy *cp;
 	unsigned int cr = get_cr();
-	unsigned int user_pgprot, kern_pgprot;
+	unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
 	int cpu_arch = cpu_architecture();
 	int i;
 
@@ -277,6 +268,9 @@ static void __init build_mem_type_table(void)
 			cachepolicy = CPOLICY_WRITEBACK;
 		ecc_mask = 0;
 	}
+#ifdef CONFIG_SMP
+	cachepolicy = CPOLICY_WRITEALLOC;
+#endif
 
 	/*
 	 * On non-Xscale3 ARMv5-and-older systems, use CB=01
@@ -285,11 +279,8 @@ static void __init build_mem_type_table(void)
 	 * in xsc3 parlance, Uncached Normal in ARMv6 parlance).
 	 */
 	if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) {
-		mem_types[MT_DEVICE_WC].prot_pte_ext |= PTE_EXT_TEX(1);
 		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
-	} else {
-		mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_BUFFERABLE;
-		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+		mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE;
 	}
 
 	/*
@@ -312,7 +303,15 @@ static void __init build_mem_type_table(void)
 	}
 
 	cp = &cache_policies[cachepolicy];
-	kern_pgprot = user_pgprot = cp->pte;
+	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
+
+#ifndef CONFIG_SMP
+	/*
+	 * Only use write-through for non-SMP systems
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
+		vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
+#endif
 
 	/*
 	 * Enable CPU-specific coherency if supported.
@@ -340,7 +339,6 @@ static void __init build_mem_type_table(void)
 		/*
 		 * Mark the device area as "shared device"
 		 */
-		mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
 		mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
 
 #ifdef CONFIG_SMP
@@ -349,30 +347,21 @@ static void __init build_mem_type_table(void)
 		 */
 		user_pgprot |= L_PTE_SHARED;
 		kern_pgprot |= L_PTE_SHARED;
+		vecs_pgprot |= L_PTE_SHARED;
 		mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
 #endif
 	}
 
 	for (i = 0; i < 16; i++) {
 		unsigned long v = pgprot_val(protection_map[i]);
-		v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
-		protection_map[i] = __pgprot(v);
+		protection_map[i] = __pgprot(v | user_pgprot);
 	}
 
-	mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
-	mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
+	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
+	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
 
-	if (cpu_arch >= CPU_ARCH_ARMv5) {
-#ifndef CONFIG_SMP
-		/*
-		 * Only use write-through for non-SMP systems
-		 */
-		mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-		mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
-#endif
-	} else {
+	if (cpu_arch < CPU_ARCH_ARMv5)
 		mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
-	}
 
 	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
 	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
@@ -420,8 +409,7 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
-			    type->prot_pte_ext);
+		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
@@ -588,12 +576,35 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
 		create_mapping(io_desc + i);
 }
 
+static unsigned long __initdata vmalloc_reserve = SZ_128M;
+
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the vmalloc
+ * area - the default is 128m.
+ */
+static void __init early_vmalloc(char **arg)
+{
+	vmalloc_reserve = memparse(*arg, arg);
+
+	if (vmalloc_reserve < SZ_16M) {
+		vmalloc_reserve = SZ_16M;
+		printk(KERN_WARNING
+			"vmalloc area too small, limiting to %luMB\n",
+			vmalloc_reserve >> 20);
+	}
+}
+__early_param("vmalloc=", early_vmalloc);
+
+#define VMALLOC_MIN	(void *)(VMALLOC_END - vmalloc_reserve)
+
 static int __init check_membank_valid(struct membank *mb)
 {
 	/*
-	 * Check whether this memory region has non-zero size.
+	 * Check whether this memory region has non-zero size or
+	 * invalid node number.
 	 */
-	if (mb->size == 0)
+	if (mb->size == 0 || mb->node >= MAX_NUMNODES)
 		return 0;
 
 	/*
@@ -627,8 +638,7 @@ static int __init check_membank_valid(struct membank *mb)
 
 static void __init sanity_check_meminfo(struct meminfo *mi)
 {
-	int i;
-	int j;
+	int i, j;
 
 	for (i = 0, j = 0; i < mi->nr_banks; i++) {
 		if (check_membank_valid(&mi->bank[i]))
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 63c62fdea521..07b62b238979 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -7,16 +7,14 @@
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
+#include <linux/io.h>
 
 #include <asm/cacheflush.h>
-#include <asm/io.h>
 #include <asm/page.h>
 #include <asm/mach/arch.h>
 
 #include "mm.h"
 
-extern void _stext, __data_start, _end;
-
 /*
  * Reserve the various regions of node 0
  */
@@ -43,12 +41,26 @@ void __init reserve_node_zero(pg_data_t *pgdat)
 			BOOTMEM_DEFAULT);
 }
 
+static void __init sanity_check_meminfo(struct meminfo *mi)
+{
+	int i, j;
+
+	for (i = 0, j = 0; i < mi->nr_banks; i++) {
+		struct membank *mb = &mi->bank[i];
+
+		if (mb->size != 0 && mb->node < MAX_NUMNODES)
+			mi->bank[j++] = mi->bank[i];
+	}
+	mi->nr_banks = j;
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.
  */
 void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
 {
+	sanity_check_meminfo(mi);
 	bootmem_init(mi);
 }
 
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 5673f4d6113b..b5551bf010aa 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
@@ -399,29 +399,7 @@ ENTRY(cpu_arm1020_switch_mm)
 	.align	5
 ENTRY(cpu_arm1020_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r1, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_DISABLE
 	mcr	p15, 0, r0, c7, c10, 4
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 4343fdb0e9e5..8bc6740c29eb 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
@@ -383,29 +383,7 @@ ENTRY(cpu_arm1020e_switch_mm)
 	.align	5
 ENTRY(cpu_arm1020e_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r1, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_DISABLE
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 2a4ea1659e96..2cd03e66c0a3 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
@@ -365,29 +365,7 @@ ENTRY(cpu_arm1022_switch_mm)
 	.align	5
 ENTRY(cpu_arm1022_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r1, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_DISABLE
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 77a1babd421c..ad961a897f6e 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
@@ -354,29 +354,7 @@ ENTRY(cpu_arm1026_switch_mm)
 	.align	5
 ENTRY(cpu_arm1026_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r1, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_DISABLE
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index c371fc87776e..80d6e1de069a 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -15,11 +15,13 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
 
+#include "proc-macros.S"
+
 ENTRY(cpu_arm6_dcache_clean_area)
 ENTRY(cpu_arm7_dcache_clean_area)
 		mov	pc, lr
@@ -214,30 +216,13 @@ ENTRY(cpu_arm7_switch_mm)
  *	   : r1 = value to set
  * Purpose : Set a PTE and flush it out of any WB cache
  */
-		.align	5
+	.align	5
 ENTRY(cpu_arm6_set_pte_ext)
 ENTRY(cpu_arm7_set_pte_ext)
 #ifdef CONFIG_MMU
-		str	r1, [r0], #-2048		@ linux version
-
-		eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-		bic	r2, r1, #PTE_SMALL_AP_MASK
-		bic	r2, r2, #PTE_TYPE_MASK
-		orr	r2, r2, #PTE_TYPE_SMALL
-
-		tst	r1, #L_PTE_USER			@ User?
-		orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-		tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-		orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-		tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young
-		movne	r2, #0
-
-		str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext wc_disable=0
 #endif /* CONFIG_MMU */
-		mov	pc, lr
+	mov	pc, lr
 
 /*
  * Function: _arm6_7_reset
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index eda733d30455..85ae18695f10 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -36,7 +36,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
@@ -93,29 +93,12 @@ ENTRY(cpu_arm720_switch_mm)
  *	   : r1 = value to set
  * Purpose : Set a PTE and flush it out of any WB cache
  */
-		.align	5
+	.align	5
 ENTRY(cpu_arm720_set_pte_ext)
 #ifdef CONFIG_MMU
-		str	r1, [r0], #-2048		@ linux version
-
-		eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-		bic	r2, r1, #PTE_SMALL_AP_MASK
-		bic	r2, r2, #PTE_TYPE_MASK
-		orr	r2, r2, #PTE_TYPE_SMALL
-
-		tst	r1, #L_PTE_USER			@ User?
-		orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-		tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-		orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-		tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young
-		movne	r2, #0
-
-		str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext wc_disable=0
 #endif
-		mov	pc, lr
+	mov	pc, lr
 
 /*
  * Function: arm720_reset
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
index 3a57376c8bc9..4f95bee63e95 100644
--- a/arch/arm/mm/proc-arm740.S
+++ b/arch/arm/mm/proc-arm740.S
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
index 7b3ecdeb5370..93e05fa7bed4 100644
--- a/arch/arm/mm/proc-arm7tdmi.S
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 28cdb060df45..914d688394fc 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -28,7 +28,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -351,33 +351,11 @@ ENTRY(cpu_arm920_switch_mm)
 	.align	5
 ENTRY(cpu_arm920_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r2, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
-#endif /* CONFIG_MMU */
+#endif
 	mov	pc, lr
 
 	__INIT
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 94ddcb4a4b76..51c9c9859e58 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -29,7 +29,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -355,29 +355,7 @@ ENTRY(cpu_arm922_switch_mm)
 	.align	5
 ENTRY(cpu_arm922_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r2, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index d045812f3399..2724526d89c1 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -52,7 +52,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -398,29 +398,7 @@ ENTRY(cpu_arm925_switch_mm)
 	.align	5
 ENTRY(cpu_arm925_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r2, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 4cd33169a7c9..54466937bff9 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -28,7 +28,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -359,29 +359,7 @@ ENTRY(cpu_arm926_switch_mm)
 	.align	5
 ENTRY(cpu_arm926_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
-	eor	r3, r2, #0x0a			@ C & small page?
-	tst	r3, #0x0b
-	biceq	r2, r2, #4
-#endif
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext
 	mov	r0, r0
 #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
index 551244d5ca19..f595117caf55 100644
--- a/arch/arm/mm/proc-arm940.S
+++ b/arch/arm/mm/proc-arm940.S
@@ -11,7 +11,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
index 6168c6160dee..e03f6ff1fb26 100644
--- a/arch/arm/mm/proc-arm946.S
+++ b/arch/arm/mm/proc-arm946.S
@@ -13,7 +13,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
index c85c1f50e396..be6c11d2b3fb 100644
--- a/arch/arm/mm/proc-arm9tdmi.S
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/ptrace.h>
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index f2e5884c513a..0fe1f8fc3488 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -22,7 +22,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -80,7 +80,8 @@ ENTRY(cpu_feroceon_proc_fin)
 	msr	cpsr_c, ip
 	bl	feroceon_flush_kern_cache_all
 
-#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 	mov	r0, #0
 	mcr	p15, 1, r0, c15, c9, 0		@ clean L2
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
@@ -389,7 +390,8 @@ ENTRY(feroceon_range_cache_fns)
 
 	.align	5
 ENTRY(cpu_feroceon_dcache_clean_area)
-#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 	mov	r2, r0
 	mov	r3, r1
 #endif
@@ -397,7 +399,8 @@ ENTRY(cpu_feroceon_dcache_clean_area)
 	add	r0, r0, #CACHE_DLINESIZE
 	subs	r1, r1, #CACHE_DLINESIZE
 	bhi	1b
-#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 1:	mcr	p15, 1, r2, c15, c9, 1		@ clean L2 entry
 	add	r2, r2, #CACHE_DLINESIZE
 	subs	r3, r3, #CACHE_DLINESIZE
@@ -446,27 +449,11 @@ ENTRY(cpu_feroceon_switch_mm)
 	.align	5
 ENTRY(cpu_feroceon_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext wc_disable=0
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
-#if defined(CONFIG_CACHE_FEROCEON_L2) && !defined(CONFIG_L2_CACHE_WRITETHROUGH)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
 	mcr	p15, 1, r0, c15, c9, 1		@ clean L2 entry
 #endif
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index b13150052a76..54b1f721dec8 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -71,3 +71,173 @@
 	mov	\reg, #16			@ size offset
 	mov	\reg, \reg, lsl \tmp		@ actual cache line size
 	.endm
+
+
+/*
+ * Sanity check the PTE configuration for the code below - which makes
+ * certain assumptions about how these bits are layed out.
+ */
+#if L_PTE_SHARED != PTE_EXT_SHARED
+#error PTE shared bit mismatch
+#endif
+#if L_PTE_BUFFERABLE != PTE_BUFFERABLE
+#error PTE bufferable bit mismatch
+#endif
+#if L_PTE_CACHEABLE != PTE_CACHEABLE
+#error PTE cacheable bit mismatch
+#endif
+#if (L_PTE_EXEC+L_PTE_USER+L_PTE_WRITE+L_PTE_DIRTY+L_PTE_YOUNG+\
+     L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED
+#error Invalid Linux PTE bit settings
+#endif
+
+/*
+ * The ARMv6 and ARMv7 set_pte_ext translation function.
+ *
+ * Permission translation:
+ *  YUWD  APX AP1 AP0	SVC	User
+ *  0xxx   0   0   0	no acc	no acc
+ *  100x   1   0   1	r/o	no acc
+ *  10x0   1   0   1	r/o	no acc
+ *  1011   0   0   1	r/w	no acc
+ *  110x   0   1   0	r/w	r/o
+ *  11x0   0   1   0	r/w	r/o
+ *  1111   0   1   1	r/w	r/w
+ */
+	.macro	armv6_mt_table pfx
+\pfx\()_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.endm
+
+	.macro	armv6_set_pte_ext pfx
+	str	r1, [r0], #-2048		@ linux version
+
+	bic	r3, r1, #0x000003fc
+	bic	r3, r3, #PTE_TYPE_MASK
+	orr	r3, r3, r2
+	orr	r3, r3, #PTE_EXT_AP0 | 2
+
+	adr	ip, \pfx\()_mt_table
+	and	r2, r1, #L_PTE_MT_MASK
+	ldr	r2, [ip, r2]
+
+	tst	r1, #L_PTE_WRITE
+	tstne	r1, #L_PTE_DIRTY
+	orreq	r3, r3, #PTE_EXT_APX
+
+	tst	r1, #L_PTE_USER
+	orrne	r3, r3, #PTE_EXT_AP1
+	tstne	r3, #PTE_EXT_APX
+	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
+
+	tst	r1, #L_PTE_EXEC
+	orreq	r3, r3, #PTE_EXT_XN
+
+	orr	r3, r3, r2
+
+	tst	r1, #L_PTE_YOUNG
+	tstne	r1, #L_PTE_PRESENT
+	moveq	r3, #0
+
+	str	r3, [r0]
+	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
+	.endm
+
+
+/*
+ * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,
+ * covering most CPUs except Xscale and Xscale 3.
+ *
+ * Permission translation:
+ *  YUWD   AP	SVC	User
+ *  0xxx  0x00	no acc	no acc
+ *  100x  0x00	r/o	no acc
+ *  10x0  0x00	r/o	no acc
+ *  1011  0x55	r/w	no acc
+ *  110x  0xaa	r/w	r/o
+ *  11x0  0xaa	r/w	r/o
+ *  1111  0xff	r/w	r/w
+ */
+	.macro	armv3_set_pte_ext wc_disable=1
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r3, #L_PTE_USER			@ user?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
+	movne	r2, #0
+
+	.if	\wc_disable
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r2, #PTE_CACHEABLE
+	bicne	r2, r2, #PTE_BUFFERABLE
+#endif
+	.endif
+	str	r2, [r0]			@ hardware version
+	.endm
+
+
+/*
+ * Xscale set_pte_ext translation, split into two halves to cope
+ * with work-arounds.  r3 must be preserved by code between these
+ * two macros.
+ *
+ * Permission translation:
+ *  YUWD  AP	SVC	User
+ *  0xxx  00	no acc	no acc
+ *  100x  00	r/o	no acc
+ *  10x0  00	r/o	no acc
+ *  1011  01	r/w	no acc
+ *  110x  10	r/w	r/o
+ *  11x0  10	r/w	r/o
+ *  1111  11	r/w	r/w
+ */
+	.macro	xscale_set_pte_ext_prologue
+	str	r1, [r0], #-2048		@ linux version
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
+	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
+
+	tst	r3, #L_PTE_USER			@ user?
+	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
+
+	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
+	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
+						@ combined with user -> user r/w
+	.endm
+
+	.macro	xscale_set_pte_ext_epilogue
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
+	movne	r2, #0				@ no -> fault
+
+	str	r2, [r0]			@ hardware version
+	mov	ip, #0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	.endm
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index bbe10576c861..90a7e5279f29 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <mach/hardware.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
@@ -153,24 +153,7 @@ ENTRY(cpu_sa110_switch_mm)
 	.align	5
 ENTRY(cpu_sa110_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext wc_disable=0
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index 871ba018252e..451e2d953e2a 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <mach/hardware.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
@@ -166,24 +166,7 @@ ENTRY(cpu_sa1100_switch_mm)
 	.align	5
 ENTRY(cpu_sa1100_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	eor	r1, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	bic	r2, r1, #PTE_SMALL_AP_MASK
-	bic	r2, r2, #PTE_TYPE_MASK
-	orr	r2, r2, #PTE_TYPE_SMALL
-
-	tst	r1, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
-
-	tst	r1, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
-
-	tst	r1, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0
-
-	str	r2, [r0]			@ hardware version
+	armv3_set_pte_ext wc_disable=0
 	mov	r0, r0
 	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 5702ec58b2a2..294943b85973 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -13,7 +13,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 
@@ -114,46 +114,12 @@ ENTRY(cpu_v6_switch_mm)
  *		  (hardware version is stored at -1024 bytes)
  *	- pte   - PTE value to store
  *	- ext	- value for extended PTE bits
- *
- *	Permissions:
- *	  YUWD  APX AP1 AP0	SVC	User
- *	  0xxx   0   0   0	no acc	no acc
- *	  100x   1   0   1	r/o	no acc
- *	  10x0   1   0   1	r/o	no acc
- *	  1011   0   0   1	r/w	no acc
- *	  110x   0   1   0	r/w	r/o
- *	  11x0   0   1   0	r/w	r/o
- *	  1111   0   1   1	r/w	r/w
  */
+	armv6_mt_table cpu_v6
+
 ENTRY(cpu_v6_set_pte_ext)
 #ifdef CONFIG_MMU
-	str	r1, [r0], #-2048		@ linux version
-
-	bic	r3, r1, #0x000003f0
-	bic	r3, r3, #0x00000003
-	orr	r3, r3, r2
-	orr	r3, r3, #PTE_EXT_AP0 | 2
-
-	tst	r1, #L_PTE_WRITE
-	tstne	r1, #L_PTE_DIRTY
-	orreq	r3, r3, #PTE_EXT_APX
-
-	tst	r1, #L_PTE_USER
-	orrne	r3, r3, #PTE_EXT_AP1
-	tstne	r3, #PTE_EXT_APX
-	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
-
-	tst	r1, #L_PTE_YOUNG
-	biceq	r3, r3, #PTE_EXT_APX | PTE_EXT_AP_MASK
-
-	tst	r1, #L_PTE_EXEC
-	orreq	r3, r3, #PTE_EXT_XN
-
-	tst	r1, #L_PTE_PRESENT
-	moveq	r3, #0
-
-	str	r3, [r0]
-	mcr	p15, 0, r0, c7, c10, 1 @ flush_pte
+	armv6_set_pte_ext cpu_v6
 #endif
 	mov	pc, lr
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b49f9a4c82c8..34e424041927 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -12,7 +12,7 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 
@@ -25,9 +25,11 @@
 
 ENTRY(cpu_v7_proc_init)
 	mov	pc, lr
+ENDPROC(cpu_v7_proc_init)
 
 ENTRY(cpu_v7_proc_fin)
 	mov	pc, lr
+ENDPROC(cpu_v7_proc_fin)
 
 /*
  *	cpu_v7_reset(loc)
@@ -43,6 +45,7 @@ ENTRY(cpu_v7_proc_fin)
 	.align	5
 ENTRY(cpu_v7_reset)
 	mov	pc, r0
+ENDPROC(cpu_v7_reset)
 
 /*
  *	cpu_v7_do_idle()
@@ -52,8 +55,9 @@ ENTRY(cpu_v7_reset)
  *	IRQs are already disabled.
  */
 ENTRY(cpu_v7_do_idle)
-	.long	0xe320f003			@ ARM V7 WFI instruction
+	wfi
 	mov	pc, lr
+ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
 #ifndef TLB_CAN_READ_FROM_L1_CACHE
@@ -65,6 +69,7 @@ ENTRY(cpu_v7_dcache_clean_area)
 	dsb
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_dcache_clean_area)
 
 /*
  *	cpu_v7_switch_mm(pgd_phys, tsk)
@@ -89,6 +94,7 @@ ENTRY(cpu_v7_switch_mm)
 	isb
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_switch_mm)
 
 /*
  *	cpu_v7_set_pte_ext(ptep, pte)
@@ -99,26 +105,19 @@ ENTRY(cpu_v7_switch_mm)
  *		  (hardware version is stored at -1024 bytes)
  *	- pte   - PTE value to store
  *	- ext	- value for extended PTE bits
- *
- *	Permissions:
- *	  YUWD  APX AP1 AP0	SVC	User
- *	  0xxx   0   0   0	no acc	no acc
- *	  100x   1   0   1	r/o	no acc
- *	  10x0   1   0   1	r/o	no acc
- *	  1011   0   0   1	r/w	no acc
- *	  110x   0   1   0	r/w	r/o
- *	  11x0   0   1   0	r/w	r/o
- *	  1111   0   1   1	r/w	r/w
  */
 ENTRY(cpu_v7_set_pte_ext)
 #ifdef CONFIG_MMU
 	str	r1, [r0], #-2048		@ linux version
 
 	bic	r3, r1, #0x000003f0
-	bic	r3, r3, #0x00000003
+	bic	r3, r3, #PTE_TYPE_MASK
 	orr	r3, r3, r2
 	orr	r3, r3, #PTE_EXT_AP0 | 2
 
+	tst	r2, #1 << 4
+	orrne	r3, r3, #PTE_EXT_TEX(1)
+
 	tst	r1, #L_PTE_WRITE
 	tstne	r1, #L_PTE_DIRTY
 	orreq	r3, r3, #PTE_EXT_APX
@@ -128,19 +127,18 @@ ENTRY(cpu_v7_set_pte_ext)
 	tstne	r3, #PTE_EXT_APX
 	bicne	r3, r3, #PTE_EXT_APX | PTE_EXT_AP0
 
-	tst	r1, #L_PTE_YOUNG
-	biceq	r3, r3, #PTE_EXT_APX | PTE_EXT_AP_MASK
-
 	tst	r1, #L_PTE_EXEC
 	orreq	r3, r3, #PTE_EXT_XN
 
-	tst	r1, #L_PTE_PRESENT
+	tst	r1, #L_PTE_YOUNG
+	tstne	r1, #L_PTE_PRESENT
 	moveq	r3, #0
 
 	str	r3, [r0]
 	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
 #endif
 	mov	pc, lr
+ENDPROC(cpu_v7_set_pte_ext)
 
 cpu_v7_name:
 	.ascii	"ARMv7 Processor"
@@ -182,12 +180,17 @@ __v7_setup:
 	mov	r10, #0x1f			@ domains 0, 1 = manager
 	mcr	p15, 0, r10, c3, c0, 0		@ load domain access register
 #endif
+	ldr	r5, =0x40e040e0
+	ldr	r6, =0xff0aa1a8
+	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
+	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 	adr	r5, v7_crval
 	ldmia	r5, {r5, r6}
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
 	bic	r0, r0, r5			@ clear bits them
 	orr	r0, r0, r6			@ set them
 	mov	pc, lr				@ return to head.S:__ret
+ENDPROC(__v7_setup)
 
 	/*
 	 *         V X F   I D LR
@@ -197,7 +200,7 @@ __v7_setup:
 	 */
 	.type	v7_crval, #object
 v7_crval:
-	crval	clear=0x0120c302, mmuset=0x00c0387d, ucset=0x00c0187c
+	crval	clear=0x0120c302, mmuset=0x10c0387d, ucset=0x00c0187c
 
 __v7_setup_stack:
 	.space	4 * 11				@ 11 registers
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
index 7bd9e7197f60..04dc8b65401b 100644
--- a/arch/arm/mm/proc-xsc3.S
+++ b/arch/arm/mm/proc-xsc3.S
@@ -27,7 +27,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <mach/hardware.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
@@ -345,38 +345,38 @@ ENTRY(cpu_xsc3_switch_mm)
  * cpu_xsc3_set_pte_ext(ptep, pte, ext)
  *
  * Set a PTE and flush it out
- *
  */
+cpu_xsc3_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
+	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+
 	.align	5
 ENTRY(cpu_xsc3_set_pte_ext)
-	str	r1, [r0], #-2048		@ linux version
+	xscale_set_pte_ext_prologue
 
-	bic	r2, r1, #0xff0			@ keep C, B bits
-	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
 	tst	r1, #L_PTE_SHARED		@ shared?
-	orrne	r2, r2, #0x200
-
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	tst	r3, #L_PTE_USER			@ user?
-	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
-
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ write and dirty?
-	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
-						@ combined with user -> user r/w
-
-	@ If it's cacheable, it needs to be in L2 also.
-	eor	ip, r1, #L_PTE_CACHEABLE
-	tst	ip, #L_PTE_CACHEABLE
-	orreq	r2, r2, #PTE_EXT_TEX(0x5)
-
-	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
-	movne	r2, #0				@ no -> fault
-
-	str	r2, [r0]			@ hardware version
-	mov	ip, #0
-	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
-	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	and	r1, r1, #L_PTE_MT_MASK
+	adr	ip, cpu_xsc3_mt_table
+	ldr	ip, [ip, r1]
+	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
+	bic	r2, r2, #0x0c			@ clear old C,B bits
+	orr	r2, r2, ip
+
+	xscale_set_pte_ext_epilogue
 	mov	pc, lr
 
 	.ltorg
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 2dd85273976f..0cce37b93937 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -23,7 +23,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
-#include <asm/elf.h>
+#include <asm/hwcap.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/page.h>
@@ -406,8 +406,6 @@ ENTRY(cpu_xscale_dcache_clean_area)
 
 /* =============================== PageTable ============================== */
 
-#define PTE_CACHE_WRITE_ALLOCATE 0
-
 /*
  * cpu_xscale_switch_mm(pgd)
  *
@@ -431,56 +429,42 @@ ENTRY(cpu_xscale_switch_mm)
  *
  * Errata 40: must set memory to write-through for user read-only pages.
  */
+cpu_xscale_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE			@ L_PTE_MT_MINICACHE
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
+	.long	0x00						@ unused
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	0x00						@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+
 	.align	5
 ENTRY(cpu_xscale_set_pte_ext)
-	str	r1, [r0], #-2048		@ linux version
-
-	bic	r2, r1, #0xff0
-	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
-
-	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY
-
-	tst	r3, #L_PTE_USER			@ User?
-	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
-
-	tst	r3, #L_PTE_WRITE | L_PTE_DIRTY	@ Write and Dirty?
-	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
-						@ combined with user -> user r/w
-
-	@
-	@ Handle the X bit.  We want to set this bit for the minicache
-	@ (U = E = B = W = 0, C = 1) or when write allocate is enabled,
-	@ and we have a writeable, cacheable region.  If we ignore the
-	@ U and E bits, we can allow user space to use the minicache as
-	@ well.
-	@
-	@  X = (C & ~W & ~B) | (C & W & B & write_allocate)
-	@
-	eor	ip, r1, #L_PTE_CACHEABLE
-	tst	ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
-#if PTE_CACHE_WRITE_ALLOCATE
-	eorne	ip, r1, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
-	tstne	ip, #L_PTE_CACHEABLE | L_PTE_WRITE | L_PTE_BUFFERABLE
-#endif
-	orreq	r2, r2, #PTE_EXT_TEX(1)
+	xscale_set_pte_ext_prologue
 
 	@
-	@ Erratum 40: The B bit must be cleared for a user read-only
-	@ cacheable page.
-	@
-	@  B = B & ~(U & C & ~W)
+	@ Erratum 40: must set memory to write-through for user read-only pages
 	@
-	and	ip, r1, #L_PTE_USER | L_PTE_WRITE | L_PTE_CACHEABLE
-	teq	ip, #L_PTE_USER | L_PTE_CACHEABLE
-	biceq	r2, r2, #PTE_BUFFERABLE
+	and	ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_WRITE) & ~(4 << 2)
+	teq	ip, #L_PTE_MT_WRITEBACK | L_PTE_USER
 
-	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ Present and Young?
-	movne	r2, #0				@ no -> fault
+	moveq	r1, #L_PTE_MT_WRITETHROUGH
+	and	r1, r1, #L_PTE_MT_MASK
+	adr	ip, cpu_xscale_mt_table
+	ldr	ip, [ip, r1]
+	bic	r2, r2, #0x0c
+	orr	r2, r2, ip
 
-	str	r2, [r0]			@ hardware version
-	mov	ip, #0
-	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	xscale_set_pte_ext_epilogue
 	mov	pc, lr
 
 
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
index b56dda8052f7..24ba5109f2e7 100644
--- a/arch/arm/mm/tlb-v7.S
+++ b/arch/arm/mm/tlb-v7.S
@@ -51,6 +51,7 @@ ENTRY(v7wbi_flush_user_tlb_range)
 	mcr	p15, 0, ip, c7, c5, 6		@ flush BTAC/BTB
 	dsb
 	mov	pc, lr
+ENDPROC(v7wbi_flush_user_tlb_range)
 
 /*
  *	v7wbi_flush_kern_tlb_range(start,end)
@@ -77,6 +78,7 @@ ENTRY(v7wbi_flush_kern_tlb_range)
 	dsb
 	isb
 	mov	pc, lr
+ENDPROC(v7wbi_flush_kern_tlb_range)
 
 	.section ".text.init", #alloc, #execinstr