summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-07 13:47:18 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-07 13:47:18 -0800
commit19cf3edbf33375e81f9b41a087d3fad95858a4e3 (patch)
tree040d427ab7111cdee528536a2a6e6b1931f05817 /arch
parentd345243629db38a6340bcb69f372329d35b8f650 (diff)
parent455bd4c430b0c0a361f38e8658a0d6cb469942b5 (diff)
downloadlinux-19cf3edbf33375e81f9b41a087d3fad95858a4e3.tar.gz
Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm
Pull ARM fixes from Russell King:
 "Mainly a group of fixes, the only exception is the wiring up of the
  kcmp syscall now that those patches went in during the last merge
  window."

* 'fixes' of git://git.linaro.org/people/rmk/linux-arm:
  ARM: 7668/1: fix memset-related crashes caused by recent GCC (4.7.2) optimizations
  ARM: 7667/1: perf: Fix section mismatch on armpmu_init()
  ARM: 7666/1: decompressor: add -mno-single-pic-base for building the decompressor
  ARM: 7665/1: Wire up kcmp syscall
  ARM: 7664/1: perf: remove erroneous semicolon from event initialisation
  ARM: 7663/1: perf: fix ARMv7 EVTYPE_MASK to include NSH bit
  ARM: 7662/1: hw_breakpoint: reset debug logic on secondary CPUs in s2ram resume
  ARM: 7661/1: mm: perform explicit branch predictor maintenance when required
  ARM: 7660/1: tlb: add branch predictor maintenance operations
  ARM: 7659/1: mm: make mm->context.id an atomic64_t variable
  ARM: 7658/1: mm: fix race updating mm->context.id on ASID rollover
  ARM: 7657/1: head: fix swapper and idmap population with LPAE and big-endian
  ARM: 7655/1: smp_twd: make twd_local_timer_of_register() no-op for nosmp
  ARM: 7652/1: mm: fix missing use of 'asid' to get asid value from mm->context.id
  ARM: 7642/1: netx: bump IRQ offset to 64
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/boot/compressed/Makefile2
-rw-r--r--arch/arm/include/asm/mmu.h8
-rw-r--r--arch/arm/include/asm/mmu_context.h2
-rw-r--r--arch/arm/include/asm/tlbflush.h34
-rw-r--r--arch/arm/include/uapi/asm/unistd.h2
-rw-r--r--arch/arm/kernel/asm-offsets.c2
-rw-r--r--arch/arm/kernel/calls.S2
-rw-r--r--arch/arm/kernel/head.S26
-rw-r--r--arch/arm/kernel/hw_breakpoint.c2
-rw-r--r--arch/arm/kernel/perf_event.c4
-rw-r--r--arch/arm/kernel/perf_event_v7.c2
-rw-r--r--arch/arm/kernel/smp.c1
-rw-r--r--arch/arm/kernel/smp_tlb.c12
-rw-r--r--arch/arm/kernel/smp_twd.c4
-rw-r--r--arch/arm/kernel/suspend.c1
-rw-r--r--arch/arm/lib/memset.S85
-rw-r--r--arch/arm/mach-netx/generic.c2
-rw-r--r--arch/arm/mach-netx/include/mach/irqs.h64
-rw-r--r--arch/arm/mm/context.c29
-rw-r--r--arch/arm/mm/idmap.c1
-rw-r--r--arch/arm/mm/proc-v7-3level.S2
21 files changed, 178 insertions, 109 deletions
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 5cad8a6dadb0..afed28e37ea5 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -120,7 +120,7 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
 endif
 
-ccflags-y := -fpic -fno-builtin -I$(obj)
+ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj)
 asflags-y := -Wa,-march=all -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 9f77e7804f3b..e3d55547e755 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -5,15 +5,15 @@
 
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
-	u64 id;
+	atomic64_t	id;
 #endif
-	unsigned int vmalloc_seq;
+	unsigned int	vmalloc_seq;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
 #define ASID_BITS	8
 #define ASID_MASK	((~0ULL) << ASID_BITS)
-#define ASID(mm)	((mm)->context.id & ~ASID_MASK)
+#define ASID(mm)	((mm)->context.id.counter & ~ASID_MASK)
 #else
 #define ASID(mm)	(0)
 #endif
@@ -26,7 +26,7 @@ typedef struct {
  *  modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
  */
 typedef struct {
-	unsigned long		end_brk;
+	unsigned long	end_brk;
 } mm_context_t;
 
 #endif
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index e1f644bc7cc5..863a6611323c 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -25,7 +25,7 @@ void __check_vmalloc_seq(struct mm_struct *mm);
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
-#define init_new_context(tsk,mm)	({ mm->context.id = 0; })
+#define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
 #else	/* !CONFIG_CPU_HAS_ASID */
 
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index 6e924d3a77eb..4db8c8820f0d 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -34,10 +34,13 @@
 #define TLB_V6_D_ASID	(1 << 17)
 #define TLB_V6_I_ASID	(1 << 18)
 
+#define TLB_V6_BP	(1 << 19)
+
 /* Unified Inner Shareable TLB operations (ARMv7 MP extensions) */
-#define TLB_V7_UIS_PAGE	(1 << 19)
-#define TLB_V7_UIS_FULL (1 << 20)
-#define TLB_V7_UIS_ASID (1 << 21)
+#define TLB_V7_UIS_PAGE	(1 << 20)
+#define TLB_V7_UIS_FULL (1 << 21)
+#define TLB_V7_UIS_ASID (1 << 22)
+#define TLB_V7_UIS_BP	(1 << 23)
 
 #define TLB_BARRIER	(1 << 28)
 #define TLB_L2CLEAN_FR	(1 << 29)		/* Feroceon */
@@ -150,7 +153,8 @@
 #define v6wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
 			 TLB_V6_I_FULL | TLB_V6_D_FULL | \
 			 TLB_V6_I_PAGE | TLB_V6_D_PAGE | \
-			 TLB_V6_I_ASID | TLB_V6_D_ASID)
+			 TLB_V6_I_ASID | TLB_V6_D_ASID | \
+			 TLB_V6_BP)
 
 #ifdef CONFIG_CPU_TLB_V6
 # define v6wbi_possible_flags	v6wbi_tlb_flags
@@ -166,9 +170,11 @@
 #endif
 
 #define v7wbi_tlb_flags_smp	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
-			 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID)
+				 TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | \
+				 TLB_V7_UIS_ASID | TLB_V7_UIS_BP)
 #define v7wbi_tlb_flags_up	(TLB_WB | TLB_DCLEAN | TLB_BARRIER | \
-			 TLB_V6_U_FULL | TLB_V6_U_PAGE | TLB_V6_U_ASID)
+				 TLB_V6_U_FULL | TLB_V6_U_PAGE | \
+				 TLB_V6_U_ASID | TLB_V6_BP)
 
 #ifdef CONFIG_CPU_TLB_V7
 
@@ -430,6 +436,20 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr)
 	}
 }
 
+static inline void local_flush_bp_all(void)
+{
+	const int zero = 0;
+	const unsigned int __tlb_flag = __cpu_tlb_flags;
+
+	if (tlb_flag(TLB_V7_UIS_BP))
+		asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero));
+	else if (tlb_flag(TLB_V6_BP))
+		asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero));
+
+	if (tlb_flag(TLB_BARRIER))
+		isb();
+}
+
 /*
  *	flush_pmd_entry
  *
@@ -480,6 +500,7 @@ static inline void clean_pmd_entry(void *pmd)
 #define flush_tlb_kernel_page	local_flush_tlb_kernel_page
 #define flush_tlb_range		local_flush_tlb_range
 #define flush_tlb_kernel_range	local_flush_tlb_kernel_range
+#define flush_bp_all		local_flush_bp_all
 #else
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
@@ -487,6 +508,7 @@ extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr);
 extern void flush_tlb_kernel_page(unsigned long kaddr);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void flush_bp_all(void);
 #endif
 
 /*
diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index 4da7cde70b5d..af33b44990ed 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -404,7 +404,7 @@
 #define __NR_setns			(__NR_SYSCALL_BASE+375)
 #define __NR_process_vm_readv		(__NR_SYSCALL_BASE+376)
 #define __NR_process_vm_writev		(__NR_SYSCALL_BASE+377)
-					/* 378 for kcmp */
+#define __NR_kcmp			(__NR_SYSCALL_BASE+378)
 #define __NR_finit_module		(__NR_SYSCALL_BASE+379)
 
 /*
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 5ce738b43508..923eec7105cf 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -110,7 +110,7 @@ int main(void)
   BLANK();
 #endif
 #ifdef CONFIG_CPU_HAS_ASID
-  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
   BLANK();
 #endif
   DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 0cc57611fc4f..c6ca7e376773 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -387,7 +387,7 @@
 /* 375 */	CALL(sys_setns)
 		CALL(sys_process_vm_readv)
 		CALL(sys_process_vm_writev)
-		CALL(sys_ni_syscall)	/* reserved for sys_kcmp */
+		CALL(sys_kcmp)
 		CALL(sys_finit_module)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 486a15ae9011..e0eb9a1cae77 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -184,13 +184,22 @@ __create_page_tables:
 	orr	r3, r3, #3			@ PGD block type
 	mov	r6, #4				@ PTRS_PER_PGD
 	mov	r7, #1 << (55 - 32)		@ L_PGD_SWAPPER
-1:	str	r3, [r0], #4			@ set bottom PGD entry bits
+1:
+#ifdef CONFIG_CPU_ENDIAN_BE8
 	str	r7, [r0], #4			@ set top PGD entry bits
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+#else
+	str	r3, [r0], #4			@ set bottom PGD entry bits
+	str	r7, [r0], #4			@ set top PGD entry bits
+#endif
 	add	r3, r3, #0x1000			@ next PMD table
 	subs	r6, r6, #1
 	bne	1b
 
 	add	r4, r4, #0x1000			@ point to the PMD tables
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	add	r4, r4, #4			@ we only write the bottom word
+#endif
 #endif
 
 	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags
@@ -258,6 +267,11 @@ __create_page_tables:
 	addne	r6, r6, #1 << SECTION_SHIFT
 	strne	r6, [r3]
 
+#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
+	sub	r4, r4, #4			@ Fixup page table pointer
+						@ for 64-bit descriptors
+#endif
+
 #ifdef CONFIG_DEBUG_LL
 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
 	/*
@@ -276,13 +290,17 @@ __create_page_tables:
 	orr	r3, r7, r3, lsl #SECTION_SHIFT
 #ifdef CONFIG_ARM_LPAE
 	mov	r7, #1 << (54 - 32)		@ XN
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	str	r7, [r0], #4
+	str	r3, [r0], #4
 #else
-	orr	r3, r3, #PMD_SECT_XN
-#endif
 	str	r3, [r0], #4
-#ifdef CONFIG_ARM_LPAE
 	str	r7, [r0], #4
 #endif
+#else
+	orr	r3, r3, #PMD_SECT_XN
+	str	r3, [r0], #4
+#endif
 
 #else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
 	/* we don't need any serial debugging mappings */
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 5eae53e7a2e1..96093b75ab90 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -1023,7 +1023,7 @@ out_mdbgen:
 static int __cpuinit dbg_reset_notify(struct notifier_block *self,
 				      unsigned long action, void *cpu)
 {
-	if (action == CPU_ONLINE)
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
 		smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
 
 	return NOTIFY_OK;
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 31e0eb353cd8..146157dfe27c 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -400,7 +400,7 @@ __hw_perf_event_init(struct perf_event *event)
 	}
 
 	if (event->group_leader != event) {
-		if (validate_group(event) != 0);
+		if (validate_group(event) != 0)
 			return -EINVAL;
 	}
 
@@ -484,7 +484,7 @@ const struct dev_pm_ops armpmu_dev_pm_ops = {
 	SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
 };
 
-static void __init armpmu_init(struct arm_pmu *armpmu)
+static void armpmu_init(struct arm_pmu *armpmu)
 {
 	atomic_set(&armpmu->active_events, 0);
 	mutex_init(&armpmu->reserve_mutex);
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 8c79a9e70b83..039cffb053a7 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -774,7 +774,7 @@ static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 /*
  * PMXEVTYPER: Event selection reg
  */
-#define	ARMV7_EVTYPE_MASK	0xc00000ff	/* Mask for writable bits */
+#define	ARMV7_EVTYPE_MASK	0xc80000ff	/* Mask for writable bits */
 #define	ARMV7_EVTYPE_EVENT	0xff		/* Mask for EVENT bits */
 
 /*
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 1bdfd87c8e41..31644f1978d5 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -285,6 +285,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	 * switch away from it before attempting any exclusive accesses.
 	 */
 	cpu_switch_mm(mm->pgd, mm);
+	local_flush_bp_all();
 	enter_lazy_tlb(mm, current);
 	local_flush_tlb_all();
 
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index 02c5d2ce23bf..bd0300531399 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -64,6 +64,11 @@ static inline void ipi_flush_tlb_kernel_range(void *arg)
 	local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
 }
 
+static inline void ipi_flush_bp_all(void *ignored)
+{
+	local_flush_bp_all();
+}
+
 void flush_tlb_all(void)
 {
 	if (tlb_ops_need_broadcast())
@@ -127,3 +132,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 		local_flush_tlb_kernel_range(start, end);
 }
 
+void flush_bp_all(void)
+{
+	if (tlb_ops_need_broadcast())
+		on_each_cpu(ipi_flush_bp_all, NULL, 1);
+	else
+		local_flush_bp_all();
+}
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c
index c092115d903a..3f2565037480 100644
--- a/arch/arm/kernel/smp_twd.c
+++ b/arch/arm/kernel/smp_twd.c
@@ -22,6 +22,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
 
+#include <asm/smp_plat.h>
 #include <asm/smp_twd.h>
 #include <asm/localtimer.h>
 
@@ -373,6 +374,9 @@ void __init twd_local_timer_of_register(void)
 	struct device_node *np;
 	int err;
 
+	if (!is_smp() || !setup_max_cpus)
+		return;
+
 	np = of_find_matching_node(NULL, twd_of_match);
 	if (!np)
 		return;
diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c
index 358bca3a995e..c59c97ea8268 100644
--- a/arch/arm/kernel/suspend.c
+++ b/arch/arm/kernel/suspend.c
@@ -68,6 +68,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	ret = __cpu_suspend(arg, fn);
 	if (ret == 0) {
 		cpu_switch_mm(mm->pgd, mm);
+		local_flush_bp_all();
 		local_flush_tlb_all();
 	}
 
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 650d5923ab83..d912e7397ecc 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -19,9 +19,9 @@
 1:	subs	r2, r2, #4		@ 1 do we have enough
 	blt	5f			@ 1 bytes to align with?
 	cmp	r3, #2			@ 1
-	strltb	r1, [r0], #1		@ 1
-	strleb	r1, [r0], #1		@ 1
-	strb	r1, [r0], #1		@ 1
+	strltb	r1, [ip], #1		@ 1
+	strleb	r1, [ip], #1		@ 1
+	strb	r1, [ip], #1		@ 1
 	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
 /*
  * The pointer is now aligned and the length is adjusted.  Try doing the
@@ -29,10 +29,14 @@
  */
 
 ENTRY(memset)
-	ands	r3, r0, #3		@ 1 unaligned?
+/*
+ * Preserve the contents of r0 for the return value.
+ */
+	mov	ip, r0
+	ands	r3, ip, #3		@ 1 unaligned?
 	bne	1b			@ 1
 /*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
  */
 	orr	r1, r1, r1, lsl #8
 	orr	r1, r1, r1, lsl #16
@@ -43,29 +47,28 @@ ENTRY(memset)
 #if ! CALGN(1)+0
 
 /*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
  */
-	str	lr, [sp, #-4]!
-	mov	ip, r1
+	stmfd	sp!, {r8, lr}
+	mov	r8, r1
 	mov	lr, r1
 
 2:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
+	stmgeia	ip!, {r1, r3, r8, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}
+	stmgeia	ip!, {r1, r3, r8, lr}
 	bgt	2b
-	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
+	ldmeqfd	sp!, {r8, pc}		@ Now <64 bytes to go.
 /*
  * No need to correct the count; we're only testing bits from now on
  */
 	tst	r2, #32
-	stmneia	r0!, {r1, r3, ip, lr}
-	stmneia	r0!, {r1, r3, ip, lr}
+	stmneia	ip!, {r1, r3, r8, lr}
+	stmneia	ip!, {r1, r3, r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r1, r3, ip, lr}
-	ldr	lr, [sp], #4
+	stmneia	ip!, {r1, r3, r8, lr}
+	ldmfd	sp!, {r8, lr}
 
 #else
 
@@ -74,54 +77,54 @@ ENTRY(memset)
  * whole cache lines at once.
  */
 
-	stmfd	sp!, {r4-r7, lr}
+	stmfd	sp!, {r4-r8, lr}
 	mov	r4, r1
 	mov	r5, r1
 	mov	r6, r1
 	mov	r7, r1
-	mov	ip, r1
+	mov	r8, r1
 	mov	lr, r1
 
 	cmp	r2, #96
-	tstgt	r0, #31
+	tstgt	ip, #31
 	ble	3f
 
-	and	ip, r0, #31
-	rsb	ip, ip, #32
-	sub	r2, r2, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	tst	ip, #(1 << 30)
-	mov	ip, r1
-	strne	r1, [r0], #4
+	and	r8, ip, #31
+	rsb	r8, r8, #32
+	sub	r2, r2, r8
+	movs	r8, r8, lsl #(32 - 4)
+	stmcsia	ip!, {r4, r5, r6, r7}
+	stmmiia	ip!, {r4, r5}
+	tst	r8, #(1 << 30)
+	mov	r8, r1
+	strne	r1, [ip], #4
 
 3:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	stmgeia	ip!, {r1, r3-r8, lr}
+	stmgeia	ip!, {r1, r3-r8, lr}
 	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
+	ldmeqfd	sp!, {r4-r8, pc}
 
 	tst	r2, #32
-	stmneia	r0!, {r1, r3-r7, ip, lr}
+	stmneia	ip!, {r1, r3-r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
+	stmneia	ip!, {r4-r7}
+	ldmfd	sp!, {r4-r8, lr}
 
 #endif
 
 4:	tst	r2, #8
-	stmneia	r0!, {r1, r3}
+	stmneia	ip!, {r1, r3}
 	tst	r2, #4
-	strne	r1, [r0], #4
+	strne	r1, [ip], #4
 /*
  * When we get here, we've got less than 4 bytes to zero.  We
  * may have an unaligned pointer as well.
  */
 5:	tst	r2, #2
-	strneb	r1, [r0], #1
-	strneb	r1, [r0], #1
+	strneb	r1, [ip], #1
+	strneb	r1, [ip], #1
 	tst	r2, #1
-	strneb	r1, [r0], #1
+	strneb	r1, [ip], #1
 	mov	pc, lr
 ENDPROC(memset)
diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c
index 27c2cb7ab813..1504b68f4c66 100644
--- a/arch/arm/mach-netx/generic.c
+++ b/arch/arm/mach-netx/generic.c
@@ -168,7 +168,7 @@ void __init netx_init_irq(void)
 {
 	int irq;
 
-	vic_init(io_p2v(NETX_PA_VIC), 0, ~0, 0);
+	vic_init(io_p2v(NETX_PA_VIC), NETX_IRQ_VIC_START, ~0, 0);
 
 	for (irq = NETX_IRQ_HIF_CHAINED(0); irq <= NETX_IRQ_HIF_LAST; irq++) {
 		irq_set_chip_and_handler(irq, &netx_hif_chip,
diff --git a/arch/arm/mach-netx/include/mach/irqs.h b/arch/arm/mach-netx/include/mach/irqs.h
index 6ce914d54a30..8f74a844a775 100644
--- a/arch/arm/mach-netx/include/mach/irqs.h
+++ b/arch/arm/mach-netx/include/mach/irqs.h
@@ -17,42 +17,42 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
-#define NETX_IRQ_VIC_START   0
-#define NETX_IRQ_SOFTINT     0
-#define NETX_IRQ_TIMER0      1
-#define NETX_IRQ_TIMER1      2
-#define NETX_IRQ_TIMER2      3
-#define NETX_IRQ_SYSTIME_NS  4
-#define NETX_IRQ_SYSTIME_S   5
-#define NETX_IRQ_GPIO_15     6
-#define NETX_IRQ_WATCHDOG    7
-#define NETX_IRQ_UART0       8
-#define NETX_IRQ_UART1       9
-#define NETX_IRQ_UART2      10
-#define NETX_IRQ_USB        11
-#define NETX_IRQ_SPI        12
-#define NETX_IRQ_I2C        13
-#define NETX_IRQ_LCD        14
-#define NETX_IRQ_HIF        15
-#define NETX_IRQ_GPIO_0_14  16
-#define NETX_IRQ_XPEC0      17
-#define NETX_IRQ_XPEC1      18
-#define NETX_IRQ_XPEC2      19
-#define NETX_IRQ_XPEC3      20
-#define NETX_IRQ_XPEC(no)   (17 + (no))
-#define NETX_IRQ_MSYNC0     21
-#define NETX_IRQ_MSYNC1     22
-#define NETX_IRQ_MSYNC2     23
-#define NETX_IRQ_MSYNC3     24
-#define NETX_IRQ_IRQ_PHY    25
-#define NETX_IRQ_ISO_AREA   26
+#define NETX_IRQ_VIC_START	64
+#define NETX_IRQ_SOFTINT	(NETX_IRQ_VIC_START + 0)
+#define NETX_IRQ_TIMER0		(NETX_IRQ_VIC_START + 1)
+#define NETX_IRQ_TIMER1		(NETX_IRQ_VIC_START + 2)
+#define NETX_IRQ_TIMER2		(NETX_IRQ_VIC_START + 3)
+#define NETX_IRQ_SYSTIME_NS	(NETX_IRQ_VIC_START + 4)
+#define NETX_IRQ_SYSTIME_S	(NETX_IRQ_VIC_START + 5)
+#define NETX_IRQ_GPIO_15	(NETX_IRQ_VIC_START + 6)
+#define NETX_IRQ_WATCHDOG	(NETX_IRQ_VIC_START + 7)
+#define NETX_IRQ_UART0		(NETX_IRQ_VIC_START + 8)
+#define NETX_IRQ_UART1		(NETX_IRQ_VIC_START + 9)
+#define NETX_IRQ_UART2		(NETX_IRQ_VIC_START + 10)
+#define NETX_IRQ_USB		(NETX_IRQ_VIC_START + 11)
+#define NETX_IRQ_SPI		(NETX_IRQ_VIC_START + 12)
+#define NETX_IRQ_I2C		(NETX_IRQ_VIC_START + 13)
+#define NETX_IRQ_LCD		(NETX_IRQ_VIC_START + 14)
+#define NETX_IRQ_HIF		(NETX_IRQ_VIC_START + 15)
+#define NETX_IRQ_GPIO_0_14	(NETX_IRQ_VIC_START + 16)
+#define NETX_IRQ_XPEC0		(NETX_IRQ_VIC_START + 17)
+#define NETX_IRQ_XPEC1		(NETX_IRQ_VIC_START + 18)
+#define NETX_IRQ_XPEC2		(NETX_IRQ_VIC_START + 19)
+#define NETX_IRQ_XPEC3		(NETX_IRQ_VIC_START + 20)
+#define NETX_IRQ_XPEC(no)	(NETX_IRQ_VIC_START + 17 + (no))
+#define NETX_IRQ_MSYNC0		(NETX_IRQ_VIC_START + 21)
+#define NETX_IRQ_MSYNC1		(NETX_IRQ_VIC_START + 22)
+#define NETX_IRQ_MSYNC2		(NETX_IRQ_VIC_START + 23)
+#define NETX_IRQ_MSYNC3		(NETX_IRQ_VIC_START + 24)
+#define NETX_IRQ_IRQ_PHY	(NETX_IRQ_VIC_START + 25)
+#define NETX_IRQ_ISO_AREA	(NETX_IRQ_VIC_START + 26)
 /* int 27 is reserved */
 /* int 28 is reserved */
-#define NETX_IRQ_TIMER3     29
-#define NETX_IRQ_TIMER4     30
+#define NETX_IRQ_TIMER3		(NETX_IRQ_VIC_START + 29)
+#define NETX_IRQ_TIMER4		(NETX_IRQ_VIC_START + 30)
 /* int 31 is reserved */
 
-#define NETX_IRQS 32
+#define NETX_IRQS 		(NETX_IRQ_VIC_START + 32)
 
 /* for multiplexed irqs on gpio 0..14 */
 #define NETX_IRQ_GPIO(x) (NETX_IRQS + (x))
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 7a0511191f6b..a5a4b2bc42ba 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -152,9 +152,9 @@ static int is_reserved_asid(u64 asid)
 	return 0;
 }
 
-static void new_context(struct mm_struct *mm, unsigned int cpu)
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
-	u64 asid = mm->context.id;
+	u64 asid = atomic64_read(&mm->context.id);
 	u64 generation = atomic64_read(&asid_generation);
 
 	if (asid != 0 && is_reserved_asid(asid)) {
@@ -181,13 +181,14 @@ static void new_context(struct mm_struct *mm, unsigned int cpu)
 		cpumask_clear(mm_cpumask(mm));
 	}
 
-	mm->context.id = asid;
+	return asid;
 }
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 {
 	unsigned long flags;
 	unsigned int cpu = smp_processor_id();
+	u64 asid;
 
 	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
 		__check_vmalloc_seq(mm);
@@ -198,20 +199,26 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	 */
 	cpu_set_reserved_ttbr0();
 
-	if (!((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
-	    && atomic64_xchg(&per_cpu(active_asids, cpu), mm->context.id))
+	asid = atomic64_read(&mm->context.id);
+	if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+	    && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
 		goto switch_mm_fastpath;
 
 	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
 	/* Check that our ASID belongs to the current generation. */
-	if ((mm->context.id ^ atomic64_read(&asid_generation)) >> ASID_BITS)
-		new_context(mm, cpu);
-
-	atomic64_set(&per_cpu(active_asids, cpu), mm->context.id);
-	cpumask_set_cpu(cpu, mm_cpumask(mm));
+	asid = atomic64_read(&mm->context.id);
+	if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
+		asid = new_context(mm, cpu);
+		atomic64_set(&mm->context.id, asid);
+	}
 
-	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
+		local_flush_bp_all();
 		local_flush_tlb_all();
+	}
+
+	atomic64_set(&per_cpu(active_asids, cpu), asid);
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index 2dffc010cc41..5ee505c937d1 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -141,6 +141,7 @@ void setup_mm_for_reboot(void)
 {
 	/* Switch to the identity mapping. */
 	cpu_switch_mm(idmap_pgd, &init_mm);
+	local_flush_bp_all();
 
 #ifdef CONFIG_CPU_HAS_ASID
 	/*
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index 50bf1dafc9ea..6ffd78c0f9ab 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -48,7 +48,7 @@
 ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_MMU
 	mmid	r1, r1				@ get mm->context.id
-	and	r3, r1, #0xff
+	asid	r3, r1
 	mov	r3, r3, lsl #(48 - 32)		@ ASID
 	mcrr	p15, 0, r0, r3, c2		@ set TTB 0
 	isb