summary refs log tree commit diff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-27 15:14:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-27 15:14:26 -0700
commitcea8f46c36c3f82860b038aa23a46e16757666ba (patch)
treee09dc37d2b6880d86dac09afbc0c686139d86df0
parentc1e7179a38919f02dd950801529176b72f5e5a8a (diff)
parent91b006def384d8f07f9f324ab211fefe2b085c90 (diff)
downloadlinux-cea8f46c36c3f82860b038aa23a46e16757666ba.tar.gz
Merge branch 'for-linus' of git://git.linaro.org/people/rmk/linux-arm
Pull ARM updates from Russell King:
 "First ARM push of this merge window, post me coming back from holiday.
  This is what has been in linux-next for the last few weeks.  Not much
  to say which isn't described by the commit summaries."

* 'for-linus' of git://git.linaro.org/people/rmk/linux-arm: (32 commits)
  ARM: 7463/1: topology: Update cpu_power according to DT information
  ARM: 7462/1: topology: factorize the update of sibling masks
  ARM: 7461/1: topology: Add arch_scale_freq_power function
  ARM: 7456/1: ptrace: provide separate functions for tracing syscall {entry,exit}
  ARM: 7455/1: audit: move syscall auditing until after ptrace SIGTRAP handling
  ARM: 7454/1: entry: don't bother with syscall tracing on ret_from_fork path
  ARM: 7453/1: audit: only allow syscall auditing for pure EABI userspace
  ARM: 7452/1: delay: allow timer-based delay implementation to be selected
  ARM: 7451/1: arch timer: implement read_current_timer and get_cycles
  ARM: 7450/1: dcache: select DCACHE_WORD_ACCESS for little-endian ARMv6+ CPUs
  ARM: 7449/1: use generic strnlen_user and strncpy_from_user functions
  ARM: 7448/1: perf: remove arm_perf_pmu_ids global enumeration
  ARM: 7447/1: rwlocks: remove unused branch labels from trylock routines
  ARM: 7446/1: spinlock: use ticket algorithm for ARMv6+ locking implementation
  ARM: 7445/1: mm: update CONTEXTIDR register to contain PID of current process
  ARM: 7444/1: kernel: add arch-timer C3STOP feature
  ARM: 7460/1: remove asm/locks.h
  ARM: 7439/1: head.S: simplify initial page table mapping
  ARM: 7437/1: zImage: Allow DTB command line concatenation with ATAG_CMDLINE
  ARM: 7436/1: Do not map the vectors page as write-through on UP systems
  ...
-rw-r--r--arch/arm/Kconfig22
-rw-r--r--arch/arm/Kconfig.debug9
-rw-r--r--arch/arm/Makefile3
-rw-r--r--arch/arm/boot/compressed/atags_to_fdt.c62
-rw-r--r--arch/arm/include/asm/arch_timer.h3
-rw-r--r--arch/arm/include/asm/delay.h32
-rw-r--r--arch/arm/include/asm/locks.h274
-rw-r--r--arch/arm/include/asm/memory.h2
-rw-r--r--arch/arm/include/asm/perf_event.h17
-rw-r--r--arch/arm/include/asm/pmu.h3
-rw-r--r--arch/arm/include/asm/spinlock.h76
-rw-r--r--arch/arm/include/asm/spinlock_types.h17
-rw-r--r--arch/arm/include/asm/timex.h10
-rw-r--r--arch/arm/include/asm/uaccess.h27
-rw-r--r--arch/arm/include/asm/word-at-a-time.h96
-rw-r--r--arch/arm/kernel/arch_timer.c13
-rw-r--r--arch/arm/kernel/armksyms.c7
-rw-r--r--arch/arm/kernel/entry-common.S20
-rw-r--r--arch/arm/kernel/head.S59
-rw-r--r--arch/arm/kernel/perf_event.c15
-rw-r--r--arch/arm/kernel/perf_event_v6.c2
-rw-r--r--arch/arm/kernel/perf_event_v7.c5
-rw-r--r--arch/arm/kernel/perf_event_xscale.c2
-rw-r--r--arch/arm/kernel/ptrace.c34
-rw-r--r--arch/arm/kernel/smp.c2
-rw-r--r--arch/arm/kernel/topology.c239
-rw-r--r--arch/arm/kernel/traps.c78
-rw-r--r--arch/arm/lib/Makefile3
-rw-r--r--arch/arm/lib/delay-loop.S (renamed from arch/arm/lib/delay.S)20
-rw-r--r--arch/arm/lib/delay.c71
-rw-r--r--arch/arm/lib/strncpy_from_user.S43
-rw-r--r--arch/arm/lib/strnlen_user.S40
-rw-r--r--arch/arm/mach-msm/platsmp.c2
-rw-r--r--arch/arm/mach-omap2/omap-smp.c2
-rw-r--r--arch/arm/mach-pxa/include/mach/regs-ost.h22
-rw-r--r--arch/arm/mach-pxa/reset.c7
-rw-r--r--arch/arm/mach-pxa/time.c52
-rw-r--r--arch/arm/mach-sa1100/assabet.c2
-rw-r--r--arch/arm/mach-sa1100/cpu-sa1100.c1
-rw-r--r--arch/arm/mach-sa1100/cpu-sa1110.c1
-rw-r--r--arch/arm/mach-sa1100/include/mach/SA-1100.h16
-rw-r--r--arch/arm/mach-sa1100/include/mach/gpio.h1
-rw-r--r--arch/arm/mach-sa1100/include/mach/hardware.h6
-rw-r--r--arch/arm/mach-sa1100/include/mach/uncompress.h2
-rw-r--r--arch/arm/mach-sa1100/irq.c1
-rw-r--r--arch/arm/mach-sa1100/jornada720_ssp.c1
-rw-r--r--arch/arm/mach-sa1100/leds-cerf.c1
-rw-r--r--arch/arm/mach-sa1100/leds-lart.c1
-rw-r--r--arch/arm/mach-sa1100/pm.c1
-rw-r--r--arch/arm/mach-sa1100/sleep.S8
-rw-r--r--arch/arm/mach-sa1100/time.c48
-rw-r--r--arch/arm/mm/context.c35
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm/mm/ioremap.c2
-rw-r--r--arch/arm/mm/mmu.c8
-rw-r--r--arch/arm/mm/proc-v6.S6
-rw-r--r--arch/arm/mm/proc-v7-2level.S5
-rw-r--r--arch/arm/oprofile/common.c45
-rw-r--r--arch/arm/plat-versatile/platsmp.c2
-rw-r--r--drivers/amba/bus.c2
-rw-r--r--drivers/input/touchscreen/jornada720_ts.c1
-rw-r--r--drivers/net/irda/pxaficp_ir.c12
-rw-r--r--drivers/pcmcia/sa1100_shannon.c1
-rw-r--r--drivers/tty/serial/amba-pl011.c2
-rw-r--r--drivers/watchdog/sa1100_wdt.c14
-rw-r--r--include/asm-generic/sizes.h49
-rw-r--r--include/linux/sizes.h47
-rw-r--r--init/Kconfig2
69 files changed, 964 insertions, 754 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9124ff75fe53..fbdd8533c05d 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -45,6 +45,9 @@ config ARM
 	select GENERIC_SMP_IDLE_THREAD
 	select KTIME_SCALAR
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
@@ -2004,6 +2007,25 @@ config ARM_ATAG_DTB_COMPAT
 	  bootloaders, this option allows zImage to extract the information
 	  from the ATAG list and store it at run time into the appended DTB.
 
+choice
+	prompt "Kernel command line type" if ARM_ATAG_DTB_COMPAT
+	default ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER
+
+config ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER
+	bool "Use bootloader kernel arguments if available"
+	help
+	  Uses the command-line options passed by the boot loader instead of
+	  the device tree bootargs property. If the boot loader doesn't provide
+	  any, the device tree bootargs property will be used.
+
+config ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND
+	bool "Extend with bootloader kernel arguments"
+	help
+	  The command-line arguments provided by the boot loader will be
+	  appended to the the device tree bootargs property.
+
+endchoice
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index a03b5a7059e2..f15f82bf3a50 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -395,4 +395,13 @@ config ARM_KPROBES_TEST
 	help
 	  Perform tests of kprobes API and instruction set simulation.
 
+config PID_IN_CONTEXTIDR
+	bool "Write the current PID to the CONTEXTIDR register"
+	depends on CPU_COPY_V6
+	help
+	  Enabling this option causes the kernel to write the current PID to
+	  the PROCID field of the CONTEXTIDR register, at the expense of some
+	  additional instructions during context switch. Say Y here only if you
+	  are planning to use hardware trace tools with this kernel.
+
 endmenu
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4d6d31115cf2..30eae87ead6d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -10,6 +10,9 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
+# Ensure linker flags are correct
+LDFLAGS		:=
+
 LDFLAGS_vmlinux	:=-p --no-undefined -X
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c
index 797f04bedb47..aabc02a68482 100644
--- a/arch/arm/boot/compressed/atags_to_fdt.c
+++ b/arch/arm/boot/compressed/atags_to_fdt.c
@@ -1,6 +1,12 @@
 #include <asm/setup.h>
 #include <libfdt.h>
 
+#if defined(CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_EXTEND)
+#define do_extend_cmdline 1
+#else
+#define do_extend_cmdline 0
+#endif
+
 static int node_offset(void *fdt, const char *node_path)
 {
 	int offset = fdt_path_offset(fdt, node_path);
@@ -36,6 +42,48 @@ static int setprop_cell(void *fdt, const char *node_path,
 	return fdt_setprop_cell(fdt, offset, property, val);
 }
 
+static const void *getprop(const void *fdt, const char *node_path,
+			   const char *property, int *len)
+{
+	int offset = fdt_path_offset(fdt, node_path);
+
+	if (offset == -FDT_ERR_NOTFOUND)
+		return NULL;
+
+	return fdt_getprop(fdt, offset, property, len);
+}
+
+static void merge_fdt_bootargs(void *fdt, const char *fdt_cmdline)
+{
+	char cmdline[COMMAND_LINE_SIZE];
+	const char *fdt_bootargs;
+	char *ptr = cmdline;
+	int len = 0;
+
+	/* copy the fdt command line into the buffer */
+	fdt_bootargs = getprop(fdt, "/chosen", "bootargs", &len);
+	if (fdt_bootargs)
+		if (len < COMMAND_LINE_SIZE) {
+			memcpy(ptr, fdt_bootargs, len);
+			/* len is the length of the string
+			 * including the NULL terminator */
+			ptr += len - 1;
+		}
+
+	/* and append the ATAG_CMDLINE */
+	if (fdt_cmdline) {
+		len = strlen(fdt_cmdline);
+		if (ptr - cmdline + len + 2 < COMMAND_LINE_SIZE) {
+			*ptr++ = ' ';
+			memcpy(ptr, fdt_cmdline, len);
+			ptr += len;
+		}
+	}
+	*ptr = '\0';
+
+	setprop_string(fdt, "/chosen", "bootargs", cmdline);
+}
+
 /*
  * Convert and fold provided ATAGs into the provided FDT.
  *
@@ -72,8 +120,18 @@ int atags_to_fdt(void *atag_list, void *fdt, int total_space)
 
 	for_each_tag(atag, atag_list) {
 		if (atag->hdr.tag == ATAG_CMDLINE) {
-			setprop_string(fdt, "/chosen", "bootargs",
-					atag->u.cmdline.cmdline);
+			/* Append the ATAGS command line to the device tree
+			 * command line.
+			 * NB: This means that if the same parameter is set in
+			 * the device tree and in the tags, the one from the
+			 * tags will be chosen.
+			 */
+			if (do_extend_cmdline)
+				merge_fdt_bootargs(fdt,
+						   atag->u.cmdline.cmdline);
+			else
+				setprop_string(fdt, "/chosen", "bootargs",
+					       atag->u.cmdline.cmdline);
 		} else if (atag->hdr.tag == ATAG_MEM) {
 			if (memcount >= sizeof(mem_reg_property)/4)
 				continue;
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index ed2e95d46e29..62e75475e57e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -1,7 +1,10 @@
 #ifndef __ASMARM_ARCH_TIMER_H
 #define __ASMARM_ARCH_TIMER_H
 
+#include <asm/errno.h>
+
 #ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_HAS_READ_CURRENT_TIMER
 int arch_timer_of_register(void);
 int arch_timer_sched_clock_init(void);
 #else
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
 #ifndef __ASM_ARM_DELAY_H
 #define __ASM_ARM_DELAY_H
 
+#include <asm/memory.h>
 #include <asm/param.h>	/* HZ */
 
-extern void __delay(int loops);
+#define MAX_UDELAY_MS	2
+#define UDELAY_MULT	((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+extern struct arm_delay_ops {
+	void (*delay)(unsigned long);
+	void (*const_udelay)(unsigned long);
+	void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n)		arm_delay_ops.delay(n)
 
 /*
  * This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
  * division by multiplication: you don't have to worry about
  * loss of precision.
  *
- * Use only for very small delays ( < 1 msec).  Should probably use a
+ * Use only for very small delays ( < 2 msec).  Should probably use a
  * lookup table, really, as the multiplications take much too long with
  * short delays.  This is a "reasonable" implementation, though (and the
  * first constant multiplications gets optimized away if the delay is
  * a constant)
  */
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n)		arm_delay_ops.udelay(n)
+#define __const_udelay(n)	arm_delay_ops.const_udelay(n)
 
 #define udelay(n)							\
 	(__builtin_constant_p(n) ?					\
 	  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :		\
-			__const_udelay((n) * ((2199023U*HZ)>>11))) :	\
+			__const_udelay((n) * UDELAY_MULT)) :		\
 	  __udelay(n))
 
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* defined(_ARM_DELAY_H) */
 
diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h
deleted file mode 100644
index ef4c897772d1..000000000000
--- a/arch/arm/include/asm/locks.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  arch/arm/include/asm/locks.h
- *
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Interrupt safe locking assembler. 
- */
-#ifndef __ASM_PROC_LOCKS_H
-#define __ASM_PROC_LOCKS_H
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"1:	ldrex	lr, [%1]\n"			\
-"	sub	lr, lr, %2\n"			\
-"	strex	ip, lr, [%1]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	cmp	lr, #0\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#else
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%1]\n"			\
-"	subs	lr, lr, %2\n"			\
-"	str	lr, [%1]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index fcb575747e5e..e965f1b560f1 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,7 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
 
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00cbe10a50e3..e074948d8143 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,21 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM perf PMU IDs for use by internal perf clients. */
-enum arm_perf_pmu_ids {
-	ARM_PERF_PMU_ID_XSCALE1	= 0,
-	ARM_PERF_PMU_ID_XSCALE2,
-	ARM_PERF_PMU_ID_V6,
-	ARM_PERF_PMU_ID_V6MP,
-	ARM_PERF_PMU_ID_CA8,
-	ARM_PERF_PMU_ID_CA9,
-	ARM_PERF_PMU_ID_CA5,
-	ARM_PERF_PMU_ID_CA15,
-	ARM_PERF_PMU_ID_CA7,
-	ARM_NUM_PMU_IDS,
-};
-
-extern enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void);
+/* Nothing to see here... */
 
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 90114faa9f3c..4432305f4a2a 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -103,10 +103,9 @@ struct pmu_hw_events {
 
 struct arm_pmu {
 	struct pmu	pmu;
-	enum arm_perf_pmu_ids id;
 	enum arm_pmu_type type;
 	cpumask_t	active_irqs;
-	const char	*name;
+	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct hw_perf_event *evt, int idx);
 	void		(*disable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 65fa3c88095c..b4ca707d0a69 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -59,18 +59,13 @@ static inline void dsb_sev(void)
 }
 
 /*
- * ARMv6 Spin-locking.
+ * ARMv6 ticket-based spin-locking.
  *
- * We exclusively read the old value.  If it is zero, we may have
- * won the lock, so we try exclusively storing it.  A memory barrier
- * is required after we get a lock, and before we release it, because
- * V6 CPUs are assumed to have weakly ordered memory.
- *
- * Unlocked value: 0
- * Locked value: 1
+ * A memory barrier is required after we get a lock, and before we
+ * release it, because V6 CPUs are assumed to have weakly ordered
+ * memory.
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -79,31 +74,39 @@ static inline void dsb_sev(void)
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 newval;
+	arch_spinlock_t lockval;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-	WFE("ne")
-"	strexeq	%0, %2, [%1]\n"
-"	teqeq	%0, #0\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%1, %0, %4\n"
+"	strex	%2, %1, [%3]\n"
+"	teq	%2, #0\n"
 "	bne	1b"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
+	while (lockval.tickets.next != lockval.tickets.owner) {
+		wfe();
+		lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+	}
+
 	smp_mb();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 slock;
 
 	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+"	ldrex	%0, [%2]\n"
+"	subs	%1, %0, %0, ror #16\n"
+"	addeq	%0, %0, %3\n"
+"	strexeq	%1, %0, [%2]"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
 	if (tmp == 0) {
@@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+	unsigned long tmp;
+	u32 slock;
+
 	smp_mb();
 
 	__asm__ __volatile__(
-"	str	%1, [%0]\n"
-	:
-	: "r" (&lock->lock), "r" (0)
+"	mov	%1, #1\n"
+"1:	ldrex	%0, [%2]\n"
+"	uadd16	%0, %0, %1\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock)
 	: "cc");
 
 	dsb_sev();
 }
 
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return tickets.owner != tickets.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return (tickets.next - tickets.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
 /*
  * RWLOCKS
  *
@@ -158,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	unsigned long tmp;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
+"	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
 "	strexeq	%0, %2, [%1]"
 	: "=&r" (tmp)
@@ -244,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	unsigned long tmp, tmp2 = 1;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%2]\n"
+"	ldrex	%0, [%2]\n"
 "	adds	%0, %0, #1\n"
 "	strexpl	%1, %0, [%2]\n"
 	: "=&r" (tmp), "+r" (tmp2)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index d14d197ae04a..b262d2f8b478 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
 # error "please don't include this file directly"
 #endif
 
+#define TICKET_SHIFT	16
+
 typedef struct {
-	volatile unsigned int lock;
+	union {
+		u32 slock;
+		struct __raw_tickets {
+#ifdef __ARMEB__
+			u16 next;
+			u16 owner;
+#else
+			u16 owner;
+			u16 next;
+#endif
+		} tickets;
+	};
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..ce119442277c 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -12,13 +12,15 @@
 #ifndef _ASMARM_TIMEX_H
 #define _ASMARM_TIMEX_H
 
+#include <asm/arch_timer.h>
 #include <mach/timex.h>
 
 typedef unsigned long cycles_t;
 
-static inline cycles_t get_cycles (void)
-{
-	return 0;
-}
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#define get_cycles()	({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#else
+#define get_cycles()	(0)
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 71f6536d17ac..479a6352e0b5 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(type,addr,size)	(__range_ok(addr,size) == 0)
 
+#define user_addr_max() \
+	(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+
 /*
  * The "__xxx" versions of the user access functions do not verify the
  * address space - it must have been done previously with a separate
@@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l
 #define __clear_user(addr,n)		(memset((void __force *)addr, 0, n), 0)
 #endif
 
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
@@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = -EFAULT;
-	if (access_ok(VERIFY_READ, src, 1))
-		res = __strncpy_from_user(dst, src, count);
-	return res;
-}
-
-#define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
-	unsigned long res = 0;
-
-	if (__addr_ok(s))
-		res = __strnlen_user(s, n);
-
-	return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif /* _ASMARM_UACCESS_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..4d52f92967a6
--- /dev/null
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_ARM_WORD_AT_A_TIME_H
+#define __ASM_ARM_WORD_AT_A_TIME_H
+
+#ifndef __ARMEB__
+
+/*
+ * Little-endian word-at-a-time zero byte handling.
+ * Heavily based on the x86 algorithm.
+ */
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	unsigned long ret;
+
+#if __LINUX_ARM_ARCH__ >= 5
+	/* We have clz available. */
+	ret = fls(mask) >> 3;
+#else
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	ret = (0x0ff0001 + mask) >> 23;
+	/* Fix the 1 for 00 case */
+	ret &= mask;
+#endif
+
+	return ret;
+}
+
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#define zero_bytemask(mask) (mask)
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, [%2]\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x3\n"
+	"	bic	%2, %2, #0x3\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+	"	lsr	%0, %0, %1\n"
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.long	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Qo" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+
+#endif	/* DCACHE_WORD_ACCESS */
+
+#else	/* __ARMEB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+#endif /* __ASM_ARM_WORD_AT_A_TIME_H */
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c
index dd58035621f7..cf258807160d 100644
--- a/arch/arm/kernel/arch_timer.c
+++ b/arch/arm/kernel/arch_timer.c
@@ -32,6 +32,8 @@ static int arch_timer_ppi2;
 
 static struct clock_event_device __percpu **arch_timer_evt;
 
+extern void init_current_timer_delay(unsigned long freq);
+
 /*
  * Architected system timer support.
  */
@@ -137,7 +139,7 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk)
 	/* Be safe... */
 	arch_timer_disable();
 
-	clk->features = CLOCK_EVT_FEAT_ONESHOT;
+	clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP;
 	clk->name = "arch_sys_timer";
 	clk->rating = 450;
 	clk->set_mode = arch_timer_set_mode;
@@ -223,6 +225,14 @@ static cycle_t arch_counter_read(struct clocksource *cs)
 	return arch_counter_get_cntpct();
 }
 
+int read_current_timer(unsigned long *timer_val)
+{
+	if (!arch_timer_rate)
+		return -ENXIO;
+	*timer_val = arch_counter_get_cntpct();
+	return 0;
+}
+
 static struct clocksource clocksource_counter = {
 	.name	= "arch_sys_counter",
 	.rating	= 400,
@@ -296,6 +306,7 @@ static int __init arch_timer_register(void)
 	if (err)
 		goto out_free_irq;
 
+	init_current_timer_delay(arch_timer_rate);
 	return 0;
 
 out_free_irq:
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index b57c75e0b01f..60d3b738d420 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,8 +49,7 @@ extern void __aeabi_ulcmp(void);
 extern void fpundefinstr(void);
 
 	/* platform dependent support */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__const_udelay);
+EXPORT_SYMBOL(arm_delay_ops);
 
 	/* networking */
 EXPORT_SYMBOL(csum_partial);
@@ -87,10 +86,6 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(__memzero);
 
-	/* user mem (segment) */
-EXPORT_SYMBOL(__strnlen_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-
 #ifdef CONFIG_MMU
 EXPORT_SYMBOL(copy_page);
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 4afed88d250a..49d9f9305247 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -95,13 +95,7 @@ ENDPROC(ret_to_user)
 ENTRY(ret_from_fork)
 	bl	schedule_tail
 	get_thread_info tsk
-	ldr	r1, [tsk, #TI_FLAGS]		@ check for syscall tracing
 	mov	why, #1
-	tst	r1, #_TIF_SYSCALL_WORK		@ are we tracing syscalls?
-	beq	ret_slow_syscall
-	mov	r1, sp
-	mov	r0, #1				@ trace exit [IP = 1]
-	bl	syscall_trace
 	b	ret_slow_syscall
 ENDPROC(ret_from_fork)
 
@@ -448,10 +442,9 @@ ENDPROC(vector_swi)
 	 * context switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	r2, scno
-	add	r1, sp, #S_OFF
-	mov	r0, #0				@ trace entry [IP = 0]
-	bl	syscall_trace
+	mov	r1, scno
+	add	r0, sp, #S_OFF
+	bl	syscall_trace_enter
 
 	adr	lr, BSYM(__sys_trace_return)	@ return address
 	mov	scno, r0			@ syscall number (possibly new)
@@ -463,10 +456,9 @@ __sys_trace:
 
 __sys_trace_return:
 	str	r0, [sp, #S_R0 + S_OFF]!	@ save returned r0
-	mov	r2, scno
-	mov	r1, sp
-	mov	r0, #1				@ trace exit [IP = 1]
-	bl	syscall_trace
+	mov	r1, scno
+	mov	r0, sp
+	bl	syscall_trace_exit
 	b	ret_slow_syscall
 
 	.align	5
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 835898e7d704..3db960e20cb8 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -55,14 +55,6 @@
 	add	\rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE
 	.endm
 
-#ifdef CONFIG_XIP_KERNEL
-#define KERNEL_START	XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
-#define KERNEL_END	_edata_loc
-#else
-#define KERNEL_START	KERNEL_RAM_VADDR
-#define KERNEL_END	_end
-#endif
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -218,51 +210,46 @@ __create_page_tables:
 	blo	1b
 
 	/*
-	 * Now setup the pagetables for our kernel direct
-	 * mapped region.
+	 * Map our RAM from the start to the end of the kernel .bss section.
 	 */
-	mov	r3, pc
-	mov	r3, r3, lsr #SECTION_SHIFT
-	orr	r3, r7, r3, lsl #SECTION_SHIFT
-	add	r0, r4,  #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
-	ldr	r6, =(KERNEL_END - 1)
-	add	r0, r0, #1 << PMD_ORDER
+	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
+	ldr	r6, =(_end - 1)
+	orr	r3, r8, r7
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
-1:	cmp	r0, r6
+1:	str	r3, [r0], #1 << PMD_ORDER
 	add	r3, r3, #1 << SECTION_SHIFT
-	strls	r3, [r0], #1 << PMD_ORDER
+	cmp	r0, r6
 	bls	1b
 
 #ifdef CONFIG_XIP_KERNEL
 	/*
-	 * Map some ram to cover our .data and .bss areas.
+	 * Map the kernel image separately as it is not located in RAM.
 	 */
-	add	r3, r8, #TEXT_OFFSET
-	orr	r3, r3, r7
-	add	r0, r4,  #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
-	str	r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]!
-	ldr	r6, =(_end - 1)
-	add	r0, r0, #4
+#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
+	mov	r3, pc
+	mov	r3, r3, lsr #SECTION_SHIFT
+	orr	r3, r7, r3, lsl #SECTION_SHIFT
+	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
+	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
+	ldr	r6, =(_edata_loc - 1)
+	add	r0, r0, #1 << PMD_ORDER
 	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
 1:	cmp	r0, r6
-	add	r3, r3, #1 << 20
-	strls	r3, [r0], #4
+	add	r3, r3, #1 << SECTION_SHIFT
+	strls	r3, [r0], #1 << PMD_ORDER
 	bls	1b
 #endif
 
 	/*
-	 * Then map boot params address in r2 or the first 1MB (2MB with LPAE)
-	 * of ram if boot params address is not specified.
+	 * Then map boot params address in r2 if specified.
 	 */
 	mov	r0, r2, lsr #SECTION_SHIFT
 	movs	r0, r0, lsl #SECTION_SHIFT
-	moveq	r0, r8
-	sub	r3, r0, r8
-	add	r3, r3, #PAGE_OFFSET
-	add	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
-	orr	r6, r7, r0
-	str	r6, [r3]
+	subne	r3, r0, r8
+	addne	r3, r3, #PAGE_OFFSET
+	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
+	orrne	r6, r7, r0
+	strne	r6, [r3]
 
 #ifdef CONFIG_DEBUG_LL
 #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index a02eada3aa5d..ab243b87118d 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -47,17 +47,14 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
 /* Set at runtime when we know what CPU type we are. */
 static struct arm_pmu *cpu_pmu;
 
-enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void)
+const char *perf_pmu_name(void)
 {
-	int id = -ENODEV;
-
-	if (cpu_pmu != NULL)
-		id = cpu_pmu->id;
+	if (!cpu_pmu)
+		return NULL;
 
-	return id;
+	return cpu_pmu->pmu.name;
 }
-EXPORT_SYMBOL_GPL(armpmu_get_pmu_id);
+EXPORT_SYMBOL_GPL(perf_pmu_name);
 
 int perf_num_counters(void)
 {
@@ -760,7 +757,7 @@ init_hw_perf_events(void)
 			cpu_pmu->name, cpu_pmu->num_events);
 		cpu_pmu_init(cpu_pmu);
 		register_cpu_notifier(&pmu_cpu_notifier);
-		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
+		armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW);
 	} else {
 		pr_info("no hardware support available\n");
 	}
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index ab627a740fa3..c90fcb2b6967 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -650,7 +650,6 @@ static int armv6_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu armv6pmu = {
-	.id			= ARM_PERF_PMU_ID_V6,
 	.name			= "v6",
 	.handle_irq		= armv6pmu_handle_irq,
 	.enable			= armv6pmu_enable_event,
@@ -685,7 +684,6 @@ static int armv6mpcore_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu armv6mpcore_pmu = {
-	.id			= ARM_PERF_PMU_ID_V6MP,
 	.name			= "v6mpcore",
 	.handle_irq		= armv6pmu_handle_irq,
 	.enable			= armv6pmu_enable_event,
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index d3c536068162..f04070bd2183 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1258,7 +1258,6 @@ static u32 __init armv7_read_num_pmnc_events(void)
 
 static struct arm_pmu *__init armv7_a8_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA8;
 	armv7pmu.name		= "ARMv7 Cortex-A8";
 	armv7pmu.map_event	= armv7_a8_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1267,7 +1266,6 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a9_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA9;
 	armv7pmu.name		= "ARMv7 Cortex-A9";
 	armv7pmu.map_event	= armv7_a9_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1276,7 +1274,6 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a5_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA5;
 	armv7pmu.name		= "ARMv7 Cortex-A5";
 	armv7pmu.map_event	= armv7_a5_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1285,7 +1282,6 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a15_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA15;
 	armv7pmu.name		= "ARMv7 Cortex-A15";
 	armv7pmu.map_event	= armv7_a15_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
@@ -1295,7 +1291,6 @@ static struct arm_pmu *__init armv7_a15_pmu_init(void)
 
 static struct arm_pmu *__init armv7_a7_pmu_init(void)
 {
-	armv7pmu.id		= ARM_PERF_PMU_ID_CA7;
 	armv7pmu.name		= "ARMv7 Cortex-A7";
 	armv7pmu.map_event	= armv7_a7_map_event;
 	armv7pmu.num_events	= armv7_read_num_pmnc_events();
diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c
index e34e7254e652..f759fe0bab63 100644
--- a/arch/arm/kernel/perf_event_xscale.c
+++ b/arch/arm/kernel/perf_event_xscale.c
@@ -435,7 +435,6 @@ static int xscale_map_event(struct perf_event *event)
 }
 
 static struct arm_pmu xscale1pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE1,
 	.name		= "xscale1",
 	.handle_irq	= xscale1pmu_handle_irq,
 	.enable		= xscale1pmu_enable_event,
@@ -803,7 +802,6 @@ xscale2pmu_write_counter(int counter, u32 val)
 }
 
 static struct arm_pmu xscale2pmu = {
-	.id		= ARM_PERF_PMU_ID_XSCALE2,
 	.name		= "xscale2",
 	.handle_irq	= xscale2pmu_handle_irq,
 	.enable		= xscale2pmu_enable_event,
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 14e38261cd31..dab711e6e1ca 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -907,16 +907,16 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
+enum ptrace_syscall_dir {
+	PTRACE_SYSCALL_ENTER = 0,
+	PTRACE_SYSCALL_EXIT,
+};
+
+static int ptrace_syscall_trace(struct pt_regs *regs, int scno,
+				enum ptrace_syscall_dir dir)
 {
 	unsigned long ip;
 
-	if (why)
-		audit_syscall_exit(regs);
-	else
-		audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0,
-				    regs->ARM_r1, regs->ARM_r2, regs->ARM_r3);
-
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		return scno;
 
@@ -927,14 +927,28 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
 	 * IP = 0 -> entry, =1 -> exit
 	 */
 	ip = regs->ARM_ip;
-	regs->ARM_ip = why;
+	regs->ARM_ip = dir;
 
-	if (why)
+	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
 		current_thread_info()->syscall = -1;
 
 	regs->ARM_ip = ip;
-
 	return current_thread_info()->syscall;
 }
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+{
+	int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER);
+	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
+			    regs->ARM_r2, regs->ARM_r3);
+	return ret;
+}
+
+asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno)
+{
+	int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT);
+	audit_syscall_exit(regs);
+	return ret;
+}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 2c7217d971db..aea74f5bc34a 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -179,7 +179,7 @@ void __ref cpu_die(void)
 	mb();
 
 	/* Tell __cpu_die() that this CPU is now safe to dispose of */
-	complete(&cpu_died);
+	RCU_NONIDLE(complete(&cpu_died));
 
 	/*
 	 * actual CPU shutdown procedure is at least platform (if not
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 8200deaa14f6..198b08456e90 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -17,11 +17,190 @@
 #include <linux/percpu.h>
 #include <linux/node.h>
 #include <linux/nodemask.h>
+#include <linux/of.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
+/*
+ * cpu power scale management
+ */
+
+/*
+ * cpu power table
+ * This per cpu data structure describes the relative capacity of each core.
+ * On a heteregenous system, cores don't have the same computation capacity
+ * and we reflect that difference in the cpu_power field so the scheduler can
+ * take this difference into account during load balance. A per cpu structure
+ * is preferred because each CPU updates its own cpu_power field during the
+ * load balance except for idle cores. One idle core is selected to run the
+ * rebalance_domains for all idle cores and the cpu_power can be updated
+ * during this sequence.
+ */
+static DEFINE_PER_CPU(unsigned long, cpu_scale);
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	return per_cpu(cpu_scale, cpu);
+}
+
+static void set_power_scale(unsigned int cpu, unsigned long power)
+{
+	per_cpu(cpu_scale, cpu) = power;
+}
+
+#ifdef CONFIG_OF
+struct cpu_efficiency {
+	const char *compatible;
+	unsigned long efficiency;
+};
+
+/*
+ * Table of relative efficiency of each processors
+ * The efficiency value must fit in 20bit and the final
+ * cpu_scale value must be in the range
+ *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ * in order to return at most 1 when DIV_ROUND_CLOSEST
+ * is used to compute the capacity of a CPU.
+ * Processors that are not defined in the table,
+ * use the default SCHED_POWER_SCALE value for cpu_scale.
+ */
+struct cpu_efficiency table_efficiency[] = {
+	{"arm,cortex-a15", 3891},
+	{"arm,cortex-a7",  2048},
+	{NULL, },
+};
+
+struct cpu_capacity {
+	unsigned long hwid;
+	unsigned long capacity;
+};
+
+struct cpu_capacity *cpu_capacity;
+
+unsigned long middle_capacity = 1;
+
+/*
+ * Iterate all CPUs' descriptor in DT and compute the efficiency
+ * (as per table_efficiency). Also calculate a middle efficiency
+ * as close as possible to  (max{eff_i} - min{eff_i}) / 2
+ * This is later used to scale the cpu_power field such that an
+ * 'average' CPU is of middle power. Also see the comments near
+ * table_efficiency[] and update_cpu_power().
+ */
+static void __init parse_dt_topology(void)
+{
+	struct cpu_efficiency *cpu_eff;
+	struct device_node *cn = NULL;
+	unsigned long min_capacity = (unsigned long)(-1);
+	unsigned long max_capacity = 0;
+	unsigned long capacity = 0;
+	int alloc_size, cpu = 0;
+
+	alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity);
+	cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT);
+
+	while ((cn = of_find_node_by_type(cn, "cpu"))) {
+		const u32 *rate, *reg;
+		int len;
+
+		if (cpu >= num_possible_cpus())
+			break;
+
+		for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++)
+			if (of_device_is_compatible(cn, cpu_eff->compatible))
+				break;
+
+		if (cpu_eff->compatible == NULL)
+			continue;
+
+		rate = of_get_property(cn, "clock-frequency", &len);
+		if (!rate || len != 4) {
+			pr_err("%s missing clock-frequency property\n",
+				cn->full_name);
+			continue;
+		}
+
+		reg = of_get_property(cn, "reg", &len);
+		if (!reg || len != 4) {
+			pr_err("%s missing reg property\n", cn->full_name);
+			continue;
+		}
+
+		capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency;
+
+		/* Save min capacity of the system */
+		if (capacity < min_capacity)
+			min_capacity = capacity;
+
+		/* Save max capacity of the system */
+		if (capacity > max_capacity)
+			max_capacity = capacity;
+
+		cpu_capacity[cpu].capacity = capacity;
+		cpu_capacity[cpu++].hwid = be32_to_cpup(reg);
+	}
+
+	if (cpu < num_possible_cpus())
+		cpu_capacity[cpu].hwid = (unsigned long)(-1);
+
+	/* If min and max capacities are equals, we bypass the update of the
+	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
+	 * compute a middle_capacity factor that will ensure that the capacity
+	 * of an 'average' CPU of the system will be as close as possible to
+	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * constraint explained near table_efficiency[].
+	 */
+	if (min_capacity == max_capacity)
+		cpu_capacity[0].hwid = (unsigned long)(-1);
+	else if (4*max_capacity < (3*(max_capacity + min_capacity)))
+		middle_capacity = (min_capacity + max_capacity)
+				>> (SCHED_POWER_SHIFT+1);
+	else
+		middle_capacity = ((max_capacity / 3)
+				>> (SCHED_POWER_SHIFT-1)) + 1;
+
+}
+
+/*
+ * Look for a customed capacity of a CPU in the cpu_capacity table during the
+ * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
+ * function returns directly for SMP system.
+ */
+void update_cpu_power(unsigned int cpu, unsigned long hwid)
+{
+	unsigned int idx = 0;
+
+	/* look for the cpu's hwid in the cpu capacity table */
+	for (idx = 0; idx < num_possible_cpus(); idx++) {
+		if (cpu_capacity[idx].hwid == hwid)
+			break;
+
+		if (cpu_capacity[idx].hwid == -1)
+			return;
+	}
+
+	if (idx == num_possible_cpus())
+		return;
+
+	set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity);
+
+	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
+		cpu, arch_scale_freq_power(NULL, cpu));
+}
+
+#else
+static inline void parse_dt_topology(void) {}
+static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {}
+#endif
+
+
+/*
+ * cpu topology management
+ */
+
 #define MPIDR_SMP_BITMASK (0x3 << 30)
 #define MPIDR_SMP_VALUE (0x2 << 30)
 
@@ -31,6 +210,7 @@
  * These masks reflect the current use of the affinity levels.
  * The affinity level can be up to 16 bits according to ARM ARM
  */
+#define MPIDR_HWID_BITMASK 0xFFFFFF
 
 #define MPIDR_LEVEL0_MASK 0x3
 #define MPIDR_LEVEL0_SHIFT 0
@@ -41,6 +221,9 @@
 #define MPIDR_LEVEL2_MASK 0xFF
 #define MPIDR_LEVEL2_SHIFT 16
 
+/*
+ * cpu topology table
+ */
 struct cputopo_arm cpu_topology[NR_CPUS];
 
 const struct cpumask *cpu_coregroup_mask(int cpu)
@@ -48,6 +231,32 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 	return &cpu_topology[cpu].core_sibling;
 }
 
+void update_siblings_masks(unsigned int cpuid)
+{
+	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_possible_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->socket_id != cpu_topo->socket_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		if (cpu != cpuid)
+			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+	smp_wmb();
+}
+
 /*
  * store_cpu_topology is called at boot when only one cpu is running
  * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
@@ -57,7 +266,6 @@ void store_cpu_topology(unsigned int cpuid)
 {
 	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
 	unsigned int mpidr;
-	unsigned int cpu;
 
 	/* If the cpu topology has been already set, just return */
 	if (cpuid_topo->core_id != -1)
@@ -99,26 +307,9 @@ void store_cpu_topology(unsigned int cpuid)
 		cpuid_topo->socket_id = -1;
 	}
 
-	/* update core and thread sibling masks */
-	for_each_possible_cpu(cpu) {
-		struct cputopo_arm *cpu_topo = &cpu_topology[cpu];
-
-		if (cpuid_topo->socket_id == cpu_topo->socket_id) {
-			cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-			if (cpu != cpuid)
-				cpumask_set_cpu(cpu,
-					&cpuid_topo->core_sibling);
-
-			if (cpuid_topo->core_id == cpu_topo->core_id) {
-				cpumask_set_cpu(cpuid,
-					&cpu_topo->thread_sibling);
-				if (cpu != cpuid)
-					cpumask_set_cpu(cpu,
-						&cpuid_topo->thread_sibling);
-			}
-		}
-	}
-	smp_wmb();
+	update_siblings_masks(cpuid);
+
+	update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -134,7 +325,7 @@ void init_cpu_topology(void)
 {
 	unsigned int cpu;
 
-	/* init core mask */
+	/* init core mask and power*/
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -143,6 +334,10 @@ void init_cpu_topology(void)
 		cpu_topo->socket_id = -1;
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
+
+		set_power_scale(cpu, SCHED_POWER_SCALE);
 	}
 	smp_wmb();
+
+	parse_dt_topology();
 }
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3647170e9a16..8b97d739b17b 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -233,9 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 #define S_ISA " ARM"
 #endif
 
-static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
 {
-	struct task_struct *tsk = thread->task;
+	struct task_struct *tsk = current;
 	static int die_counter;
 	int ret;
 
@@ -245,12 +245,12 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 	/* trap and error numbers are mostly meaningless on ARM */
 	ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV);
 	if (ret == NOTIFY_STOP)
-		return ret;
+		return 1;
 
 	print_modules();
 	__show_regs(regs);
 	printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n",
-		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+		TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
 		dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
@@ -259,45 +259,77 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt
 		dump_instr(KERN_EMERG, regs);
 	}
 
-	return ret;
+	return 0;
 }
 
-static DEFINE_RAW_SPINLOCK(die_lock);
+static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int die_owner = -1;
+static unsigned int die_nest_count;
 
-/*
- * This function is protected against re-entrancy.
- */
-void die(const char *str, struct pt_regs *regs, int err)
+static unsigned long oops_begin(void)
 {
-	struct thread_info *thread = current_thread_info();
-	int ret;
-	enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+	int cpu;
+	unsigned long flags;
 
 	oops_enter();
 
-	raw_spin_lock_irq(&die_lock);
+	/* racy, but better than risking deadlock. */
+	raw_local_irq_save(flags);
+	cpu = smp_processor_id();
+	if (!arch_spin_trylock(&die_lock)) {
+		if (cpu == die_owner)
+			/* nested oops. should stop eventually */;
+		else
+			arch_spin_lock(&die_lock);
+	}
+	die_nest_count++;
+	die_owner = cpu;
 	console_verbose();
 	bust_spinlocks(1);
-	if (!user_mode(regs))
-		bug_type = report_bug(regs->ARM_pc, regs);
-	if (bug_type != BUG_TRAP_TYPE_NONE)
-		str = "Oops - BUG";
-	ret = __die(str, err, thread, regs);
+	return flags;
+}
 
-	if (regs && kexec_should_crash(thread->task))
+static void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
+{
+	if (regs && kexec_should_crash(current))
 		crash_kexec(regs);
 
 	bust_spinlocks(0);
+	die_owner = -1;
 	add_taint(TAINT_DIE);
-	raw_spin_unlock_irq(&die_lock);
+	die_nest_count--;
+	if (!die_nest_count)
+		/* Nest count reaches zero, release the lock. */
+		arch_spin_unlock(&die_lock);
+	raw_local_irq_restore(flags);
 	oops_exit();
 
 	if (in_interrupt())
 		panic("Fatal exception in interrupt");
 	if (panic_on_oops)
 		panic("Fatal exception");
-	if (ret != NOTIFY_STOP)
-		do_exit(SIGSEGV);
+	if (signr)
+		do_exit(signr);
+}
+
+/*
+ * This function is protected against re-entrancy.
+ */
+void die(const char *str, struct pt_regs *regs, int err)
+{
+	enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE;
+	unsigned long flags = oops_begin();
+	int sig = SIGSEGV;
+
+	if (!user_mode(regs))
+		bug_type = report_bug(regs->ARM_pc, regs);
+	if (bug_type != BUG_TRAP_TYPE_NONE)
+		str = "Oops - BUG";
+
+	if (__die(str, err, regs))
+		sig = 0;
+
+	oops_end(flags, regs, sig);
 }
 
 void arm_notify_die(const char *str, struct pt_regs *regs,
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 992769ae2599..2473fd1fd51c 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -6,9 +6,8 @@
 
 lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
-		   delay.o findbit.o memchr.o memcpy.o		      \
+		   delay.o delay-loop.o findbit.o memchr.o memcpy.o   \
 		   memmove.o memset.o memzero.o setbit.o              \
-		   strncpy_from_user.o strnlen_user.o                 \
 		   strchr.o strrchr.o                                 \
 		   testchangebit.o testclearbit.o testsetbit.o        \
 		   ashldi3.o ashrdi3.o lshrdi3.o muldi3.o             \
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay-loop.S
index 3c9a05c8d20b..36b668d8e121 100644
--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay-loop.S
@@ -9,11 +9,11 @@
  */
 #include <linux/linkage.h>
 #include <asm/assembler.h>
-#include <asm/param.h>
+#include <asm/delay.h>
 		.text
 
 .LC0:		.word	loops_per_jiffy
-.LC1:		.word	(2199023*HZ)>>11
+.LC1:		.word	UDELAY_MULT
 
 /*
  * r0  <= 2000
@@ -21,10 +21,10 @@
  * HZ  <= 1000
  */
 
-ENTRY(__udelay)
+ENTRY(__loop_udelay)
 		ldr	r2, .LC1
 		mul	r0, r2, r0
-ENTRY(__const_udelay)				@ 0 <= r0 <= 0x7fffff06
+ENTRY(__loop_const_udelay)			@ 0 <= r0 <= 0x7fffff06
 		mov	r1, #-1
 		ldr	r2, .LC0
 		ldr	r2, [r2]		@ max = 0x01ffffff
@@ -39,12 +39,10 @@ ENTRY(__const_udelay)				@ 0 <= r0 <= 0x7fffff06
 
 /*
  * loops = r0 * HZ * loops_per_jiffy / 1000000
- *
- * Oh, if only we had a cycle counter...
  */
 
 @ Delay routine
-ENTRY(__delay)
+ENTRY(__loop_delay)
 		subs	r0, r0, #1
 #if 0
 		movls	pc, lr
@@ -62,8 +60,8 @@ ENTRY(__delay)
 		movls	pc, lr
 		subs	r0, r0, #1
 #endif
-		bhi	__delay
+		bhi	__loop_delay
 		mov	pc, lr
-ENDPROC(__udelay)
-ENDPROC(__const_udelay)
-ENDPROC(__delay)
+ENDPROC(__loop_udelay)
+ENDPROC(__loop_const_udelay)
+ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
new file mode 100644
index 000000000000..d6dacc69254e
--- /dev/null
+++ b/arch/arm/lib/delay.c
@@ -0,0 +1,71 @@
+/*
+ * Delay loops based on the OpenRISC implementation.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timex.h>
+
+/*
+ * Default to the loop-based delay implementation.
+ */
+struct arm_delay_ops arm_delay_ops = {
+	.delay		= __loop_delay,
+	.const_udelay	= __loop_const_udelay,
+	.udelay		= __loop_udelay,
+};
+
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+static void __timer_delay(unsigned long cycles)
+{
+	cycles_t start = get_cycles();
+
+	while ((get_cycles() - start) < cycles)
+		cpu_relax();
+}
+
+static void __timer_const_udelay(unsigned long xloops)
+{
+	unsigned long long loops = xloops;
+	loops *= loops_per_jiffy;
+	__timer_delay(loops >> UDELAY_SHIFT);
+}
+
+static void __timer_udelay(unsigned long usecs)
+{
+	__timer_const_udelay(usecs * UDELAY_MULT);
+}
+
+void __init init_current_timer_delay(unsigned long freq)
+{
+	pr_info("Switching to timer-based delay loop\n");
+	lpj_fine			= freq / HZ;
+	arm_delay_ops.delay		= __timer_delay;
+	arm_delay_ops.const_udelay	= __timer_const_udelay;
+	arm_delay_ops.udelay		= __timer_udelay;
+}
+
+unsigned long __cpuinit calibrate_delay_is_known(void)
+{
+	return lpj_fine;
+}
+#endif
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
deleted file mode 100644
index f202d7bd1647..000000000000
--- a/arch/arm/lib/strncpy_from_user.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- *  linux/arch/arm/lib/strncpy_from_user.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/*
- * Copy a string from user space to kernel space.
- *  r0 = dst, r1 = src, r2 = byte length
- * returns the number of characters copied (strlen of copied string),
- *  -EFAULT on exception, or "len" if we fill the whole buffer
- */
-ENTRY(__strncpy_from_user)
-	mov	ip, r1
-1:	subs	r2, r2, #1
-	ldrusr	r3, r1, 1, pl
-	bmi	2f
-	strb	r3, [r0], #1
-	teq	r3, #0
-	bne	1b
-	sub	r1, r1, #1	@ take NUL character out of count
-2:	sub	r0, r1, ip
-	mov	pc, lr
-ENDPROC(__strncpy_from_user)
-
-	.pushsection .fixup,"ax"
-	.align	0
-9001:	mov	r3, #0
-	strb	r3, [r0, #0]	@ null terminate
-	mov	r0, #-EFAULT
-	mov	pc, lr
-	.popsection
-
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
deleted file mode 100644
index 0ecbb459c4f1..000000000000
--- a/arch/arm/lib/strnlen_user.S
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- *  linux/arch/arm/lib/strnlen_user.S
- *
- *  Copyright (C) 1995-2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-
-	.text
-	.align	5
-
-/* Prototype: unsigned long __strnlen_user(const char *str, long n)
- * Purpose  : get length of a string in user memory
- * Params   : str - address of string in user memory
- * Returns  : length of string *including terminator*
- *	      or zero on exception, or n + 1 if too long
- */
-ENTRY(__strnlen_user)
-	mov	r2, r0
-1:
-	ldrusr	r3, r0, 1
-	teq	r3, #0
-	beq	2f
-	subs	r1, r1, #1
-	bne	1b
-	add	r0, r0, #1
-2:	sub	r0, r0, r2
-	mov	pc, lr
-ENDPROC(__strnlen_user)
-
-	.pushsection .fixup,"ax"
-	.align	0
-9001:	mov	r0, #0
-	mov	pc, lr
-	.popsection
diff --git a/arch/arm/mach-msm/platsmp.c b/arch/arm/mach-msm/platsmp.c
index db0117ec55f4..e012dc8391cf 100644
--- a/arch/arm/mach-msm/platsmp.c
+++ b/arch/arm/mach-msm/platsmp.c
@@ -127,7 +127,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * the boot monitor to read the system wide flags register,
 	 * and branch to the address found there.
 	 */
-	gic_raise_softirq(cpumask_of(cpu), 1);
+	gic_raise_softirq(cpumask_of(cpu), 0);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 7d118b9bdd5f..9a35adf91232 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -125,7 +125,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 		booted = true;
 	}
 
-	gic_raise_softirq(cpumask_of(cpu), 1);
+	gic_raise_softirq(cpumask_of(cpu), 0);
 
 	/*
 	 * Now the secondary core is starting up let it run its
diff --git a/arch/arm/mach-pxa/include/mach/regs-ost.h b/arch/arm/mach-pxa/include/mach/regs-ost.h
index a3e5f86ef67e..628819995c52 100644
--- a/arch/arm/mach-pxa/include/mach/regs-ost.h
+++ b/arch/arm/mach-pxa/include/mach/regs-ost.h
@@ -7,17 +7,17 @@
  * OS Timer & Match Registers
  */
 
-#define OSMR0		__REG(0x40A00000)  /* */
-#define OSMR1		__REG(0x40A00004)  /* */
-#define OSMR2		__REG(0x40A00008)  /* */
-#define OSMR3		__REG(0x40A0000C)  /* */
-#define OSMR4		__REG(0x40A00080)  /* */
-#define OSCR		__REG(0x40A00010)  /* OS Timer Counter Register */
-#define OSCR4		__REG(0x40A00040)  /* OS Timer Counter Register */
-#define OMCR4		__REG(0x40A000C0)  /* */
-#define OSSR		__REG(0x40A00014)  /* OS Timer Status Register */
-#define OWER		__REG(0x40A00018)  /* OS Timer Watchdog Enable Register */
-#define OIER		__REG(0x40A0001C)  /* OS Timer Interrupt Enable Register */
+#define OSMR0		io_p2v(0x40A00000)  /* */
+#define OSMR1		io_p2v(0x40A00004)  /* */
+#define OSMR2		io_p2v(0x40A00008)  /* */
+#define OSMR3		io_p2v(0x40A0000C)  /* */
+#define OSMR4		io_p2v(0x40A00080)  /* */
+#define OSCR		io_p2v(0x40A00010)  /* OS Timer Counter Register */
+#define OSCR4		io_p2v(0x40A00040)  /* OS Timer Counter Register */
+#define OMCR4		io_p2v(0x40A000C0)  /* */
+#define OSSR		io_p2v(0x40A00014)  /* OS Timer Status Register */
+#define OWER		io_p2v(0x40A00018)  /* OS Timer Watchdog Enable Register */
+#define OIER		io_p2v(0x40A0001C)  /* OS Timer Interrupt Enable Register */
 
 #define OSSR_M3		(1 << 3)	/* Match status channel 3 */
 #define OSSR_M2		(1 << 2)	/* Match status channel 2 */
diff --git a/arch/arm/mach-pxa/reset.c b/arch/arm/mach-pxa/reset.c
index b4528899ef08..3fab583755d4 100644
--- a/arch/arm/mach-pxa/reset.c
+++ b/arch/arm/mach-pxa/reset.c
@@ -77,9 +77,10 @@ static void do_gpio_reset(void)
 static void do_hw_reset(void)
 {
 	/* Initialize the watchdog and let it fire */
-	OWER = OWER_WME;
-	OSSR = OSSR_M3;
-	OSMR3 = OSCR + 368640;	/* ... in 100 ms */
+	writel_relaxed(OWER_WME, OWER);
+	writel_relaxed(OSSR_M3, OSSR);
+	/* ... in 100 ms */
+	writel_relaxed(readl_relaxed(OSCR) + 368640, OSMR3);
 }
 
 void pxa_restart(char mode, const char *cmd)
diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c
index 3d6c9bd90de6..4bc47d63698b 100644
--- a/arch/arm/mach-pxa/time.c
+++ b/arch/arm/mach-pxa/time.c
@@ -35,7 +35,7 @@
 
 static u32 notrace pxa_read_sched_clock(void)
 {
-	return OSCR;
+	return readl_relaxed(OSCR);
 }
 
 
@@ -47,8 +47,8 @@ pxa_ost0_interrupt(int irq, void *dev_id)
 	struct clock_event_device *c = dev_id;
 
 	/* Disarm the compare/match, signal the event. */
-	OIER &= ~OIER_E0;
-	OSSR = OSSR_M0;
+	writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
+	writel_relaxed(OSSR_M0, OSSR);
 	c->event_handler(c);
 
 	return IRQ_HANDLED;
@@ -59,10 +59,10 @@ pxa_osmr0_set_next_event(unsigned long delta, struct clock_event_device *dev)
 {
 	unsigned long next, oscr;
 
-	OIER |= OIER_E0;
-	next = OSCR + delta;
-	OSMR0 = next;
-	oscr = OSCR;
+	writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER);
+	next = readl_relaxed(OSCR) + delta;
+	writel_relaxed(next, OSMR0);
+	oscr = readl_relaxed(OSCR);
 
 	return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0;
 }
@@ -72,15 +72,15 @@ pxa_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *dev)
 {
 	switch (mode) {
 	case CLOCK_EVT_MODE_ONESHOT:
-		OIER &= ~OIER_E0;
-		OSSR = OSSR_M0;
+		writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
+		writel_relaxed(OSSR_M0, OSSR);
 		break;
 
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
 		/* initializing, released, or preparing for suspend */
-		OIER &= ~OIER_E0;
-		OSSR = OSSR_M0;
+		writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
+		writel_relaxed(OSSR_M0, OSSR);
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -108,8 +108,8 @@ static void __init pxa_timer_init(void)
 {
 	unsigned long clock_tick_rate = get_clock_tick_rate();
 
-	OIER = 0;
-	OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
+	writel_relaxed(0, OIER);
+	writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
 
 	setup_sched_clock(pxa_read_sched_clock, 32, clock_tick_rate);
 
@@ -122,7 +122,7 @@ static void __init pxa_timer_init(void)
 
 	setup_irq(IRQ_OST0, &pxa_ost0_irq);
 
-	clocksource_mmio_init(&OSCR, "oscr0", clock_tick_rate, 200, 32,
+	clocksource_mmio_init(OSCR, "oscr0", clock_tick_rate, 200, 32,
 		clocksource_mmio_readl_up);
 	clockevents_register_device(&ckevt_pxa_osmr0);
 }
@@ -132,12 +132,12 @@ static unsigned long osmr[4], oier, oscr;
 
 static void pxa_timer_suspend(void)
 {
-	osmr[0] = OSMR0;
-	osmr[1] = OSMR1;
-	osmr[2] = OSMR2;
-	osmr[3] = OSMR3;
-	oier = OIER;
-	oscr = OSCR;
+	osmr[0] = readl_relaxed(OSMR0);
+	osmr[1] = readl_relaxed(OSMR1);
+	osmr[2] = readl_relaxed(OSMR2);
+	osmr[3] = readl_relaxed(OSMR3);
+	oier = readl_relaxed(OIER);
+	oscr = readl_relaxed(OSCR);
 }
 
 static void pxa_timer_resume(void)
@@ -151,12 +151,12 @@ static void pxa_timer_resume(void)
 	if (osmr[0] - oscr < MIN_OSCR_DELTA)
 		osmr[0] += MIN_OSCR_DELTA;
 
-	OSMR0 = osmr[0];
-	OSMR1 = osmr[1];
-	OSMR2 = osmr[2];
-	OSMR3 = osmr[3];
-	OIER = oier;
-	OSCR = oscr;
+	writel_relaxed(osmr[0], OSMR0);
+	writel_relaxed(osmr[1], OSMR1);
+	writel_relaxed(osmr[2], OSMR2);
+	writel_relaxed(osmr[3], OSMR3);
+	writel_relaxed(oier, OIER);
+	writel_relaxed(oscr, OSCR);
 }
 #else
 #define pxa_timer_suspend NULL
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index d1dc7f1a239c..d673211f121c 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -362,7 +362,7 @@ static void __init assabet_init(void)
 static void __init map_sa1100_gpio_regs( void )
 {
 	unsigned long phys = __PREG(GPLR) & PMD_MASK;
-	unsigned long virt = io_p2v(phys);
+	unsigned long virt = (unsigned long)io_p2v(phys);
 	int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO);
 	pmd_t *pmd;
 
diff --git a/arch/arm/mach-sa1100/cpu-sa1100.c b/arch/arm/mach-sa1100/cpu-sa1100.c
index 19b2053f5af4..e8f4d1e19233 100644
--- a/arch/arm/mach-sa1100/cpu-sa1100.c
+++ b/arch/arm/mach-sa1100/cpu-sa1100.c
@@ -87,6 +87,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/io.h>
 
 #include <asm/cputype.h>
 
diff --git a/arch/arm/mach-sa1100/cpu-sa1110.c b/arch/arm/mach-sa1100/cpu-sa1110.c
index 675bf8ef97e8..48c45b0c92bb 100644
--- a/arch/arm/mach-sa1100/cpu-sa1110.c
+++ b/arch/arm/mach-sa1100/cpu-sa1110.c
@@ -19,6 +19,7 @@
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/types.h>
diff --git a/arch/arm/mach-sa1100/include/mach/SA-1100.h b/arch/arm/mach-sa1100/include/mach/SA-1100.h
index 3f2d1b60188c..0ac6cc08a19c 100644
--- a/arch/arm/mach-sa1100/include/mach/SA-1100.h
+++ b/arch/arm/mach-sa1100/include/mach/SA-1100.h
@@ -830,14 +830,14 @@
  *              	(read/write).
  */
 
-#define OSMR0  		__REG(0x90000000)  /* OS timer Match Reg. 0 */
-#define OSMR1  		__REG(0x90000004)  /* OS timer Match Reg. 1 */
-#define OSMR2  		__REG(0x90000008)  /* OS timer Match Reg. 2 */
-#define OSMR3  		__REG(0x9000000c)  /* OS timer Match Reg. 3 */
-#define OSCR   	__REG(0x90000010)  /* OS timer Counter Reg. */
-#define OSSR   	__REG(0x90000014	)  /* OS timer Status Reg. */
-#define OWER   	__REG(0x90000018	)  /* OS timer Watch-dog Enable Reg. */
-#define OIER   	__REG(0x9000001C	)  /* OS timer Interrupt Enable Reg. */
+#define OSMR0  		io_p2v(0x90000000)  /* OS timer Match Reg. 0 */
+#define OSMR1  		io_p2v(0x90000004)  /* OS timer Match Reg. 1 */
+#define OSMR2  		io_p2v(0x90000008)  /* OS timer Match Reg. 2 */
+#define OSMR3  		io_p2v(0x9000000c)  /* OS timer Match Reg. 3 */
+#define OSCR   		io_p2v(0x90000010)  /* OS timer Counter Reg. */
+#define OSSR   		io_p2v(0x90000014)  /* OS timer Status Reg. */
+#define OWER   		io_p2v(0x90000018)  /* OS timer Watch-dog Enable Reg. */
+#define OIER  	 	io_p2v(0x9000001C)  /* OS timer Interrupt Enable Reg. */
 
 #define OSSR_M(Nb)	        	/* Match detected [0..3]           */ \
                 	(0x00000001 << (Nb))
diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h
index a38fc4f54241..6a9eecf3137e 100644
--- a/arch/arm/mach-sa1100/include/mach/gpio.h
+++ b/arch/arm/mach-sa1100/include/mach/gpio.h
@@ -24,6 +24,7 @@
 #ifndef __ASM_ARCH_SA1100_GPIO_H
 #define __ASM_ARCH_SA1100_GPIO_H
 
+#include <linux/io.h>
 #include <mach/hardware.h>
 #include <asm/irq.h>
 #include <asm-generic/gpio.h>
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index 99f5856d8de4..cbedd75a9d65 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -32,7 +32,7 @@
 #define PIO_START       0x80000000	/* physical start of IO space */
 
 #define io_p2v( x )             \
-   ( (((x)&0x00ffffff) | (((x)&0x30000000)>>VIO_SHIFT)) + VIO_BASE )
+   IOMEM( (((x)&0x00ffffff) | (((x)&0x30000000)>>VIO_SHIFT)) + VIO_BASE )
 #define io_v2p( x )             \
    ( (((x)&0x00ffffff) | (((x)&(0x30000000>>VIO_SHIFT))<<VIO_SHIFT)) + PIO_START )
 
@@ -47,6 +47,8 @@
 #define CPU_SA1110_ID	(0x6901b110)
 #define CPU_SA1110_MASK	(0xfffffff0)
 
+#define __MREG(x)	IOMEM(io_p2v(x))
+
 #ifndef __ASSEMBLY__
 
 #include <asm/cputype.h>
@@ -56,7 +58,7 @@
 #define cpu_is_sa1100()	((read_cpuid_id() & CPU_SA1100_MASK) == CPU_SA1100_ID)
 #define cpu_is_sa1110()	((read_cpuid_id() & CPU_SA1110_MASK) == CPU_SA1110_ID)
 
-# define __REG(x)	(*((volatile unsigned long *)io_p2v(x)))
+# define __REG(x)	(*((volatile unsigned long __iomem *)io_p2v(x)))
 # define __PREG(x)	(io_v2p((unsigned long)&(x)))
 
 static inline unsigned long get_clock_tick_rate(void)
diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h
index 6cb39ddde656..5cf71da60e42 100644
--- a/arch/arm/mach-sa1100/include/mach/uncompress.h
+++ b/arch/arm/mach-sa1100/include/mach/uncompress.h
@@ -8,6 +8,8 @@
 
 #include "hardware.h"
 
+#define IOMEM(x)	(x)
+
 /*
  * The following code assumes the serial port has already been
  * initialized by the bootloader.  We search for the first enabled
diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c
index 516ccc25d7fd..2124f1fc2fbe 100644
--- a/arch/arm/mach-sa1100/irq.c
+++ b/arch/arm/mach-sa1100/irq.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/ioport.h>
 #include <linux/syscore_ops.h>
diff --git a/arch/arm/mach-sa1100/jornada720_ssp.c b/arch/arm/mach-sa1100/jornada720_ssp.c
index b412fc09c80c..7f07f08d8968 100644
--- a/arch/arm/mach-sa1100/jornada720_ssp.c
+++ b/arch/arm/mach-sa1100/jornada720_ssp.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
+#include <linux/io.h>
 
 #include <mach/hardware.h>
 #include <mach/jornada720.h>
diff --git a/arch/arm/mach-sa1100/leds-cerf.c b/arch/arm/mach-sa1100/leds-cerf.c
index 040540fb7d8a..30fc3b2bf555 100644
--- a/arch/arm/mach-sa1100/leds-cerf.c
+++ b/arch/arm/mach-sa1100/leds-cerf.c
@@ -4,6 +4,7 @@
  * Author: ???
  */
 #include <linux/init.h>
+#include <linux/io.h>
 
 #include <mach/hardware.h>
 #include <asm/leds.h>
diff --git a/arch/arm/mach-sa1100/leds-lart.c b/arch/arm/mach-sa1100/leds-lart.c
index a51830c60e53..50a5b143b460 100644
--- a/arch/arm/mach-sa1100/leds-lart.c
+++ b/arch/arm/mach-sa1100/leds-lart.c
@@ -10,6 +10,7 @@
  *  pace of the LED.
  */
 #include <linux/init.h>
+#include <linux/io.h>
 
 #include <mach/hardware.h>
 #include <asm/leds.h>
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index 690cf0ce5c0c..6645d1e31f14 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -23,6 +23,7 @@
  * 				Storage is local on the stack now.
  */
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/suspend.h>
 #include <linux/errno.h>
 #include <linux/time.h>
diff --git a/arch/arm/mach-sa1100/sleep.S b/arch/arm/mach-sa1100/sleep.S
index 30cc6721665b..85863741ef8b 100644
--- a/arch/arm/mach-sa1100/sleep.S
+++ b/arch/arm/mach-sa1100/sleep.S
@@ -38,9 +38,9 @@ ENTRY(sa1100_finish_suspend)
 	orr     r4, r4, #MDREFR_K1DB2
 	ldr	r5, =PPCR
 
-	@ Pre-load __udelay into the I-cache
+	@ Pre-load __loop_udelay into the I-cache
 	mov	r0, #1
-	bl	__udelay
+	bl	__loop_udelay
 	mov	r0, r0
 
 	@ The following must all exist in a single cache line to
@@ -53,11 +53,11 @@ ENTRY(sa1100_finish_suspend)
 	@ delay 90us and set CPU PLL to lowest speed
 	@ fixes resume problem on high speed SA1110
 	mov	r0, #90
-	bl	__udelay
+	bl	__loop_udelay
 	mov	r1, #0
 	str	r1, [r5]
 	mov	r0, #90
-	bl	__udelay
+	bl	__loop_udelay
 
 	/*
 	 * SA1110 SDRAM controller workaround.  register values:
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 6af26e8d55e6..80702c9ecc77 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -22,7 +22,7 @@
 
 static u32 notrace sa1100_read_sched_clock(void)
 {
-	return OSCR;
+	return readl_relaxed(OSCR);
 }
 
 #define MIN_OSCR_DELTA 2
@@ -32,8 +32,8 @@ static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
 	struct clock_event_device *c = dev_id;
 
 	/* Disarm the compare/match, signal the event. */
-	OIER &= ~OIER_E0;
-	OSSR = OSSR_M0;
+	writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
+	writel_relaxed(OSSR_M0, OSSR);
 	c->event_handler(c);
 
 	return IRQ_HANDLED;
@@ -44,10 +44,10 @@ sa1100_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c)
 {
 	unsigned long next, oscr;
 
-	OIER |= OIER_E0;
-	next = OSCR + delta;
-	OSMR0 = next;
-	oscr = OSCR;
+	writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER);
+	next = readl_relaxed(OSCR) + delta;
+	writel_relaxed(next, OSMR0);
+	oscr = readl_relaxed(OSCR);
 
 	return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0;
 }
@@ -59,8 +59,8 @@ sa1100_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *c)
 	case CLOCK_EVT_MODE_ONESHOT:
 	case CLOCK_EVT_MODE_UNUSED:
 	case CLOCK_EVT_MODE_SHUTDOWN:
-		OIER &= ~OIER_E0;
-		OSSR = OSSR_M0;
+		writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
+		writel_relaxed(OSSR_M0, OSSR);
 		break;
 
 	case CLOCK_EVT_MODE_RESUME:
@@ -86,8 +86,8 @@ static struct irqaction sa1100_timer_irq = {
 
 static void __init sa1100_timer_init(void)
 {
-	OIER = 0;
-	OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
+	writel_relaxed(0, OIER);
+	writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
 
 	setup_sched_clock(sa1100_read_sched_clock, 32, 3686400);
 
@@ -100,7 +100,7 @@ static void __init sa1100_timer_init(void)
 
 	setup_irq(IRQ_OST0, &sa1100_timer_irq);
 
-	clocksource_mmio_init(&OSCR, "oscr", CLOCK_TICK_RATE, 200, 32,
+	clocksource_mmio_init(OSCR, "oscr", CLOCK_TICK_RATE, 200, 32,
 		clocksource_mmio_readl_up);
 	clockevents_register_device(&ckevt_sa1100_osmr0);
 }
@@ -110,26 +110,26 @@ unsigned long osmr[4], oier;
 
 static void sa1100_timer_suspend(void)
 {
-	osmr[0] = OSMR0;
-	osmr[1] = OSMR1;
-	osmr[2] = OSMR2;
-	osmr[3] = OSMR3;
-	oier = OIER;
+	osmr[0] = readl_relaxed(OSMR0);
+	osmr[1] = readl_relaxed(OSMR1);
+	osmr[2] = readl_relaxed(OSMR2);
+	osmr[3] = readl_relaxed(OSMR3);
+	oier = readl_relaxed(OIER);
 }
 
 static void sa1100_timer_resume(void)
 {
-	OSSR = 0x0f;
-	OSMR0 = osmr[0];
-	OSMR1 = osmr[1];
-	OSMR2 = osmr[2];
-	OSMR3 = osmr[3];
-	OIER = oier;
+	writel_relaxed(0x0f, OSSR);
+	writel_relaxed(osmr[0], OSMR0);
+	writel_relaxed(osmr[1], OSMR1);
+	writel_relaxed(osmr[2], OSMR2);
+	writel_relaxed(osmr[3], OSMR3);
+	writel_relaxed(oier, OIER);
 
 	/*
 	 * OSMR0 is the system timer: make sure OSCR is sufficiently behind
 	 */
-	OSCR = OSMR0 - LATCH;
+	writel_relaxed(OSMR0 - LATCH, OSCR);
 }
 #else
 #define sa1100_timer_suspend NULL
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 806cc4f63516..119bc52ab93e 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 
 #include <asm/mmu_context.h>
+#include <asm/thread_notify.h>
 #include <asm/tlbflush.h>
 
 static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
@@ -48,6 +49,40 @@ void cpu_set_reserved_ttbr0(void)
 }
 #endif
 
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
+			       void *t)
+{
+	u32 contextidr;
+	pid_t pid;
+	struct thread_info *thread = t;
+
+	if (cmd != THREAD_NOTIFY_SWITCH)
+		return NOTIFY_DONE;
+
+	pid = task_pid_nr(thread->task) << ASID_BITS;
+	asm volatile(
+	"	mrc	p15, 0, %0, c13, c0, 1\n"
+	"	bfi	%1, %0, #0, %2\n"
+	"	mcr	p15, 0, %1, c13, c0, 1\n"
+	: "=r" (contextidr), "+r" (pid)
+	: "I" (ASID_BITS));
+	isb();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block contextidr_notifier_block = {
+	.notifier_call = contextidr_notifier,
+};
+
+static int __init contextidr_notifier_init(void)
+{
+	return thread_register_notifier(&contextidr_notifier_block);
+}
+arch_initcall(contextidr_notifier_init);
+#endif
+
 /*
  * We fork()ed a process, and we need a new context for the child
  * to run in.
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 655878bcc96d..5cfc98994076 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -23,12 +23,12 @@
 #include <linux/slab.h>
 #include <linux/iommu.h>
 #include <linux/vmalloc.h>
+#include <linux/sizes.h>
 
 #include <asm/memory.h>
 #include <asm/highmem.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
-#include <asm/sizes.h>
 #include <asm/mach/arch.h>
 #include <asm/dma-iommu.h>
 #include <asm/mach/map.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index f54d59219764..9aec41fa80ae 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -21,13 +21,13 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/dma-contiguous.h>
+#include <linux/sizes.h>
 
 #include <asm/mach-types.h>
 #include <asm/memblock.h>
 #include <asm/prom.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
-#include <asm/sizes.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
 
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 4f55f5062ab7..566750fa57d4 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -25,6 +25,7 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/io.h>
+#include <linux/sizes.h>
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
@@ -32,7 +33,6 @@
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
-#include <asm/sizes.h>
 #include <asm/system_info.h>
 
 #include <asm/mach/map.h>
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index cf4528d51774..4c2d0451e84a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -16,13 +16,13 @@
 #include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
+#include <linux/sizes.h>
 
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/sections.h>
 #include <asm/cachetype.h>
 #include <asm/setup.h>
-#include <asm/sizes.h>
 #include <asm/smp_plat.h>
 #include <asm/tlb.h>
 #include <asm/highmem.h>
@@ -422,12 +422,6 @@ static void __init build_mem_type_table(void)
 	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
 
 	/*
-	 * Only use write-through for non-SMP systems
-	 */
-	if (!is_smp() && cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
-		vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
-
-	/*
 	 * Enable CPU-specific coherency if supported.
 	 * (Only available on XSC3 at the moment.)
 	 */
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 5900cd520e84..86b8b480634f 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -107,6 +107,12 @@ ENTRY(cpu_v6_switch_mm)
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
 	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
+	bic	r2, r2, #0xff			@ extract the PID
+	and	r1, r1, #0xff
+	orr	r1, r1, r2			@ insert into new context ID
+#endif
 	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 42ac069c8012..fd045e706390 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -46,6 +46,11 @@ ENTRY(cpu_v7_switch_mm)
 #ifdef CONFIG_ARM_ERRATA_430973
 	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 #endif
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
+	lsr	r2, r2, #8			@ extract the PID
+	bfi	r1, r2, #8, #24			@ insert into new context ID
+#endif
 #ifdef CONFIG_ARM_ERRATA_754322
 	dsb
 #endif
diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c
index 4e0a371630b3..99c63d4b6af8 100644
--- a/arch/arm/oprofile/common.c
+++ b/arch/arm/oprofile/common.c
@@ -23,26 +23,37 @@
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_HW_PERF_EVENTS
+
+/*
+ * OProfile has a curious naming scheme for the ARM PMUs, but they are
+ * part of the user ABI so we need to map from the perf PMU name for
+ * supported PMUs.
+ */
+static struct op_perf_name {
+	char *perf_name;
+	char *op_name;
+} op_perf_name_map[] = {
+	{ "xscale1",		"arm/xscale1"	},
+	{ "xscale1",		"arm/xscale2"	},
+	{ "v6",			"arm/armv6"	},
+	{ "v6mpcore",		"arm/mpcore"	},
+	{ "ARMv7 Cortex-A8",	"arm/armv7"	},
+	{ "ARMv7 Cortex-A9",	"arm/armv7-ca9"	},
+};
+
 char *op_name_from_perf_id(void)
 {
-	enum arm_perf_pmu_ids id = armpmu_get_pmu_id();
-
-	switch (id) {
-	case ARM_PERF_PMU_ID_XSCALE1:
-		return "arm/xscale1";
-	case ARM_PERF_PMU_ID_XSCALE2:
-		return "arm/xscale2";
-	case ARM_PERF_PMU_ID_V6:
-		return "arm/armv6";
-	case ARM_PERF_PMU_ID_V6MP:
-		return "arm/mpcore";
-	case ARM_PERF_PMU_ID_CA8:
-		return "arm/armv7";
-	case ARM_PERF_PMU_ID_CA9:
-		return "arm/armv7-ca9";
-	default:
-		return NULL;
+	int i;
+	struct op_perf_name names;
+	const char *perf_name = perf_pmu_name();
+
+	for (i = 0; i < ARRAY_SIZE(op_perf_name_map); ++i) {
+		names = op_perf_name_map[i];
+		if (!strcmp(names.perf_name, perf_name))
+			return names.op_name;
 	}
+
+	return NULL;
 }
 #endif
 
diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c
index 49c7db48c7f1..d7c5c171f5aa 100644
--- a/arch/arm/plat-versatile/platsmp.c
+++ b/arch/arm/plat-versatile/platsmp.c
@@ -85,7 +85,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * the boot monitor to read the system wide flags register,
 	 * and branch to the address found there.
 	 */
-	gic_raise_softirq(cpumask_of(cpu), 1);
+	gic_raise_softirq(cpumask_of(cpu), 0);
 
 	timeout = jiffies + (1 * HZ);
 	while (time_before(jiffies, timeout)) {
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index b7e728517284..e8eb91bd0d28 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -16,9 +16,9 @@
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/amba/bus.h>
+#include <linux/sizes.h>
 
 #include <asm/irq.h>
-#include <asm/sizes.h>
 
 #define to_amba_driver(d)	container_of(d, struct amba_driver, drv)
 
diff --git a/drivers/input/touchscreen/jornada720_ts.c b/drivers/input/touchscreen/jornada720_ts.c
index d9be6eac99b1..7f03d1bd916e 100644
--- a/drivers/input/touchscreen/jornada720_ts.c
+++ b/drivers/input/touchscreen/jornada720_ts.c
@@ -19,6 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/io.h>
 
 #include <mach/hardware.h>
 #include <mach/jornada720.h>
diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c
index ff16daf33ae1..8d5476707912 100644
--- a/drivers/net/irda/pxaficp_ir.c
+++ b/drivers/net/irda/pxaficp_ir.c
@@ -289,7 +289,7 @@ static irqreturn_t pxa_irda_sir_irq(int irq, void *dev_id)
 			}
 			lsr = STLSR;
 		}
-		si->last_oscr = OSCR;
+		si->last_oscr = readl_relaxed(OSCR);
 		break;
 
 	case 0x04: /* Received Data Available */
@@ -300,7 +300,7 @@ static irqreturn_t pxa_irda_sir_irq(int irq, void *dev_id)
 		    dev->stats.rx_bytes++;
 	            async_unwrap_char(dev, &dev->stats, &si->rx_buff, STRBR);
 	  	} while (STLSR & LSR_DR);
-		si->last_oscr = OSCR;
+		si->last_oscr = readl_relaxed(OSCR);
 	  	break;
 
 	case 0x02: /* Transmit FIFO Data Request */
@@ -316,7 +316,7 @@ static irqreturn_t pxa_irda_sir_irq(int irq, void *dev_id)
                         /* We need to ensure that the transmitter has finished. */
 			while ((STLSR & LSR_TEMT) == 0)
 				cpu_relax();
-			si->last_oscr = OSCR;
+			si->last_oscr = readl_relaxed(OSCR);
 
 			/*
 		 	* Ok, we've finished transmitting.  Now enable
@@ -370,7 +370,7 @@ static void pxa_irda_fir_dma_tx_irq(int channel, void *data)
 
 	while (ICSR1 & ICSR1_TBY)
 		cpu_relax();
-	si->last_oscr = OSCR;
+	si->last_oscr = readl_relaxed(OSCR);
 
 	/*
 	 * HACK: It looks like the TBY bit is dropped too soon.
@@ -470,7 +470,7 @@ static irqreturn_t pxa_irda_fir_irq(int irq, void *dev_id)
 
 	/* stop RX DMA */
 	DCSR(si->rxdma) &= ~DCSR_RUN;
-	si->last_oscr = OSCR;
+	si->last_oscr = readl_relaxed(OSCR);
 	icsr0 = ICSR0;
 
 	if (icsr0 & (ICSR0_FRE | ICSR0_RAB)) {
@@ -546,7 +546,7 @@ static int pxa_irda_hard_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_copy_from_linear_data(skb, si->dma_tx_buff, skb->len);
 
 		if (mtt)
-			while ((unsigned)(OSCR - si->last_oscr)/4 < mtt)
+			while ((unsigned)(readl_relaxed(OSCR) - si->last_oscr)/4 < mtt)
 				cpu_relax();
 
 		/* stop RX DMA,  disable FICP */
diff --git a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c
index decb34730bcf..56ab73915602 100644
--- a/drivers/pcmcia/sa1100_shannon.c
+++ b/drivers/pcmcia/sa1100_shannon.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/init.h>
+#include <linux/io.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index c17923ec6e95..d3553b5d3fca 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -53,9 +53,9 @@
 #include <linux/delay.h>
 #include <linux/types.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/sizes.h>
 
 #include <asm/io.h>
-#include <asm/sizes.h>
 
 #define UART_NR			14
 
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 54984deb8561..ccd6b29e21bf 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -54,10 +54,10 @@ static int sa1100dog_open(struct inode *inode, struct file *file)
 		return -EBUSY;
 
 	/* Activate SA1100 Watchdog timer */
-	OSMR3 = OSCR + pre_margin;
-	OSSR = OSSR_M3;
-	OWER = OWER_WME;
-	OIER |= OIER_E3;
+	writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3);
+	writel_relaxed(OSSR_M3, OSSR);
+	writel_relaxed(OWER_WME, OWER);
+	writel_relaxed(readl_relaxed(OIER) | OIER_E3, OIER);
 	return nonseekable_open(inode, file);
 }
 
@@ -80,7 +80,7 @@ static ssize_t sa1100dog_write(struct file *file, const char __user *data,
 {
 	if (len)
 		/* Refresh OSMR3 timer. */
-		OSMR3 = OSCR + pre_margin;
+		writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3);
 	return len;
 }
 
@@ -114,7 +114,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd,
 		break;
 
 	case WDIOC_KEEPALIVE:
-		OSMR3 = OSCR + pre_margin;
+		writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3);
 		ret = 0;
 		break;
 
@@ -129,7 +129,7 @@ static long sa1100dog_ioctl(struct file *file, unsigned int cmd,
 		}
 
 		pre_margin = oscr_freq * time;
-		OSMR3 = OSCR + pre_margin;
+		writel_relaxed(readl_relaxed(OSCR) + pre_margin, OSMR3);
 		/*fall through*/
 
 	case WDIOC_GETTIMEOUT:
diff --git a/include/asm-generic/sizes.h b/include/asm-generic/sizes.h
index ea5d4ef81061..1dcfad9629ef 100644
--- a/include/asm-generic/sizes.h
+++ b/include/asm-generic/sizes.h
@@ -1,47 +1,2 @@
-/*
- * linux/include/asm-generic/sizes.h
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_GENERIC_SIZES_H__
-#define __ASM_GENERIC_SIZES_H__
-
-#define SZ_1				0x00000001
-#define SZ_2				0x00000002
-#define SZ_4				0x00000004
-#define SZ_8				0x00000008
-#define SZ_16				0x00000010
-#define SZ_32				0x00000020
-#define SZ_64				0x00000040
-#define SZ_128				0x00000080
-#define SZ_256				0x00000100
-#define SZ_512				0x00000200
-
-#define SZ_1K				0x00000400
-#define SZ_2K				0x00000800
-#define SZ_4K				0x00001000
-#define SZ_8K				0x00002000
-#define SZ_16K				0x00004000
-#define SZ_32K				0x00008000
-#define SZ_64K				0x00010000
-#define SZ_128K				0x00020000
-#define SZ_256K				0x00040000
-#define SZ_512K				0x00080000
-
-#define SZ_1M				0x00100000
-#define SZ_2M				0x00200000
-#define SZ_4M				0x00400000
-#define SZ_8M				0x00800000
-#define SZ_16M				0x01000000
-#define SZ_32M				0x02000000
-#define SZ_64M				0x04000000
-#define SZ_128M				0x08000000
-#define SZ_256M				0x10000000
-#define SZ_512M				0x20000000
-
-#define SZ_1G				0x40000000
-#define SZ_2G				0x80000000
-
-#endif /* __ASM_GENERIC_SIZES_H__ */
+/* This is a placeholder, to be removed over time */
+#include <linux/sizes.h>
diff --git a/include/linux/sizes.h b/include/linux/sizes.h
new file mode 100644
index 000000000000..ce3e8150c174
--- /dev/null
+++ b/include/linux/sizes.h
@@ -0,0 +1,47 @@
+/*
+ * include/linux/sizes.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_SIZES_H__
+#define __LINUX_SIZES_H__
+
+#define SZ_1				0x00000001
+#define SZ_2				0x00000002
+#define SZ_4				0x00000004
+#define SZ_8				0x00000008
+#define SZ_16				0x00000010
+#define SZ_32				0x00000020
+#define SZ_64				0x00000040
+#define SZ_128				0x00000080
+#define SZ_256				0x00000100
+#define SZ_512				0x00000200
+
+#define SZ_1K				0x00000400
+#define SZ_2K				0x00000800
+#define SZ_4K				0x00001000
+#define SZ_8K				0x00002000
+#define SZ_16K				0x00004000
+#define SZ_32K				0x00008000
+#define SZ_64K				0x00010000
+#define SZ_128K				0x00020000
+#define SZ_256K				0x00040000
+#define SZ_512K				0x00080000
+
+#define SZ_1M				0x00100000
+#define SZ_2M				0x00200000
+#define SZ_4M				0x00400000
+#define SZ_8M				0x00800000
+#define SZ_16M				0x01000000
+#define SZ_32M				0x02000000
+#define SZ_64M				0x04000000
+#define SZ_128M				0x08000000
+#define SZ_256M				0x10000000
+#define SZ_512M				0x20000000
+
+#define SZ_1G				0x40000000
+#define SZ_2G				0x80000000
+
+#endif /* __LINUX_SIZES_H__ */
diff --git a/init/Kconfig b/init/Kconfig
index d07dcf9fc8a9..b3f55f15e107 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -357,7 +357,7 @@ config AUDIT
 
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
-	depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || ARM)
+	depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT))
 	default y if SECURITY_SELINUX
 	help
 	  Enable low-overhead system-call auditing infrastructure that