summary refs log tree commit diff
path: root/arch/arm/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/include/asm')
-rw-r--r--arch/arm/include/asm/arch_timer.h3
-rw-r--r--arch/arm/include/asm/delay.h32
-rw-r--r--arch/arm/include/asm/locks.h274
-rw-r--r--arch/arm/include/asm/memory.h2
-rw-r--r--arch/arm/include/asm/perf_event.h17
-rw-r--r--arch/arm/include/asm/pmu.h3
-rw-r--r--arch/arm/include/asm/spinlock.h76
-rw-r--r--arch/arm/include/asm/spinlock_types.h17
-rw-r--r--arch/arm/include/asm/timex.h10
-rw-r--r--arch/arm/include/asm/uaccess.h27
-rw-r--r--arch/arm/include/asm/word-at-a-time.h96
11 files changed, 204 insertions, 353 deletions
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index ed2e95d46e29..62e75475e57e 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -1,7 +1,10 @@
 #ifndef __ASMARM_ARCH_TIMER_H
 #define __ASMARM_ARCH_TIMER_H
 
+#include <asm/errno.h>
+
 #ifdef CONFIG_ARM_ARCH_TIMER
+#define ARCH_HAS_READ_CURRENT_TIMER
 int arch_timer_of_register(void);
 int arch_timer_sched_clock_init(void);
 #else
diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index b2deda181549..dc6145120de3 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -6,9 +6,22 @@
 #ifndef __ASM_ARM_DELAY_H
 #define __ASM_ARM_DELAY_H
 
+#include <asm/memory.h>
 #include <asm/param.h>	/* HZ */
 
-extern void __delay(int loops);
+#define MAX_UDELAY_MS	2
+#define UDELAY_MULT	((UL(2199023) * HZ) >> 11)
+#define UDELAY_SHIFT	30
+
+#ifndef __ASSEMBLY__
+
+extern struct arm_delay_ops {
+	void (*delay)(unsigned long);
+	void (*const_udelay)(unsigned long);
+	void (*udelay)(unsigned long);
+} arm_delay_ops;
+
+#define __delay(n)		arm_delay_ops.delay(n)
 
 /*
  * This function intentionally does not exist; if you see references to
@@ -23,22 +36,27 @@ extern void __bad_udelay(void);
  * division by multiplication: you don't have to worry about
  * loss of precision.
  *
- * Use only for very small delays ( < 1 msec).  Should probably use a
+ * Use only for very small delays ( < 2 msec).  Should probably use a
  * lookup table, really, as the multiplications take much too long with
  * short delays.  This is a "reasonable" implementation, though (and the
  * first constant multiplications gets optimized away if the delay is
  * a constant)
  */
-extern void __udelay(unsigned long usecs);
-extern void __const_udelay(unsigned long);
-
-#define MAX_UDELAY_MS 2
+#define __udelay(n)		arm_delay_ops.udelay(n)
+#define __const_udelay(n)	arm_delay_ops.const_udelay(n)
 
 #define udelay(n)							\
 	(__builtin_constant_p(n) ?					\
 	  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :		\
-			__const_udelay((n) * ((2199023U*HZ)>>11))) :	\
+			__const_udelay((n) * UDELAY_MULT)) :		\
 	  __udelay(n))
 
+/* Loop-based definitions for assembly code. */
+extern void __loop_delay(unsigned long loops);
+extern void __loop_udelay(unsigned long usecs);
+extern void __loop_const_udelay(unsigned long);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* defined(_ARM_DELAY_H) */
 
diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h
deleted file mode 100644
index ef4c897772d1..000000000000
--- a/arch/arm/include/asm/locks.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- *  arch/arm/include/asm/locks.h
- *
- *  Copyright (C) 2000 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Interrupt safe locking assembler. 
- */
-#ifndef __ASM_PROC_LOCKS_H
-#define __ASM_PROC_LOCKS_H
-
-#if __LINUX_ARM_ARCH__ >= 6
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"1:	ldrex	lr, [%1]\n"			\
-"	sub	lr, lr, %2\n"			\
-"	strex	ip, lr, [%1]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	cmp	lr, #0\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	sub	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"1:	ldrex	lr, [%0]\n"			\
-"	add	lr, lr, %1\n"			\
-"	strex	ip, lr, [%0]\n"			\
-"	teq	ip, #0\n"			\
-"	bne	1b\n"				\
-"	teq	lr, #0\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#else
-
-#define __down_op(ptr,fail)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %0\n"			\
-"	blmi	" #fail				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_ret(ptr,fail)			\
-	({					\
-		unsigned int ret;		\
-	__asm__ __volatile__(			\
-	"@ down_op_ret\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%1]\n"			\
-"	subs	lr, lr, %2\n"			\
-"	str	lr, [%1]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movmi	ip, %1\n"			\
-"	movpl	ip, #0\n"			\
-"	blmi	" #fail "\n"			\
-"	mov	%0, ip"				\
-	: "=&r" (ret)				\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	ret;					\
-	})
-
-#define __up_op(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op\n"				\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movle	ip, %0\n"			\
-"	blle	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-/*
- * The value 0x01000000 supports up to 128 processors and
- * lots of processes.  BIAS must be chosen such that sub'ing
- * BIAS once per CPU will result in the long remaining
- * negative.
- */
-#define RW_LOCK_BIAS      0x01000000
-#define RW_LOCK_BIAS_STR "0x01000000"
-
-#define __down_op_write(ptr,fail)		\
-	({					\
-	__asm__ __volatile__(			\
-	"@ down_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	subs	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movne	ip, %0\n"			\
-"	blne	" #fail				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __up_op_write(ptr,wake)			\
-	({					\
-	__asm__ __volatile__(			\
-	"@ up_op_write\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	movcs	ip, %0\n"			\
-"	blcs	" #wake				\
-	:					\
-	: "r" (ptr), "I" (RW_LOCK_BIAS)		\
-	: "ip", "lr", "cc");			\
-	smp_mb();				\
-	})
-
-#define __down_op_read(ptr,fail)		\
-	__down_op(ptr, fail)
-
-#define __up_op_read(ptr,wake)			\
-	({					\
-	smp_mb();				\
-	__asm__ __volatile__(			\
-	"@ up_op_read\n"			\
-"	mrs	ip, cpsr\n"			\
-"	orr	lr, ip, #128\n"			\
-"	msr	cpsr_c, lr\n"			\
-"	ldr	lr, [%0]\n"			\
-"	adds	lr, lr, %1\n"			\
-"	str	lr, [%0]\n"			\
-"	msr	cpsr_c, ip\n"			\
-"	moveq	ip, %0\n"			\
-"	bleq	" #wake				\
-	:					\
-	: "r" (ptr), "I" (1)			\
-	: "ip", "lr", "cc");			\
-	})
-
-#endif
-
-#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index fcb575747e5e..e965f1b560f1 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -16,7 +16,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
 
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 00cbe10a50e3..e074948d8143 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,21 +12,6 @@
 #ifndef __ARM_PERF_EVENT_H__
 #define __ARM_PERF_EVENT_H__
 
-/* ARM perf PMU IDs for use by internal perf clients. */
-enum arm_perf_pmu_ids {
-	ARM_PERF_PMU_ID_XSCALE1	= 0,
-	ARM_PERF_PMU_ID_XSCALE2,
-	ARM_PERF_PMU_ID_V6,
-	ARM_PERF_PMU_ID_V6MP,
-	ARM_PERF_PMU_ID_CA8,
-	ARM_PERF_PMU_ID_CA9,
-	ARM_PERF_PMU_ID_CA5,
-	ARM_PERF_PMU_ID_CA15,
-	ARM_PERF_PMU_ID_CA7,
-	ARM_NUM_PMU_IDS,
-};
-
-extern enum arm_perf_pmu_ids
-armpmu_get_pmu_id(void);
+/* Nothing to see here... */
 
 #endif /* __ARM_PERF_EVENT_H__ */
diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h
index 90114faa9f3c..4432305f4a2a 100644
--- a/arch/arm/include/asm/pmu.h
+++ b/arch/arm/include/asm/pmu.h
@@ -103,10 +103,9 @@ struct pmu_hw_events {
 
 struct arm_pmu {
 	struct pmu	pmu;
-	enum arm_perf_pmu_ids id;
 	enum arm_pmu_type type;
 	cpumask_t	active_irqs;
-	const char	*name;
+	char		*name;
 	irqreturn_t	(*handle_irq)(int irq_num, void *dev);
 	void		(*enable)(struct hw_perf_event *evt, int idx);
 	void		(*disable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index 65fa3c88095c..b4ca707d0a69 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -59,18 +59,13 @@ static inline void dsb_sev(void)
 }
 
 /*
- * ARMv6 Spin-locking.
+ * ARMv6 ticket-based spin-locking.
  *
- * We exclusively read the old value.  If it is zero, we may have
- * won the lock, so we try exclusively storing it.  A memory barrier
- * is required after we get a lock, and before we release it, because
- * V6 CPUs are assumed to have weakly ordered memory.
- *
- * Unlocked value: 0
- * Locked value: 1
+ * A memory barrier is required after we get a lock, and before we
+ * release it, because V6 CPUs are assumed to have weakly ordered
+ * memory.
  */
 
-#define arch_spin_is_locked(x)		((x)->lock != 0)
 #define arch_spin_unlock_wait(lock) \
 	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
 
@@ -79,31 +74,39 @@ static inline void dsb_sev(void)
 static inline void arch_spin_lock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 newval;
+	arch_spinlock_t lockval;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-	WFE("ne")
-"	strexeq	%0, %2, [%1]\n"
-"	teqeq	%0, #0\n"
+"1:	ldrex	%0, [%3]\n"
+"	add	%1, %0, %4\n"
+"	strex	%2, %1, [%3]\n"
+"	teq	%2, #0\n"
 "	bne	1b"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
+	while (lockval.tickets.next != lockval.tickets.owner) {
+		wfe();
+		lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+	}
+
 	smp_mb();
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *lock)
 {
 	unsigned long tmp;
+	u32 slock;
 
 	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&lock->lock), "r" (1)
+"	ldrex	%0, [%2]\n"
+"	subs	%1, %0, %0, ror #16\n"
+"	addeq	%0, %0, %3\n"
+"	strexeq	%1, %0, [%2]"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 	: "cc");
 
 	if (tmp == 0) {
@@ -116,17 +119,38 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
 
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
+	unsigned long tmp;
+	u32 slock;
+
 	smp_mb();
 
 	__asm__ __volatile__(
-"	str	%1, [%0]\n"
-	:
-	: "r" (&lock->lock), "r" (0)
+"	mov	%1, #1\n"
+"1:	ldrex	%0, [%2]\n"
+"	uadd16	%0, %0, %1\n"
+"	strex	%1, %0, [%2]\n"
+"	teq	%1, #0\n"
+"	bne	1b"
+	: "=&r" (slock), "=&r" (tmp)
+	: "r" (&lock->slock)
 	: "cc");
 
 	dsb_sev();
 }
 
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return tickets.owner != tickets.next;
+}
+
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	return (tickets.next - tickets.owner) > 1;
+}
+#define arch_spin_is_contended	arch_spin_is_contended
+
 /*
  * RWLOCKS
  *
@@ -158,7 +182,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 	unsigned long tmp;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%1]\n"
+"	ldrex	%0, [%1]\n"
 "	teq	%0, #0\n"
 "	strexeq	%0, %2, [%1]"
 	: "=&r" (tmp)
@@ -244,7 +268,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 	unsigned long tmp, tmp2 = 1;
 
 	__asm__ __volatile__(
-"1:	ldrex	%0, [%2]\n"
+"	ldrex	%0, [%2]\n"
 "	adds	%0, %0, #1\n"
 "	strexpl	%1, %0, [%2]\n"
 	: "=&r" (tmp), "+r" (tmp2)
diff --git a/arch/arm/include/asm/spinlock_types.h b/arch/arm/include/asm/spinlock_types.h
index d14d197ae04a..b262d2f8b478 100644
--- a/arch/arm/include/asm/spinlock_types.h
+++ b/arch/arm/include/asm/spinlock_types.h
@@ -5,11 +5,24 @@
 # error "please don't include this file directly"
 #endif
 
+#define TICKET_SHIFT	16
+
 typedef struct {
-	volatile unsigned int lock;
+	union {
+		u32 slock;
+		struct __raw_tickets {
+#ifdef __ARMEB__
+			u16 next;
+			u16 owner;
+#else
+			u16 owner;
+			u16 next;
+#endif
+		} tickets;
+	};
 } arch_spinlock_t;
 
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
 
 typedef struct {
 	volatile unsigned int lock;
diff --git a/arch/arm/include/asm/timex.h b/arch/arm/include/asm/timex.h
index 3be8de3adaba..ce119442277c 100644
--- a/arch/arm/include/asm/timex.h
+++ b/arch/arm/include/asm/timex.h
@@ -12,13 +12,15 @@
 #ifndef _ASMARM_TIMEX_H
 #define _ASMARM_TIMEX_H
 
+#include <asm/arch_timer.h>
 #include <mach/timex.h>
 
 typedef unsigned long cycles_t;
 
-static inline cycles_t get_cycles (void)
-{
-	return 0;
-}
+#ifdef ARCH_HAS_READ_CURRENT_TIMER
+#define get_cycles()	({ cycles_t c; read_current_timer(&c) ? 0 : c; })
+#else
+#define get_cycles()	(0)
+#endif
 
 #endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 71f6536d17ac..479a6352e0b5 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -189,6 +189,9 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(type,addr,size)	(__range_ok(addr,size) == 0)
 
+#define user_addr_max() \
+	(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
+
 /*
  * The "__xxx" versions of the user access functions do not verify the
  * address space - it must have been done previously with a separate
@@ -398,9 +401,6 @@ extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned l
 #define __clear_user(addr,n)		(memset((void __force *)addr, 0, n), 0)
 #endif
 
-extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
-
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
@@ -427,24 +427,9 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
 	return n;
 }
 
-static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
-{
-	long res = -EFAULT;
-	if (access_ok(VERIFY_READ, src, 1))
-		res = __strncpy_from_user(dst, src, count);
-	return res;
-}
-
-#define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
+extern long strncpy_from_user(char *dest, const char __user *src, long count);
 
-static inline long __must_check strnlen_user(const char __user *s, long n)
-{
-	unsigned long res = 0;
-
-	if (__addr_ok(s))
-		res = __strnlen_user(s, n);
-
-	return res;
-}
+extern __must_check long strlen_user(const char __user *str);
+extern __must_check long strnlen_user(const char __user *str, long n);
 
 #endif /* _ASMARM_UACCESS_H */
diff --git a/arch/arm/include/asm/word-at-a-time.h b/arch/arm/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..4d52f92967a6
--- /dev/null
+++ b/arch/arm/include/asm/word-at-a-time.h
@@ -0,0 +1,96 @@
+#ifndef __ASM_ARM_WORD_AT_A_TIME_H
+#define __ASM_ARM_WORD_AT_A_TIME_H
+
+#ifndef __ARMEB__
+
+/*
+ * Little-endian word-at-a-time zero byte handling.
+ * Heavily based on the x86 algorithm.
+ */
+#include <linux/kernel.h>
+
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits,
+				     const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+#define prep_zero_mask(a, bits, c) (bits)
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	unsigned long ret;
+
+#if __LINUX_ARM_ARCH__ >= 5
+	/* We have clz available. */
+	ret = fls(mask) >> 3;
+#else
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	ret = (0x0ff0001 + mask) >> 23;
+	/* Fix the 1 for 00 case */
+	ret &= mask;
+#endif
+
+	return ret;
+}
+
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#define zero_bytemask(mask) (mask)
+
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	ldr	%0, [%2]\n"
+	"2:\n"
+	"	.pushsection .fixup,\"ax\"\n"
+	"	.align 2\n"
+	"3:	and	%1, %2, #0x3\n"
+	"	bic	%2, %2, #0x3\n"
+	"	ldr	%0, [%2]\n"
+	"	lsl	%1, %1, #0x3\n"
+	"	lsr	%0, %0, %1\n"
+	"	b	2b\n"
+	"	.popsection\n"
+	"	.pushsection __ex_table,\"a\"\n"
+	"	.align	3\n"
+	"	.long	1b, 3b\n"
+	"	.popsection"
+	: "=&r" (ret), "=&r" (offset)
+	: "r" (addr), "Qo" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+
+#endif	/* DCACHE_WORD_ACCESS */
+
+#else	/* __ARMEB__ */
+#include <asm-generic/word-at-a-time.h>
+#endif
+
+#endif /* __ASM_ARM_WORD_AT_A_TIME_H */