summary refs log tree commit diff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 21:47:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-30 21:47:12 -0700
commit05a8256c586ab75bcd6b793737b2022a1a98cb1e (patch)
tree2f25fcae0d63a0bfbfa16d887eb518d8d5c226e2
parent0161b6e0d88e04f54ada7112bb2dad1f3ae472af (diff)
parent5316a64ce518f850afb0fca322b85b6dff3cb59f (diff)
downloadlinux-05a8256c586ab75bcd6b793737b2022a1a98cb1e.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull arch/tile updates from Chris Metcalf:
 "These are a grab bag of changes to improve debugging and respond to a
  variety of issues raised on LKML over the last couple of months"

* git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile:
  tile: avoid a "label not used" warning in do_page_fault()
  tile: vdso: use raw_read_seqcount_begin() in vdso
  tile: force CONFIG_TILEGX if ARCH != tilepro
  tile: improve stack backtrace
  tile: fix "odd fault" warning for stack backtraces
  tile: set up initial stack top to honor STACK_TOP_DELTA
  tile: support delivering NMIs for multicore backtrace
  drivers/tty/hvc/hvc_tile.c: properly return -EAGAIN
  tile: add <asm/word-at-a-time.h> and enable support functions
  tile: use READ_ONCE() in arch_spin_is_locked()
  tile: modify arch_spin_unlock_wait() semantics
-rw-r--r--arch/tile/Kconfig7
-rw-r--r--arch/tile/include/asm/irq.h5
-rw-r--r--arch/tile/include/asm/processor.h2
-rw-r--r--arch/tile/include/asm/spinlock_32.h6
-rw-r--r--arch/tile/include/asm/spinlock_64.h5
-rw-r--r--arch/tile/include/asm/stack.h13
-rw-r--r--arch/tile/include/asm/thread_info.h1
-rw-r--r--arch/tile/include/asm/traps.h8
-rw-r--r--arch/tile/include/asm/uaccess.h66
-rw-r--r--arch/tile/include/asm/word-at-a-time.h36
-rw-r--r--arch/tile/include/hv/hypervisor.h60
-rw-r--r--arch/tile/kernel/entry.S7
-rw-r--r--arch/tile/kernel/hvglue.S3
-rw-r--r--arch/tile/kernel/hvglue_trace.c4
-rw-r--r--arch/tile/kernel/intvec_64.S6
-rw-r--r--arch/tile/kernel/process.c138
-rw-r--r--arch/tile/kernel/setup.c2
-rw-r--r--arch/tile/kernel/stack.c125
-rw-r--r--arch/tile/kernel/traps.c15
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c10
-rw-r--r--arch/tile/lib/exports.c3
-rw-r--r--arch/tile/lib/spinlock_32.c11
-rw-r--r--arch/tile/lib/spinlock_64.c11
-rw-r--r--arch/tile/lib/usercopy_32.S46
-rw-r--r--arch/tile/lib/usercopy_64.S46
-rw-r--r--arch/tile/mm/fault.c17
-rw-r--r--drivers/tty/hvc/hvc_tile.c3
27 files changed, 402 insertions, 254 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 59cf0b911898..9def1f52d03a 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -24,11 +24,14 @@ config TILE
 	select MODULES_USE_ELF_RELA
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_SYSCALL_TRACEPOINTS
+	select USER_STACKTRACE_SUPPORT
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_DEBUG_STACKOVERFLOW
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_CONTEXT_TRACKING
 	select EDAC_SUPPORT
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
@@ -125,8 +128,10 @@ config HVC_TILE
 	select HVC_IRQ if TILEGX
 	def_bool y
 
+# Building with ARCH=tilegx (or ARCH=tile) implies using the
+# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on.
 config TILEGX
-	bool "Building for TILE-Gx (64-bit) processor"
+	def_bool ARCH != "tilepro"
 	select SPARSE_IRQ
 	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index 1fe86911838b..84a924034bdb 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
 
 void setup_irq_regs(void);
 
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
 #endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index dd4f9f17e30a..139dfdee0134 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -111,8 +111,6 @@ struct thread_struct {
 	unsigned long long interrupt_mask;
 	/* User interrupt-control 0 state */
 	unsigned long intctrl_0;
-	/* Is this task currently doing a backtrace? */
-	bool in_backtrace;
 	/* Any other miscellaneous processor state bits */
 	unsigned long proc_status;
 #if !CHIP_HAS_FIXED_INTVEC_BASE()
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index c0a77b38d39a..b14b1ba5bf9c 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -41,8 +41,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 	 * to claim the lock is held, since it will be momentarily
 	 * if not already.  There's no need to wait for a "valid"
 	 * lock->next_ticket to become available.
+	 * Use READ_ONCE() to ensure that calling this in a loop is OK.
 	 */
-	return lock->next_ticket != lock->current_ticket;
+	int curr = READ_ONCE(lock->current_ticket);
+	int next = READ_ONCE(lock->next_ticket);
+
+	return next != curr;
 }
 
 void arch_spin_lock(arch_spinlock_t *lock);
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 9a12b9c7e5d3..b9718fb4e74a 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -18,6 +18,8 @@
 #ifndef _ASM_TILE_SPINLOCK_64_H
 #define _ASM_TILE_SPINLOCK_64_H
 
+#include <linux/compiler.h>
+
 /* Shifts and masks for the various fields in "lock". */
 #define __ARCH_SPIN_CURRENT_SHIFT	17
 #define __ARCH_SPIN_NEXT_MASK		0x7fff
@@ -44,7 +46,8 @@ static inline u32 arch_spin_next(u32 val)
 /* The lock is locked if a task would have to wait to get it. */
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	u32 val = lock->lock;
+	/* Use READ_ONCE() to ensure that calling this in a loop is OK. */
+	u32 val = READ_ONCE(lock->lock);
 	return arch_spin_current(val) != arch_spin_next(val);
 }
 
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 0e9d382a2d45..c3cb42615a9f 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -58,17 +58,14 @@ extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
 /* Advance to the next frame. */
 extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
 
+/* Dump just the contents of the pt_regs structure. */
+extern void tile_show_regs(struct pt_regs *);
+
 /*
  * Dump stack given complete register info. Use only from the
  * architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-extern void tile_show_stack(struct KBacktraceIterator *, int headers);
-
-/* Dump stack of current process, with registers to seed the backtrace. */
-extern void dump_stack_regs(struct pt_regs *);
-
-/* Helper method for assembly dump_stack(). */
-extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+extern void tile_show_stack(struct KBacktraceIterator *);
 
 #endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index f804c39a5e4d..dc1fb28d9636 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -42,6 +42,7 @@ struct thread_info {
 	unsigned long		unalign_jit_tmp[4]; /* temp r0..r3 storage */
 	void __user		*unalign_jit_base; /* unalign fixup JIT base */
 #endif
+	bool in_backtrace;			/* currently doing backtrace? */
 };
 
 /*
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 4b99a1c3aab2..11c82270c1f5 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
 /* kernel/messaging.c */
 void hv_message_intr(struct pt_regs *, int intnum);
 
+#define	TILE_NMI_DUMP_STACK	1	/* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
 /* kernel/irq.c */
 void tile_dev_intr(struct pt_regs *, int intnum);
 
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index a33276bf5ca1..0a9c4265763b 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -65,6 +65,13 @@ static inline int is_arch_mappable_range(unsigned long addr,
 #endif
 
 /*
+ * Note that using this definition ignores is_arch_mappable_range(),
+ * so on tilepro code that uses user_addr_max() is constrained not
+ * to reference the tilepro user-interrupt region.
+ */
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
+/*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
  */
@@ -471,62 +478,9 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 #endif
 
 
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only.  This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-extern long strnlen_user_asm(const char __user *str, long n);
-static inline long __must_check strnlen_user(const char __user *str, long n)
-{
-	might_fault();
-	return strnlen_user_asm(str, n);
-}
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
-static inline long __must_check __strncpy_from_user(
-	char *dst, const char __user *src, long count)
-{
-	might_fault();
-	return strncpy_from_user_asm(dst, src, count);
-}
-static inline long __must_check strncpy_from_user(
-	char *dst, const char __user *src, long count)
-{
-	if (access_ok(VERIFY_READ, src, 1))
-		return __strncpy_from_user(dst, src, count);
-	return -EFAULT;
-}
+extern long strnlen_user(const char __user *str, long n);
+extern long strlen_user(const char __user *str);
+extern long strncpy_from_user(char *dst, const char __user *src, long);
 
 /**
  * clear_user: - Zero a block of memory in user space.
diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..9e5ce0d7b292
--- /dev/null
+++ b/arch/tile/include/asm/word-at-a-time.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/byteorder.h>
+
+struct word_at_a_time { /* unused */ };
+#define WORD_AT_A_TIME_CONSTANTS {}
+
+/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+static inline unsigned long has_zero(unsigned long val, unsigned long *data,
+				     const struct word_at_a_time *c)
+{
+#ifdef __tilegx__
+	unsigned long mask = __insn_v1cmpeqi(val, 0);
+#else /* tilepro */
+	unsigned long mask = __insn_seqib(val, 0);
+#endif
+	*data = mask;
+	return mask;
+}
+
+/* These operations are both nops. */
+#define prep_zero_mask(val, data, c) (data)
+#define create_zero_mask(data) (data)
+
+/* And this operation just depends on endianness. */
+static inline long find_zero(unsigned long mask)
+{
+#ifdef __BIG_ENDIAN
+	return __builtin_clzl(mask) >> 3;
+#else
+	return __builtin_ctzl(mask) >> 3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index e0e6af4e783b..f10b332b3b65 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -321,8 +321,11 @@
 /** hv_console_set_ipi */
 #define HV_DISPATCH_CONSOLE_SET_IPI               63
 
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI                      65
+
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          64
+#define _HV_DISPATCH_END                          66
 
 
 #ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
 #define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
 /** Device interrupt downcall interrupt vector */
 #define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL            64
+
+#define HV_NMI_FLAG_FORCE    0x1  /**< Force an NMI downcall regardless of
+               the ICS bit of the client. */
 
 #ifndef __ASSEMBLER__
 
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
 int hv_dev_poll_cancel(int devhdl);
 
 
+/** NMI information */
+typedef struct
+{
+  /** Result: negative error, or HV_NMI_RESULT_xxx. */
+  int result;
+
+  /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+  HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK        0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS  1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV   2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE    0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ *  This will cause the NMI to be issued on the remote tile regardless
+ *  of the state of the client interrupt mask.  However, if the remote
+ *  tile is in the hypervisor, it will not execute the NMI, and
+ *  HV_NMI_RESULT_FAIL_HV will be returned.  Similarly, if the remote
+ *  tile is in a client interrupt critical section at the time of the
+ *  NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ *  be returned.  In this second case, however, if HV_NMI_FLAG_FORCE
+ *  is set in flags, then the remote tile will enter its NMI interrupt
+ *  vector regardless.  Forcing the NMI vector during an interrupt
+ *  critical section will mean that the client can not safely continue
+ *  execution after handling the interrupt.
+ *
+ *  @param tile Tile to which the NMI request is sent.
+ *  @param info NMI information which is defined by and interpreted by the
+ *         supervisor, is passed to the specified tile, and is
+ *         stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ *         specified tile when entering the NMI handler routine.
+ *         Typically, this parameter stores the NMI type, or an aligned
+ *         VA plus some special bits, etc.
+ *  @param flags Flags (HV_NMI_FLAG_xxx).
+ *  @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
 /** Scatter-gather list for preada/pwritea calls. */
 typedef struct
 #if CHIP_VA_WIDTH() <= 32
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 3d9175992a20..670a3569450f 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -27,13 +27,6 @@ STD_ENTRY(current_text_addr)
 	{ move r0, lr; jrp lr }
 	STD_ENDPROC(current_text_addr)
 
-STD_ENTRY(dump_stack)
-	{ move r2, lr; lnk r1 }
-	{ move r4, r52; addli r1, r1, dump_stack - . }
-	{ move r3, sp; j _dump_stack }
-	jrp lr   /* keep backtracer happy */
-	STD_ENDPROC(dump_stack)
-
 STD_ENTRY(KBacktraceIterator_init_current)
 	{ move r2, lr; lnk r1 }
 	{ move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
index 2ab456622391..d78ee2ad610c 100644
--- a/arch/tile/kernel/hvglue.S
+++ b/arch/tile/kernel/hvglue.S
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
 gensym hv_get_ipi_pte, 0x700, 32
 gensym hv_set_pte_super_shift, 0x720, 32
 gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
index 85c74ad29312..add0d71395c6 100644
--- a/arch/tile/kernel/hvglue_trace.c
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -75,6 +75,7 @@
 #define hv_get_ipi_pte _hv_get_ipi_pte
 #define hv_set_pte_super_shift _hv_set_pte_super_shift
 #define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
 #include <hv/hypervisor.h>
 #undef hv_init
 #undef hv_install_context
@@ -134,6 +135,7 @@
 #undef hv_get_ipi_pte
 #undef hv_set_pte_super_shift
 #undef hv_console_set_ipi
+#undef hv_send_nmi
 
 /*
  * Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
 	 HV_VirtAddr, tlb_va, unsigned long, tlb_length,
 	 unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
 	 HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+	 __hv64, flags)
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 5b67efcecabd..800b91d3f9dc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -515,6 +515,10 @@ intvec_\vecname:
 	.ifc \c_routine, handle_perf_interrupt
 	mfspr   r2, AUX_PERF_COUNT_STS
 	.endif
+	.ifc \c_routine, do_nmi
+	mfspr   r2, SPR_SYSTEM_SAVE_K_2   /* nmi type */
+	.else
+	.endif
 	.endif
 	.endif
 	.endif
@@ -1571,3 +1575,5 @@ intrpt_start:
 
 	/* Synthetic interrupt delivered only by the simulator */
 	int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+	/* Synthetic interrupt delivered by hv */
+	int_hand     INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index b403c2e3e263..a45213781ad0 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/delay.h>
 #include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
@@ -132,7 +133,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		       (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
 		callee_regs[0] = sp;   /* r30 = function */
 		callee_regs[1] = arg;  /* r31 = arg */
-		childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
 		p->thread.pc = (unsigned long) ret_from_kernel_thread;
 		return 0;
 	}
@@ -546,31 +546,141 @@ void exit_thread(void)
 #endif
 }
 
-void show_regs(struct pt_regs *regs)
+void tile_show_regs(struct pt_regs *regs)
 {
-	struct task_struct *tsk = validate_current();
 	int i;
-
-	if (tsk != &corrupt_current)
-		show_regs_print_info(KERN_ERR);
 #ifdef __tilegx__
 	for (i = 0; i < 17; i++)
-		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
 		       i, regs->regs[i], i+18, regs->regs[i+18],
 		       i+36, regs->regs[i+36]);
-	pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
+	pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
 	       regs->regs[17], regs->regs[35], regs->tp);
-	pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
+	pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
 #else
 	for (i = 0; i < 13; i++)
-		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+		       " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
 		       i, regs->regs[i], i+14, regs->regs[i+14],
 		       i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
-	pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
+	pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
 	       regs->regs[13], regs->tp, regs->sp, regs->lr);
 #endif
-	pr_err(" pc : " REGFMT " ex1: %ld     faultnum: %ld\n",
-	       regs->pc, regs->ex1, regs->faultnum);
+	pr_err(" pc : "REGFMT" ex1: %ld     faultnum: %ld flags:%s%s%s%s\n",
+	       regs->pc, regs->ex1, regs->faultnum,
+	       is_compat_task() ? " compat" : "",
+	       (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "",
+	       !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "",
+	       (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : "");
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	struct KBacktraceIterator kbt;
+
+	show_regs_print_info(KERN_DEFAULT);
+	tile_show_regs(regs);
+
+	KBacktraceIterator_init(&kbt, NULL, regs);
+	tile_show_stack(&kbt);
+}
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
 
-	dump_stack_regs(regs);
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+	int is_idle = is_idle_task(current) && !in_interrupt();
+	int cpu;
+
+	nmi_enter();
+	cpu = smp_processor_id();
+	if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+		goto done;
+
+	spin_lock(&backtrace_lock);
+	if (is_idle)
+		pr_info("CPU: %d idle\n", cpu);
+	else
+		show_regs(regs);
+	spin_unlock(&backtrace_lock);
+	atomic_dec(&backtrace_cpus);
+done:
+	nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+	struct cpumask mask;
+	HV_Coord tile;
+	unsigned int timeout;
+	int cpu;
+	int ongoing;
+	HV_NMI_Info info[NR_CPUS];
+
+	ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+	if (ongoing != 0) {
+		pr_err("Trying to do all-cpu backtrace.\n");
+		pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+		       ongoing);
+		if (self) {
+			pr_err("Reporting the stack on this cpu only.\n");
+			dump_stack();
+		}
+		return;
+	}
+
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	cpumask_copy(&backtrace_mask, &mask);
+
+	/* Backtrace for myself first. */
+	if (self)
+		dump_stack();
+
+	/* Tentatively dump stack on remote tiles via NMI. */
+	timeout = 100;
+	while (!cpumask_empty(&mask) && timeout) {
+		for_each_cpu(cpu, &mask) {
+			tile.x = cpu_x(cpu);
+			tile.y = cpu_y(cpu);
+			info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+			if (info[cpu].result == HV_NMI_RESULT_OK)
+				cpumask_clear_cpu(cpu, &mask);
+		}
+
+		mdelay(10);
+		timeout--;
+	}
+
+	/* Warn about cpus stuck in ICS and decrement their counts here. */
+	if (!cpumask_empty(&mask)) {
+		for_each_cpu(cpu, &mask) {
+			switch (info[cpu].result) {
+			case HV_NMI_RESULT_FAIL_ICS:
+				pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+					cpu, info[cpu].pc);
+				break;
+			case HV_NMI_RESULT_FAIL_HV:
+				pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+					cpu);
+				break;
+			case HV_ENOSYS:
+				pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+				goto skip_for_each;
+			default:  /* should not happen */
+				pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+					cpu, info[cpu].result, info[cpu].pc);
+				break;
+			}
+		}
+skip_for_each:
+		atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+	}
 }
+#endif /* __tilegx_ */
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index d366675e4bf8..99c9ff87e018 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -71,7 +71,7 @@ static unsigned long __initdata node_percpu[MAX_NUMNODES];
  * per-CPU stack and boot info.
  */
 DEFINE_PER_CPU(unsigned long, boot_sp) =
-	(unsigned long)init_stack + THREAD_SIZE;
+	(unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA;
 
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index c42dce50acd8..35d34635e4f1 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -23,6 +23,7 @@
 #include <linux/mmzone.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/hardirq.h>
 #include <linux/string.h>
 #include <asm/backtrace.h>
 #include <asm/page.h>
@@ -109,7 +110,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 		if (kbt->verbose)
 			pr_err("  <%s while in user mode>\n", fault);
 	} else {
-		if (kbt->verbose)
+		if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
 			pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
 			       p->pc, p->sp, p->ex1);
 		return NULL;
@@ -119,10 +120,12 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 	return p;
 }
 
-/* Is the pc pointing to a sigreturn trampoline? */
-static int is_sigreturn(unsigned long pc)
+/* Is the iterator pointing to a sigreturn trampoline? */
+static int is_sigreturn(struct KBacktraceIterator *kbt)
 {
-	return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+	return kbt->task->mm &&
+		(kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+				(ulong)&__vdso_rt_sigreturn));
 }
 
 /* Return a pt_regs pointer for a valid signal handler frame */
@@ -131,7 +134,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
 {
 	BacktraceIterator *b = &kbt->it;
 
-	if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+	if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
 	    b->sp % sizeof(long) == 0) {
 		int retval;
 		pagefault_disable();
@@ -151,11 +154,6 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
 	return NULL;
 }
 
-static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
-{
-	return is_sigreturn(kbt->it.pc);
-}
-
 static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
 {
 	struct pt_regs *p;
@@ -178,7 +176,7 @@ static int KBacktraceIterator_next_item_inclusive(
 {
 	for (;;) {
 		do {
-			if (!KBacktraceIterator_is_sigreturn(kbt))
+			if (!is_sigreturn(kbt))
 				return KBT_ONGOING;
 		} while (backtrace_next(&kbt->it));
 
@@ -357,51 +355,50 @@ static void describe_addr(struct KBacktraceIterator *kbt,
  */
 static bool start_backtrace(void)
 {
-	if (current->thread.in_backtrace) {
+	if (current_thread_info()->in_backtrace) {
 		pr_err("Backtrace requested while in backtrace!\n");
 		return false;
 	}
-	current->thread.in_backtrace = true;
+	current_thread_info()->in_backtrace = true;
 	return true;
 }
 
 static void end_backtrace(void)
 {
-	current->thread.in_backtrace = false;
+	current_thread_info()->in_backtrace = false;
 }
 
 /*
  * This method wraps the backtracer's more generic support.
  * It is only invoked from the architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+void tile_show_stack(struct KBacktraceIterator *kbt)
 {
 	int i;
 	int have_mmap_sem = 0;
 
 	if (!start_backtrace())
 		return;
-	if (headers) {
-		/*
-		 * Add a blank line since if we are called from panic(),
-		 * then bust_spinlocks() spit out a space in front of us
-		 * and it will mess up our KERN_ERR.
-		 */
-		pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
-		       kbt->task->pid, kbt->task->tgid, kbt->task->comm,
-		       raw_smp_processor_id(), get_cycles());
-	}
 	kbt->verbose = 1;
 	i = 0;
 	for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
 		char namebuf[KSYM_NAME_LEN+100];
 		unsigned long address = kbt->it.pc;
 
-		/* Try to acquire the mmap_sem as we pass into userspace. */
-		if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+		/*
+		 * Try to acquire the mmap_sem as we pass into userspace.
+		 * If we're in an interrupt context, don't even try, since
+		 * it's not safe to call e.g. d_path() from an interrupt,
+		 * since it uses spin locks without disabling interrupts.
+		 * Note we test "kbt->task == current", not "kbt->is_current",
+		 * since we're checking that "current" will work in d_path().
+		 */
+		if (kbt->task == current && address < PAGE_OFFSET &&
+		    !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
 			have_mmap_sem =
 				down_read_trylock(&kbt->task->mm->mmap_sem);
+		}
 
 		describe_addr(kbt, address, have_mmap_sem,
 			      namebuf, sizeof(namebuf));
@@ -416,24 +413,12 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 	}
 	if (kbt->end == KBT_LOOP)
 		pr_err("Stack dump stopped; next frame identical to this one\n");
-	if (headers)
-		pr_err("Stack dump complete\n");
 	if (have_mmap_sem)
 		up_read(&kbt->task->mm->mmap_sem);
 	end_backtrace();
 }
 EXPORT_SYMBOL(tile_show_stack);
 
-
-/* This is called from show_regs() and _dump_stack() */
-void dump_stack_regs(struct pt_regs *regs)
-{
-	struct KBacktraceIterator kbt;
-	KBacktraceIterator_init(&kbt, NULL, regs);
-	tile_show_stack(&kbt, 1);
-}
-EXPORT_SYMBOL(dump_stack_regs);
-
 static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
 				       ulong pc, ulong lr, ulong sp, ulong r52)
 {
@@ -445,11 +430,15 @@ static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
 	return regs;
 }
 
-/* This is called from dump_stack() and just converts to pt_regs */
+/* Deprecated function currently only used by kernel_double_fault(). */
 void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
+	struct KBacktraceIterator kbt;
 	struct pt_regs regs;
-	dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+
+	regs_to_pt_regs(&regs, pc, lr, sp, r52);
+	KBacktraceIterator_init(&kbt, NULL, &regs);
+	tile_show_stack(&kbt);
 }
 
 /* This is called from KBacktraceIterator_init_current() */
@@ -461,22 +450,30 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
 				regs_to_pt_regs(&regs, pc, lr, sp, r52));
 }
 
-/* This is called only from kernel/sched/core.c, with esp == NULL */
+/*
+ * Called from sched_show_task() with task != NULL, or dump_stack()
+ * with task == NULL.  The esp argument is always NULL.
+ */
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 	struct KBacktraceIterator kbt;
-	if (task == NULL || task == current)
+	if (task == NULL || task == current) {
 		KBacktraceIterator_init_current(&kbt);
-	else
+		KBacktraceIterator_next(&kbt);  /* don't show first frame */
+	} else {
 		KBacktraceIterator_init(&kbt, task, NULL);
-	tile_show_stack(&kbt, 0);
+	}
+	tile_show_stack(&kbt);
 }
 
 #ifdef CONFIG_STACKTRACE
 
 /* Support generic Linux stack API too */
 
-void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+static void save_stack_trace_common(struct task_struct *task,
+				    struct pt_regs *regs,
+				    bool user,
+				    struct stack_trace *trace)
 {
 	struct KBacktraceIterator kbt;
 	int skip = trace->skip;
@@ -484,31 +481,57 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
 
 	if (!start_backtrace())
 		goto done;
-	if (task == NULL || task == current)
+	if (regs != NULL) {
+		KBacktraceIterator_init(&kbt, NULL, regs);
+	} else if (task == NULL || task == current) {
 		KBacktraceIterator_init_current(&kbt);
-	else
+		skip++;  /* don't show KBacktraceIterator_init_current */
+	} else {
 		KBacktraceIterator_init(&kbt, task, NULL);
+	}
 	for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
 		if (skip) {
 			--skip;
 			continue;
 		}
-		if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+		if (i >= trace->max_entries ||
+		    (!user && kbt.it.pc < PAGE_OFFSET))
 			break;
 		trace->entries[i++] = kbt.it.pc;
 	}
 	end_backtrace();
 done:
+	if (i < trace->max_entries)
+		trace->entries[i++] = ULONG_MAX;
 	trace->nr_entries = i;
 }
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+	save_stack_trace_common(task, NULL, false, trace);
+}
 EXPORT_SYMBOL(save_stack_trace_tsk);
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	save_stack_trace_tsk(NULL, trace);
+	save_stack_trace_common(NULL, NULL, false, trace);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	save_stack_trace_common(NULL, regs, false, trace);
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+	/* Trace user stack if we are not a kernel thread. */
+	if (current->mm)
+		save_stack_trace_common(NULL, task_pt_regs(current),
+					true, trace);
+	else if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
 #endif
 
 /* In entry.S */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 312fc134c1cb..0011a9ff0525 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -395,6 +395,21 @@ done:
 	exception_exit(prev_state);
 }
 
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+	switch (reason) {
+	case TILE_NMI_DUMP_STACK:
+		do_nmi_dump_stack(regs);
+		break;
+	default:
+		panic("Unexpected do_nmi type %ld", reason);
+		return;
+	}
+}
+
+/* Deprecated function currently only used here. */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
 	_dump_stack(dummy, pc, lr, sp, r52);
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index 8bb21eda07d8..e63310c49742 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -67,7 +67,7 @@ static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
 	u64 ns;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->wall_time_sec;
 		ns = vdso->wall_time_snsec;
 		ns += vgetsns(vdso);
@@ -86,7 +86,7 @@ static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
 	u64 ns;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->monotonic_time_sec;
 		ns = vdso->monotonic_time_snsec;
 		ns += vgetsns(vdso);
@@ -105,7 +105,7 @@ static inline int do_realtime_coarse(struct vdso_data *vdso,
 	unsigned count;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->wall_time_coarse_sec;
 		ts->tv_nsec = vdso->wall_time_coarse_nsec;
 	} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -119,7 +119,7 @@ static inline int do_monotonic_coarse(struct vdso_data *vdso,
 	unsigned count;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->monotonic_time_coarse_sec;
 		ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
 	} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -137,7 +137,7 @@ struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
 	/* The use of the timezone is obsolete, normally tz is NULL. */
 	if (unlikely(tz != NULL)) {
 		do {
-			count = read_seqcount_begin(&vdso->tz_seq);
+			count = raw_read_seqcount_begin(&vdso->tz_seq);
 			tz->tz_minuteswest = vdso->tz_minuteswest;
 			tz->tz_dsttime = vdso->tz_dsttime;
 		} while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 82733c87d67e..9d171ca4302c 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,8 +18,6 @@
 
 /* arch/tile/lib/usercopy.S */
 #include <linux/uaccess.h>
-EXPORT_SYMBOL(strnlen_user_asm);
-EXPORT_SYMBOL(strncpy_from_user_asm);
 EXPORT_SYMBOL(clear_user_asm);
 EXPORT_SYMBOL(flush_user_asm);
 EXPORT_SYMBOL(finv_user_asm);
@@ -28,7 +26,6 @@ EXPORT_SYMBOL(finv_user_asm);
 #include <linux/kernel.h>
 #include <asm/processor.h>
 EXPORT_SYMBOL(current_text_addr);
-EXPORT_SYMBOL(dump_stack);
 
 /* arch/tile/kernel/head.S */
 EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b34f79aada48..88c2a53362e7 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	u32 iterations = 0;
-	while (arch_spin_is_locked(lock))
+	int curr = READ_ONCE(lock->current_ticket);
+	int next = READ_ONCE(lock->next_ticket);
+
+	/* Return immediately if unlocked. */
+	if (next == curr)
+		return;
+
+	/* Wait until the current locker has released the lock. */
+	do {
 		delay_backoff(iterations++);
+	} while (READ_ONCE(lock->current_ticket) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index d6fb9581e980..c8d1f94ff1fe 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	u32 iterations = 0;
-	while (arch_spin_is_locked(lock))
+	u32 val = READ_ONCE(lock->lock);
+	u32 curr = arch_spin_current(val);
+
+	/* Return immediately if unlocked. */
+	if (arch_spin_next(val) == curr)
+		return;
+
+	/* Wait until the current locker has released the lock. */
+	do {
 		delay_backoff(iterations++);
+	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index 1bc162224638..db93ad5fae25 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -20,52 +20,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
 /*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-	{ bz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { lb_u r4, r0; addi r1, r1, -1 }
-	bz r4, 2f
-	{ bnzt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strnlen_user_asm)
-	.pushsection .fixup,"ax"
-strnlen_user_fault:
-	{ move r0, zero; jrp lr }
-	ENDPROC(strnlen_user_fault)
-	.section __ex_table,"a"
-	.align 4
-	.word 1b, strnlen_user_fault
-	.popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-	{ bz r2, 2f; move r3, r0 }
-1:	{ lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-	{ sb r0, r4; addi r0, r0, 1 }
-	bz r4, 2f
-	bnzt r2, 1b
-	{ sub r0, r0, r3; jrp lr }
-2:	addi r0, r0, -1   /* don't count the trailing NUL */
-	{ sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strncpy_from_user_asm)
-	.pushsection .fixup,"ax"
-strncpy_from_user_fault:
-	{ movei r0, -EFAULT; jrp lr }
-	ENDPROC(strncpy_from_user_fault)
-	.section __ex_table,"a"
-	.align 4
-	.word 1b, strncpy_from_user_fault
-	.popsection
-
-/*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
  * It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index b3b31a3306f8..9322dc551e91 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -20,52 +20,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
 /*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-	{ beqz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { ld1u r4, r0; addi r1, r1, -1 }
-	beqz r4, 2f
-	{ bnezt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strnlen_user_asm)
-	.pushsection .fixup,"ax"
-strnlen_user_fault:
-	{ move r0, zero; jrp lr }
-	ENDPROC(strnlen_user_fault)
-	.section __ex_table,"a"
-	.align 8
-	.quad 1b, strnlen_user_fault
-	.popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-	{ beqz r2, 2f; move r3, r0 }
-1:	{ ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-	{ st1 r0, r4; addi r0, r0, 1 }
-	beqz r4, 2f
-	bnezt r2, 1b
-	{ sub r0, r0, r3; jrp lr }
-2:	addi r0, r0, -1   /* don't count the trailing NUL */
-	{ sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strncpy_from_user_asm)
-	.pushsection .fixup,"ax"
-strncpy_from_user_fault:
-	{ movei r0, -EFAULT; jrp lr }
-	ENDPROC(strncpy_from_user_fault)
-	.section __ex_table,"a"
-	.align 8
-	.quad 1b, strncpy_from_user_fault
-	.popsection
-
-/*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
  * It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 3f4f58d34a92..13eac59bf16a 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -699,11 +699,10 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
  * interrupt away appropriately and return immediately.  We can't do
  * page faults for user code while in kernel mode.
  */
-void do_page_fault(struct pt_regs *regs, int fault_num,
-		   unsigned long address, unsigned long write)
+static inline void __do_page_fault(struct pt_regs *regs, int fault_num,
+				   unsigned long address, unsigned long write)
 {
 	int is_page_fault;
-	enum ctx_state prev_state = exception_enter();
 
 #ifdef CONFIG_KPROBES
 	/*
@@ -713,7 +712,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 	 */
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
 		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
-		goto done;
+		return;
 #endif
 
 #ifdef __tilegx__
@@ -835,18 +834,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			async->is_fault = is_page_fault;
 			async->is_write = write;
 			async->address = address;
-			goto done;
+			return;
 		}
 	}
 #endif
 
 	handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
 
-done:
+void do_page_fault(struct pt_regs *regs, int fault_num,
+		   unsigned long address, unsigned long write)
+{
+	enum ctx_state prev_state = exception_enter();
+	__do_page_fault(regs, fault_num, address, write);
 	exception_exit(prev_state);
 }
 
-
 #if CHIP_HAS_TILE_DMA()
 /*
  * This routine effectively re-issues asynchronous page faults
diff --git a/drivers/tty/hvc/hvc_tile.c b/drivers/tty/hvc/hvc_tile.c
index 3f6cd3102db5..9da1e842bbe9 100644
--- a/drivers/tty/hvc/hvc_tile.c
+++ b/drivers/tty/hvc/hvc_tile.c
@@ -51,7 +51,8 @@ int tile_console_write(const char *buf, int count)
 			      _SIM_CONTROL_OPERATOR_BITS));
 		return 0;
 	} else {
-		return hv_console_write((HV_VirtAddr)buf, count);
+		/* Translate 0 bytes written to EAGAIN for hvc_console_print. */
+		return hv_console_write((HV_VirtAddr)buf, count) ?: -EAGAIN;
 	}
 }