summary refs log tree commit diff
path: root/arch/tile
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-08-25 09:59:19 +0200
committerIngo Molnar <mingo@kernel.org>2015-08-25 09:59:19 +0200
commit8d58b66ed2b000f27658c88a4ed70e8042e86a58 (patch)
tree5bae2c74f932b5d863f72cb698a6f71260340b26 /arch/tile
parent13fe86f465b72fc9328d4f5ebc33223c011852ae (diff)
parentc13dcf9f2d6f5f06ef1bf79ec456df614c5e058b (diff)
downloadlinux-8d58b66ed2b000f27658c88a4ed70e8042e86a58.tar.gz
Merge tag 'v4.2-rc8' into x86/mm, before applying new changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/Kconfig8
-rw-r--r--arch/tile/include/asm/Kbuild2
-rw-r--r--arch/tile/include/asm/atomic_64.h3
-rw-r--r--arch/tile/include/asm/edac.h29
-rw-r--r--arch/tile/include/asm/hugetlb.h13
-rw-r--r--arch/tile/include/asm/irq.h5
-rw-r--r--arch/tile/include/asm/pgtable.h8
-rw-r--r--arch/tile/include/asm/processor.h2
-rw-r--r--arch/tile/include/asm/spinlock_32.h6
-rw-r--r--arch/tile/include/asm/spinlock_64.h5
-rw-r--r--arch/tile/include/asm/stack.h13
-rw-r--r--arch/tile/include/asm/thread_info.h1
-rw-r--r--arch/tile/include/asm/topology.h2
-rw-r--r--arch/tile/include/asm/traps.h8
-rw-r--r--arch/tile/include/asm/uaccess.h84
-rw-r--r--arch/tile/include/asm/word-at-a-time.h36
-rw-r--r--arch/tile/include/hv/hypervisor.h60
-rw-r--r--arch/tile/kernel/compat_signal.c2
-rw-r--r--arch/tile/kernel/entry.S7
-rw-r--r--arch/tile/kernel/hvglue.S3
-rw-r--r--arch/tile/kernel/hvglue_trace.c4
-rw-r--r--arch/tile/kernel/intvec_64.S6
-rw-r--r--arch/tile/kernel/process.c138
-rw-r--r--arch/tile/kernel/setup.c4
-rw-r--r--arch/tile/kernel/stack.c127
-rw-r--r--arch/tile/kernel/traps.c15
-rw-r--r--arch/tile/kernel/usb.c1
-rw-r--r--arch/tile/kernel/vdso/vgettimeofday.c10
-rw-r--r--arch/tile/lib/exports.c3
-rw-r--r--arch/tile/lib/memcpy_user_64.c4
-rw-r--r--arch/tile/lib/spinlock_32.c11
-rw-r--r--arch/tile/lib/spinlock_64.c11
-rw-r--r--arch/tile/lib/usercopy_32.S46
-rw-r--r--arch/tile/lib/usercopy_64.S46
-rw-r--r--arch/tile/mm/elf.c2
-rw-r--r--arch/tile/mm/fault.c21
-rw-r--r--arch/tile/mm/highmem.c3
-rw-r--r--arch/tile/mm/hugetlbpage.c5
38 files changed, 429 insertions, 325 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index a07e31b50d3f..9def1f52d03a 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -24,10 +24,14 @@ config TILE
 	select MODULES_USE_ELF_RELA
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_SYSCALL_TRACEPOINTS
+	select USER_STACKTRACE_SUPPORT
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select HAVE_DEBUG_STACKOVERFLOW
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_CONTEXT_TRACKING
+	select EDAC_SUPPORT
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
@@ -124,8 +128,10 @@ config HVC_TILE
 	select HVC_IRQ if TILEGX
 	def_bool y
 
+# Building with ARCH=tilegx (or ARCH=tile) implies using the
+# 64-bit TILE-Gx toolchain, so force CONFIG_TILEGX on.
 config TILEGX
-	bool "Building for TILE-Gx (64-bit) processor"
+	def_bool ARCH != "tilepro"
 	select SPARSE_IRQ
 	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index f5433e0e34e0..d8a843163471 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_regs.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += param.h
@@ -27,7 +28,6 @@ generic-y += poll.h
 generic-y += posix_types.h
 generic-y += preempt.h
 generic-y += resource.h
-generic-y += scatterlist.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 7b11c5fadd42..0496970cef82 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
 
-/* Define this to indicate that cmpxchg is an efficient operation. */
-#define __HAVE_ARCH_CMPXCHG
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_TILE_ATOMIC_64_H */
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h
deleted file mode 100644
index 87fc83eeaffd..000000000000
--- a/arch/tile/include/asm/edac.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- */
-
-#ifndef _ASM_TILE_EDAC_H
-#define _ASM_TILE_EDAC_H
-
-/* ECC atomic, DMA, SMP and interrupt safe scrub function */
-
-static inline void atomic_scrub(void *va, u32 size)
-{
-	/*
-	 * These is nothing to be done here because CE is
-	 * corrected by the mshim.
-	 */
-	return;
-}
-
-#endif /* _ASM_TILE_EDAC_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index 3257733003f8..2fac5be4de26 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -40,10 +40,6 @@ static inline int prepare_hugepage_range(struct file *file,
 	return 0;
 }
 
-static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
-}
-
 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
 					  unsigned long floor,
@@ -98,15 +94,6 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return *ptep;
 }
 
-static inline int arch_prepare_hugepage(struct page *page)
-{
-	return 0;
-}
-
-static inline void arch_release_hugepage(struct page *page)
-{
-}
-
 static inline void arch_clear_hugepage_flags(struct page *page)
 {
 }
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index 1fe86911838b..84a924034bdb 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -78,4 +78,9 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type);
 
 void setup_irq_regs(void);
 
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
+#endif
+
 #endif /* _ASM_TILE_IRQ_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 95a4f19d16c5..2b05ccbebed9 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -414,10 +414,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 }
 
 
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
-				       unsigned long address,
-				       pmd_t *pmdp)
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long address,
+					    pmd_t *pmdp)
 {
 	return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp)));
 }
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index dd4f9f17e30a..139dfdee0134 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -111,8 +111,6 @@ struct thread_struct {
 	unsigned long long interrupt_mask;
 	/* User interrupt-control 0 state */
 	unsigned long intctrl_0;
-	/* Is this task currently doing a backtrace? */
-	bool in_backtrace;
 	/* Any other miscellaneous processor state bits */
 	unsigned long proc_status;
 #if !CHIP_HAS_FIXED_INTVEC_BASE()
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index c0a77b38d39a..b14b1ba5bf9c 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -41,8 +41,12 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 	 * to claim the lock is held, since it will be momentarily
 	 * if not already.  There's no need to wait for a "valid"
 	 * lock->next_ticket to become available.
+	 * Use READ_ONCE() to ensure that calling this in a loop is OK.
 	 */
-	return lock->next_ticket != lock->current_ticket;
+	int curr = READ_ONCE(lock->current_ticket);
+	int next = READ_ONCE(lock->next_ticket);
+
+	return next != curr;
 }
 
 void arch_spin_lock(arch_spinlock_t *lock);
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 9a12b9c7e5d3..b9718fb4e74a 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -18,6 +18,8 @@
 #ifndef _ASM_TILE_SPINLOCK_64_H
 #define _ASM_TILE_SPINLOCK_64_H
 
+#include <linux/compiler.h>
+
 /* Shifts and masks for the various fields in "lock". */
 #define __ARCH_SPIN_CURRENT_SHIFT	17
 #define __ARCH_SPIN_NEXT_MASK		0x7fff
@@ -44,7 +46,8 @@ static inline u32 arch_spin_next(u32 val)
 /* The lock is locked if a task would have to wait to get it. */
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	u32 val = lock->lock;
+	/* Use READ_ONCE() to ensure that calling this in a loop is OK. */
+	u32 val = READ_ONCE(lock->lock);
 	return arch_spin_current(val) != arch_spin_next(val);
 }
 
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 0e9d382a2d45..c3cb42615a9f 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -58,17 +58,14 @@ extern int KBacktraceIterator_end(struct KBacktraceIterator *kbt);
 /* Advance to the next frame. */
 extern void KBacktraceIterator_next(struct KBacktraceIterator *kbt);
 
+/* Dump just the contents of the pt_regs structure. */
+extern void tile_show_regs(struct pt_regs *);
+
 /*
  * Dump stack given complete register info. Use only from the
  * architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-extern void tile_show_stack(struct KBacktraceIterator *, int headers);
-
-/* Dump stack of current process, with registers to seed the backtrace. */
-extern void dump_stack_regs(struct pt_regs *);
-
-/* Helper method for assembly dump_stack(). */
-extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+extern void tile_show_stack(struct KBacktraceIterator *);
 
 #endif /* _ASM_TILE_STACK_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index f804c39a5e4d..dc1fb28d9636 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -42,6 +42,7 @@ struct thread_info {
 	unsigned long		unalign_jit_tmp[4]; /* temp r0..r3 storage */
 	void __user		*unalign_jit_base; /* unalign fixup JIT base */
 #endif
+	bool in_backtrace;			/* currently doing backtrace? */
 };
 
 /*
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 938311844233..76b0d0ebb244 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -55,7 +55,7 @@ static inline const struct cpumask *cpumask_of_node(int node)
 #define topology_physical_package_id(cpu)       ((void)(cpu), 0)
 #define topology_core_id(cpu)                   (cpu)
 #define topology_core_cpumask(cpu)              ((void)(cpu), cpu_online_mask)
-#define topology_thread_cpumask(cpu)            cpumask_of(cpu)
+#define topology_sibling_cpumask(cpu)           cpumask_of(cpu)
 #endif
 
 #endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 4b99a1c3aab2..11c82270c1f5 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -52,6 +52,14 @@ void do_timer_interrupt(struct pt_regs *, int fault_num);
 /* kernel/messaging.c */
 void hv_message_intr(struct pt_regs *, int intnum);
 
+#define	TILE_NMI_DUMP_STACK	1	/* Dump stack for sysrq+'l' */
+
+/* kernel/process.c */
+void do_nmi_dump_stack(struct pt_regs *regs);
+
+/* kernel/traps.c */
+void do_nmi(struct pt_regs *, int fault_num, unsigned long reason);
+
 /* kernel/irq.c */
 void tile_dev_intr(struct pt_regs *, int intnum);
 
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index f41cb53cf645..0a9c4265763b 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -65,6 +65,13 @@ static inline int is_arch_mappable_range(unsigned long addr,
 #endif
 
 /*
+ * Note that using this definition ignores is_arch_mappable_range(),
+ * so on tilepro code that uses user_addr_max() is constrained not
+ * to reference the tilepro user-interrupt region.
+ */
+#define user_addr_max() (current_thread_info()->addr_limit.seg)
+
+/*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
  */
@@ -78,7 +85,8 @@ int __range_ok(unsigned long addr, unsigned long size);
  * @addr: User space pointer to start of block to check
  * @size: Size of block to check
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Checks if a pointer to a block of memory in user space is valid.
  *
@@ -192,7 +200,8 @@ extern int __get_user_bad(void)
  * @x:   Variable to store result.
  * @ptr: Source address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple variable from user space to kernel
  * space.  It supports simple types like char and int, but not larger
@@ -274,7 +283,8 @@ extern int __put_user_bad(void)
  * @x:   Value to copy to user space.
  * @ptr: Destination address, in user space.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * This macro copies a single simple value from kernel space to user
  * space.  It supports simple types like char and int, but not larger
@@ -330,7 +340,8 @@ extern int __put_user_bad(void)
  * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -366,7 +377,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n)
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to kernel space.  Caller must check
  * the specified block with access_ok() before calling this function.
@@ -437,7 +449,8 @@ static inline unsigned long __must_check copy_from_user(void *to,
  * @from: Source address, in user space.
  * @n:    Number of bytes to copy.
  *
- * Context: User context only.  This function may sleep.
+ * Context: User context only. This function may sleep if pagefaults are
+ *          enabled.
  *
  * Copy data from user space to user space.  Caller must check
  * the specified blocks with access_ok() before calling this function.
@@ -465,62 +478,9 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 #endif
 
 
-/**
- * strlen_user: - Get the size of a string in user space.
- * @str: The string to measure.
- *
- * Context: User context only.  This function may sleep.
- *
- * Get the size of a NUL-terminated string in user space.
- *
- * Returns the size of the string INCLUDING the terminating NUL.
- * On exception, returns 0.
- *
- * If there is a limit on the length of a valid string, you may wish to
- * consider using strnlen_user() instead.
- */
-extern long strnlen_user_asm(const char __user *str, long n);
-static inline long __must_check strnlen_user(const char __user *str, long n)
-{
-	might_fault();
-	return strnlen_user_asm(str, n);
-}
-#define strlen_user(str) strnlen_user(str, LONG_MAX)
-
-/**
- * strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
- * @dst:   Destination address, in kernel space.  This buffer must be at
- *         least @count bytes long.
- * @src:   Source address, in user space.
- * @count: Maximum number of bytes to copy, including the trailing NUL.
- *
- * Copies a NUL-terminated string from userspace to kernel space.
- * Caller must check the specified block with access_ok() before calling
- * this function.
- *
- * On success, returns the length of the string (not including the trailing
- * NUL).
- *
- * If access to userspace fails, returns -EFAULT (some data may have been
- * copied).
- *
- * If @count is smaller than the length of the string, copies @count bytes
- * and returns @count.
- */
-extern long strncpy_from_user_asm(char *dst, const char __user *src, long);
-static inline long __must_check __strncpy_from_user(
-	char *dst, const char __user *src, long count)
-{
-	might_fault();
-	return strncpy_from_user_asm(dst, src, count);
-}
-static inline long __must_check strncpy_from_user(
-	char *dst, const char __user *src, long count)
-{
-	if (access_ok(VERIFY_READ, src, 1))
-		return __strncpy_from_user(dst, src, count);
-	return -EFAULT;
-}
+extern long strnlen_user(const char __user *str, long n);
+extern long strlen_user(const char __user *str);
+extern long strncpy_from_user(char *dst, const char __user *src, long);
 
 /**
  * clear_user: - Zero a block of memory in user space.
diff --git a/arch/tile/include/asm/word-at-a-time.h b/arch/tile/include/asm/word-at-a-time.h
new file mode 100644
index 000000000000..9e5ce0d7b292
--- /dev/null
+++ b/arch/tile/include/asm/word-at-a-time.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+#include <asm/byteorder.h>
+
+struct word_at_a_time { /* unused */ };
+#define WORD_AT_A_TIME_CONSTANTS {}
+
+/* Generate 0x01 byte values for non-zero bytes using a SIMD instruction. */
+static inline unsigned long has_zero(unsigned long val, unsigned long *data,
+				     const struct word_at_a_time *c)
+{
+#ifdef __tilegx__
+	unsigned long mask = __insn_v1cmpeqi(val, 0);
+#else /* tilepro */
+	unsigned long mask = __insn_seqib(val, 0);
+#endif
+	*data = mask;
+	return mask;
+}
+
+/* These operations are both nops. */
+#define prep_zero_mask(val, data, c) (data)
+#define create_zero_mask(data) (data)
+
+/* And this operation just depends on endianness. */
+static inline long find_zero(unsigned long mask)
+{
+#ifdef __BIG_ENDIAN
+	return __builtin_clzl(mask) >> 3;
+#else
+	return __builtin_ctzl(mask) >> 3;
+#endif
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index e0e6af4e783b..f10b332b3b65 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -321,8 +321,11 @@
 /** hv_console_set_ipi */
 #define HV_DISPATCH_CONSOLE_SET_IPI               63
 
+/** hv_send_nmi */
+#define HV_DISPATCH_SEND_NMI                      65
+
 /** One more than the largest dispatch value */
-#define _HV_DISPATCH_END                          64
+#define _HV_DISPATCH_END                          66
 
 
 #ifndef __ASSEMBLER__
@@ -1253,6 +1256,11 @@ void hv_downcall_dispatch(void);
 #define INT_DMATLB_ACCESS_DWNCL  INT_DMA_CPL
 /** Device interrupt downcall interrupt vector */
 #define INT_DEV_INTR_DWNCL       INT_WORLD_ACCESS
+/** NMI downcall interrupt vector */
+#define INT_NMI_DWNCL            64
+
+#define HV_NMI_FLAG_FORCE    0x1  /**< Force an NMI downcall regardless of
+               the ICS bit of the client. */
 
 #ifndef __ASSEMBLER__
 
@@ -1780,6 +1788,56 @@ int hv_dev_poll(int devhdl, __hv32 events, HV_IntArg intarg);
 int hv_dev_poll_cancel(int devhdl);
 
 
+/** NMI information */
+typedef struct
+{
+  /** Result: negative error, or HV_NMI_RESULT_xxx. */
+  int result;
+
+  /** PC from interrupted remote core (if result != HV_NMI_RESULT_FAIL_HV). */
+  HV_VirtAddr pc;
+
+} HV_NMI_Info;
+
+/** NMI issued successfully. */
+#define HV_NMI_RESULT_OK        0
+
+/** NMI not issued: remote tile running at client PL with ICS set. */
+#define HV_NMI_RESULT_FAIL_ICS  1
+
+/** NMI not issued: remote tile waiting in hypervisor. */
+#define HV_NMI_RESULT_FAIL_HV   2
+
+/** Force an NMI downcall regardless of the ICS bit of the client. */
+#define HV_NMI_FLAG_FORCE    0x1
+
+/** Send an NMI interrupt request to a particular tile.
+ *
+ *  This will cause the NMI to be issued on the remote tile regardless
+ *  of the state of the client interrupt mask.  However, if the remote
+ *  tile is in the hypervisor, it will not execute the NMI, and
+ *  HV_NMI_RESULT_FAIL_HV will be returned.  Similarly, if the remote
+ *  tile is in a client interrupt critical section at the time of the
+ *  NMI, it will not execute the NMI, and HV_NMI_RESULT_FAIL_ICS will
+ *  be returned.  In this second case, however, if HV_NMI_FLAG_FORCE
+ *  is set in flags, then the remote tile will enter its NMI interrupt
+ *  vector regardless.  Forcing the NMI vector during an interrupt
+ *  critical section will mean that the client can not safely continue
+ *  execution after handling the interrupt.
+ *
+ *  @param tile Tile to which the NMI request is sent.
+ *  @param info NMI information which is defined by and interpreted by the
+ *         supervisor, is passed to the specified tile, and is
+ *         stored in the SPR register SYSTEM_SAVE_{CLIENT_PL}_2 on the
+ *         specified tile when entering the NMI handler routine.
+ *         Typically, this parameter stores the NMI type, or an aligned
+ *         VA plus some special bits, etc.
+ *  @param flags Flags (HV_NMI_FLAG_xxx).
+ *  @return Information about the requested NMI.
+ */
+HV_NMI_Info hv_send_nmi(HV_Coord tile, unsigned long info, __hv64 flags);
+
+
 /** Scatter-gather list for preada/pwritea calls. */
 typedef struct
 #if CHIP_VA_WIDTH() <= 32
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index e8c2c04143cd..c667e104a0c2 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
 	if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
 		return -EFAULT;
 
-	memset(to, 0, sizeof(*to));
-
 	err = __get_user(to->si_signo, &from->si_signo);
 	err |= __get_user(to->si_errno, &from->si_errno);
 	err |= __get_user(to->si_code, &from->si_code);
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 3d9175992a20..670a3569450f 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -27,13 +27,6 @@ STD_ENTRY(current_text_addr)
 	{ move r0, lr; jrp lr }
 	STD_ENDPROC(current_text_addr)
 
-STD_ENTRY(dump_stack)
-	{ move r2, lr; lnk r1 }
-	{ move r4, r52; addli r1, r1, dump_stack - . }
-	{ move r3, sp; j _dump_stack }
-	jrp lr   /* keep backtracer happy */
-	STD_ENDPROC(dump_stack)
-
 STD_ENTRY(KBacktraceIterator_init_current)
 	{ move r2, lr; lnk r1 }
 	{ move r4, r52; addli r1, r1, KBacktraceIterator_init_current - . }
diff --git a/arch/tile/kernel/hvglue.S b/arch/tile/kernel/hvglue.S
index 2ab456622391..d78ee2ad610c 100644
--- a/arch/tile/kernel/hvglue.S
+++ b/arch/tile/kernel/hvglue.S
@@ -71,4 +71,5 @@ gensym hv_flush_all, 0x6e0, 32
 gensym hv_get_ipi_pte, 0x700, 32
 gensym hv_set_pte_super_shift, 0x720, 32
 gensym hv_console_set_ipi, 0x7e0, 32
-gensym hv_glue_internals, 0x800, 30720
+gensym hv_send_nmi, 0x820, 32
+gensym hv_glue_internals, 0x820, 30688
diff --git a/arch/tile/kernel/hvglue_trace.c b/arch/tile/kernel/hvglue_trace.c
index 85c74ad29312..add0d71395c6 100644
--- a/arch/tile/kernel/hvglue_trace.c
+++ b/arch/tile/kernel/hvglue_trace.c
@@ -75,6 +75,7 @@
 #define hv_get_ipi_pte _hv_get_ipi_pte
 #define hv_set_pte_super_shift _hv_set_pte_super_shift
 #define hv_console_set_ipi _hv_console_set_ipi
+#define hv_send_nmi _hv_send_nmi
 #include <hv/hypervisor.h>
 #undef hv_init
 #undef hv_install_context
@@ -134,6 +135,7 @@
 #undef hv_get_ipi_pte
 #undef hv_set_pte_super_shift
 #undef hv_console_set_ipi
+#undef hv_send_nmi
 
 /*
  * Provide macros based on <linux/syscalls.h> to provide a wrapper
@@ -264,3 +266,5 @@ HV_WRAP9(int, hv_flush_remote, HV_PhysAddr, cache_pa,
 	 HV_VirtAddr, tlb_va, unsigned long, tlb_length,
 	 unsigned long, tlb_pgsize, unsigned long*, tlb_cpumask,
 	 HV_Remote_ASID*, asids, int, asidcount)
+HV_WRAP3(HV_NMI_Info, hv_send_nmi, HV_Coord, tile, unsigned long, info,
+	 __hv64, flags)
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 5b67efcecabd..800b91d3f9dc 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -515,6 +515,10 @@ intvec_\vecname:
 	.ifc \c_routine, handle_perf_interrupt
 	mfspr   r2, AUX_PERF_COUNT_STS
 	.endif
+	.ifc \c_routine, do_nmi
+	mfspr   r2, SPR_SYSTEM_SAVE_K_2   /* nmi type */
+	.else
+	.endif
 	.endif
 	.endif
 	.endif
@@ -1571,3 +1575,5 @@ intrpt_start:
 
 	/* Synthetic interrupt delivered only by the simulator */
 	int_hand     INT_BREAKPOINT, BREAKPOINT, do_breakpoint
+	/* Synthetic interrupt delivered by hv */
+	int_hand     INT_NMI_DWNCL, NMI_DWNCL, do_nmi, handle_nmi
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index b403c2e3e263..a45213781ad0 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <linux/delay.h>
 #include <linux/context_tracking.h>
 #include <asm/stack.h>
 #include <asm/switch_to.h>
@@ -132,7 +133,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		       (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
 		callee_regs[0] = sp;   /* r30 = function */
 		callee_regs[1] = arg;  /* r31 = arg */
-		childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
 		p->thread.pc = (unsigned long) ret_from_kernel_thread;
 		return 0;
 	}
@@ -546,31 +546,141 @@ void exit_thread(void)
 #endif
 }
 
-void show_regs(struct pt_regs *regs)
+void tile_show_regs(struct pt_regs *regs)
 {
-	struct task_struct *tsk = validate_current();
 	int i;
-
-	if (tsk != &corrupt_current)
-		show_regs_print_info(KERN_ERR);
 #ifdef __tilegx__
 	for (i = 0; i < 17; i++)
-		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
 		       i, regs->regs[i], i+18, regs->regs[i+18],
 		       i+36, regs->regs[i+36]);
-	pr_err(" r17: " REGFMT " r35: " REGFMT " tp : " REGFMT "\n",
+	pr_err(" r17: "REGFMT" r35: "REGFMT" tp : "REGFMT"\n",
 	       regs->regs[17], regs->regs[35], regs->tp);
-	pr_err(" sp : " REGFMT " lr : " REGFMT "\n", regs->sp, regs->lr);
+	pr_err(" sp : "REGFMT" lr : "REGFMT"\n", regs->sp, regs->lr);
 #else
 	for (i = 0; i < 13; i++)
-		pr_err(" r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT " r%-2d: " REGFMT "\n",
+		pr_err(" r%-2d: "REGFMT" r%-2d: "REGFMT
+		       " r%-2d: "REGFMT" r%-2d: "REGFMT"\n",
 		       i, regs->regs[i], i+14, regs->regs[i+14],
 		       i+27, regs->regs[i+27], i+40, regs->regs[i+40]);
-	pr_err(" r13: " REGFMT " tp : " REGFMT " sp : " REGFMT " lr : " REGFMT "\n",
+	pr_err(" r13: "REGFMT" tp : "REGFMT" sp : "REGFMT" lr : "REGFMT"\n",
 	       regs->regs[13], regs->tp, regs->sp, regs->lr);
 #endif
-	pr_err(" pc : " REGFMT " ex1: %ld     faultnum: %ld\n",
-	       regs->pc, regs->ex1, regs->faultnum);
+	pr_err(" pc : "REGFMT" ex1: %ld     faultnum: %ld flags:%s%s%s%s\n",
+	       regs->pc, regs->ex1, regs->faultnum,
+	       is_compat_task() ? " compat" : "",
+	       (regs->flags & PT_FLAGS_DISABLE_IRQ) ? " noirq" : "",
+	       !(regs->flags & PT_FLAGS_CALLER_SAVES) ? " nocallersave" : "",
+	       (regs->flags & PT_FLAGS_RESTORE_REGS) ? " restoreregs" : "");
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	struct KBacktraceIterator kbt;
+
+	show_regs_print_info(KERN_DEFAULT);
+	tile_show_regs(regs);
+
+	KBacktraceIterator_init(&kbt, NULL, regs);
+	tile_show_stack(&kbt);
+}
+
+/* To ensure stack dump on tiles occurs one by one. */
+static DEFINE_SPINLOCK(backtrace_lock);
+/* To ensure no backtrace occurs before all of the stack dump are done. */
+static atomic_t backtrace_cpus;
+/* The cpu mask to avoid reentrance. */
+static struct cpumask backtrace_mask;
 
-	dump_stack_regs(regs);
+void do_nmi_dump_stack(struct pt_regs *regs)
+{
+	int is_idle = is_idle_task(current) && !in_interrupt();
+	int cpu;
+
+	nmi_enter();
+	cpu = smp_processor_id();
+	if (WARN_ON_ONCE(!cpumask_test_and_clear_cpu(cpu, &backtrace_mask)))
+		goto done;
+
+	spin_lock(&backtrace_lock);
+	if (is_idle)
+		pr_info("CPU: %d idle\n", cpu);
+	else
+		show_regs(regs);
+	spin_unlock(&backtrace_lock);
+	atomic_dec(&backtrace_cpus);
+done:
+	nmi_exit();
+}
+
+#ifdef __tilegx__
+void arch_trigger_all_cpu_backtrace(bool self)
+{
+	struct cpumask mask;
+	HV_Coord tile;
+	unsigned int timeout;
+	int cpu;
+	int ongoing;
+	HV_NMI_Info info[NR_CPUS];
+
+	ongoing = atomic_cmpxchg(&backtrace_cpus, 0, num_online_cpus() - 1);
+	if (ongoing != 0) {
+		pr_err("Trying to do all-cpu backtrace.\n");
+		pr_err("But another all-cpu backtrace is ongoing (%d cpus left)\n",
+		       ongoing);
+		if (self) {
+			pr_err("Reporting the stack on this cpu only.\n");
+			dump_stack();
+		}
+		return;
+	}
+
+	cpumask_copy(&mask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &mask);
+	cpumask_copy(&backtrace_mask, &mask);
+
+	/* Backtrace for myself first. */
+	if (self)
+		dump_stack();
+
+	/* Tentatively dump stack on remote tiles via NMI. */
+	timeout = 100;
+	while (!cpumask_empty(&mask) && timeout) {
+		for_each_cpu(cpu, &mask) {
+			tile.x = cpu_x(cpu);
+			tile.y = cpu_y(cpu);
+			info[cpu] = hv_send_nmi(tile, TILE_NMI_DUMP_STACK, 0);
+			if (info[cpu].result == HV_NMI_RESULT_OK)
+				cpumask_clear_cpu(cpu, &mask);
+		}
+
+		mdelay(10);
+		timeout--;
+	}
+
+	/* Warn about cpus stuck in ICS and decrement their counts here. */
+	if (!cpumask_empty(&mask)) {
+		for_each_cpu(cpu, &mask) {
+			switch (info[cpu].result) {
+			case HV_NMI_RESULT_FAIL_ICS:
+				pr_warn("Skipping stack dump of cpu %d in ICS at pc %#llx\n",
+					cpu, info[cpu].pc);
+				break;
+			case HV_NMI_RESULT_FAIL_HV:
+				pr_warn("Skipping stack dump of cpu %d in hypervisor\n",
+					cpu);
+				break;
+			case HV_ENOSYS:
+				pr_warn("Hypervisor too old to allow remote stack dumps.\n");
+				goto skip_for_each;
+			default:  /* should not happen */
+				pr_warn("Skipping stack dump of cpu %d [%d,%#llx]\n",
+					cpu, info[cpu].result, info[cpu].pc);
+				break;
+			}
+		}
+skip_for_each:
+		atomic_sub(cpumask_weight(&mask), &backtrace_cpus);
+	}
 }
+#endif /* __tilegx_ */
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index d366675e4bf8..6b755d125783 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -71,7 +71,7 @@ static unsigned long __initdata node_percpu[MAX_NUMNODES];
  * per-CPU stack and boot info.
  */
 DEFINE_PER_CPU(unsigned long, boot_sp) =
-	(unsigned long)init_stack + THREAD_SIZE;
+	(unsigned long)init_stack + THREAD_SIZE - STACK_TOP_DELTA;
 
 #ifdef CONFIG_SMP
 DEFINE_PER_CPU(unsigned long, boot_pc) = (unsigned long)start_kernel;
@@ -1139,7 +1139,7 @@ static void __init load_hv_initrd(void)
 
 void __init free_initrd_mem(unsigned long begin, unsigned long end)
 {
-	free_bootmem(__pa(begin), end - begin);
+	free_bootmem_late(__pa(begin), end - begin);
 }
 
 static int __init setup_initrd(char *str)
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index c42dce50acd8..402b9c85a894 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -23,6 +23,7 @@
 #include <linux/mmzone.h>
 #include <linux/dcache.h>
 #include <linux/fs.h>
+#include <linux/hardirq.h>
 #include <linux/string.h>
 #include <asm/backtrace.h>
 #include <asm/page.h>
@@ -109,7 +110,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 		if (kbt->verbose)
 			pr_err("  <%s while in user mode>\n", fault);
 	} else {
-		if (kbt->verbose)
+		if (kbt->verbose && (p->pc != 0 || p->sp != 0 || p->ex1 != 0))
 			pr_err("  (odd fault: pc %#lx, sp %#lx, ex1 %#lx?)\n",
 			       p->pc, p->sp, p->ex1);
 		return NULL;
@@ -119,10 +120,12 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 	return p;
 }
 
-/* Is the pc pointing to a sigreturn trampoline? */
-static int is_sigreturn(unsigned long pc)
+/* Is the iterator pointing to a sigreturn trampoline? */
+static int is_sigreturn(struct KBacktraceIterator *kbt)
 {
-	return current->mm && (pc == VDSO_SYM(&__vdso_rt_sigreturn));
+	return kbt->task->mm &&
+		(kbt->it.pc == ((ulong)kbt->task->mm->context.vdso_base +
+				(ulong)&__vdso_rt_sigreturn));
 }
 
 /* Return a pt_regs pointer for a valid signal handler frame */
@@ -131,7 +134,7 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
 {
 	BacktraceIterator *b = &kbt->it;
 
-	if (is_sigreturn(b->pc) && b->sp < PAGE_OFFSET &&
+	if (is_sigreturn(kbt) && b->sp < PAGE_OFFSET &&
 	    b->sp % sizeof(long) == 0) {
 		int retval;
 		pagefault_disable();
@@ -151,11 +154,6 @@ static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
 	return NULL;
 }
 
-static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
-{
-	return is_sigreturn(kbt->it.pc);
-}
-
 static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
 {
 	struct pt_regs *p;
@@ -178,7 +176,7 @@ static int KBacktraceIterator_next_item_inclusive(
 {
 	for (;;) {
 		do {
-			if (!KBacktraceIterator_is_sigreturn(kbt))
+			if (!is_sigreturn(kbt))
 				return KBT_ONGOING;
 		} while (backtrace_next(&kbt->it));
 
@@ -334,7 +332,7 @@ static void describe_addr(struct KBacktraceIterator *kbt,
 	}
 
 	if (vma->vm_file) {
-		p = d_path(&vma->vm_file->f_path, buf, bufsize);
+		p = file_path(vma->vm_file, buf, bufsize);
 		if (IS_ERR(p))
 			p = "?";
 		name = kbasename(p);
@@ -357,51 +355,50 @@ static void describe_addr(struct KBacktraceIterator *kbt,
  */
 static bool start_backtrace(void)
 {
-	if (current->thread.in_backtrace) {
+	if (current_thread_info()->in_backtrace) {
 		pr_err("Backtrace requested while in backtrace!\n");
 		return false;
 	}
-	current->thread.in_backtrace = true;
+	current_thread_info()->in_backtrace = true;
 	return true;
 }
 
 static void end_backtrace(void)
 {
-	current->thread.in_backtrace = false;
+	current_thread_info()->in_backtrace = false;
 }
 
 /*
  * This method wraps the backtracer's more generic support.
  * It is only invoked from the architecture-specific code; show_stack()
- * and dump_stack() (in entry.S) are architecture-independent entry points.
+ * and dump_stack() are architecture-independent entry points.
  */
-void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
+void tile_show_stack(struct KBacktraceIterator *kbt)
 {
 	int i;
 	int have_mmap_sem = 0;
 
 	if (!start_backtrace())
 		return;
-	if (headers) {
-		/*
-		 * Add a blank line since if we are called from panic(),
-		 * then bust_spinlocks() spit out a space in front of us
-		 * and it will mess up our KERN_ERR.
-		 */
-		pr_err("Starting stack dump of tid %d, pid %d (%s) on cpu %d at cycle %lld\n",
-		       kbt->task->pid, kbt->task->tgid, kbt->task->comm,
-		       raw_smp_processor_id(), get_cycles());
-	}
 	kbt->verbose = 1;
 	i = 0;
 	for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
 		char namebuf[KSYM_NAME_LEN+100];
 		unsigned long address = kbt->it.pc;
 
-		/* Try to acquire the mmap_sem as we pass into userspace. */
-		if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+		/*
+		 * Try to acquire the mmap_sem as we pass into userspace.
+		 * If we're in an interrupt context, don't even try, since
+		 * it's not safe to call e.g. d_path() from an interrupt,
+		 * since it uses spin locks without disabling interrupts.
+		 * Note we test "kbt->task == current", not "kbt->is_current",
+		 * since we're checking that "current" will work in d_path().
+		 */
+		if (kbt->task == current && address < PAGE_OFFSET &&
+		    !have_mmap_sem && kbt->task->mm && !in_interrupt()) {
 			have_mmap_sem =
 				down_read_trylock(&kbt->task->mm->mmap_sem);
+		}
 
 		describe_addr(kbt, address, have_mmap_sem,
 			      namebuf, sizeof(namebuf));
@@ -416,24 +413,12 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 	}
 	if (kbt->end == KBT_LOOP)
 		pr_err("Stack dump stopped; next frame identical to this one\n");
-	if (headers)
-		pr_err("Stack dump complete\n");
 	if (have_mmap_sem)
 		up_read(&kbt->task->mm->mmap_sem);
 	end_backtrace();
 }
 EXPORT_SYMBOL(tile_show_stack);
 
-
-/* This is called from show_regs() and _dump_stack() */
-void dump_stack_regs(struct pt_regs *regs)
-{
-	struct KBacktraceIterator kbt;
-	KBacktraceIterator_init(&kbt, NULL, regs);
-	tile_show_stack(&kbt, 1);
-}
-EXPORT_SYMBOL(dump_stack_regs);
-
 static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
 				       ulong pc, ulong lr, ulong sp, ulong r52)
 {
@@ -445,11 +430,15 @@ static struct pt_regs *regs_to_pt_regs(struct pt_regs *regs,
 	return regs;
 }
 
-/* This is called from dump_stack() and just converts to pt_regs */
+/* Deprecated function currently only used by kernel_double_fault(). */
 void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
+	struct KBacktraceIterator kbt;
 	struct pt_regs regs;
-	dump_stack_regs(regs_to_pt_regs(&regs, pc, lr, sp, r52));
+
+	regs_to_pt_regs(&regs, pc, lr, sp, r52);
+	KBacktraceIterator_init(&kbt, NULL, &regs);
+	tile_show_stack(&kbt);
 }
 
 /* This is called from KBacktraceIterator_init_current() */
@@ -461,22 +450,30 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
 				regs_to_pt_regs(&regs, pc, lr, sp, r52));
 }
 
-/* This is called only from kernel/sched/core.c, with esp == NULL */
+/*
+ * Called from sched_show_task() with task != NULL, or dump_stack()
+ * with task == NULL.  The esp argument is always NULL.
+ */
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 	struct KBacktraceIterator kbt;
-	if (task == NULL || task == current)
+	if (task == NULL || task == current) {
 		KBacktraceIterator_init_current(&kbt);
-	else
+		KBacktraceIterator_next(&kbt);  /* don't show first frame */
+	} else {
 		KBacktraceIterator_init(&kbt, task, NULL);
-	tile_show_stack(&kbt, 0);
+	}
+	tile_show_stack(&kbt);
 }
 
 #ifdef CONFIG_STACKTRACE
 
 /* Support generic Linux stack API too */
 
-void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+static void save_stack_trace_common(struct task_struct *task,
+				    struct pt_regs *regs,
+				    bool user,
+				    struct stack_trace *trace)
 {
 	struct KBacktraceIterator kbt;
 	int skip = trace->skip;
@@ -484,31 +481,57 @@ void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
 
 	if (!start_backtrace())
 		goto done;
-	if (task == NULL || task == current)
+	if (regs != NULL) {
+		KBacktraceIterator_init(&kbt, NULL, regs);
+	} else if (task == NULL || task == current) {
 		KBacktraceIterator_init_current(&kbt);
-	else
+		skip++;  /* don't show KBacktraceIterator_init_current */
+	} else {
 		KBacktraceIterator_init(&kbt, task, NULL);
+	}
 	for (; !KBacktraceIterator_end(&kbt); KBacktraceIterator_next(&kbt)) {
 		if (skip) {
 			--skip;
 			continue;
 		}
-		if (i >= trace->max_entries || kbt.it.pc < PAGE_OFFSET)
+		if (i >= trace->max_entries ||
+		    (!user && kbt.it.pc < PAGE_OFFSET))
 			break;
 		trace->entries[i++] = kbt.it.pc;
 	}
 	end_backtrace();
 done:
+	if (i < trace->max_entries)
+		trace->entries[i++] = ULONG_MAX;
 	trace->nr_entries = i;
 }
+
+void save_stack_trace_tsk(struct task_struct *task, struct stack_trace *trace)
+{
+	save_stack_trace_common(task, NULL, false, trace);
+}
 EXPORT_SYMBOL(save_stack_trace_tsk);
 
 void save_stack_trace(struct stack_trace *trace)
 {
-	save_stack_trace_tsk(NULL, trace);
+	save_stack_trace_common(NULL, NULL, false, trace);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	save_stack_trace_common(NULL, regs, false, trace);
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+	/* Trace user stack if we are not a kernel thread. */
+	if (current->mm)
+		save_stack_trace_common(NULL, task_pt_regs(current),
+					true, trace);
+	else if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
 #endif
 
 /* In entry.S */
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 312fc134c1cb..0011a9ff0525 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -395,6 +395,21 @@ done:
 	exception_exit(prev_state);
 }
 
+void do_nmi(struct pt_regs *regs, int fault_num, unsigned long reason)
+{
+	switch (reason) {
+	case TILE_NMI_DUMP_STACK:
+		do_nmi_dump_stack(regs);
+		break;
+	default:
+		panic("Unexpected do_nmi type %ld", reason);
+		return;
+	}
+}
+
+/* Deprecated function currently only used here. */
+extern void _dump_stack(int dummy, ulong pc, ulong lr, ulong sp, ulong r52);
+
 void kernel_double_fault(int dummy, ulong pc, ulong lr, ulong sp, ulong r52)
 {
 	_dump_stack(dummy, pc, lr, sp, r52);
diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c
index 5af8debc6a71..f0da5a237e94 100644
--- a/arch/tile/kernel/usb.c
+++ b/arch/tile/kernel/usb.c
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/usb/tilegx.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 static u64 ehci_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/tile/kernel/vdso/vgettimeofday.c b/arch/tile/kernel/vdso/vgettimeofday.c
index 8bb21eda07d8..e63310c49742 100644
--- a/arch/tile/kernel/vdso/vgettimeofday.c
+++ b/arch/tile/kernel/vdso/vgettimeofday.c
@@ -67,7 +67,7 @@ static inline int do_realtime(struct vdso_data *vdso, struct timespec *ts)
 	u64 ns;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->wall_time_sec;
 		ns = vdso->wall_time_snsec;
 		ns += vgetsns(vdso);
@@ -86,7 +86,7 @@ static inline int do_monotonic(struct vdso_data *vdso, struct timespec *ts)
 	u64 ns;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->monotonic_time_sec;
 		ns = vdso->monotonic_time_snsec;
 		ns += vgetsns(vdso);
@@ -105,7 +105,7 @@ static inline int do_realtime_coarse(struct vdso_data *vdso,
 	unsigned count;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->wall_time_coarse_sec;
 		ts->tv_nsec = vdso->wall_time_coarse_nsec;
 	} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -119,7 +119,7 @@ static inline int do_monotonic_coarse(struct vdso_data *vdso,
 	unsigned count;
 
 	do {
-		count = read_seqcount_begin(&vdso->tb_seq);
+		count = raw_read_seqcount_begin(&vdso->tb_seq);
 		ts->tv_sec = vdso->monotonic_time_coarse_sec;
 		ts->tv_nsec = vdso->monotonic_time_coarse_nsec;
 	} while (unlikely(read_seqcount_retry(&vdso->tb_seq, count)));
@@ -137,7 +137,7 @@ struct syscall_return_value __vdso_gettimeofday(struct timeval *tv,
 	/* The use of the timezone is obsolete, normally tz is NULL. */
 	if (unlikely(tz != NULL)) {
 		do {
-			count = read_seqcount_begin(&vdso->tz_seq);
+			count = raw_read_seqcount_begin(&vdso->tz_seq);
 			tz->tz_minuteswest = vdso->tz_minuteswest;
 			tz->tz_dsttime = vdso->tz_dsttime;
 		} while (unlikely(read_seqcount_retry(&vdso->tz_seq, count)));
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 82733c87d67e..9d171ca4302c 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -18,8 +18,6 @@
 
 /* arch/tile/lib/usercopy.S */
 #include <linux/uaccess.h>
-EXPORT_SYMBOL(strnlen_user_asm);
-EXPORT_SYMBOL(strncpy_from_user_asm);
 EXPORT_SYMBOL(clear_user_asm);
 EXPORT_SYMBOL(flush_user_asm);
 EXPORT_SYMBOL(finv_user_asm);
@@ -28,7 +26,6 @@ EXPORT_SYMBOL(finv_user_asm);
 #include <linux/kernel.h>
 #include <asm/processor.h>
 EXPORT_SYMBOL(current_text_addr);
-EXPORT_SYMBOL(dump_stack);
 
 /* arch/tile/kernel/head.S */
 EXPORT_SYMBOL(empty_zero_page);
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 88c7016492c4..97bbb6060b25 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -28,7 +28,7 @@
 #define _ST(p, inst, v)						\
 	({							\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
@@ -41,7 +41,7 @@
 	({							\
 		unsigned long __v;				\
 		asm("1: " #inst " %0, %1;"			\
-		    ".pushsection .coldtext.memcpy,\"ax\";"	\
+		    ".pushsection .coldtext,\"ax\";"	\
 		    "2: { move r0, %2; jrp lr };"		\
 		    ".section __ex_table,\"a\";"		\
 		    ".align 8;"					\
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index b34f79aada48..88c2a53362e7 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	u32 iterations = 0;
-	while (arch_spin_is_locked(lock))
+	int curr = READ_ONCE(lock->current_ticket);
+	int next = READ_ONCE(lock->next_ticket);
+
+	/* Return immediately if unlocked. */
+	if (next == curr)
+		return;
+
+	/* Wait until the current locker has released the lock. */
+	do {
 		delay_backoff(iterations++);
+	} while (READ_ONCE(lock->current_ticket) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/spinlock_64.c b/arch/tile/lib/spinlock_64.c
index d6fb9581e980..c8d1f94ff1fe 100644
--- a/arch/tile/lib/spinlock_64.c
+++ b/arch/tile/lib/spinlock_64.c
@@ -65,8 +65,17 @@ EXPORT_SYMBOL(arch_spin_trylock);
 void arch_spin_unlock_wait(arch_spinlock_t *lock)
 {
 	u32 iterations = 0;
-	while (arch_spin_is_locked(lock))
+	u32 val = READ_ONCE(lock->lock);
+	u32 curr = arch_spin_current(val);
+
+	/* Return immediately if unlocked. */
+	if (arch_spin_next(val) == curr)
+		return;
+
+	/* Wait until the current locker has released the lock. */
+	do {
 		delay_backoff(iterations++);
+	} while (arch_spin_current(READ_ONCE(lock->lock)) == curr);
 }
 EXPORT_SYMBOL(arch_spin_unlock_wait);
 
diff --git a/arch/tile/lib/usercopy_32.S b/arch/tile/lib/usercopy_32.S
index 1bc162224638..db93ad5fae25 100644
--- a/arch/tile/lib/usercopy_32.S
+++ b/arch/tile/lib/usercopy_32.S
@@ -20,52 +20,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
 /*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-	{ bz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { lb_u r4, r0; addi r1, r1, -1 }
-	bz r4, 2f
-	{ bnzt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strnlen_user_asm)
-	.pushsection .fixup,"ax"
-strnlen_user_fault:
-	{ move r0, zero; jrp lr }
-	ENDPROC(strnlen_user_fault)
-	.section __ex_table,"a"
-	.align 4
-	.word 1b, strnlen_user_fault
-	.popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-	{ bz r2, 2f; move r3, r0 }
-1:	{ lb_u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-	{ sb r0, r4; addi r0, r0, 1 }
-	bz r4, 2f
-	bnzt r2, 1b
-	{ sub r0, r0, r3; jrp lr }
-2:	addi r0, r0, -1   /* don't count the trailing NUL */
-	{ sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strncpy_from_user_asm)
-	.pushsection .fixup,"ax"
-strncpy_from_user_fault:
-	{ movei r0, -EFAULT; jrp lr }
-	ENDPROC(strncpy_from_user_fault)
-	.section __ex_table,"a"
-	.align 4
-	.word 1b, strncpy_from_user_fault
-	.popsection
-
-/*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
  * It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/lib/usercopy_64.S b/arch/tile/lib/usercopy_64.S
index b3b31a3306f8..9322dc551e91 100644
--- a/arch/tile/lib/usercopy_64.S
+++ b/arch/tile/lib/usercopy_64.S
@@ -20,52 +20,6 @@
 /* Access user memory, but use MMU to avoid propagating kernel exceptions. */
 
 /*
- * strnlen_user_asm takes the pointer in r0, and the length bound in r1.
- * It returns the length, including the terminating NUL, or zero on exception.
- * If length is greater than the bound, returns one plus the bound.
- */
-STD_ENTRY(strnlen_user_asm)
-	{ beqz r1, 2f; addi r3, r0, -1 }  /* bias down to include NUL */
-1:      { ld1u r4, r0; addi r1, r1, -1 }
-	beqz r4, 2f
-	{ bnezt r1, 1b; addi r0, r0, 1 }
-2:      { sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strnlen_user_asm)
-	.pushsection .fixup,"ax"
-strnlen_user_fault:
-	{ move r0, zero; jrp lr }
-	ENDPROC(strnlen_user_fault)
-	.section __ex_table,"a"
-	.align 8
-	.quad 1b, strnlen_user_fault
-	.popsection
-
-/*
- * strncpy_from_user_asm takes the kernel target pointer in r0,
- * the userspace source pointer in r1, and the length bound (including
- * the trailing NUL) in r2.  On success, it returns the string length
- * (not including the trailing NUL), or -EFAULT on failure.
- */
-STD_ENTRY(strncpy_from_user_asm)
-	{ beqz r2, 2f; move r3, r0 }
-1:	{ ld1u r4, r1; addi r1, r1, 1; addi r2, r2, -1 }
-	{ st1 r0, r4; addi r0, r0, 1 }
-	beqz r4, 2f
-	bnezt r2, 1b
-	{ sub r0, r0, r3; jrp lr }
-2:	addi r0, r0, -1   /* don't count the trailing NUL */
-	{ sub r0, r0, r3; jrp lr }
-	STD_ENDPROC(strncpy_from_user_asm)
-	.pushsection .fixup,"ax"
-strncpy_from_user_fault:
-	{ movei r0, -EFAULT; jrp lr }
-	ENDPROC(strncpy_from_user_fault)
-	.section __ex_table,"a"
-	.align 8
-	.quad 1b, strncpy_from_user_fault
-	.popsection
-
-/*
  * clear_user_asm takes the user target address in r0 and the
  * number of bytes to zero in r1.
  * It returns the number of uncopiable bytes (hopefully zero) in r0.
diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c
index f7ddae3725a4..6225cc998db1 100644
--- a/arch/tile/mm/elf.c
+++ b/arch/tile/mm/elf.c
@@ -56,7 +56,7 @@ static int notify_exec(struct mm_struct *mm)
 	if (exe_file == NULL)
 		goto done_free;
 
-	path = d_path(&exe_file->f_path, buf, PAGE_SIZE);
+	path = file_path(exe_file, buf, PAGE_SIZE);
 	if (IS_ERR(path))
 		goto done_put;
 
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index e83cc999da02..13eac59bf16a 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs,
 
 	/*
 	 * If we're in an interrupt, have no user context or are running in an
-	 * atomic region then we must not take the fault.
+	 * region with pagefaults disabled then we must not take the fault.
 	 */
-	if (in_atomic() || !mm) {
+	if (pagefault_disabled() || !mm) {
 		vma = NULL;  /* happy compiler */
 		goto bad_area_nosemaphore;
 	}
@@ -699,11 +699,10 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
  * interrupt away appropriately and return immediately.  We can't do
  * page faults for user code while in kernel mode.
  */
-void do_page_fault(struct pt_regs *regs, int fault_num,
-		   unsigned long address, unsigned long write)
+static inline void __do_page_fault(struct pt_regs *regs, int fault_num,
+				   unsigned long address, unsigned long write)
 {
 	int is_page_fault;
-	enum ctx_state prev_state = exception_enter();
 
 #ifdef CONFIG_KPROBES
 	/*
@@ -713,7 +712,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 	 */
 	if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
 		       regs->faultnum, SIGSEGV) == NOTIFY_STOP)
-		goto done;
+		return;
 #endif
 
 #ifdef __tilegx__
@@ -835,18 +834,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
 			async->is_fault = is_page_fault;
 			async->is_write = write;
 			async->address = address;
-			goto done;
+			return;
 		}
 	}
 #endif
 
 	handle_page_fault(regs, fault_num, is_page_fault, address, write);
+}
 
-done:
+void do_page_fault(struct pt_regs *regs, int fault_num,
+		   unsigned long address, unsigned long write)
+{
+	enum ctx_state prev_state = exception_enter();
+	__do_page_fault(regs, fault_num, address, write);
 	exception_exit(prev_state);
 }
 
-
 #if CHIP_HAS_TILE_DMA()
 /*
  * This routine effectively re-issues asynchronous page faults
diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c
index 6aa2f2625447..fcd545014e79 100644
--- a/arch/tile/mm/highmem.c
+++ b/arch/tile/mm/highmem.c
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
 	int idx, type;
 	pte_t *pte;
 
-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+	preempt_disable();
 	pagefault_disable();
 
 	/* Avoid icache flushes by disallowing atomic executable mappings. */
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
 	}
 
 	pagefault_enable();
+	preempt_enable();
 }
 EXPORT_SYMBOL(__kunmap_atomic);
 
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 8416240c322c..c034dc3fe2d4 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -160,11 +160,6 @@ int pud_huge(pud_t pud)
 	return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
 }
 
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
-	return 0;
-}
-
 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 		unsigned long addr, unsigned long len,