summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_ops.txt4
-rw-r--r--Documentation/fault-injection/fault-injection.txt11
-rw-r--r--Documentation/memory-barriers.txt6
-rw-r--r--Documentation/static-keys.txt99
-rw-r--r--arch/Kconfig6
-rw-r--r--arch/alpha/include/asm/atomic.h42
-rw-r--r--arch/arc/include/asm/atomic.h8
-rw-r--r--arch/arm/include/asm/atomic.h51
-rw-r--r--arch/arm/include/asm/barrier.h4
-rw-r--r--arch/arm/include/asm/cmpxchg.h47
-rw-r--r--arch/arm/include/asm/jump_label.h25
-rw-r--r--arch/arm/kernel/jump_label.c2
-rw-r--r--arch/arm64/include/asm/atomic.h14
-rw-r--r--arch/arm64/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/jump_label.h18
-rw-r--r--arch/arm64/kernel/jump_label.c2
-rw-r--r--arch/avr32/include/asm/atomic.h12
-rw-r--r--arch/blackfin/include/asm/atomic.h16
-rw-r--r--arch/blackfin/kernel/bfin_ksyms.c7
-rw-r--r--arch/blackfin/mach-bf561/atomic.S30
-rw-r--r--arch/blackfin/mach-common/smp.c2
-rw-r--r--arch/frv/include/asm/atomic.h107
-rw-r--r--arch/frv/include/asm/atomic_defs.h172
-rw-r--r--arch/frv/include/asm/bitops.h99
-rw-r--r--arch/frv/kernel/dma.c6
-rw-r--r--arch/frv/kernel/frv_ksyms.c5
-rw-r--r--arch/frv/lib/Makefile2
-rw-r--r--arch/frv/lib/atomic-lib.c7
-rw-r--r--arch/frv/lib/atomic-ops.S110
-rw-r--r--arch/frv/lib/atomic64-ops.S94
-rw-r--r--arch/h8300/include/asm/atomic.h137
-rw-r--r--arch/hexagon/include/asm/atomic.h4
-rw-r--r--arch/ia64/include/asm/atomic.h24
-rw-r--r--arch/ia64/include/asm/barrier.h4
-rw-r--r--arch/m32r/include/asm/atomic.h45
-rw-r--r--arch/m32r/kernel/smp.c4
-rw-r--r--arch/m68k/include/asm/atomic.h14
-rw-r--r--arch/metag/include/asm/atomic_lnkget.h38
-rw-r--r--arch/metag/include/asm/atomic_lock1.h23
-rw-r--r--arch/metag/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/atomic.h7
-rw-r--r--arch/mips/include/asm/barrier.h4
-rw-r--r--arch/mips/include/asm/jump_label.h19
-rw-r--r--arch/mips/kernel/jump_label.c2
-rw-r--r--arch/mn10300/include/asm/atomic.h71
-rw-r--r--arch/mn10300/mm/tlb-smp.c2
-rw-r--r--arch/parisc/configs/c8000_defconfig1
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig1
-rw-r--r--arch/parisc/include/asm/atomic.h7
-rw-r--r--arch/powerpc/include/asm/atomic.h7
-rw-r--r--arch/powerpc/include/asm/barrier.h4
-rw-r--r--arch/powerpc/include/asm/jump_label.h19
-rw-r--r--arch/powerpc/kernel/jump_label.c2
-rw-r--r--arch/powerpc/kernel/misc_32.S19
-rw-r--r--arch/s390/include/asm/atomic.h41
-rw-r--r--arch/s390/include/asm/barrier.h4
-rw-r--r--arch/s390/include/asm/jump_label.h19
-rw-r--r--arch/s390/kernel/jump_label.c2
-rw-r--r--arch/s390/kernel/time.c4
-rw-r--r--arch/s390/kvm/interrupt.c30
-rw-r--r--arch/s390/kvm/kvm-s390.c32
-rw-r--r--arch/s390/lib/uaccess.c12
-rw-r--r--arch/sh/include/asm/atomic-grb.h43
-rw-r--r--arch/sh/include/asm/atomic-irq.h21
-rw-r--r--arch/sh/include/asm/atomic-llsc.h31
-rw-r--r--arch/sparc/include/asm/atomic_32.h4
-rw-r--r--arch/sparc/include/asm/atomic_64.h4
-rw-r--r--arch/sparc/include/asm/barrier_64.h4
-rw-r--r--arch/sparc/include/asm/jump_label.h35
-rw-r--r--arch/sparc/kernel/jump_label.c2
-rw-r--r--arch/sparc/lib/atomic32.c22
-rw-r--r--arch/sparc/lib/atomic_64.S6
-rw-r--r--arch/sparc/lib/ksyms.c3
-rw-r--r--arch/tile/include/asm/atomic_32.h28
-rw-r--r--arch/tile/include/asm/atomic_64.h40
-rw-r--r--arch/tile/lib/atomic_32.c23
-rw-r--r--arch/tile/lib/atomic_asm_32.S4
-rw-r--r--arch/x86/include/asm/atomic.h25
-rw-r--r--arch/x86/include/asm/atomic64_32.h14
-rw-r--r--arch/x86/include/asm/atomic64_64.h15
-rw-r--r--arch/x86/include/asm/barrier.h8
-rw-r--r--arch/x86/include/asm/jump_label.h23
-rw-r--r--arch/x86/include/asm/qrwlock.h10
-rw-r--r--arch/x86/kernel/jump_label.c2
-rw-r--r--arch/x86/kernel/tsc.c22
-rw-r--r--arch/xtensa/configs/iss_defconfig1
-rw-r--r--arch/xtensa/include/asm/atomic.h73
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4
-rw-r--r--drivers/s390/scsi/zfcp_aux.c2
-rw-r--r--drivers/s390/scsi/zfcp_erp.c62
-rw-r--r--drivers/s390/scsi/zfcp_fc.c8
-rw-r--r--drivers/s390/scsi/zfcp_fsf.c26
-rw-r--r--drivers/s390/scsi/zfcp_qdio.c14
-rw-r--r--include/asm-generic/atomic-long.h263
-rw-r--r--include/asm-generic/atomic.h11
-rw-r--r--include/asm-generic/atomic64.h4
-rw-r--r--include/asm-generic/barrier.h4
-rw-r--r--include/asm-generic/qrwlock.h78
-rw-r--r--include/linux/atomic.h361
-rw-r--r--include/linux/compiler.h7
-rw-r--r--include/linux/jump_label.h261
-rw-r--r--include/linux/llist.h2
-rw-r--r--kernel/futex.c100
-rw-r--r--kernel/jump_label.c158
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/qrwlock.c47
-rw-r--r--kernel/locking/qspinlock.c6
-rw-r--r--kernel/locking/qspinlock_paravirt.h102
-rw-r--r--kernel/locking/rtmutex-tester.c420
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rtmutex_common.h22
-rw-r--r--kernel/sched/core.c6
-rw-r--r--lib/Kconfig.debug22
-rw-r--r--lib/Makefile2
-rw-r--r--lib/atomic64.c3
-rw-r--r--lib/atomic64_test.c68
-rw-r--r--lib/lockref.c8
-rw-r--r--lib/test_static_key_base.c68
-rw-r--r--lib/test_static_keys.c225
-rw-r--r--scripts/rt-tester/check-all.sh21
-rwxr-xr-xscripts/rt-tester/rt-tester.py218
-rw-r--r--scripts/rt-tester/t2-l1-2rt-sameprio.tst94
-rw-r--r--scripts/rt-tester/t2-l1-pi.tst77
-rw-r--r--scripts/rt-tester/t2-l1-signal.tst72
-rw-r--r--scripts/rt-tester/t2-l2-2rt-deadlock.tst84
-rw-r--r--scripts/rt-tester/t3-l1-pi-1rt.tst87
-rw-r--r--scripts/rt-tester/t3-l1-pi-2rt.tst88
-rw-r--r--scripts/rt-tester/t3-l1-pi-3rt.tst87
-rw-r--r--scripts/rt-tester/t3-l1-pi-signal.tst93
-rw-r--r--scripts/rt-tester/t3-l1-pi-steal.tst91
-rw-r--r--scripts/rt-tester/t3-l2-pi.tst87
-rw-r--r--scripts/rt-tester/t4-l2-pi-deboost.tst118
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst178
-rw-r--r--scripts/rt-tester/t5-l4-pi-boost-deboost.tst138
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/static_keys/Makefile8
-rw-r--r--tools/testing/selftests/static_keys/test_static_keys.sh16
139 files changed, 2425 insertions, 3585 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index dab6da3382d9..b19fc34efdb1 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -266,7 +266,9 @@ with the given old and new values. Like all atomic_xxx operations,
 atomic_cmpxchg will only satisfy its atomicity semantics as long as all
 other accesses of *v are performed through atomic_xxx operations.
 
-atomic_cmpxchg must provide explicit memory barriers around the operation.
+atomic_cmpxchg must provide explicit memory barriers around the operation,
+although if the comparison fails then no memory ordering guarantees are
+required.
 
 The semantics for atomic_cmpxchg are the same as those defined for 'cas'
 below.
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
index 4cf1a2a6bd72..415484f3d59a 100644
--- a/Documentation/fault-injection/fault-injection.txt
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -15,6 +15,10 @@ o fail_page_alloc
 
   injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
 
+o fail_futex
+
+  injects futex deadlock and uaddr fault errors.
+
 o fail_make_request
 
   injects disk IO errors on devices permitted by setting
@@ -113,6 +117,12 @@ configuration of fault-injection capabilities.
 	specifies the minimum page allocation order to be injected
 	failures.
 
+- /sys/kernel/debug/fail_futex/ignore-private:
+
+	Format: { 'Y' | 'N' }
+	default is 'N', setting it to 'Y' will disable failure injections
+	when dealing with private (address space) futexes.
+
 o Boot option
 
 In order to inject faults while debugfs is not available (early boot time),
@@ -121,6 +131,7 @@ use the boot option:
 	failslab=
 	fail_page_alloc=
 	fail_make_request=
+	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
 How to add new fault injection capability
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index eafa6a53f72c..2ba8461b0631 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -2327,9 +2327,7 @@ about the state (old or new) implies an SMP-conditional general memory barrier
 explicit lock operations, described later).  These include:
 
 	xchg();
-	cmpxchg();
 	atomic_xchg();			atomic_long_xchg();
-	atomic_cmpxchg();		atomic_long_cmpxchg();
 	atomic_inc_return();		atomic_long_inc_return();
 	atomic_dec_return();		atomic_long_dec_return();
 	atomic_add_return();		atomic_long_add_return();
@@ -2342,7 +2340,9 @@ explicit lock operations, described later).  These include:
 	test_and_clear_bit();
 	test_and_change_bit();
 
-	/* when succeeds (returns 1) */
+	/* when succeeds */
+	cmpxchg();
+	atomic_cmpxchg();		atomic_long_cmpxchg();
 	atomic_add_unless();		atomic_long_add_unless();
 
 These are used for such things as implementing ACQUIRE-class and RELEASE-class
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index c4407a41b0fc..f4cb0b2d5cd7 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -1,7 +1,22 @@
 			Static Keys
 			-----------
 
-By: Jason Baron <jbaron@redhat.com>
+DEPRECATED API:
+
+The use of 'struct static_key' directly, is now DEPRECATED. In addition
+static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+
+struct static_key false = STATIC_KEY_INIT_FALSE;
+struct static_key true = STATIC_KEY_INIT_TRUE;
+static_key_true()
+static_key_false()
+
+The updated API replacements are:
+
+DEFINE_STATIC_KEY_TRUE(key);
+DEFINE_STATIC_KEY_FALSE(key);
+static_key_likely()
+statick_key_unlikely()
 
 0) Abstract
 
@@ -9,22 +24,22 @@ Static keys allows the inclusion of seldom used features in
 performance-sensitive fast-path kernel code, via a GCC feature and a code
 patching technique. A quick example:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
 
 	...
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 	...
-	static_key_slow_inc();
+	static_branch_enable(&key);
 	...
-	static_key_slow_inc();
+	static_branch_disable(&key);
 	...
 
-The static_key_false() branch will be generated into the code with as little
+The static_branch_unlikely() branch will be generated into the code with as little
 impact to the likely code path as possible.
 
 
@@ -56,7 +71,7 @@ the branch site to change the branch direction.
 
 For example, if we have a simple branch that is disabled by default:
 
-	if (static_key_false(&key))
+	if (static_branch_unlikely(&key))
 		printk("I am the true branch\n");
 
 Thus, by default the 'printk' will not be emitted. And the code generated will
@@ -75,68 +90,55 @@ the basis for the static keys facility.
 
 In order to make use of this optimization you must first define a key:
 
-	struct static_key key;
-
-Which is initialized as:
-
-	struct static_key key = STATIC_KEY_INIT_TRUE;
+	DEFINE_STATIC_KEY_TRUE(key);
 
 or:
 
-	struct static_key key = STATIC_KEY_INIT_FALSE;
+	DEFINE_STATIC_KEY_FALSE(key);
+
 
-If the key is not initialized, it is default false. The 'struct static_key',
-must be a 'global'. That is, it can't be allocated on the stack or dynamically
+The key must be global, that is, it can't be allocated on the stack or dynamically
 allocated at run-time.
 
 The key is then used in code as:
 
-        if (static_key_false(&key))
+        if (static_branch_unlikely(&key))
                 do unlikely code
         else
                 do likely code
 
 Or:
 
-        if (static_key_true(&key))
+        if (static_branch_likely(&key))
                 do likely code
         else
                 do unlikely code
 
-A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
-'static_key_false()' construct. Likewise, a key initialized via
-'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
-single key can be used in many branches, but all the branches must match the
-way that the key has been initialized.
+Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may
+be used in either static_branch_likely() or static_branch_unlikely()
+statemnts.
 
-The branch(es) can then be switched via:
+Branch(es) can be set true via:
 
-	static_key_slow_inc(&key);
-	...
-	static_key_slow_dec(&key);
+static_branch_enable(&key);
 
-Thus, 'static_key_slow_inc()' means 'make the branch true', and
-'static_key_slow_dec()' means 'make the branch false' with appropriate
-reference counting. For example, if the key is initialized true, a
-static_key_slow_dec(), will switch the branch to false. And a subsequent
-static_key_slow_inc(), will change the branch back to true. Likewise, if the
-key is initialized false, a 'static_key_slow_inc()', will change the branch to
-true. And then a 'static_key_slow_dec()', will again make the branch false.
+or false via:
+
+static_branch_disable(&key);
 
-An example usage in the kernel is the implementation of tracepoints:
+The branch(es) can then be switched via reference counts:
 
-        static inline void trace_##name(proto)                          \
-        {                                                               \
-                if (static_key_false(&__tracepoint_##name.key))		\
-                        __DO_TRACE(&__tracepoint_##name,                \
-                                TP_PROTO(data_proto),                   \
-                                TP_ARGS(data_args),                     \
-                                TP_CONDITION(cond));                    \
-        }
+	static_branch_inc(&key);
+	...
+	static_branch_dec(&key);
 
-Tracepoints are disabled by default, and can be placed in performance critical
-pieces of the kernel. Thus, by using a static key, the tracepoints can have
-absolutely minimal impact when not in use.
+Thus, 'static_branch_inc()' means 'make the branch true', and
+'static_branch_dec()' means 'make the branch false' with appropriate
+reference counting. For example, if the key is initialized true, a
+static_branch_dec(), will switch the branch to false. And a subsequent
+static_branch_inc(), will change the branch back to true. Likewise, if the
+key is initialized false, a 'static_branch_inc()', will change the branch to
+true. And then a 'static_branch_dec()', will again make the branch false.
 
 
 4) Architecture level code patching interface, 'jump labels'
@@ -150,9 +152,12 @@ simply fall back to a traditional, load, test, and jump sequence.
 
 * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
 
-* __always_inline bool arch_static_branch(struct static_key *key), see:
+* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see:
 					arch/x86/include/asm/jump_label.h
 
+* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch),
+					see: arch/x86/include/asm/jump_label.h
+
 * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
 					see: arch/x86/kernel/jump_label.c
 
@@ -173,7 +178,7 @@ SYSCALL_DEFINE0(getppid)
 {
         int pid;
 
-+       if (static_key_false(&key))
++       if (static_branch_unlikely(&key))
 +               printk("I am the true branch\n");
 
         rcu_read_lock();
diff --git a/arch/Kconfig b/arch/Kconfig
index 8a8ea7110de8..a71cdbe2a04d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -71,6 +71,12 @@ config JUMP_LABEL
 	 ( On 32-bit x86, the necessary options added to the compiler
 	   flags may increase the size of the kernel slightly. )
 
+config STATIC_KEYS_SELFTEST
+	bool "Static key selftest"
+	depends on JUMP_LABEL
+	help
+	  Boot time self-test of the branch patching code.
+
 config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index 8f8eafbedd7c..e8c956098424 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -29,13 +29,13 @@
  * branch back to restart the operation.
  */
 
-#define ATOMIC_OP(op)							\
+#define ATOMIC_OP(op, asm_op)						\
 static __inline__ void atomic_##op(int i, atomic_t * v)			\
 {									\
 	unsigned long temp;						\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
-	"	" #op "l %0,%2,%0\n"					\
+	"	" #asm_op " %0,%2,%0\n"					\
 	"	stl_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -45,15 +45,15 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 	:"Ir" (i), "m" (v->counter));					\
 }									\
 
-#define ATOMIC_OP_RETURN(op)						\
+#define ATOMIC_OP_RETURN(op, asm_op)					\
 static inline int atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	long temp, result;						\
 	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldl_l %0,%1\n"						\
-	"	" #op "l %0,%3,%2\n"					\
-	"	" #op "l %0,%3,%0\n"					\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
 	"	stl_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -65,13 +65,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	return result;							\
 }
 
-#define ATOMIC64_OP(op)							\
+#define ATOMIC64_OP(op, asm_op)						\
 static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 {									\
 	unsigned long temp;						\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
-	"	" #op "q %0,%2,%0\n"					\
+	"	" #asm_op " %0,%2,%0\n"					\
 	"	stq_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -81,15 +81,15 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		\
 	:"Ir" (i), "m" (v->counter));					\
 }									\
 
-#define ATOMIC64_OP_RETURN(op)						\
+#define ATOMIC64_OP_RETURN(op, asm_op)					\
 static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 {									\
 	long temp, result;						\
 	smp_mb();							\
 	__asm__ __volatile__(						\
 	"1:	ldq_l %0,%1\n"						\
-	"	" #op "q %0,%3,%2\n"					\
-	"	" #op "q %0,%3,%0\n"					\
+	"	" #asm_op " %0,%3,%2\n"					\
+	"	" #asm_op " %0,%3,%0\n"					\
 	"	stq_c %0,%1\n"						\
 	"	beq %0,2f\n"						\
 	".subsection 2\n"						\
@@ -101,15 +101,27 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	\
 	return result;							\
 }
 
-#define ATOMIC_OPS(opg)							\
-	ATOMIC_OP(opg)							\
-	ATOMIC_OP_RETURN(opg)						\
-	ATOMIC64_OP(opg)						\
-	ATOMIC64_OP_RETURN(opg)
+#define ATOMIC_OPS(op)							\
+	ATOMIC_OP(op, op##l)						\
+	ATOMIC_OP_RETURN(op, op##l)					\
+	ATOMIC64_OP(op, op##q)						\
+	ATOMIC64_OP_RETURN(op, op##q)
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+#define atomic_andnot atomic_andnot
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(andnot, bic)
+ATOMIC_OP(or, bis)
+ATOMIC_OP(xor, xor)
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(andnot, bic)
+ATOMIC64_OP(or, bis)
+ATOMIC64_OP(xor, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 87d18ae53115..c3ecda023e3a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -172,9 +172,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
-ATOMIC_OP(and, &=, and)
 
-#define atomic_clear_mask(mask, v) atomic_and(~(mask), (v))
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(andnot, &= ~, bic)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index e22c11970b7b..fe3ef397f5a4 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -57,12 +57,11 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
-	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
 	__asm__ __volatile__("@ atomic_" #op "_return\n"		\
@@ -75,17 +74,17 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 	: "r" (&v->counter), "Ir" (i)					\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return result;							\
 }
 
-static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+
+static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
 	int oldval;
 	unsigned long res;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	do {
@@ -99,10 +98,9 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
 		    : "cc");
 	} while (res);
 
-	smp_mb();
-
 	return oldval;
 }
+#define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
 
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
@@ -194,6 +192,13 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(andnot, &= ~, bic)
+ATOMIC_OP(or,  |=, orr)
+ATOMIC_OP(xor, ^=, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -290,12 +295,12 @@ static inline void atomic64_##op(long long i, atomic64_t *v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)				\
-static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
+static inline long long							\
+atomic64_##op##_return_relaxed(long long i, atomic64_t *v)		\
 {									\
 	long long result;						\
 	unsigned long tmp;						\
 									\
-	smp_mb();							\
 	prefetchw(&v->counter);						\
 									\
 	__asm__ __volatile__("@ atomic64_" #op "_return\n"		\
@@ -309,8 +314,6 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
 	: "r" (&v->counter), "r" (i)					\
 	: "cc");							\
 									\
-	smp_mb();							\
-									\
 	return result;							\
 }
 
@@ -321,17 +324,26 @@ static inline long long atomic64_##op##_return(long long i, atomic64_t *v) \
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OP(and, and, and)
+ATOMIC64_OP(andnot, bic, bic)
+ATOMIC64_OP(or,  orr, orr)
+ATOMIC64_OP(xor, eor, eor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
-					long long new)
+static inline long long
+atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new)
 {
 	long long oldval;
 	unsigned long res;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	do {
@@ -346,17 +358,15 @@ static inline long long atomic64_cmpxchg(atomic64_t *ptr, long long old,
 		: "cc");
 	} while (res);
 
-	smp_mb();
-
 	return oldval;
 }
+#define atomic64_cmpxchg_relaxed	atomic64_cmpxchg_relaxed
 
-static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new)
 {
 	long long result;
 	unsigned long tmp;
 
-	smp_mb();
 	prefetchw(&ptr->counter);
 
 	__asm__ __volatile__("@ atomic64_xchg\n"
@@ -368,10 +378,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
 	: "r" (&ptr->counter), "r" (new)
 	: "cc");
 
-	smp_mb();
-
 	return result;
 }
+#define atomic64_xchg_relaxed		atomic64_xchg_relaxed
 
 static inline long long atomic64_dec_if_positive(atomic64_t *v)
 {
diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h
index 6c2327e1c732..70393574e0fa 100644
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -67,12 +67,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 1692a05d3207..916a2744d5c6 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -35,7 +35,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	unsigned int tmp;
 #endif
 
-	smp_mb();
 	prefetchw((const void *)ptr);
 
 	switch (size) {
@@ -98,12 +97,11 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 		__bad_xchg(ptr, size), ret = 0;
 		break;
 	}
-	smp_mb();
 
 	return ret;
 }
 
-#define xchg(ptr, x) ({							\
+#define xchg_relaxed(ptr, x) ({						\
 	(__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr),		\
 				   sizeof(*(ptr)));			\
 })
@@ -117,6 +115,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 #error "SMP is not supported on this platform"
 #endif
 
+#define xchg xchg_relaxed
+
 /*
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
@@ -194,23 +194,11 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return oldval;
 }
 
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
-					 unsigned long new, int size)
-{
-	unsigned long ret;
-
-	smp_mb();
-	ret = __cmpxchg(ptr, old, new, size);
-	smp_mb();
-
-	return ret;
-}
-
-#define cmpxchg(ptr,o,n) ({						\
-	(__typeof__(*(ptr)))__cmpxchg_mb((ptr),				\
-					 (unsigned long)(o),		\
-					 (unsigned long)(n),		\
-					 sizeof(*(ptr)));		\
+#define cmpxchg_relaxed(ptr,o,n) ({					\
+	(__typeof__(*(ptr)))__cmpxchg((ptr),				\
+				      (unsigned long)(o),		\
+				      (unsigned long)(n),		\
+				      sizeof(*(ptr)));			\
 })
 
 static inline unsigned long __cmpxchg_local(volatile void *ptr,
@@ -273,25 +261,6 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 
 #define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
 
-static inline unsigned long long __cmpxchg64_mb(unsigned long long *ptr,
-						unsigned long long old,
-						unsigned long long new)
-{
-	unsigned long long ret;
-
-	smp_mb();
-	ret = __cmpxchg64(ptr, old, new);
-	smp_mb();
-
-	return ret;
-}
-
-#define cmpxchg64(ptr, o, n) ({						\
-	(__typeof__(*(ptr)))__cmpxchg64_mb((ptr),			\
-					   (unsigned long long)(o),	\
-					   (unsigned long long)(n));	\
-})
-
 #endif	/* __LINUX_ARM_ARCH__ >= 6 */
 
 #endif /* __ASM_ARM_CMPXCHG_H */
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 5f337dc5c108..34f7b6980d21 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -4,23 +4,32 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/unified.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-#ifdef CONFIG_THUMB2_KERNEL
-#define JUMP_LABEL_NOP	"nop.w"
-#else
-#define JUMP_LABEL_NOP	"nop"
-#endif
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 WASM(nop) "\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
-		 JUMP_LABEL_NOP "\n\t"
+		 WASM(b) " %l[l_yes]\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".word 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 : :  "i" (key) :  : l_yes);
+		 : :  "i" (&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c
index e39cbf488cfe..845a5dd9c42b 100644
--- a/arch/arm/kernel/jump_label.c
+++ b/arch/arm/kernel/jump_label.c
@@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	unsigned int insn;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		insn = arm_gen_branch(entry->code, entry->target);
 	else
 		insn = arm_gen_nop();
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 7047051ded40..866a71fca9a3 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -85,6 +85,13 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, sub)
 
+#define atomic_andnot atomic_andnot
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(andnot, bic)
+ATOMIC_OP(or, orr)
+ATOMIC_OP(xor, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -183,6 +190,13 @@ static inline long atomic64_##op##_return(long i, atomic64_t *v)	\
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, sub)
 
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(andnot, bic)
+ATOMIC64_OP(or, orr)
+ATOMIC64_OP(xor, eor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 0fa47c4275cb..ef93b20bc964 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -44,12 +44,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index c0e5165c2f76..1b5e0e843c3a 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -26,14 +26,28 @@
 
 #define JUMP_LABEL_NOP_SIZE		AARCH64_INSN_SIZE
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm goto("1: nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 ".align 3\n\t"
 		 ".quad 1b, %l[l_yes], %c0\n\t"
 		 ".popsection\n\t"
-		 :  :  "i"(key) :  : l_yes);
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm goto("1: b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 3\n\t"
+		 ".quad 1b, %l[l_yes], %c0\n\t"
+		 ".popsection\n\t"
+		 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
 	return false;
 l_yes:
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 4f1fec7a46db..c2dd1ad3e648 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	void *addr = (void *)entry->code;
 	u32 insn;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn = aarch64_insn_gen_branch_imm(entry->code,
 						   entry->target,
 						   AARCH64_INSN_BRANCH_NOLINK);
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index 2d07ce1c5327..97c9bdf83409 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -44,6 +44,18 @@ static inline int __atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OP_RETURN(sub, sub, rKs21)
 ATOMIC_OP_RETURN(add, add, r)
 
+#define ATOMIC_OP(op, asm_op)						\
+ATOMIC_OP_RETURN(op, asm_op, r)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	(void)__atomic_##op##_return(i, v);				\
+}
+
+ATOMIC_OP(and, and)
+ATOMIC_OP(or, or)
+ATOMIC_OP(xor, eor)
+
+#undef ATOMIC_OP
 #undef ATOMIC_OP_RETURN
 
 /*
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index a107a98e9978..1c1c42330c99 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -16,19 +16,21 @@
 #include <linux/types.h>
 
 asmlinkage int __raw_uncached_fetch_asm(const volatile int *ptr);
-asmlinkage int __raw_atomic_update_asm(volatile int *ptr, int value);
-asmlinkage int __raw_atomic_clear_asm(volatile int *ptr, int value);
-asmlinkage int __raw_atomic_set_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_add_asm(volatile int *ptr, int value);
+
+asmlinkage int __raw_atomic_and_asm(volatile int *ptr, int value);
+asmlinkage int __raw_atomic_or_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_xor_asm(volatile int *ptr, int value);
 asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value);
 
 #define atomic_read(v) __raw_uncached_fetch_asm(&(v)->counter)
 
-#define atomic_add_return(i, v) __raw_atomic_update_asm(&(v)->counter, i)
-#define atomic_sub_return(i, v) __raw_atomic_update_asm(&(v)->counter, -(i))
+#define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i)
+#define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i))
 
-#define atomic_clear_mask(m, v) __raw_atomic_clear_asm(&(v)->counter, m)
-#define atomic_set_mask(m, v)   __raw_atomic_set_asm(&(v)->counter, m)
+#define atomic_or(i, v)  (void)__raw_atomic_or_asm(&(v)->counter, i)
+#define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i)
+#define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i)
 
 #endif
 
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index c446591b961d..a401c27b69b4 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -83,11 +83,12 @@ EXPORT_SYMBOL(insl);
 EXPORT_SYMBOL(insl_16);
 
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(__raw_atomic_update_asm);
-EXPORT_SYMBOL(__raw_atomic_clear_asm);
-EXPORT_SYMBOL(__raw_atomic_set_asm);
+EXPORT_SYMBOL(__raw_atomic_add_asm);
+EXPORT_SYMBOL(__raw_atomic_and_asm);
+EXPORT_SYMBOL(__raw_atomic_or_asm);
 EXPORT_SYMBOL(__raw_atomic_xor_asm);
 EXPORT_SYMBOL(__raw_atomic_test_asm);
+
 EXPORT_SYMBOL(__raw_xchg_1_asm);
 EXPORT_SYMBOL(__raw_xchg_2_asm);
 EXPORT_SYMBOL(__raw_xchg_4_asm);
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
index 2a08df8e8c4c..26fccb5568b9 100644
--- a/arch/blackfin/mach-bf561/atomic.S
+++ b/arch/blackfin/mach-bf561/atomic.S
@@ -587,10 +587,10 @@ ENDPROC(___raw_write_unlock_asm)
  * r0 = ptr
  * r1 = value
  *
- * Add a signed value to a 32bit word and return the new value atomically.
+ * ADD a signed value to a 32bit word and return the new value atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_update_asm)
+ENTRY(___raw_atomic_add_asm)
 	p1 = r0;
 	r3 = r1;
 	[--sp] = rets;
@@ -603,19 +603,19 @@ ENTRY(___raw_atomic_update_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_update_asm)
+ENDPROC(___raw_atomic_add_asm)
 
 /*
  * r0 = ptr
  * r1 = mask
  *
- * Clear the mask bits from a 32bit word and return the old 32bit value
+ * AND the mask bits from a 32bit word and return the old 32bit value
  * atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_clear_asm)
+ENTRY(___raw_atomic_and_asm)
 	p1 = r0;
-	r3 = ~r1;
+	r3 = r1;
 	[--sp] = rets;
 	call _get_core_lock;
 	r2 = [p1];
@@ -627,17 +627,17 @@ ENTRY(___raw_atomic_clear_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_clear_asm)
+ENDPROC(___raw_atomic_and_asm)
 
 /*
  * r0 = ptr
  * r1 = mask
  *
- * Set the mask bits into a 32bit word and return the old 32bit value
+ * OR the mask bits into a 32bit word and return the old 32bit value
  * atomically.
  * Clobbers: r3:0, p1:0
  */
-ENTRY(___raw_atomic_set_asm)
+ENTRY(___raw_atomic_or_asm)
 	p1 = r0;
 	r3 = r1;
 	[--sp] = rets;
@@ -651,7 +651,7 @@ ENTRY(___raw_atomic_set_asm)
 	r0 = r3;
 	rets = [sp++];
 	rts;
-ENDPROC(___raw_atomic_set_asm)
+ENDPROC(___raw_atomic_or_asm)
 
 /*
  * r0 = ptr
@@ -787,7 +787,7 @@ ENTRY(___raw_bit_set_asm)
 	r2 = r1;
 	r1 = 1;
 	r1 <<= r2;
-	jump ___raw_atomic_set_asm
+	jump ___raw_atomic_or_asm
 ENDPROC(___raw_bit_set_asm)
 
 /*
@@ -798,10 +798,10 @@ ENDPROC(___raw_bit_set_asm)
  * Clobbers: r3:0, p1:0
  */
 ENTRY(___raw_bit_clear_asm)
-	r2 = r1;
-	r1 = 1;
-	r1 <<= r2;
-	jump ___raw_atomic_clear_asm
+	r2 = 1;
+	r2 <<= r1;
+	r1 = ~r2;
+	jump ___raw_atomic_and_asm
 ENDPROC(___raw_bit_clear_asm)
 
 /*
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 1c7259597395..0030e21cfceb 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -195,7 +195,7 @@ void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
 	local_irq_save(flags);
 	for_each_cpu(cpu, cpumask) {
 		bfin_ipi_data = &per_cpu(bfin_ipi, cpu);
-		atomic_set_mask((1 << msg), &bfin_ipi_data->bits);
+		atomic_or((1 << msg), &bfin_ipi_data->bits);
 		atomic_inc(&bfin_ipi_data->count);
 	}
 	local_irq_restore(flags);
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index 102190a61d65..0da689def4cc 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -15,7 +15,6 @@
 #define _ASM_ATOMIC_H
 
 #include <linux/types.h>
-#include <asm/spr-regs.h>
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
@@ -23,6 +22,8 @@
 #error not SMP safe
 #endif
 
+#include <asm/atomic_defs.h>
+
 /*
  * Atomic operations that C can't guarantee us.  Useful for
  * resource counting etc..
@@ -34,56 +35,26 @@
 #define atomic_read(v)		ACCESS_ONCE((v)->counter)
 #define atomic_set(v, i)	(((v)->counter) = (i))
 
-#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-static inline int atomic_add_return(int i, atomic_t *v)
+static inline int atomic_inc_return(atomic_t *v)
 {
-	unsigned long val;
+	return __atomic_add_return(1, &v->counter);
+}
 
-	asm("0:						\n"
-	    "	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-	    "	ckeq		icc3,cc7		\n"
-	    "	ld.p		%M0,%1			\n"	/* LD.P/ORCR must be atomic */
-	    "	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-	    "	add%I2		%1,%2,%1		\n"
-	    "	cst.p		%1,%M0		,cc3,#1	\n"
-	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* clear ICC3.Z if store happens */
-	    "	beq		icc3,#0,0b		\n"
-	    : "+U"(v->counter), "=&r"(val)
-	    : "NPr"(i)
-	    : "memory", "cc7", "cc3", "icc3"
-	    );
+static inline int atomic_dec_return(atomic_t *v)
+{
+	return __atomic_sub_return(1, &v->counter);
+}
 
-	return val;
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+	return __atomic_add_return(i, &v->counter);
 }
 
 static inline int atomic_sub_return(int i, atomic_t *v)
 {
-	unsigned long val;
-
-	asm("0:						\n"
-	    "	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-	    "	ckeq		icc3,cc7		\n"
-	    "	ld.p		%M0,%1			\n"	/* LD.P/ORCR must be atomic */
-	    "	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-	    "	sub%I2		%1,%2,%1		\n"
-	    "	cst.p		%1,%M0		,cc3,#1	\n"
-	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* clear ICC3.Z if store happens */
-	    "	beq		icc3,#0,0b		\n"
-	    : "+U"(v->counter), "=&r"(val)
-	    : "NPr"(i)
-	    : "memory", "cc7", "cc3", "icc3"
-	    );
-
-	return val;
+	return __atomic_sub_return(i, &v->counter);
 }
 
-#else
-
-extern int atomic_add_return(int i, atomic_t *v);
-extern int atomic_sub_return(int i, atomic_t *v);
-
-#endif
-
 static inline int atomic_add_negative(int i, atomic_t *v)
 {
 	return atomic_add_return(i, v) < 0;
@@ -101,17 +72,14 @@ static inline void atomic_sub(int i, atomic_t *v)
 
 static inline void atomic_inc(atomic_t *v)
 {
-	atomic_add_return(1, v);
+	atomic_inc_return(v);
 }
 
 static inline void atomic_dec(atomic_t *v)
 {
-	atomic_sub_return(1, v);
+	atomic_dec_return(v);
 }
 
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-
 #define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
 #define atomic_inc_and_test(v)		(atomic_add_return(1, (v)) == 0)
@@ -120,18 +88,19 @@ static inline void atomic_dec(atomic_t *v)
  * 64-bit atomic ops
  */
 typedef struct {
-	volatile long long counter;
+	long long counter;
 } atomic64_t;
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-static inline long long atomic64_read(atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
 {
 	long long counter;
 
 	asm("ldd%I1 %M1,%0"
 	    : "=e"(counter)
 	    : "m"(v->counter));
+
 	return counter;
 }
 
@@ -142,10 +111,25 @@ static inline void atomic64_set(atomic64_t *v, long long i)
 		     : "e"(i));
 }
 
-extern long long atomic64_inc_return(atomic64_t *v);
-extern long long atomic64_dec_return(atomic64_t *v);
-extern long long atomic64_add_return(long long i, atomic64_t *v);
-extern long long atomic64_sub_return(long long i, atomic64_t *v);
+static inline long long atomic64_inc_return(atomic64_t *v)
+{
+	return __atomic64_add_return(1, &v->counter);
+}
+
+static inline long long atomic64_dec_return(atomic64_t *v)
+{
+	return __atomic64_sub_return(1, &v->counter);
+}
+
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
+{
+	return __atomic64_add_return(i, &v->counter);
+}
+
+static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+{
+	return __atomic64_sub_return(i, &v->counter);
+}
 
 static inline long long atomic64_add_negative(long long i, atomic64_t *v)
 {
@@ -176,6 +160,7 @@ static inline void atomic64_dec(atomic64_t *v)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_and_test(v)	(atomic64_inc_return((v)) == 0)
 
+
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&(v)->counter, old, new))
 #define atomic_xchg(v, new)		(xchg(&(v)->counter, new))
 #define atomic64_cmpxchg(v, old, new)	(__cmpxchg_64(old, new, &(v)->counter))
@@ -196,5 +181,21 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
+#define ATOMIC_OP(op)							\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	(void)__atomic32_fetch_##op(i, &v->counter);			\
+}									\
+									\
+static inline void atomic64_##op(long long i, atomic64_t *v)		\
+{									\
+	(void)__atomic64_fetch_##op(i, &v->counter);			\
+}
+
+ATOMIC_OP(or)
+ATOMIC_OP(and)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
 
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/frv/include/asm/atomic_defs.h b/arch/frv/include/asm/atomic_defs.h
new file mode 100644
index 000000000000..36e126d2f801
--- /dev/null
+++ b/arch/frv/include/asm/atomic_defs.h
@@ -0,0 +1,172 @@
+
+#include <asm/spr-regs.h>
+
+#ifdef __ATOMIC_LIB__
+
+#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
+
+#define ATOMIC_QUALS
+#define ATOMIC_EXPORT(x)	EXPORT_SYMBOL(x)
+
+#else /* !OUTOFLINE && LIB */
+
+#define ATOMIC_OP_RETURN(op)
+#define ATOMIC_FETCH_OP(op)
+
+#endif /* OUTOFLINE */
+
+#else /* !__ATOMIC_LIB__ */
+
+#define ATOMIC_EXPORT(x)
+
+#ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
+
+#define ATOMIC_OP_RETURN(op)						\
+extern int __atomic_##op##_return(int i, int *v);			\
+extern long long __atomic64_##op##_return(long long i, long long *v);
+
+#define ATOMIC_FETCH_OP(op)						\
+extern int __atomic32_fetch_##op(int i, int *v);			\
+extern long long __atomic64_fetch_##op(long long i, long long *v);
+
+#else /* !OUTOFLINE && !LIB */
+
+#define ATOMIC_QUALS	static inline
+
+#endif /* OUTOFLINE */
+#endif /* __ATOMIC_LIB__ */
+
+
+/*
+ * Note on the 64 bit inline asm variants...
+ *
+ * CSTD is a conditional instruction and needs a constrained memory reference.
+ * Normally 'U' provides the correct constraints for conditional instructions
+ * and this is used for the 32 bit version, however 'U' does not appear to work
+ * for 64 bit values (gcc-4.9)
+ *
+ * The exact constraint is that conditional instructions cannot deal with an
+ * immediate displacement in the memory reference, so what we do is we read the
+ * address through a volatile cast into a local variable in order to insure we
+ * _have_ to compute the correct address without displacement. This allows us
+ * to use the regular 'm' for the memory address.
+ *
+ * Furthermore, the %Ln operand, which prints the low word register (r+1),
+ * really only works for registers, this means we cannot allow immediate values
+ * for the 64 bit versions -- like we do for the 32 bit ones.
+ *
+ */
+
+#ifndef ATOMIC_OP_RETURN
+#define ATOMIC_OP_RETURN(op)						\
+ATOMIC_QUALS int __atomic_##op##_return(int i, int *v)			\
+{									\
+	int val;							\
+									\
+	asm volatile(							\
+	    "0:						\n"		\
+	    "	orcc		gr0,gr0,gr0,icc3	\n"		\
+	    "	ckeq		icc3,cc7		\n"		\
+	    "	ld.p		%M0,%1			\n"		\
+	    "	orcr		cc7,cc7,cc3		\n"		\
+	    "   "#op"%I2	%1,%2,%1		\n"		\
+	    "	cst.p		%1,%M0		,cc3,#1	\n"		\
+	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"		\
+	    "	beq		icc3,#0,0b		\n"		\
+	    : "+U"(*v), "=&r"(val)					\
+	    : "NPr"(i)							\
+	    : "memory", "cc7", "cc3", "icc3"				\
+	    );								\
+									\
+	return val;							\
+}									\
+ATOMIC_EXPORT(__atomic_##op##_return);					\
+									\
+ATOMIC_QUALS long long __atomic64_##op##_return(long long i, long long *v)	\
+{									\
+	long long *__v = READ_ONCE(v);					\
+	long long val;							\
+									\
+	asm volatile(							\
+	    "0:						\n"		\
+	    "	orcc		gr0,gr0,gr0,icc3	\n"		\
+	    "	ckeq		icc3,cc7		\n"		\
+	    "	ldd.p		%M0,%1			\n"		\
+	    "	orcr		cc7,cc7,cc3		\n"		\
+	    "   "#op"cc		%L1,%L2,%L1,icc0	\n"		\
+	    "   "#op"x		%1,%2,%1,icc0		\n"		\
+	    "	cstd.p		%1,%M0		,cc3,#1	\n"		\
+	    "	corcc		gr29,gr29,gr0	,cc3,#1	\n"		\
+	    "	beq		icc3,#0,0b		\n"		\
+	    : "+m"(*__v), "=&e"(val)					\
+	    : "e"(i)							\
+	    : "memory", "cc7", "cc3", "icc0", "icc3"			\
+	    );								\
+									\
+	return val;							\
+}									\
+ATOMIC_EXPORT(__atomic64_##op##_return);
+#endif
+
+#ifndef ATOMIC_FETCH_OP
+#define ATOMIC_FETCH_OP(op)						\
+ATOMIC_QUALS int __atomic32_fetch_##op(int i, int *v)			\
+{									\
+	int old, tmp;							\
+									\
+	asm volatile(							\
+		"0:						\n"	\
+		"	orcc		gr0,gr0,gr0,icc3	\n"	\
+		"	ckeq		icc3,cc7		\n"	\
+		"	ld.p		%M0,%1			\n"	\
+		"	orcr		cc7,cc7,cc3		\n"	\
+		"	"#op"%I3	%1,%3,%2		\n"	\
+		"	cst.p		%2,%M0		,cc3,#1	\n"	\
+		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	\
+		"	beq		icc3,#0,0b		\n"	\
+		: "+U"(*v), "=&r"(old), "=r"(tmp)			\
+		: "NPr"(i)						\
+		: "memory", "cc7", "cc3", "icc3"			\
+		);							\
+									\
+	return old;							\
+}									\
+ATOMIC_EXPORT(__atomic32_fetch_##op);					\
+									\
+ATOMIC_QUALS long long __atomic64_fetch_##op(long long i, long long *v)	\
+{									\
+	long long *__v = READ_ONCE(v);					\
+	long long old, tmp;						\
+									\
+	asm volatile(							\
+		"0:						\n"	\
+		"	orcc		gr0,gr0,gr0,icc3	\n"	\
+		"	ckeq		icc3,cc7		\n"	\
+		"	ldd.p		%M0,%1			\n"	\
+		"	orcr		cc7,cc7,cc3		\n"	\
+		"	"#op"		%L1,%L3,%L2		\n"	\
+		"	"#op"		%1,%3,%2		\n"	\
+		"	cstd.p		%2,%M0		,cc3,#1	\n"	\
+		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	\
+		"	beq		icc3,#0,0b		\n"	\
+		: "+m"(*__v), "=&e"(old), "=e"(tmp)			\
+		: "e"(i)						\
+		: "memory", "cc7", "cc3", "icc3"			\
+		);							\
+									\
+	return old;							\
+}									\
+ATOMIC_EXPORT(__atomic64_fetch_##op);
+#endif
+
+ATOMIC_FETCH_OP(or)
+ATOMIC_FETCH_OP(and)
+ATOMIC_FETCH_OP(xor)
+
+ATOMIC_OP_RETURN(add)
+ATOMIC_OP_RETURN(sub)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_QUALS
+#undef ATOMIC_EXPORT
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 96de220ef131..0df8e95e3715 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -25,109 +25,30 @@
 
 #include <asm-generic/bitops/ffz.h>
 
-#ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-static inline
-unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	and%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(~mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-static inline
-unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	or%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-static inline
-unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v)
-{
-	unsigned long old, tmp;
-
-	asm volatile(
-		"0:						\n"
-		"	orcc		gr0,gr0,gr0,icc3	\n"	/* set ICC3.Z */
-		"	ckeq		icc3,cc7		\n"
-		"	ld.p		%M0,%1			\n"	/* LD.P/ORCR are atomic */
-		"	orcr		cc7,cc7,cc3		\n"	/* set CC3 to true */
-		"	xor%I3		%1,%3,%2		\n"
-		"	cst.p		%2,%M0		,cc3,#1	\n"	/* if store happens... */
-		"	corcc		gr29,gr29,gr0	,cc3,#1	\n"	/* ... clear ICC3.Z */
-		"	beq		icc3,#0,0b		\n"
-		: "+U"(*v), "=&r"(old), "=r"(tmp)
-		: "NPr"(mask)
-		: "memory", "cc7", "cc3", "icc3"
-		);
-
-	return old;
-}
-
-#else
-
-extern unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v);
-extern unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v);
-extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v);
-
-#endif
-
-#define atomic_clear_mask(mask, v)	atomic_test_and_ANDNOT_mask((mask), (v))
-#define atomic_set_mask(mask, v)	atomic_test_and_OR_mask((mask), (v))
+#include <asm/atomic.h>
 
 static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_and(~mask, ptr) & mask) != 0;
 }
 
 static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_or(mask, ptr) & mask) != 0;
 }
 
 static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
-	volatile unsigned long *ptr = addr;
-	unsigned long mask = 1UL << (nr & 31);
+	unsigned int *ptr = (void *)addr;
+	unsigned int mask = 1UL << (nr & 31);
 	ptr += nr >> 5;
-	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
+	return (__atomic32_fetch_xor(mask, ptr) & mask) != 0;
 }
 
 static inline void clear_bit(unsigned long nr, volatile void *addr)
diff --git a/arch/frv/kernel/dma.c b/arch/frv/kernel/dma.c
index 156184e17e57..370dc9fa0b11 100644
--- a/arch/frv/kernel/dma.c
+++ b/arch/frv/kernel/dma.c
@@ -109,13 +109,13 @@ static struct frv_dma_channel frv_dma_channels[FRV_DMA_NCHANS] = {
 
 static DEFINE_RWLOCK(frv_dma_channels_lock);
 
-unsigned long frv_dma_inprogress;
+unsigned int frv_dma_inprogress;
 
 #define frv_clear_dma_inprogress(channel) \
-	atomic_clear_mask(1 << (channel), &frv_dma_inprogress);
+	(void)__atomic32_fetch_and(~(1 << (channel)), &frv_dma_inprogress);
 
 #define frv_set_dma_inprogress(channel) \
-	atomic_set_mask(1 << (channel), &frv_dma_inprogress);
+	(void)__atomic32_fetch_or(1 << (channel), &frv_dma_inprogress);
 
 /*****************************************************************************/
 /*
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 86c516d96dcd..cdb4ce9960eb 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -58,11 +58,6 @@ EXPORT_SYMBOL(__outsl_ns);
 EXPORT_SYMBOL(__insl_ns);
 
 #ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
-EXPORT_SYMBOL(atomic_test_and_ANDNOT_mask);
-EXPORT_SYMBOL(atomic_test_and_OR_mask);
-EXPORT_SYMBOL(atomic_test_and_XOR_mask);
-EXPORT_SYMBOL(atomic_add_return);
-EXPORT_SYMBOL(atomic_sub_return);
 EXPORT_SYMBOL(__xchg_32);
 EXPORT_SYMBOL(__cmpxchg_32);
 #endif
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 4ff2fb1e6b16..970e8b4f1a02 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o atomic-lib.o
diff --git a/arch/frv/lib/atomic-lib.c b/arch/frv/lib/atomic-lib.c
new file mode 100644
index 000000000000..4d1b887c248b
--- /dev/null
+++ b/arch/frv/lib/atomic-lib.c
@@ -0,0 +1,7 @@
+
+#include <linux/export.h>
+#include <asm/atomic.h>
+
+#define __ATOMIC_LIB__
+
+#include <asm/atomic_defs.h>
diff --git a/arch/frv/lib/atomic-ops.S b/arch/frv/lib/atomic-ops.S
index 5e9e6ab5dd0e..b7439a960b5b 100644
--- a/arch/frv/lib/atomic-ops.S
+++ b/arch/frv/lib/atomic-ops.S
@@ -19,116 +19,6 @@
 
 ###############################################################################
 #
-# unsigned long atomic_test_and_ANDNOT_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_ANDNOT_mask
-        .type		atomic_test_and_ANDNOT_mask,@function
-atomic_test_and_ANDNOT_mask:
-	not.p		gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	and		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_ANDNOT_mask, .-atomic_test_and_ANDNOT_mask
-
-###############################################################################
-#
-# unsigned long atomic_test_and_OR_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_OR_mask
-        .type		atomic_test_and_OR_mask,@function
-atomic_test_and_OR_mask:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	or		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_OR_mask, .-atomic_test_and_OR_mask
-
-###############################################################################
-#
-# unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsigned long *v);
-#
-###############################################################################
-	.globl		atomic_test_and_XOR_mask
-        .type		atomic_test_and_XOR_mask,@function
-atomic_test_and_XOR_mask:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	xor		gr8,gr10,gr11
-	cst.p		gr11,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_test_and_XOR_mask, .-atomic_test_and_XOR_mask
-
-###############################################################################
-#
-# int atomic_add_return(int i, atomic_t *v)
-#
-###############################################################################
-	.globl		atomic_add_return
-        .type		atomic_add_return,@function
-atomic_add_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	add		gr8,gr10,gr8
-	cst.p		gr8,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_add_return, .-atomic_add_return
-
-###############################################################################
-#
-# int atomic_sub_return(int i, atomic_t *v)
-#
-###############################################################################
-	.globl		atomic_sub_return
-        .type		atomic_sub_return,@function
-atomic_sub_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ld.p		@(gr9,gr0),gr8			/* LD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	sub		gr8,gr10,gr8
-	cst.p		gr8,@(gr9,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic_sub_return, .-atomic_sub_return
-
-###############################################################################
-#
 # uint32_t __xchg_32(uint32_t i, uint32_t *v)
 #
 ###############################################################################
diff --git a/arch/frv/lib/atomic64-ops.S b/arch/frv/lib/atomic64-ops.S
index b6194eeac127..c4c472308a33 100644
--- a/arch/frv/lib/atomic64-ops.S
+++ b/arch/frv/lib/atomic64-ops.S
@@ -20,100 +20,6 @@
 
 ###############################################################################
 #
-# long long atomic64_inc_return(atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_inc_return
-        .type		atomic64_inc_return,@function
-atomic64_inc_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	addicc		gr9,#1,gr9,icc0
-	addxi		gr8,#0,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_inc_return, .-atomic64_inc_return
-
-###############################################################################
-#
-# long long atomic64_dec_return(atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_dec_return
-        .type		atomic64_dec_return,@function
-atomic64_dec_return:
-	or.p		gr8,gr8,gr10
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	subicc		gr9,#1,gr9,icc0
-	subxi		gr8,#0,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_dec_return, .-atomic64_dec_return
-
-###############################################################################
-#
-# long long atomic64_add_return(long long i, atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_add_return
-        .type		atomic64_add_return,@function
-atomic64_add_return:
-	or.p		gr8,gr8,gr4
-	or		gr9,gr9,gr5
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	addcc		gr9,gr5,gr9,icc0
-	addx		gr8,gr4,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_add_return, .-atomic64_add_return
-
-###############################################################################
-#
-# long long atomic64_sub_return(long long i, atomic64_t *v)
-#
-###############################################################################
-	.globl		atomic64_sub_return
-        .type		atomic64_sub_return,@function
-atomic64_sub_return:
-	or.p		gr8,gr8,gr4
-	or		gr9,gr9,gr5
-0:
-	orcc		gr0,gr0,gr0,icc3		/* set ICC3.Z */
-	ckeq		icc3,cc7
-	ldd.p		@(gr10,gr0),gr8			/* LDD.P/ORCR must be atomic */
-	orcr		cc7,cc7,cc3			/* set CC3 to true */
-	subcc		gr9,gr5,gr9,icc0
-	subx		gr8,gr4,gr8,icc0
-	cstd.p		gr8,@(gr10,gr0)		,cc3,#1
-	corcc		gr29,gr29,gr0		,cc3,#1	/* clear ICC3.Z if store happens */
-	beq		icc3,#0,0b
-	bralr
-
-	.size		atomic64_sub_return, .-atomic64_sub_return
-
-###############################################################################
-#
 # uint64_t __xchg_64(uint64_t i, uint64_t *v)
 #
 ###############################################################################
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 7ca73f8546cc..702ee539f87d 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -16,83 +16,52 @@
 
 #include <linux/kernel.h>
 
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	ret = v->counter += i;
-	arch_local_irq_restore(flags);
-	return ret;
+#define ATOMIC_OP_RETURN(op, c_op)				\
+static inline int atomic_##op##_return(int i, atomic_t *v)	\
+{								\
+	h8300flags flags;					\
+	int ret;						\
+								\
+	flags = arch_local_irq_save();				\
+	ret = v->counter c_op i;				\
+	arch_local_irq_restore(flags);				\
+	return ret;						\
 }
 
-#define atomic_add(i, v) atomic_add_return(i, v)
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	ret = v->counter -= i;
-	arch_local_irq_restore(flags);
-	return ret;
+#define ATOMIC_OP(op, c_op)					\
+static inline void atomic_##op(int i, atomic_t *v)		\
+{								\
+	h8300flags flags;					\
+								\
+	flags = arch_local_irq_save();				\
+	v->counter c_op i;					\
+	arch_local_irq_restore(flags);				\
 }
 
-#define atomic_sub(i, v) atomic_sub_return(i, v)
-#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
+ATOMIC_OP_RETURN(add, +=)
+ATOMIC_OP_RETURN(sub, -=)
 
-static inline int atomic_inc_return(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or,  |=)
+ATOMIC_OP(xor, ^=)
 
-	flags = arch_local_irq_save();
-	v->counter++;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret;
-}
-
-#define atomic_inc(v) atomic_inc_return(v)
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
 
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+#define atomic_add(i, v)		(void)atomic_add_return(i, v)
+#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
 
-static inline int atomic_dec_return(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
+#define atomic_sub(i, v)		(void)atomic_sub_return(i, v)
+#define atomic_sub_and_test(i, v)	(atomic_sub_return(i, v) == 0)
 
-	flags = arch_local_irq_save();
-	--v->counter;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret;
-}
+#define atomic_inc_return(v)		atomic_add_return(1, v)
+#define atomic_dec_return(v)		atomic_sub_return(1, v)
 
-#define atomic_dec(v) atomic_dec_return(v)
+#define atomic_inc(v)			(void)atomic_inc_return(v)
+#define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 
-static inline int atomic_dec_and_test(atomic_t *v)
-{
-	h8300flags flags;
-	int ret;
-
-	flags = arch_local_irq_save();
-	--v->counter;
-	ret = v->counter;
-	arch_local_irq_restore(flags);
-	return ret == 0;
-}
+#define atomic_dec(v)			(void)atomic_dec_return(v)
+#define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
@@ -120,40 +89,4 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return ret;
 }
 
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *v)
-{
-	unsigned char ccr;
-	unsigned long tmp;
-
-	__asm__ __volatile__("stc ccr,%w3\n\t"
-			     "orc #0x80,ccr\n\t"
-			     "mov.l %0,%1\n\t"
-			     "and.l %2,%1\n\t"
-			     "mov.l %1,%0\n\t"
-			     "ldc %w3,ccr"
-			     : "=m"(*v), "=r"(tmp)
-			     : "g"(~(mask)), "r"(ccr));
-}
-
-static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
-{
-	unsigned char ccr;
-	unsigned long tmp;
-
-	__asm__ __volatile__("stc ccr,%w3\n\t"
-			     "orc #0x80,ccr\n\t"
-			     "mov.l %0,%1\n\t"
-			     "or.l %2,%1\n\t"
-			     "mov.l %1,%0\n\t"
-			     "ldc %w3,ccr"
-			     : "=m"(*v), "=r"(tmp)
-			     : "g"(~(mask)), "r"(ccr));
-}
-
-/* Atomic operations are already serializing */
-#define smp_mb__before_atomic_dec()    barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc()    barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 93d07025f183..811d61f6422d 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -132,6 +132,10 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 0bf03501fe5c..be4beeb77d57 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -45,8 +45,6 @@ ia64_atomic_##op (int i, atomic_t *v)					\
 ATOMIC_OP(add, +)
 ATOMIC_OP(sub, -)
 
-#undef ATOMIC_OP
-
 #define atomic_add_return(i,v)						\
 ({									\
 	int __ia64_aar_i = (i);						\
@@ -71,6 +69,16 @@ ATOMIC_OP(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
+ATOMIC_OP(and, &)
+ATOMIC_OP(or, |)
+ATOMIC_OP(xor, ^)
+
+#define atomic_and(i,v)	(void)ia64_atomic_and(i,v)
+#define atomic_or(i,v)	(void)ia64_atomic_or(i,v)
+#define atomic_xor(i,v)	(void)ia64_atomic_xor(i,v)
+
+#undef ATOMIC_OP
+
 #define ATOMIC64_OP(op, c_op)						\
 static __inline__ long							\
 ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
@@ -89,8 +97,6 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v)				\
 ATOMIC64_OP(add, +)
 ATOMIC64_OP(sub, -)
 
-#undef ATOMIC64_OP
-
 #define atomic64_add_return(i,v)					\
 ({									\
 	long __ia64_aar_i = (i);					\
@@ -115,6 +121,16 @@ ATOMIC64_OP(sub, -)
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
 
+ATOMIC64_OP(and, &)
+ATOMIC64_OP(or, |)
+ATOMIC64_OP(xor, ^)
+
+#define atomic64_and(i,v)	(void)ia64_atomic64_and(i,v)
+#define atomic64_or(i,v)	(void)ia64_atomic64_or(i,v)
+#define atomic64_xor(i,v)	(void)ia64_atomic64_xor(i,v)
+
+#undef ATOMIC64_OP
+
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h
index 843ba435e43b..df896a1c41d3 100644
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -66,12 +66,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index 31bb74adba08..025e2a170493 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -94,6 +94,10 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -239,45 +243,4 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-static __inline__ void atomic_clear_mask(unsigned long  mask, atomic_t *addr)
-{
-	unsigned long flags;
-	unsigned long tmp;
-
-	local_irq_save(flags);
-	__asm__ __volatile__ (
-		"# atomic_clear_mask		\n\t"
-		DCACHE_CLEAR("%0", "r5", "%1")
-		M32R_LOCK" %0, @%1;		\n\t"
-		"and	%0, %2;			\n\t"
-		M32R_UNLOCK" %0, @%1;		\n\t"
-		: "=&r" (tmp)
-		: "r" (addr), "r" (~mask)
-		: "memory"
-		__ATOMIC_CLOBBER
-	);
-	local_irq_restore(flags);
-}
-
-static __inline__ void atomic_set_mask(unsigned long  mask, atomic_t *addr)
-{
-	unsigned long flags;
-	unsigned long tmp;
-
-	local_irq_save(flags);
-	__asm__ __volatile__ (
-		"# atomic_set_mask		\n\t"
-		DCACHE_CLEAR("%0", "r5", "%1")
-		M32R_LOCK" %0, @%1;		\n\t"
-		"or	%0, %2;			\n\t"
-		M32R_UNLOCK" %0, @%1;		\n\t"
-		: "=&r" (tmp)
-		: "r" (addr), "r" (mask)
-		: "memory"
-		__ATOMIC_CLOBBER
-	);
-	local_irq_restore(flags);
-}
-
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index c18ddc74ef9a..62d6961e7f2b 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -156,7 +156,7 @@ void smp_flush_cache_all(void)
 	cpumask_clear_cpu(smp_processor_id(), &cpumask);
 	spin_lock(&flushcache_lock);
 	mask=cpumask_bits(&cpumask);
-	atomic_set_mask(*mask, (atomic_t *)&flushcache_cpumask);
+	atomic_or(*mask, (atomic_t *)&flushcache_cpumask);
 	send_IPI_mask(&cpumask, INVALIDATE_CACHE_IPI, 0);
 	_flush_cache_copyback_all();
 	while (flushcache_cpumask)
@@ -407,7 +407,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	flush_vma = vma;
 	flush_va = va;
 	mask=cpumask_bits(&cpumask);
-	atomic_set_mask(*mask, (atomic_t *)&flush_cpumask);
+	atomic_or(*mask, (atomic_t *)&flush_cpumask);
 
 	/*
 	 * We have to send the IPI only to
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index e85f047fb072..039fac120cc0 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -77,6 +77,10 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, eor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -170,16 +174,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 	return c != 0;
 }
 
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *v)
-{
-	__asm__ __volatile__("andl %1,%0" : "+m" (*v) : ASM_DI (~(mask)));
-}
-
-static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
-{
-	__asm__ __volatile__("orl %1,%0" : "+m" (*v) : ASM_DI (mask));
-}
-
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
index 948d8688643c..21c4c268b86c 100644
--- a/arch/metag/include/asm/atomic_lnkget.h
+++ b/arch/metag/include/asm/atomic_lnkget.h
@@ -74,44 +74,14 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	int temp;
-
-	asm volatile (
-		"1:	LNKGETD %0, [%1]\n"
-		"	AND	%0, %0, %2\n"
-		"	LNKSETD	[%1] %0\n"
-		"	DEFR	%0, TXSTAT\n"
-		"	ANDT	%0, %0, #HI(0x3f000000)\n"
-		"	CMPT	%0, #HI(0x02000000)\n"
-		"	BNZ	1b\n"
-		: "=&d" (temp)
-		: "da" (&v->counter), "bd" (~mask)
-		: "cc");
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	int temp;
-
-	asm volatile (
-		"1:	LNKGETD %0, [%1]\n"
-		"	OR	%0, %0, %2\n"
-		"	LNKSETD	[%1], %0\n"
-		"	DEFR	%0, TXSTAT\n"
-		"	ANDT	%0, %0, #HI(0x3f000000)\n"
-		"	CMPT	%0, #HI(0x02000000)\n"
-		"	BNZ	1b\n"
-		: "=&d" (temp)
-		: "da" (&v->counter), "bd" (mask)
-		: "cc");
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int result, temp;
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
index f5d5898c1020..f8efe380fe8b 100644
--- a/arch/metag/include/asm/atomic_lock1.h
+++ b/arch/metag/include/asm/atomic_lock1.h
@@ -68,31 +68,14 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	__global_lock1(flags);
-	fence();
-	v->counter &= ~mask;
-	__global_unlock1(flags);
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	__global_lock1(flags);
-	fence();
-	v->counter |= mask;
-	__global_unlock1(flags);
-}
-
 static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
index 5a696e507930..172b7e5efc53 100644
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -90,12 +90,12 @@ static inline void fence(void)
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 26d436336f2e..4c42fd9af777 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -137,6 +137,10 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v)		      \
 ATOMIC_OPS(add, +=, addu)
 ATOMIC_OPS(sub, -=, subu)
 
+ATOMIC_OP(and, &=, and)
+ATOMIC_OP(or, |=, or)
+ATOMIC_OP(xor, ^=, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -416,6 +420,9 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v)	      \
 
 ATOMIC64_OPS(add, +=, daddu)
 ATOMIC64_OPS(sub, -=, dsubu)
+ATOMIC64_OP(and, &=, and)
+ATOMIC64_OP(or, |=, or)
+ATOMIC64_OP(xor, ^=, xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index 7ecba84656d4..752e0b86c171 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -133,12 +133,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index 608aa57799c8..e77672539e8e 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -26,14 +26,29 @@
 #define NOP_INSN "nop"
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\t" NOP_INSN "\n\t"
 		"nop\n\t"
 		".pushsection __jump_table,  \"aw\"\n\t"
 		WORD_INSN " 1b, %l[l_yes], %0\n\t"
 		".popsection\n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\tj %l[l_yes]\n\t"
+		"nop\n\t"
+		".pushsection __jump_table,  \"aw\"\n\t"
+		WORD_INSN " 1b, %l[l_yes], %0\n\t"
+		".popsection\n\t"
+		: :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index dda800e9e731..3e586daa3a32 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e,
 	/* Target must have the right alignment and ISA must be preserved. */
 	BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT);
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op;
 		insn.j_format.target = e->target >> J_RANGE_SHIFT;
 	} else {
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 5be655e83e70..375e59140c9c 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -89,6 +89,10 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -127,73 +131,6 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
-/**
- * atomic_clear_mask - Atomically clear bits in memory
- * @mask: Mask of the bits to be cleared
- * @v: pointer to word in memory
- *
- * Atomically clears the bits set in mask from the memory word specified.
- */
-static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
-{
-#ifdef CONFIG_SMP
-	int status;
-
-	asm volatile(
-		"1:	mov	%3,(_AAR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"
-		"	and	%4,%0		\n"
-		"	mov	%0,(_ADR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"	/* flush */
-		"	mov	(_ASR,%2),%0	\n"
-		"	or	%0,%0		\n"
-		"	bne	1b		\n"
-		: "=&r"(status), "=m"(*addr)
-		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(addr), "r"(~mask)
-		: "memory", "cc");
-#else
-	unsigned long flags;
-
-	mask = ~mask;
-	flags = arch_local_cli_save();
-	*addr &= mask;
-	arch_local_irq_restore(flags);
-#endif
-}
-
-/**
- * atomic_set_mask - Atomically set bits in memory
- * @mask: Mask of the bits to be set
- * @v: pointer to word in memory
- *
- * Atomically sets the bits set in mask from the memory word specified.
- */
-static inline void atomic_set_mask(unsigned long mask, unsigned long *addr)
-{
-#ifdef CONFIG_SMP
-	int status;
-
-	asm volatile(
-		"1:	mov	%3,(_AAR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"
-		"	or	%4,%0		\n"
-		"	mov	%0,(_ADR,%2)	\n"
-		"	mov	(_ADR,%2),%0	\n"	/* flush */
-		"	mov	(_ASR,%2),%0	\n"
-		"	or	%0,%0		\n"
-		"	bne	1b		\n"
-		: "=&r"(status), "=m"(*addr)
-		: "a"(ATOMIC_OPS_BASE_ADDR), "r"(addr), "r"(mask)
-		: "memory", "cc");
-#else
-	unsigned long flags;
-
-	flags = arch_local_cli_save();
-	*addr |= mask;
-	arch_local_irq_restore(flags);
-#endif
-}
-
 #endif /* __KERNEL__ */
 #endif /* CONFIG_SMP */
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c
index e5d0ef722bfa..9a39ea9031d4 100644
--- a/arch/mn10300/mm/tlb-smp.c
+++ b/arch/mn10300/mm/tlb-smp.c
@@ -119,7 +119,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
 	flush_mm = mm;
 	flush_va = va;
 #if NR_CPUS <= BITS_PER_LONG
-	atomic_set_mask(cpumask.bits[0], &flush_cpumask.bits[0]);
+	atomic_or(cpumask.bits[0], (atomic_t *)&flush_cpumask.bits[0]);
 #else
 #error Not supported.
 #endif
diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig
index 269c23d23fcb..1a8f6f95689e 100644
--- a/arch/parisc/configs/c8000_defconfig
+++ b/arch/parisc/configs/c8000_defconfig
@@ -242,7 +242,6 @@ CONFIG_LOCKUP_DETECTOR=y
 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_RT_MUTEX_TESTER=y
 CONFIG_PROVE_RCU_DELAY=y
 CONFIG_DEBUG_BLOCK_EXT_DEVT=y
 CONFIG_LATENCYTOP=y
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 33b148f825ba..0ffb08ff5125 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -295,7 +295,6 @@ CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_RT_MUTEX_TESTER=y
 CONFIG_DEBUG_SPINLOCK=y
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_RCU_CPU_STALL_INFO=y
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 226f8ca993f6..2536965d00ea 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -126,6 +126,10 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
 
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -185,6 +189,9 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
+ATOMIC64_OP(and, &=)
+ATOMIC64_OP(or, |=)
+ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 512d2782b043..55f106ed12bf 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -67,6 +67,10 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v)		\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
+ATOMIC_OP(and, and)
+ATOMIC_OP(or, or)
+ATOMIC_OP(xor, xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -304,6 +308,9 @@ static __inline__ long atomic64_##op##_return(long a, atomic64_t *v)	\
 
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
+ATOMIC64_OP(and, and)
+ATOMIC64_OP(or, or)
+ATOMIC64_OP(xor, xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 51ccc7232042..0eca6efc0631 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -76,12 +76,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_lwsync();							\
 	___p1;								\
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index efbf9a322a23..47e155f15433 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -18,14 +18,29 @@
 #define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
 #define JUMP_LABEL_NOP_SIZE	4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:\n\t"
 		 "nop\n\t"
 		 ".pushsection __jump_table,  \"aw\"\n\t"
 		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
 		 ".popsection \n\t"
-		 : :  "i" (key) : : l_yes);
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index a1ed8a8c7cb4..6472472093d0 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 {
 	u32 *addr = (u32 *)(unsigned long)entry->code;
 
-	if (type == JUMP_LABEL_ENABLE)
+	if (type == JUMP_LABEL_JMP)
 		patch_branch(addr, entry->target, 0);
 	else
 		patch_instruction(addr, PPC_INST_NOP);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7c6bb4b17b49..ed3ab509faca 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -596,25 +596,6 @@ _GLOBAL(copy_page)
 	b	2b
 
 /*
- * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
- * void atomic_set_mask(atomic_t mask, atomic_t *addr);
- */
-_GLOBAL(atomic_clear_mask)
-10:	lwarx	r5,0,r4
-	andc	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-_GLOBAL(atomic_set_mask)
-10:	lwarx	r5,0,r4
-	or	r5,r5,r3
-	PPC405_ERR77(0,r4)
-	stwcx.	r5,0,r4
-	bne-	10b
-	blr
-
-/*
  * Extended precision shifts.
  *
  * Updated to be valid for shift counts from 0 to 63 inclusive.
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index adbe3802e377..117fa5c921c1 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -27,6 +27,7 @@
 #define __ATOMIC_OR	"lao"
 #define __ATOMIC_AND	"lan"
 #define __ATOMIC_ADD	"laa"
+#define __ATOMIC_XOR	"lax"
 #define __ATOMIC_BARRIER "bcr	14,0\n"
 
 #define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -49,6 +50,7 @@
 #define __ATOMIC_OR	"or"
 #define __ATOMIC_AND	"nr"
 #define __ATOMIC_ADD	"ar"
+#define __ATOMIC_XOR	"xr"
 #define __ATOMIC_BARRIER "\n"
 
 #define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -118,15 +120,17 @@ static inline void atomic_add(int i, atomic_t *v)
 #define atomic_dec_return(_v)		atomic_sub_return(1, _v)
 #define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	__ATOMIC_LOOP(v, ~mask, __ATOMIC_AND, __ATOMIC_NO_BARRIER);
+#define ATOMIC_OP(op, OP)						\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	__ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER);	\
 }
 
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	__ATOMIC_LOOP(v, mask, __ATOMIC_OR, __ATOMIC_NO_BARRIER);
-}
+ATOMIC_OP(and, AND)
+ATOMIC_OP(or, OR)
+ATOMIC_OP(xor, XOR)
+
+#undef ATOMIC_OP
 
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
@@ -167,6 +171,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 #define __ATOMIC64_OR	"laog"
 #define __ATOMIC64_AND	"lang"
 #define __ATOMIC64_ADD	"laag"
+#define __ATOMIC64_XOR	"laxg"
 #define __ATOMIC64_BARRIER "bcr	14,0\n"
 
 #define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -189,6 +194,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 #define __ATOMIC64_OR	"ogr"
 #define __ATOMIC64_AND	"ngr"
 #define __ATOMIC64_ADD	"agr"
+#define __ATOMIC64_XOR	"xgr"
 #define __ATOMIC64_BARRIER "\n"
 
 #define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier)		\
@@ -247,16 +253,6 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	__ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
 }
 
-static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v)
-{
-	__ATOMIC64_LOOP(v, ~mask, __ATOMIC64_AND, __ATOMIC64_NO_BARRIER);
-}
-
-static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v)
-{
-	__ATOMIC64_LOOP(v, mask, __ATOMIC64_OR, __ATOMIC64_NO_BARRIER);
-}
-
 #define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
 
 static inline long long atomic64_cmpxchg(atomic64_t *v,
@@ -270,6 +266,17 @@ static inline long long atomic64_cmpxchg(atomic64_t *v,
 	return old;
 }
 
+#define ATOMIC64_OP(op, OP)						\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	__ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER);	\
+}
+
+ATOMIC64_OP(and, AND)
+ATOMIC64_OP(or, OR)
+ATOMIC64_OP(xor, XOR)
+
+#undef ATOMIC64_OP
 #undef __ATOMIC64_LOOP
 
 static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index e6f8615a11eb..d48fe0162331 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -42,12 +42,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 69972b7957ee..7f9fd5e3f1bf 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -12,14 +12,29 @@
  * We use a brcl 0,2 instruction for jump labels at compile time so it
  * can be easily distinguished from a hotpatch generated instruction.
  */
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("0:	brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n"
 		".pushsection __jump_table, \"aw\"\n"
 		".balign 8\n"
 		".quad 0b, %l[label], %0\n"
 		".popsection\n"
-		: : "X" (key) : : label);
+		: : "X" (&((char *)key)[branch]) : : label);
+
+	return false;
+label:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("0:	brcl 15, %l[label]\n"
+		".pushsection __jump_table, \"aw\"\n"
+		".balign 8\n"
+		".quad 0b, %l[label], %0\n"
+		".popsection\n"
+		: : "X" (&((char *)key)[branch]) : : label);
+
 	return false;
 label:
 	return true;
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index c9dac2139f59..083b05f5f5ab 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -61,7 +61,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 {
 	struct insn old, new;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		jump_label_make_nop(entry, &old);
 		jump_label_make_branch(entry, &new);
 	} else {
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 52524b9083c3..017c3a9bfc28 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -378,7 +378,7 @@ static void disable_sync_clock(void *dummy)
 	 * increase the "sequence" counter to avoid the race of an
 	 * etr event and the complete recovery against get_sync_clock.
 	 */
-	atomic_clear_mask(0x80000000, sw_ptr);
+	atomic_andnot(0x80000000, sw_ptr);
 	atomic_inc(sw_ptr);
 }
 
@@ -389,7 +389,7 @@ static void disable_sync_clock(void *dummy)
 static void enable_sync_clock(void)
 {
 	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
-	atomic_set_mask(0x80000000, sw_ptr);
+	atomic_or(0x80000000, sw_ptr);
 }
 
 /*
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b277d50dcf76..5c2c169395c3 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -173,20 +173,20 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
 	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
 	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
-			  &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+		    &vcpu->arch.sie_block->cpuflags);
 	vcpu->arch.sie_block->lctl = 0x0000;
 	vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
 
@@ -199,7 +199,7 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
 
 static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
 {
-	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
 static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
@@ -928,7 +928,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
+	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
 	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
 }
 
@@ -1026,7 +1026,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 
 	li->irq.ext = irq->u.ext;
 	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1041,7 +1041,7 @@ static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
 		/* another external call is pending */
 		return -EBUSY;
 	}
-	atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
 	return 0;
 }
 
@@ -1067,7 +1067,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
 		return -EBUSY;
 	*extcall = irq->u.extcall;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1139,7 +1139,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1183,7 +1183,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1196,7 +1196,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 				   0, 0);
 
 	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
@@ -1375,13 +1375,13 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type)
 	spin_lock(&li->lock);
 	switch (type) {
 	case KVM_S390_MCHK:
-		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		atomic_or(CPUSTAT_IO_INT, li->cpuflags);
 		break;
 	default:
-		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
 		break;
 	}
 	spin_unlock(&li->lock);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 98df53c01343..c91eb941b444 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1333,12 +1333,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	save_access_regs(vcpu->arch.host_acrs);
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
-	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 
 	save_fpu_regs();
@@ -1443,9 +1443,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 						    CPUSTAT_STOPPED);
 
 	if (test_kvm_facility(vcpu->kvm, 78))
-		atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
 	else if (test_kvm_facility(vcpu->kvm, 8))
-		atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
 
 	kvm_s390_vcpu_setup_model(vcpu);
 
@@ -1557,24 +1557,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
 	exit_sie(vcpu);
 }
 
 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
 }
 
 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 	exit_sie(vcpu);
 }
 
 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
 {
-	atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 }
 
 /*
@@ -1583,7 +1583,7 @@ static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
  * return immediately. */
 void exit_sie(struct kvm_vcpu *vcpu)
 {
-	atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
 		cpu_relax();
 }
@@ -1807,19 +1807,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
 		vcpu->guest_debug = dbg->control;
 		/* enforce guest PER */
-		atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 
 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
 			rc = kvm_s390_import_bp_data(vcpu, dbg);
 	} else {
-		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 		vcpu->arch.guestdbg.last_bp = 0;
 	}
 
 	if (rc) {
 		vcpu->guest_debug = 0;
 		kvm_s390_clear_bp_data(vcpu);
-		atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
 	}
 
 	return rc;
@@ -1894,7 +1894,7 @@ retry:
 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
 		if (!ibs_enabled(vcpu)) {
 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
-			atomic_set_mask(CPUSTAT_IBS,
+			atomic_or(CPUSTAT_IBS,
 					&vcpu->arch.sie_block->cpuflags);
 		}
 		goto retry;
@@ -1903,7 +1903,7 @@ retry:
 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
 		if (ibs_enabled(vcpu)) {
 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
-			atomic_clear_mask(CPUSTAT_IBS,
+			atomic_andnot(CPUSTAT_IBS,
 					  &vcpu->arch.sie_block->cpuflags);
 		}
 		goto retry;
@@ -2419,7 +2419,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 		__disable_ibs_on_all_vcpus(vcpu->kvm);
 	}
 
-	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 	/*
 	 * Another VCPU might have used IBS while we were offline.
 	 * Let's play safe and flush the VCPU at startup.
@@ -2445,7 +2445,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
 	kvm_s390_clear_stop_irq(vcpu);
 
-	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 	__disable_ibs_on_vcpu(vcpu);
 
 	for (i = 0; i < online_vcpus; i++) {
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 0d002a746bec..ae4de559e3a0 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -15,7 +15,7 @@
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
 
-static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+static DEFINE_STATIC_KEY_FALSE(have_mvcos);
 
 static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
 						 unsigned long size)
@@ -104,7 +104,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr,
 
 unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_from_user_mvcos(to, from, n);
 	return copy_from_user_mvcp(to, from, n);
 }
@@ -177,7 +177,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x,
 
 unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_to_user_mvcos(to, from, n);
 	return copy_to_user_mvcs(to, from, n);
 }
@@ -240,7 +240,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user
 
 unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 		return copy_in_user_mvcos(to, from, n);
 	return copy_in_user_mvc(to, from, n);
 }
@@ -312,7 +312,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size)
 
 unsigned long __clear_user(void __user *to, unsigned long size)
 {
-	if (static_key_false(&have_mvcos))
+	if (static_branch_likely(&have_mvcos))
 			return clear_user_mvcos(to, size);
 	return clear_user_xc(to, size);
 }
@@ -373,7 +373,7 @@ EXPORT_SYMBOL(__strncpy_from_user);
 static int __init uaccess_init(void)
 {
 	if (test_facility(27))
-		static_key_slow_inc(&have_mvcos);
+		static_branch_enable(&have_mvcos);
 	return 0;
 }
 early_initcall(uaccess_init);
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index 97a5fda83450..b94df40e5f2d 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -48,47 +48,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	int tmp;
-	unsigned int _mask = ~mask;
-
-	__asm__ __volatile__ (
-		"   .align 2              \n\t"
-		"   mova    1f,   r0      \n\t" /* r0 = end point */
-		"   mov    r15,   r1      \n\t" /* r1 = saved sp */
-		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */
-		"   mov.l  @%1,   %0      \n\t" /* load  old value */
-		"   and     %2,   %0      \n\t" /* add */
-		"   mov.l   %0,   @%1     \n\t" /* store new value */
-		"1: mov     r1,   r15     \n\t" /* LOGOUT */
-		: "=&r" (tmp),
-		  "+r"  (v)
-		: "r"   (_mask)
-		: "memory" , "r0", "r1");
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	int tmp;
-
-	__asm__ __volatile__ (
-		"   .align 2              \n\t"
-		"   mova    1f,   r0      \n\t" /* r0 = end point */
-		"   mov    r15,   r1      \n\t" /* r1 = saved sp */
-		"   mov    #-6,   r15     \n\t" /* LOGIN: r15 = size */
-		"   mov.l  @%1,   %0      \n\t" /* load  old value */
-		"   or      %2,   %0      \n\t" /* or */
-		"   mov.l   %0,   @%1     \n\t" /* store new value */
-		"1: mov     r1,   r15     \n\t" /* LOGOUT */
-		: "=&r" (tmp),
-		  "+r"  (v)
-		: "r"   (mask)
-		: "memory" , "r0", "r1");
-}
-
 #endif /* __ASM_SH_ATOMIC_GRB_H */
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index 61d107523f06..23fcdad5773e 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -37,27 +37,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add, +=)
 ATOMIC_OPS(sub, -=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	v->counter &= ~mask;
-	raw_local_irq_restore(flags);
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long flags;
-
-	raw_local_irq_save(flags);
-	v->counter |= mask;
-	raw_local_irq_restore(flags);
-}
-
 #endif /* __ASM_SH_ATOMIC_IRQ_H */
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 8575dccb9ef7..33d34b16d4d6 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -52,37 +52,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-"1:	movli.l @%2, %0		! atomic_clear_mask	\n"
-"	and	%1, %0					\n"
-"	movco.l	%0, @%2					\n"
-"	bf	1b					\n"
-	: "=&z" (tmp)
-	: "r" (~mask), "r" (&v->counter)
-	: "t");
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-	unsigned long tmp;
-
-	__asm__ __volatile__ (
-"1:	movli.l @%2, %0		! atomic_set_mask	\n"
-"	or	%1, %0					\n"
-"	movco.l	%0, @%2					\n"
-"	bf	1b					\n"
-	: "=&z" (tmp)
-	: "r" (mask), "r" (&v->counter)
-	: "t");
-}
-
 #endif /* __ASM_SH_ATOMIC_LLSC_H */
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 0e69b7e7a439..7dcbebbcaec6 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -17,10 +17,12 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
-
 #define ATOMIC_INIT(i)  { (i) }
 
 int atomic_add_return(int, atomic_t *);
+void atomic_and(int, atomic_t *);
+void atomic_or(int, atomic_t *);
+void atomic_xor(int, atomic_t *);
 int atomic_cmpxchg(atomic_t *, int, int);
 int atomic_xchg(atomic_t *, int);
 int __atomic_add_unless(atomic_t *, int, int);
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 4082749913ce..917084ace49d 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -33,6 +33,10 @@ long atomic64_##op##_return(long, atomic64_t *);
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
index 809941e33e12..14a928601657 100644
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -60,12 +60,12 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index cc9b04a2b11b..62d0354d1727 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -7,16 +7,33 @@
 
 #define JUMP_LABEL_NOP_SIZE 4
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
-		asm_volatile_goto("1:\n\t"
-			 "nop\n\t"
-			 "nop\n\t"
-			 ".pushsection __jump_table,  \"aw\"\n\t"
-			 ".align 4\n\t"
-			 ".word 1b, %l[l_yes], %c0\n\t"
-			 ".popsection \n\t"
-			 : :  "i" (key) : : l_yes);
+	asm_volatile_goto("1:\n\t"
+		 "nop\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
index 48565c11e82a..59bbeff55024 100644
--- a/arch/sparc/kernel/jump_label.c
+++ b/arch/sparc/kernel/jump_label.c
@@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 	u32 val;
 	u32 *insn = (u32 *) (unsigned long) entry->code;
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		s32 off = (s32)entry->target - (s32)entry->code;
 
 #ifdef CONFIG_SPARC64
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 71cd65ab200c..b9d63c0a7aab 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -27,22 +27,38 @@ static DEFINE_SPINLOCK(dummy);
 
 #endif /* SMP */
 
-#define ATOMIC_OP(op, cop)						\
+#define ATOMIC_OP_RETURN(op, c_op)					\
 int atomic_##op##_return(int i, atomic_t *v)				\
 {									\
 	int ret;							\
 	unsigned long flags;						\
 	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
 									\
-	ret = (v->counter cop i);					\
+	ret = (v->counter c_op i);					\
 									\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
 EXPORT_SYMBOL(atomic_##op##_return);
 
-ATOMIC_OP(add, +=)
+#define ATOMIC_OP(op, c_op)						\
+void atomic_##op(int i, atomic_t *v)					\
+{									\
+	unsigned long flags;						\
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
+									\
+	v->counter c_op i;						\
+									\
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+}									\
+EXPORT_SYMBOL(atomic_##op);
+
+ATOMIC_OP_RETURN(add, +=)
+ATOMIC_OP(and, &=)
+ATOMIC_OP(or, |=)
+ATOMIC_OP(xor, ^=)
 
+#undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
 int atomic_xchg(atomic_t *v, int new)
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 05dac43907d1..d6b0363f345b 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -47,6 +47,9 @@ ENDPROC(atomic_##op##_return);
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
@@ -84,6 +87,9 @@ ENDPROC(atomic64_##op##_return);
 
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
index 8069ce12f20b..8eb454cfe05c 100644
--- a/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@ -111,6 +111,9 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 1b109fad9fff..d320ce253d86 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -34,6 +34,19 @@ static inline void atomic_add(int i, atomic_t *v)
 	_atomic_xchg_add(&v->counter, i);
 }
 
+#define ATOMIC_OP(op)							\
+unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	_atomic_##op((unsigned long *)&v->counter, i);			\
+}
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
 /**
  * atomic_add_return - add integer and return
  * @v: pointer of type atomic_t
@@ -113,6 +126,17 @@ static inline void atomic64_add(long long i, atomic64_t *v)
 	_atomic64_xchg_add(&v->counter, i);
 }
 
+#define ATOMIC64_OP(op)						\
+long long _atomic64_##op(long long *v, long long n);		\
+static inline void atomic64_##op(long long i, atomic64_t *v)	\
+{								\
+	_atomic64_##op(&v->counter, i);				\
+}
+
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
 /**
  * atomic64_add_return - add integer and return
  * @v: pointer of type atomic64_t
@@ -225,6 +249,7 @@ extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
 						  int *lock, int o, int n);
 extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_and(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
 extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
 extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
@@ -234,6 +259,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
 					long long n);
 extern long long __atomic64_xchg_add_unless(volatile long long *p,
 					int *lock, long long o, long long n);
+extern long long __atomic64_and(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_or(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_xor(volatile long long *p, int *lock, long long n);
 
 /* Return failure from the atomic wrappers. */
 struct __get_user __atomic_bad_address(int __user *addr);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 0496970cef82..096a56d6ead4 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -58,6 +58,26 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return oldval;
 }
 
+static inline void atomic_and(int i, atomic_t *v)
+{
+	__insn_fetchand4((void *)&v->counter, i);
+}
+
+static inline void atomic_or(int i, atomic_t *v)
+{
+	__insn_fetchor4((void *)&v->counter, i);
+}
+
+static inline void atomic_xor(int i, atomic_t *v)
+{
+	int guess, oldval = v->counter;
+	do {
+		guess = oldval;
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch4(&v->counter, guess ^ i);
+	} while (guess != oldval);
+}
+
 /* Now the true 64-bit operations. */
 
 #define ATOMIC64_INIT(i)	{ (i) }
@@ -91,6 +111,26 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 	return oldval != u;
 }
 
+static inline void atomic64_and(long i, atomic64_t *v)
+{
+	__insn_fetchand((void *)&v->counter, i);
+}
+
+static inline void atomic64_or(long i, atomic64_t *v)
+{
+	__insn_fetchor((void *)&v->counter, i);
+}
+
+static inline void atomic64_xor(long i, atomic64_t *v)
+{
+	long guess, oldval = v->counter;
+	do {
+		guess = oldval;
+		__insn_mtspr(SPR_CMPEXCH_VALUE, guess);
+		oldval = __insn_cmpexch(&v->counter, guess ^ i);
+	} while (guess != oldval);
+}
+
 #define atomic64_sub_return(i, v)	atomic64_add_return(-(i), (v))
 #define atomic64_sub(i, v)		atomic64_add(-(i), (v))
 #define atomic64_inc_return(v)		atomic64_add_return(1, (v))
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index c89b211fd9e7..298df1e9912a 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -94,6 +94,12 @@ unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
 }
 EXPORT_SYMBOL(_atomic_or);
 
+unsigned long _atomic_and(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_and((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_and);
+
 unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
 {
 	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
@@ -136,6 +142,23 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n)
 }
 EXPORT_SYMBOL(_atomic64_cmpxchg);
 
+long long _atomic64_and(long long *v, long long n)
+{
+	return __atomic64_and(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_and);
+
+long long _atomic64_or(long long *v, long long n)
+{
+	return __atomic64_or(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_or);
+
+long long _atomic64_xor(long long *v, long long n)
+{
+	return __atomic64_xor(v, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xor);
 
 /*
  * If any of the atomic or futex routines hit a bad address (not in
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 6bda3132cd61..f611265633d6 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -178,6 +178,7 @@ atomic_op _xchg_add, 32, "add r24, r22, r2"
 atomic_op _xchg_add_unless, 32, \
 	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
 atomic_op _or, 32, "or r24, r22, r2"
+atomic_op _and, 32, "and r24, r22, r2"
 atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
 atomic_op _xor, 32, "xor r24, r22, r2"
 
@@ -191,6 +192,9 @@ atomic_op 64_xchg_add_unless, 64, \
 	{ bbns r26, 3f; add r24, r22, r4 }; \
 	{ bbns r27, 3f; add r25, r23, r5 }; \
 	slt_u r26, r24, r22; add r25, r25, r26"
+atomic_op 64_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
+atomic_op 64_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
+atomic_op 64_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
 
 	jrp     lr              /* happy backtracer */
 
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index e9168955c42f..fb52aa644aab 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -182,6 +182,21 @@ static inline int atomic_xchg(atomic_t *v, int new)
 	return xchg(&v->counter, new);
 }
 
+#define ATOMIC_OP(op)							\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	asm volatile(LOCK_PREFIX #op"l %1,%0"				\
+			: "+m" (v->counter)				\
+			: "ir" (i)					\
+			: "memory");					\
+}
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_OP
+
 /**
  * __atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
@@ -219,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v)
 	return *v;
 }
 
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "andl %0,%1"			\
-		     : : "r" (~(mask)), "m" (*(addr)) : "memory")
-
-#define atomic_set_mask(mask, addr)				\
-	asm volatile(LOCK_PREFIX "orl %0,%1"			\
-		     : : "r" ((unsigned)(mask)), "m" (*(addr))	\
-		     : "memory")
-
 #ifdef CONFIG_X86_32
 # include <asm/atomic64_32.h>
 #else
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index b154de75c90c..a11c30b77fb5 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -313,4 +313,18 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
 #undef alternative_atomic64
 #undef __alternative_atomic64
 
+#define ATOMIC64_OP(op, c_op)						\
+static inline void atomic64_##op(long long i, atomic64_t *v)		\
+{									\
+	long long old, c = 0;						\
+	while ((old = atomic64_cmpxchg(v, c, c c_op i)) != c)		\
+		c = old;						\
+}
+
+ATOMIC64_OP(and, &)
+ATOMIC64_OP(or, |)
+ATOMIC64_OP(xor, ^)
+
+#undef ATOMIC64_OP
+
 #endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index b965f9e03f2a..50e33eff58de 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -220,4 +220,19 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	return dec;
 }
 
+#define ATOMIC64_OP(op)							\
+static inline void atomic64_##op(long i, atomic64_t *v)			\
+{									\
+	asm volatile(LOCK_PREFIX #op"q %1,%0"				\
+			: "+m" (v->counter)				\
+			: "er" (i)					\
+			: "memory");					\
+}
+
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
+#undef ATOMIC64_OP
+
 #endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 818cb8788225..0681d2532527 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -57,12 +57,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
@@ -74,12 +74,12 @@ do {									\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	barrier();							\
 	___p1;								\
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a4c1cf7e93f8..5daeca3d0f9e 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -16,15 +16,32 @@
 # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
 #endif
 
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
-		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
 		".popsection \n\t"
-		: :  "i" (key) : : l_yes);
+		: :  "i" (key), "i" (branch) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
+		"2:\n\t"
+		".pushsection __jump_table,  \"aw\" \n\t"
+		_ASM_ALIGN "\n\t"
+		_ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+		".popsection \n\t"
+		: :  "i" (key), "i" (branch) : : l_yes);
+
 	return false;
 l_yes:
 	return true;
diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
index ae0e241e228b..c537cbb038a7 100644
--- a/arch/x86/include/asm/qrwlock.h
+++ b/arch/x86/include/asm/qrwlock.h
@@ -2,16 +2,6 @@
 #define _ASM_X86_QRWLOCK_H
 
 #include <asm-generic/qrwlock_types.h>
-
-#ifndef CONFIG_X86_PPRO_FENCE
-#define queue_write_unlock queue_write_unlock
-static inline void queue_write_unlock(struct qrwlock *lock)
-{
-        barrier();
-        ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
-}
-#endif
-
 #include <asm-generic/qrwlock.h>
 
 #endif /* _ASM_X86_QRWLOCK_H */
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 26d5a55a2736..e565e0e4d216 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry,
 	const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
 	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
 
-	if (type == JUMP_LABEL_ENABLE) {
+	if (type == JUMP_LABEL_JMP) {
 		if (init) {
 			/*
 			 * Jump label is enabled for the first time.
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 79055cf2c497..c8d52cb4cb6e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -38,7 +38,7 @@ static int __read_mostly tsc_unstable;
    erroneous rdtsc usage on !cpu_has_tsc processors */
 static int __read_mostly tsc_disabled = -1;
 
-static struct static_key __use_tsc = STATIC_KEY_INIT;
+static DEFINE_STATIC_KEY_FALSE(__use_tsc);
 
 int tsc_clocksource_reliable;
 
@@ -274,7 +274,12 @@ done:
  */
 u64 native_sched_clock(void)
 {
-	u64 tsc_now;
+	if (static_branch_likely(&__use_tsc)) {
+		u64 tsc_now = rdtsc();
+
+		/* return the value in ns */
+		return cycles_2_ns(tsc_now);
+	}
 
 	/*
 	 * Fall back to jiffies if there's no TSC available:
@@ -284,16 +289,9 @@ u64 native_sched_clock(void)
 	 *   very important for it to be as fast as the platform
 	 *   can achieve it. )
 	 */
-	if (!static_key_false(&__use_tsc)) {
-		/* No locking but a rare wrong value is not a big deal: */
-		return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-	}
-
-	/* read the Time Stamp Counter: */
-	tsc_now = rdtsc();
 
-	/* return the value in ns */
-	return cycles_2_ns(tsc_now);
+	/* No locking but a rare wrong value is not a big deal: */
+	return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
 }
 
 /*
@@ -1212,7 +1210,7 @@ void __init tsc_init(void)
 	/* now allow native_sched_clock() to use rdtsc */
 
 	tsc_disabled = 0;
-	static_key_slow_inc(&__use_tsc);
+	static_branch_enable(&__use_tsc);
 
 	if (!no_sched_irq_time)
 		enable_sched_clock_irqtime();
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index e4d193e7a300..f3dfe0d921c2 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -616,7 +616,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_SLUB_DEBUG_ON is not set
 # CONFIG_SLUB_STATS is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
 # CONFIG_DEBUG_MUTEXES is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index ebcd1f6fc8cb..93795d047303 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -145,6 +145,10 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -250,75 +254,6 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
-{
-#if XCHAL_HAVE_S32C1I
-	unsigned long tmp;
-	int result;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       and     %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (~mask), "a" (v)
-			: "memory"
-			);
-#else
-	unsigned int all_f = -1;
-	unsigned int vval;
-
-	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
-			"       l32i    %0, %2, 0\n"
-			"       xor     %1, %4, %3\n"
-			"       and     %0, %0, %4\n"
-			"       s32i    %0, %2, 0\n"
-			"       wsr     a15, ps\n"
-			"       rsync\n"
-			: "=&a" (vval), "=a" (mask)
-			: "a" (v), "a" (all_f), "1" (mask)
-			: "a15", "memory"
-			);
-#endif
-}
-
-static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
-{
-#if XCHAL_HAVE_S32C1I
-	unsigned long tmp;
-	int result;
-
-	__asm__ __volatile__(
-			"1:     l32i    %1, %3, 0\n"
-			"       wsr     %1, scompare1\n"
-			"       or      %0, %1, %2\n"
-			"       s32c1i  %0, %3, 0\n"
-			"       bne     %0, %1, 1b\n"
-			: "=&a" (result), "=&a" (tmp)
-			: "a" (mask), "a" (v)
-			: "memory"
-			);
-#else
-	unsigned int vval;
-
-	__asm__ __volatile__(
-			"       rsil    a15,"__stringify(TOPLEVEL)"\n"
-			"       l32i    %0, %2, 0\n"
-			"       or      %0, %0, %1\n"
-			"       s32i    %0, %2, 0\n"
-			"       wsr     a15, ps\n"
-			"       rsync\n"
-			: "=&a" (vval)
-			: "a" (mask), "a" (v)
-			: "a15", "memory"
-			);
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 884b4f9b81c4..8917c98ff121 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -748,7 +748,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 	if (i915_gem_init_hw(dev)) {
 		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+			atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
 	}
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 52b446b27b4d..7a918d1c12ba 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5091,7 +5091,7 @@ int i915_gem_init(struct drm_device *dev)
 		 * for all other failure, such as an allocation failure, bail.
 		 */
 		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
-		atomic_set_mask(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
+		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
 		ret = 0;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 984e2fe6688c..449a95c6c2a1 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2446,7 +2446,7 @@ static void i915_reset_and_wakeup(struct drm_device *dev)
 			kobject_uevent_env(&dev->primary->kdev->kobj,
 					   KOBJ_CHANGE, reset_done_event);
 		} else {
-			atomic_set_mask(I915_WEDGED, &error->reset_counter);
+			atomic_or(I915_WEDGED, &error->reset_counter);
 		}
 
 		/*
@@ -2574,7 +2574,7 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
 	i915_report_and_clear_eir(dev);
 
 	if (wedged) {
-		atomic_set_mask(I915_RESET_IN_PROGRESS_FLAG,
+		atomic_or(I915_RESET_IN_PROGRESS_FLAG,
 				&dev_priv->gpu_error.reset_counter);
 
 		/*
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 01a73395a017..c00ac4650dce 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -529,7 +529,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
 	list_add_tail(&port->list, &adapter->port_list);
 	write_unlock_irq(&adapter->port_list_lock);
 
-	atomic_set_mask(status | ZFCP_STATUS_COMMON_RUNNING, &port->status);
+	atomic_or(status | ZFCP_STATUS_COMMON_RUNNING, &port->status);
 
 	return port;
 
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index acde3f5d6e9e..3fb410977014 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -190,7 +190,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 		if (!(act_status & ZFCP_STATUS_ERP_NO_REF))
 			if (scsi_device_get(sdev))
 				return NULL;
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE,
 				&zfcp_sdev->status);
 		erp_action = &zfcp_sdev->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
@@ -206,7 +206,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 		if (!get_device(&port->dev))
 			return NULL;
 		zfcp_erp_action_dismiss_port(port);
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &port->status);
 		erp_action = &port->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
 		erp_action->port = port;
@@ -217,7 +217,7 @@ static struct zfcp_erp_action *zfcp_erp_setup_act(int need, u32 act_status,
 	case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
 		kref_get(&adapter->ref);
 		zfcp_erp_action_dismiss_adapter(adapter);
-		atomic_set_mask(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
+		atomic_or(ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status);
 		erp_action = &adapter->erp_action;
 		memset(erp_action, 0, sizeof(struct zfcp_erp_action));
 		if (!(atomic_read(&adapter->status) &
@@ -254,7 +254,7 @@ static int zfcp_erp_action_enqueue(int want, struct zfcp_adapter *adapter,
 	act = zfcp_erp_setup_act(need, act_status, adapter, port, sdev);
 	if (!act)
 		goto out;
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_ERP_PENDING, &adapter->status);
 	++adapter->erp_total_count;
 	list_add_tail(&act->list, &adapter->erp_ready_head);
 	wake_up(&adapter->erp_ready_wq);
@@ -486,14 +486,14 @@ static void zfcp_erp_adapter_unblock(struct zfcp_adapter *adapter)
 {
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status))
 		zfcp_dbf_rec_run("eraubl1", &adapter->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &adapter->status);
 }
 
 static void zfcp_erp_port_unblock(struct zfcp_port *port)
 {
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status))
 		zfcp_dbf_rec_run("erpubl1", &port->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &port->status);
 }
 
 static void zfcp_erp_lun_unblock(struct scsi_device *sdev)
@@ -502,7 +502,7 @@ static void zfcp_erp_lun_unblock(struct scsi_device *sdev)
 
 	if (status_change_set(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status))
 		zfcp_dbf_rec_run("erlubl1", &sdev_to_zfcp(sdev)->erp_action);
-	atomic_set_mask(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status);
+	atomic_or(ZFCP_STATUS_COMMON_UNBLOCKED, &zfcp_sdev->status);
 }
 
 static void zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action)
@@ -642,7 +642,7 @@ static void zfcp_erp_wakeup(struct zfcp_adapter *adapter)
 	read_lock_irqsave(&adapter->erp_lock, flags);
 	if (list_empty(&adapter->erp_ready_head) &&
 	    list_empty(&adapter->erp_running_head)) {
-			atomic_clear_mask(ZFCP_STATUS_ADAPTER_ERP_PENDING,
+			atomic_andnot(ZFCP_STATUS_ADAPTER_ERP_PENDING,
 					  &adapter->status);
 			wake_up(&adapter->erp_done_wqh);
 	}
@@ -665,16 +665,16 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
 	int sleep = 1;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
+	atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
 
 	for (retries = 7; retries; retries--) {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+		atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				  &adapter->status);
 		write_lock_irq(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
 		write_unlock_irq(&adapter->erp_lock);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
-			atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+			atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 					  &adapter->status);
 			return ZFCP_ERP_FAILED;
 		}
@@ -692,7 +692,7 @@ static int zfcp_erp_adapter_strat_fsf_xconf(struct zfcp_erp_action *erp_action)
 		sleep *= 2;
 	}
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+	atomic_andnot(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 			  &adapter->status);
 
 	if (!(atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_XCONFIG_OK))
@@ -764,7 +764,7 @@ static void zfcp_erp_adapter_strategy_close(struct zfcp_erp_action *act)
 	/* all ports and LUNs are closed */
 	zfcp_erp_clear_adapter_status(adapter, ZFCP_STATUS_COMMON_OPEN);
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
+	atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
 			  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
 }
 
@@ -773,7 +773,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
 	struct zfcp_adapter *adapter = act->adapter;
 
 	if (zfcp_qdio_open(adapter->qdio)) {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
+		atomic_andnot(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
 				  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
 				  &adapter->status);
 		return ZFCP_ERP_FAILED;
@@ -784,7 +784,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *act)
 		return ZFCP_ERP_FAILED;
 	}
 
-	atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &adapter->status);
+	atomic_or(ZFCP_STATUS_COMMON_OPEN, &adapter->status);
 
 	return ZFCP_ERP_SUCCEEDED;
 }
@@ -948,7 +948,7 @@ static void zfcp_erp_lun_strategy_clearstati(struct scsi_device *sdev)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED,
+	atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_DENIED,
 			  &zfcp_sdev->status);
 }
 
@@ -1187,18 +1187,18 @@ static void zfcp_erp_action_dequeue(struct zfcp_erp_action *erp_action)
 	switch (erp_action->action) {
 	case ZFCP_ERP_ACTION_REOPEN_LUN:
 		zfcp_sdev = sdev_to_zfcp(erp_action->sdev);
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &zfcp_sdev->status);
 		break;
 
 	case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
 	case ZFCP_ERP_ACTION_REOPEN_PORT:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &erp_action->port->status);
 		break;
 
 	case ZFCP_ERP_ACTION_REOPEN_ADAPTER:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+		atomic_andnot(ZFCP_STATUS_COMMON_ERP_INUSE,
 				  &erp_action->adapter->status);
 		break;
 	}
@@ -1422,19 +1422,19 @@ void zfcp_erp_set_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 	unsigned long flags;
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 
-	atomic_set_mask(mask, &adapter->status);
+	atomic_or(mask, &adapter->status);
 
 	if (!common_mask)
 		return;
 
 	read_lock_irqsave(&adapter->port_list_lock, flags);
 	list_for_each_entry(port, &adapter->port_list, list)
-		atomic_set_mask(common_mask, &port->status);
+		atomic_or(common_mask, &port->status);
 	read_unlock_irqrestore(&adapter->port_list_lock, flags);
 
 	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, adapter->scsi_host)
-		atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
+		atomic_or(common_mask, &sdev_to_zfcp(sdev)->status);
 	spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
 }
 
@@ -1453,7 +1453,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 	u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
 
-	atomic_clear_mask(mask, &adapter->status);
+	atomic_andnot(mask, &adapter->status);
 
 	if (!common_mask)
 		return;
@@ -1463,7 +1463,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 
 	read_lock_irqsave(&adapter->port_list_lock, flags);
 	list_for_each_entry(port, &adapter->port_list, list) {
-		atomic_clear_mask(common_mask, &port->status);
+		atomic_andnot(common_mask, &port->status);
 		if (clear_counter)
 			atomic_set(&port->erp_counter, 0);
 	}
@@ -1471,7 +1471,7 @@ void zfcp_erp_clear_adapter_status(struct zfcp_adapter *adapter, u32 mask)
 
 	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, adapter->scsi_host) {
-		atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
+		atomic_andnot(common_mask, &sdev_to_zfcp(sdev)->status);
 		if (clear_counter)
 			atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
 	}
@@ -1491,7 +1491,7 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 	unsigned long flags;
 
-	atomic_set_mask(mask, &port->status);
+	atomic_or(mask, &port->status);
 
 	if (!common_mask)
 		return;
@@ -1499,7 +1499,7 @@ void zfcp_erp_set_port_status(struct zfcp_port *port, u32 mask)
 	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port)
-			atomic_set_mask(common_mask,
+			atomic_or(common_mask,
 					&sdev_to_zfcp(sdev)->status);
 	spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
 }
@@ -1518,7 +1518,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
 	u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
 	unsigned long flags;
 
-	atomic_clear_mask(mask, &port->status);
+	atomic_andnot(mask, &port->status);
 
 	if (!common_mask)
 		return;
@@ -1529,7 +1529,7 @@ void zfcp_erp_clear_port_status(struct zfcp_port *port, u32 mask)
 	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
 	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port) {
-			atomic_clear_mask(common_mask,
+			atomic_andnot(common_mask,
 					  &sdev_to_zfcp(sdev)->status);
 			if (clear_counter)
 				atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
@@ -1546,7 +1546,7 @@ void zfcp_erp_set_lun_status(struct scsi_device *sdev, u32 mask)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_set_mask(mask, &zfcp_sdev->status);
+	atomic_or(mask, &zfcp_sdev->status);
 }
 
 /**
@@ -1558,7 +1558,7 @@ void zfcp_erp_clear_lun_status(struct scsi_device *sdev, u32 mask)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(mask, &zfcp_sdev->status);
+	atomic_andnot(mask, &zfcp_sdev->status);
 
 	if (mask & ZFCP_STATUS_COMMON_ERP_FAILED)
 		atomic_set(&zfcp_sdev->erp_counter, 0);
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index 25d49f32ca63..237688af179b 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -508,7 +508,7 @@ static void zfcp_fc_adisc_handler(void *data)
 	/* port is good, unblock rport without going through erp */
 	zfcp_scsi_schedule_rport_register(port);
  out:
-	atomic_clear_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 	put_device(&port->dev);
 	kmem_cache_free(zfcp_fc_req_cache, fc_req);
 }
@@ -564,14 +564,14 @@ void zfcp_fc_link_test_work(struct work_struct *work)
 	if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
 		goto out;
 
-	atomic_set_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_or(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 
 	retval = zfcp_fc_adisc(port);
 	if (retval == 0)
 		return;
 
 	/* send of ADISC was not possible */
-	atomic_clear_mask(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+	atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
 	zfcp_erp_port_forced_reopen(port, 0, "fcltwk1");
 
 out:
@@ -640,7 +640,7 @@ static void zfcp_fc_validate_port(struct zfcp_port *port, struct list_head *lh)
 	if (!(atomic_read(&port->status) & ZFCP_STATUS_COMMON_NOESC))
 		return;
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_NOESC, &port->status);
+	atomic_andnot(ZFCP_STATUS_COMMON_NOESC, &port->status);
 
 	if ((port->supported_classes != 0) ||
 	    !list_empty(&port->unit_list))
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 4ac73e047c11..522a633c866a 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -114,7 +114,7 @@ static void zfcp_fsf_link_down_info_eval(struct zfcp_fsf_req *req,
 	if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED)
 		return;
 
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
 
 	zfcp_scsi_schedule_rports_block(adapter);
 
@@ -345,7 +345,7 @@ static void zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *req)
 		zfcp_erp_adapter_shutdown(adapter, 0, "fspse_3");
 		break;
 	case FSF_PROT_HOST_CONNECTION_INITIALIZING:
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+		atomic_or(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				&adapter->status);
 		break;
 	case FSF_PROT_DUPLICATE_REQUEST_ID:
@@ -554,7 +554,7 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req)
 			zfcp_erp_adapter_shutdown(adapter, 0, "fsecdh1");
 			return;
 		}
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+		atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
 				&adapter->status);
 		break;
 	case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE:
@@ -567,7 +567,7 @@ static void zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *req)
 
 		/* avoids adapter shutdown to be able to recognize
 		 * events such as LINK UP */
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+		atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
 				&adapter->status);
 		zfcp_fsf_link_down_info_eval(req,
 			&qtcb->header.fsf_status_qual.link_down_info);
@@ -1394,9 +1394,9 @@ static void zfcp_fsf_open_port_handler(struct zfcp_fsf_req *req)
 		break;
 	case FSF_GOOD:
 		port->handle = header->port_handle;
-		atomic_set_mask(ZFCP_STATUS_COMMON_OPEN |
+		atomic_or(ZFCP_STATUS_COMMON_OPEN |
 				ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_BOXED,
+		atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_BOXED,
 		                  &port->status);
 		/* check whether D_ID has changed during open */
 		/*
@@ -1677,10 +1677,10 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req)
 	case FSF_PORT_BOXED:
 		/* can't use generic zfcp_erp_modify_port_status because
 		 * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */
-		atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+		atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
 		shost_for_each_device(sdev, port->adapter->scsi_host)
 			if (sdev_to_zfcp(sdev)->port == port)
-				atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+				atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
 						  &sdev_to_zfcp(sdev)->status);
 		zfcp_erp_set_port_status(port, ZFCP_STATUS_COMMON_ACCESS_BOXED);
 		zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED,
@@ -1700,10 +1700,10 @@ static void zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *req)
 		/* can't use generic zfcp_erp_modify_port_status because
 		 * ZFCP_STATUS_COMMON_OPEN must not be reset for the port
 		 */
-		atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+		atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
 		shost_for_each_device(sdev, port->adapter->scsi_host)
 			if (sdev_to_zfcp(sdev)->port == port)
-				atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+				atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
 						  &sdev_to_zfcp(sdev)->status);
 		break;
 	}
@@ -1766,7 +1766,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
 
 	zfcp_sdev = sdev_to_zfcp(sdev);
 
-	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
+	atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_COMMON_ACCESS_BOXED,
 			  &zfcp_sdev->status);
 
@@ -1822,7 +1822,7 @@ static void zfcp_fsf_open_lun_handler(struct zfcp_fsf_req *req)
 
 	case FSF_GOOD:
 		zfcp_sdev->lun_handle = header->lun_handle;
-		atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+		atomic_or(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
 		break;
 	}
 }
@@ -1913,7 +1913,7 @@ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req)
 		}
 		break;
 	case FSF_GOOD:
-		atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+		atomic_andnot(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
 		break;
 	}
 }
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 495e1cb3afa6..dbf2b54703f7 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -349,7 +349,7 @@ void zfcp_qdio_close(struct zfcp_qdio *qdio)
 
 	/* clear QDIOUP flag, thus do_QDIO is not called during qdio_shutdown */
 	spin_lock_irq(&qdio->req_q_lock);
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
+	atomic_andnot(ZFCP_STATUS_ADAPTER_QDIOUP, &adapter->status);
 	spin_unlock_irq(&qdio->req_q_lock);
 
 	wake_up(&qdio->req_q_wq);
@@ -384,7 +384,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 	if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP)
 		return -EIO;
 
-	atomic_clear_mask(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
+	atomic_andnot(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
 			  &qdio->adapter->status);
 
 	zfcp_qdio_setup_init_data(&init_data, qdio);
@@ -396,14 +396,14 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 		goto failed_qdio;
 
 	if (ssqd.qdioac2 & CHSC_AC2_DATA_DIV_ENABLED)
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED,
+		atomic_or(ZFCP_STATUS_ADAPTER_DATA_DIV_ENABLED,
 				&qdio->adapter->status);
 
 	if (ssqd.qdioac2 & CHSC_AC2_MULTI_BUFFER_ENABLED) {
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
+		atomic_or(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
 		qdio->max_sbale_per_sbal = QDIO_MAX_ELEMENTS_PER_BUFFER;
 	} else {
-		atomic_clear_mask(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
+		atomic_andnot(ZFCP_STATUS_ADAPTER_MB_ACT, &adapter->status);
 		qdio->max_sbale_per_sbal = QDIO_MAX_ELEMENTS_PER_BUFFER - 1;
 	}
 
@@ -427,7 +427,7 @@ int zfcp_qdio_open(struct zfcp_qdio *qdio)
 	/* set index of first available SBALS / number of available SBALS */
 	qdio->req_q_idx = 0;
 	atomic_set(&qdio->req_q_free, QDIO_MAX_BUFFERS_PER_Q);
-	atomic_set_mask(ZFCP_STATUS_ADAPTER_QDIOUP, &qdio->adapter->status);
+	atomic_or(ZFCP_STATUS_ADAPTER_QDIOUP, &qdio->adapter->status);
 
 	if (adapter->scsi_host) {
 		adapter->scsi_host->sg_tablesize = qdio->max_sbale_per_req;
@@ -499,6 +499,6 @@ void zfcp_qdio_siosl(struct zfcp_adapter *adapter)
 
 	rc = ccw_device_siosl(adapter->ccw_device);
 	if (!rc)
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
+		atomic_or(ZFCP_STATUS_ADAPTER_SIOSL_ISSUED,
 				&adapter->status);
 }
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index b7babf0206b8..a94cbebbc33d 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -23,236 +23,159 @@
 typedef atomic64_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+#define ATOMIC_LONG_PFX(x)	atomic64 ## x
 
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_set(v, i);
-}
-
-static inline void atomic_long_inc(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_inc(v);
-}
-
-static inline void atomic_long_dec(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_dec(v);
-}
-
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_add(i, v);
-}
-
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	atomic64_sub(i, v);
-}
-
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_sub_and_test(i, v);
-}
-
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_dec_and_test(v);
-}
-
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_inc_and_test(v);
-}
-
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return atomic64_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_sub_return(i, v);
-}
-
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_inc_return(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_dec_return(v);
-}
-
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
-{
-	atomic64_t *v = (atomic64_t *)l;
-
-	return (long)atomic64_add_unless(v, a, u);
-}
-
-#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic64_cmpxchg((atomic64_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic64_xchg((atomic64_t *)(v), (new)))
-
-#else  /*  BITS_PER_LONG == 64  */
+#else
 
 typedef atomic_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
-static inline long atomic_long_read(atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_read(v);
-}
-
-static inline void atomic_long_set(atomic_long_t *l, long i)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	atomic_set(v, i);
-}
+#define ATOMIC_LONG_PFX(x)	atomic ## x
+
+#endif
+
+#define ATOMIC_LONG_READ_OP(mo)						\
+static inline long atomic_long_read##mo(atomic_long_t *l)		\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_read##mo)(v);			\
+}
+ATOMIC_LONG_READ_OP()
+ATOMIC_LONG_READ_OP(_acquire)
+
+#undef ATOMIC_LONG_READ_OP
+
+#define ATOMIC_LONG_SET_OP(mo)						\
+static inline void atomic_long_set##mo(atomic_long_t *l, long i)	\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	ATOMIC_LONG_PFX(_set##mo)(v, i);				\
+}
+ATOMIC_LONG_SET_OP()
+ATOMIC_LONG_SET_OP(_release)
+
+#undef ATOMIC_LONG_SET_OP
+
+#define ATOMIC_LONG_ADD_SUB_OP(op, mo)					\
+static inline long							\
+atomic_long_##op##_return##mo(long i, atomic_long_t *l)			\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v);		\
+}
+ATOMIC_LONG_ADD_SUB_OP(add,)
+ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(add, _release)
+ATOMIC_LONG_ADD_SUB_OP(sub,)
+ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
+ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
+ATOMIC_LONG_ADD_SUB_OP(sub, _release)
+
+#undef ATOMIC_LONG_ADD_SUB_OP
+
+#define atomic_long_cmpxchg_relaxed(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_acquire(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg_release(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (old), (new)))
+#define atomic_long_cmpxchg(l, old, new) \
+	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
+
+#define atomic_long_xchg_relaxed(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_acquire(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg_release(v, new) \
+	(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
+#define atomic_long_xchg(v, new) \
+	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 
 static inline void atomic_long_inc(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_inc(v);
+	ATOMIC_LONG_PFX(_inc)(v);
 }
 
 static inline void atomic_long_dec(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_dec(v);
+	ATOMIC_LONG_PFX(_dec)(v);
 }
 
 static inline void atomic_long_add(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_add(i, v);
+	ATOMIC_LONG_PFX(_add)(i, v);
 }
 
 static inline void atomic_long_sub(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	atomic_sub(i, v);
+	ATOMIC_LONG_PFX(_sub)(i, v);
 }
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_sub_and_test(i, v);
+	return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
 }
 
 static inline int atomic_long_dec_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_dec_and_test(v);
+	return ATOMIC_LONG_PFX(_dec_and_test)(v);
 }
 
 static inline int atomic_long_inc_and_test(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_inc_and_test(v);
+	return ATOMIC_LONG_PFX(_inc_and_test)(v);
 }
 
 static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return atomic_add_negative(i, v);
-}
-
-static inline long atomic_long_add_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_add_return(i, v);
-}
-
-static inline long atomic_long_sub_return(long i, atomic_long_t *l)
-{
-	atomic_t *v = (atomic_t *)l;
-
-	return (long)atomic_sub_return(i, v);
+	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
 static inline long atomic_long_inc_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_inc_return(v);
+	return (long)ATOMIC_LONG_PFX(_inc_return)(v);
 }
 
 static inline long atomic_long_dec_return(atomic_long_t *l)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_dec_return(v);
+	return (long)ATOMIC_LONG_PFX(_dec_return)(v);
 }
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {
-	atomic_t *v = (atomic_t *)l;
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
 
-	return (long)atomic_add_unless(v, a, u);
+	return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
 }
 
-#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l))
-
-#define atomic_long_cmpxchg(l, old, new) \
-	(atomic_cmpxchg((atomic_t *)(l), (old), (new)))
-#define atomic_long_xchg(v, new) \
-	(atomic_xchg((atomic_t *)(v), (new)))
-
-#endif  /*  BITS_PER_LONG == 64  */
+#define atomic_long_inc_not_zero(l) \
+	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
 
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 1973ad2b13f4..d4d7e337fdcb 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -98,15 +98,16 @@ ATOMIC_OP_RETURN(add, +)
 ATOMIC_OP_RETURN(sub, -)
 #endif
 
-#ifndef atomic_clear_mask
+#ifndef atomic_and
 ATOMIC_OP(and, &)
-#define atomic_clear_mask(i, v) atomic_and(~(i), (v))
 #endif
 
-#ifndef atomic_set_mask
-#define CONFIG_ARCH_HAS_ATOMIC_OR
+#ifndef atomic_or
 ATOMIC_OP(or, |)
-#define atomic_set_mask(i, v)	atomic_or((i), (v))
+#endif
+
+#ifndef atomic_xor
+ATOMIC_OP(xor, ^)
 #endif
 
 #undef ATOMIC_OP_RETURN
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 30ad9c86cebb..d48e78ccad3d 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v);
 ATOMIC64_OPS(add)
 ATOMIC64_OPS(sub)
 
+ATOMIC64_OP(and)
+ATOMIC64_OP(or)
+ATOMIC64_OP(xor)
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 55e3abc2d027..b42afada1280 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -108,12 +108,12 @@
 do {									\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
-	ACCESS_ONCE(*p) = (v);						\
+	WRITE_ONCE(*p, v);						\
 } while (0)
 
 #define smp_load_acquire(p)						\
 ({									\
-	typeof(*p) ___p1 = ACCESS_ONCE(*p);				\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
 	compiletime_assert_atomic_type(*p);				\
 	smp_mb();							\
 	___p1;								\
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 6383d54bf983..54a8e65e18b6 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -36,39 +36,39 @@
 /*
  * External function declarations
  */
-extern void queue_read_lock_slowpath(struct qrwlock *lock);
-extern void queue_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts);
+extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
- * queue_read_can_lock- would read_trylock() succeed?
+ * queued_read_can_lock- would read_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_read_can_lock(struct qrwlock *lock)
+static inline int queued_read_can_lock(struct qrwlock *lock)
 {
 	return !(atomic_read(&lock->cnts) & _QW_WMASK);
 }
 
 /**
- * queue_write_can_lock- would write_trylock() succeed?
+ * queued_write_can_lock- would write_trylock() succeed?
  * @lock: Pointer to queue rwlock structure
  */
-static inline int queue_write_can_lock(struct qrwlock *lock)
+static inline int queued_write_can_lock(struct qrwlock *lock)
 {
 	return !atomic_read(&lock->cnts);
 }
 
 /**
- * queue_read_trylock - try to acquire read lock of a queue rwlock
+ * queued_read_trylock - try to acquire read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_read_trylock(struct qrwlock *lock)
+static inline int queued_read_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
 	cnts = atomic_read(&lock->cnts);
 	if (likely(!(cnts & _QW_WMASK))) {
-		cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
+		cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 		if (likely(!(cnts & _QW_WMASK)))
 			return 1;
 		atomic_sub(_QR_BIAS, &lock->cnts);
@@ -77,11 +77,11 @@ static inline int queue_read_trylock(struct qrwlock *lock)
 }
 
 /**
- * queue_write_trylock - try to acquire write lock of a queue rwlock
+ * queued_write_trylock - try to acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  * Return: 1 if lock acquired, 0 if failed
  */
-static inline int queue_write_trylock(struct qrwlock *lock)
+static inline int queued_write_trylock(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -89,78 +89,70 @@ static inline int queue_write_trylock(struct qrwlock *lock)
 	if (unlikely(cnts))
 		return 0;
 
-	return likely(atomic_cmpxchg(&lock->cnts,
-				     cnts, cnts | _QW_LOCKED) == cnts);
+	return likely(atomic_cmpxchg_acquire(&lock->cnts,
+					     cnts, cnts | _QW_LOCKED) == cnts);
 }
 /**
- * queue_read_lock - acquire read lock of a queue rwlock
+ * queued_read_lock - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
  */
-static inline void queue_read_lock(struct qrwlock *lock)
+static inline void queued_read_lock(struct qrwlock *lock)
 {
 	u32 cnts;
 
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts);
 	if (likely(!(cnts & _QW_WMASK)))
 		return;
 
 	/* The slowpath will decrement the reader count, if necessary. */
-	queue_read_lock_slowpath(lock);
+	queued_read_lock_slowpath(lock, cnts);
 }
 
 /**
- * queue_write_lock - acquire write lock of a queue rwlock
+ * queued_write_lock - acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_lock(struct qrwlock *lock)
+static inline void queued_write_lock(struct qrwlock *lock)
 {
 	/* Optimize for the unfair lock case where the fair flag is 0. */
-	if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
+	if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0)
 		return;
 
-	queue_write_lock_slowpath(lock);
+	queued_write_lock_slowpath(lock);
 }
 
 /**
- * queue_read_unlock - release read lock of a queue rwlock
+ * queued_read_unlock - release read lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_read_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
 {
 	/*
 	 * Atomically decrement the reader count
 	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QR_BIAS, &lock->cnts);
+	(void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
-#ifndef queue_write_unlock
 /**
- * queue_write_unlock - release write lock of a queue rwlock
+ * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-static inline void queue_write_unlock(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
 {
-	/*
-	 * If the writer field is atomic, it can be cleared directly.
-	 * Otherwise, an atomic subtraction will be used to clear it.
-	 */
-	smp_mb__before_atomic();
-	atomic_sub(_QW_LOCKED, &lock->cnts);
+	smp_store_release((u8 *)&lock->cnts, 0);
 }
-#endif
 
 /*
  * Remapping rwlock architecture specific functions to the corresponding
  * queue rwlock functions.
  */
-#define arch_read_can_lock(l)	queue_read_can_lock(l)
-#define arch_write_can_lock(l)	queue_write_can_lock(l)
-#define arch_read_lock(l)	queue_read_lock(l)
-#define arch_write_lock(l)	queue_write_lock(l)
-#define arch_read_trylock(l)	queue_read_trylock(l)
-#define arch_write_trylock(l)	queue_write_trylock(l)
-#define arch_read_unlock(l)	queue_read_unlock(l)
-#define arch_write_unlock(l)	queue_write_unlock(l)
+#define arch_read_can_lock(l)	queued_read_can_lock(l)
+#define arch_write_can_lock(l)	queued_write_can_lock(l)
+#define arch_read_lock(l)	queued_read_lock(l)
+#define arch_write_lock(l)	queued_write_lock(l)
+#define arch_read_trylock(l)	queued_read_trylock(l)
+#define arch_write_trylock(l)	queued_write_trylock(l)
+#define arch_read_unlock(l)	queued_read_unlock(l)
+#define arch_write_unlock(l)	queued_write_unlock(l)
 
 #endif /* __ASM_GENERIC_QRWLOCK_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5b08a8540ecf..00a5763e850e 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,329 @@
 #ifndef _LINUX_ATOMIC_H
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
+#include <asm/barrier.h>
+
+/*
+ * Relaxed variants of xchg, cmpxchg and some atomic operations.
+ *
+ * We support four variants:
+ *
+ * - Fully ordered: The default implementation, no suffix required.
+ * - Acquire: Provides ACQUIRE semantics, _acquire suffix.
+ * - Release: Provides RELEASE semantics, _release suffix.
+ * - Relaxed: No ordering guarantees, _relaxed suffix.
+ *
+ * For compound atomics performing both a load and a store, ACQUIRE
+ * semantics apply only to the load and RELEASE semantics only to the
+ * store portion of the operation. Note that a failed cmpxchg_acquire
+ * does -not- imply any memory ordering constraints.
+ *
+ * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions.
+ */
+
+#ifndef atomic_read_acquire
+#define  atomic_read_acquire(v)		smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic_set_release
+#define  atomic_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/*
+ * The idea here is to build acquire/release variants by adding explicit
+ * barriers on top of the relaxed variant. In the case where the relaxed
+ * variant is already fully ordered, no additional barriers are needed.
+ */
+#define __atomic_op_acquire(op, args...)				\
+({									\
+	typeof(op##_relaxed(args)) __ret  = op##_relaxed(args);		\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+#define __atomic_op_release(op, args...)				\
+({									\
+	smp_mb__before_atomic();					\
+	op##_relaxed(args);						\
+})
+
+#define __atomic_op_fence(op, args...)					\
+({									\
+	typeof(op##_relaxed(args)) __ret;				\
+	smp_mb__before_atomic();					\
+	__ret = op##_relaxed(args);					\
+	smp_mb__after_atomic();						\
+	__ret;								\
+})
+
+/* atomic_add_return_relaxed */
+#ifndef atomic_add_return_relaxed
+#define  atomic_add_return_relaxed	atomic_add_return
+#define  atomic_add_return_acquire	atomic_add_return
+#define  atomic_add_return_release	atomic_add_return
+
+#else /* atomic_add_return_relaxed */
+
+#ifndef atomic_add_return_acquire
+#define  atomic_add_return_acquire(...)					\
+	__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return_release
+#define  atomic_add_return_release(...)					\
+	__atomic_op_release(atomic_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_add_return
+#define  atomic_add_return(...)						\
+	__atomic_op_fence(atomic_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic_add_return_relaxed */
+
+/* atomic_sub_return_relaxed */
+#ifndef atomic_sub_return_relaxed
+#define  atomic_sub_return_relaxed	atomic_sub_return
+#define  atomic_sub_return_acquire	atomic_sub_return
+#define  atomic_sub_return_release	atomic_sub_return
+
+#else /* atomic_sub_return_relaxed */
+
+#ifndef atomic_sub_return_acquire
+#define  atomic_sub_return_acquire(...)					\
+	__atomic_op_acquire(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return_release
+#define  atomic_sub_return_release(...)					\
+	__atomic_op_release(atomic_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_sub_return
+#define  atomic_sub_return(...)						\
+	__atomic_op_fence(atomic_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic_sub_return_relaxed */
+
+/* atomic_xchg_relaxed */
+#ifndef atomic_xchg_relaxed
+#define  atomic_xchg_relaxed		atomic_xchg
+#define  atomic_xchg_acquire		atomic_xchg
+#define  atomic_xchg_release		atomic_xchg
+
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+#define  atomic_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg_release
+#define  atomic_xchg_release(...)					\
+	__atomic_op_release(atomic_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_xchg
+#define  atomic_xchg(...)						\
+	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic_xchg_relaxed */
+
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
+#define  atomic_cmpxchg_acquire		atomic_cmpxchg
+#define  atomic_cmpxchg_release		atomic_cmpxchg
+
+#else /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic_cmpxchg_acquire
+#define  atomic_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg_release
+#define  atomic_cmpxchg_release(...)					\
+	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic_cmpxchg
+#define  atomic_cmpxchg(...)						\
+	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic64_read_acquire
+#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
+#endif
+
+#ifndef atomic64_set_release
+#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
+#endif
+
+/* atomic64_add_return_relaxed */
+#ifndef atomic64_add_return_relaxed
+#define  atomic64_add_return_relaxed	atomic64_add_return
+#define  atomic64_add_return_acquire	atomic64_add_return
+#define  atomic64_add_return_release	atomic64_add_return
+
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+#define  atomic64_add_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return_release
+#define  atomic64_add_return_release(...)				\
+	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_add_return
+#define  atomic64_add_return(...)					\
+	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_add_return_relaxed */
+
+/* atomic64_sub_return_relaxed */
+#ifndef atomic64_sub_return_relaxed
+#define  atomic64_sub_return_relaxed	atomic64_sub_return
+#define  atomic64_sub_return_acquire	atomic64_sub_return
+#define  atomic64_sub_return_release	atomic64_sub_return
+
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+#define  atomic64_sub_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return_release
+#define  atomic64_sub_return_release(...)				\
+	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_sub_return
+#define  atomic64_sub_return(...)					\
+	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_sub_return_relaxed */
+
+/* atomic64_xchg_relaxed */
+#ifndef atomic64_xchg_relaxed
+#define  atomic64_xchg_relaxed		atomic64_xchg
+#define  atomic64_xchg_acquire		atomic64_xchg
+#define  atomic64_xchg_release		atomic64_xchg
+
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+#define  atomic64_xchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg_release
+#define  atomic64_xchg_release(...)					\
+	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_xchg
+#define  atomic64_xchg(...)						\
+	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_xchg_relaxed */
+
+/* atomic64_cmpxchg_relaxed */
+#ifndef atomic64_cmpxchg_relaxed
+#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
+#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
+#define  atomic64_cmpxchg_release	atomic64_cmpxchg
+
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+#define  atomic64_cmpxchg_acquire(...)					\
+	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg_release
+#define  atomic64_cmpxchg_release(...)					\
+	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_cmpxchg
+#define  atomic64_cmpxchg(...)						\
+	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
+#endif
+#endif /* atomic64_cmpxchg_relaxed */
+
+/* cmpxchg_relaxed */
+#ifndef cmpxchg_relaxed
+#define  cmpxchg_relaxed		cmpxchg
+#define  cmpxchg_acquire		cmpxchg
+#define  cmpxchg_release		cmpxchg
+
+#else /* cmpxchg_relaxed */
+
+#ifndef cmpxchg_acquire
+#define  cmpxchg_acquire(...)						\
+	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg_release
+#define  cmpxchg_release(...)						\
+	__atomic_op_release(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg
+#define  cmpxchg(...)							\
+	__atomic_op_fence(cmpxchg, __VA_ARGS__)
+#endif
+#endif /* cmpxchg_relaxed */
+
+/* cmpxchg64_relaxed */
+#ifndef cmpxchg64_relaxed
+#define  cmpxchg64_relaxed		cmpxchg64
+#define  cmpxchg64_acquire		cmpxchg64
+#define  cmpxchg64_release		cmpxchg64
+
+#else /* cmpxchg64_relaxed */
+
+#ifndef cmpxchg64_acquire
+#define  cmpxchg64_acquire(...)						\
+	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64_release
+#define  cmpxchg64_release(...)						\
+	__atomic_op_release(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64
+#define  cmpxchg64(...)							\
+	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
+#endif
+#endif /* cmpxchg64_relaxed */
+
+/* xchg_relaxed */
+#ifndef xchg_relaxed
+#define  xchg_relaxed			xchg
+#define  xchg_acquire			xchg
+#define  xchg_release			xchg
+
+#else /* xchg_relaxed */
+
+#ifndef xchg_acquire
+#define  xchg_acquire(...)		__atomic_op_acquire(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg_release
+#define  xchg_release(...)		__atomic_op_release(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg
+#define  xchg(...)			__atomic_op_fence(xchg, __VA_ARGS__)
+#endif
+#endif /* xchg_relaxed */
 
 /**
  * atomic_add_unless - add unless the number is already a given value
@@ -28,6 +351,23 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 #endif
 
+#ifndef atomic_andnot
+static inline void atomic_andnot(int i, atomic_t *v)
+{
+	atomic_and(~i, v);
+}
+#endif
+
+static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_andnot(mask, v);
+}
+
+static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+	atomic_or(mask, v);
+}
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
@@ -111,21 +451,16 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
-static inline void atomic_or(int i, atomic_t *v)
-{
-	int old;
-	int new;
-
-	do {
-		old = atomic_read(v);
-		new = old | i;
-	} while (atomic_cmpxchg(v, old, new) != old);
-}
-#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */
-
 #include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
+
+#ifndef atomic64_andnot
+static inline void atomic64_andnot(long long i, atomic64_t *v)
+{
+	atomic64_and(~i, v);
+}
+#endif
+
 #endif /* _LINUX_ATOMIC_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index e08a6ae7c0a4..c836eb2dc44d 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -252,7 +252,12 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
 
 #define WRITE_ONCE(x, val) \
-	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
+({							\
+	union { typeof(x) __val; char __c[1]; } __u =	\
+		{ .__val = (__force typeof(x)) (val) }; \
+	__write_once_size(&(x), __u.__c, sizeof(x));	\
+	__u.__val;					\
+})
 
 /**
  * READ_ONCE_CTRL - Read a value heading a control dependency
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index f4de473f226b..7f653e8f6690 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -7,17 +7,52 @@
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
  * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
  *
+ * DEPRECATED API:
+ *
+ * The use of 'struct static_key' directly, is now DEPRECATED. In addition
+ * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following:
+ *
+ * struct static_key false = STATIC_KEY_INIT_FALSE;
+ * struct static_key true = STATIC_KEY_INIT_TRUE;
+ * static_key_true()
+ * static_key_false()
+ *
+ * The updated API replacements are:
+ *
+ * DEFINE_STATIC_KEY_TRUE(key);
+ * DEFINE_STATIC_KEY_FALSE(key);
+ * static_key_likely()
+ * statick_key_unlikely()
+ *
  * Jump labels provide an interface to generate dynamic branches using
- * self-modifying code. Assuming toolchain and architecture support, the result
- * of a "if (static_key_false(&key))" statement is an unconditional branch (which
- * defaults to false - and the true block is placed out of line).
+ * self-modifying code. Assuming toolchain and architecture support, if we
+ * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)",
+ * an "if (static_branch_unlikely(&key))" statement is an unconditional branch
+ * (which defaults to false - and the true block is placed out of line).
+ * Similarly, we can define an initially true key via
+ * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same
+ * "if (static_branch_unlikely(&key))", in which case we will generate an
+ * unconditional branch to the out-of-line true branch. Keys that are
+ * initially true or false can be using in both static_branch_unlikely()
+ * and static_branch_likely() statements.
+ *
+ * At runtime we can change the branch target by setting the key
+ * to true via a call to static_branch_enable(), or false using
+ * static_branch_disable(). If the direction of the branch is switched by
+ * these calls then we run-time modify the branch target via a
+ * no-op -> jump or jump -> no-op conversion. For example, for an
+ * initially false key that is used in an "if (static_branch_unlikely(&key))"
+ * statement, setting the key to true requires us to patch in a jump
+ * to the out-of-line of true branch.
  *
- * However at runtime we can change the branch target using
- * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
- * object, and for as long as there are references all branches referring to
- * that particular key will point to the (out of line) true block.
+ * In addtion to static_branch_{enable,disable}, we can also reference count
+ * the key or branch direction via static_branch_{inc,dec}. Thus,
+ * static_branch_inc() can be thought of as a 'make more true' and
+ * static_branch_dec() as a 'make more false'. The inc()/dec()
+ * interface is meant to be used exclusively from the inc()/dec() for a given
+ * key.
  *
- * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions
+ * Since this relies on modifying code, the branch modifying functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
  * OTOH, since the affected branches are unconditional, their runtime overhead
  * will be absolutely minimal, esp. in the default (off) case where the total
@@ -29,20 +64,10 @@
  * cause significant performance degradation. Struct static_key_deferred and
  * static_key_slow_dec_deferred() provide for this.
  *
- * Lacking toolchain and or architecture support, jump labels fall back to a simple
- * conditional branch.
- *
- * struct static_key my_key = STATIC_KEY_INIT_TRUE;
- *
- *   if (static_key_true(&my_key)) {
- *   }
+ * Lacking toolchain and or architecture support, static keys fall back to a
+ * simple conditional branch.
  *
- * will result in the true case being in-line and starts the key with a single
- * reference. Mixing static_key_true() and static_key_false() on the same key is not
- * allowed.
- *
- * Not initializing the key (static data is initialized to 0s anyway) is the
- * same as using STATIC_KEY_INIT_FALSE.
+ * Additional babbling in: Documentation/static-keys.txt
  */
 
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
@@ -86,8 +111,8 @@ struct static_key {
 #ifndef __ASSEMBLY__
 
 enum jump_label_type {
-	JUMP_LABEL_DISABLE = 0,
-	JUMP_LABEL_ENABLE,
+	JUMP_LABEL_NOP = 0,
+	JUMP_LABEL_JMP,
 };
 
 struct module;
@@ -101,33 +126,18 @@ static inline int static_key_count(struct static_key *key)
 
 #ifdef HAVE_JUMP_LABEL
 
-#define JUMP_LABEL_TYPE_FALSE_BRANCH	0UL
-#define JUMP_LABEL_TYPE_TRUE_BRANCH	1UL
-#define JUMP_LABEL_TYPE_MASK		1UL
-
-static
-inline struct jump_entry *jump_label_get_entries(struct static_key *key)
-{
-	return (struct jump_entry *)((unsigned long)key->entries
-						& ~JUMP_LABEL_TYPE_MASK);
-}
-
-static inline bool jump_label_get_branch_default(struct static_key *key)
-{
-	if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) ==
-	    JUMP_LABEL_TYPE_TRUE_BRANCH)
-		return true;
-	return false;
-}
+#define JUMP_TYPE_FALSE	0UL
+#define JUMP_TYPE_TRUE	1UL
+#define JUMP_TYPE_MASK	1UL
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	return arch_static_branch(key);
+	return arch_static_branch(key, false);
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	return !static_key_false(key);
+	return !arch_static_branch(key, true);
 }
 
 extern struct jump_entry __start___jump_table[];
@@ -145,12 +155,12 @@ extern void static_key_slow_inc(struct static_key *key);
 extern void static_key_slow_dec(struct static_key *key);
 extern void jump_label_apply_nops(struct module *mod);
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key)		\
+#define STATIC_KEY_INIT_TRUE					\
 	{ .enabled = ATOMIC_INIT(1),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH })
-#define STATIC_KEY_INIT_FALSE ((struct static_key)		\
+	  .entries = (void *)JUMP_TYPE_TRUE }
+#define STATIC_KEY_INIT_FALSE					\
 	{ .enabled = ATOMIC_INIT(0),				\
-	  .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH })
+	  .entries = (void *)JUMP_TYPE_FALSE }
 
 #else  /* !HAVE_JUMP_LABEL */
 
@@ -198,10 +208,8 @@ static inline int jump_label_apply_nops(struct module *mod)
 	return 0;
 }
 
-#define STATIC_KEY_INIT_TRUE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(1) })
-#define STATIC_KEY_INIT_FALSE ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(0) })
+#define STATIC_KEY_INIT_TRUE	{ .enabled = ATOMIC_INIT(1) }
+#define STATIC_KEY_INIT_FALSE	{ .enabled = ATOMIC_INIT(0) }
 
 #endif	/* HAVE_JUMP_LABEL */
 
@@ -213,6 +221,157 @@ static inline bool static_key_enabled(struct static_key *key)
 	return static_key_count(key) > 0;
 }
 
+static inline void static_key_enable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (!count)
+		static_key_slow_inc(key);
+}
+
+static inline void static_key_disable(struct static_key *key)
+{
+	int count = static_key_count(key);
+
+	WARN_ON_ONCE(count < 0 || count > 1);
+
+	if (count)
+		static_key_slow_dec(key);
+}
+
+/* -------------------------------------------------------------------------- */
+
+/*
+ * Two type wrappers around static_key, such that we can use compile time
+ * type differentiation to emit the right code.
+ *
+ * All the below code is macros in order to play type games.
+ */
+
+struct static_key_true {
+	struct static_key key;
+};
+
+struct static_key_false {
+	struct static_key key;
+};
+
+#define STATIC_KEY_TRUE_INIT  (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE,  }
+#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, }
+
+#define DEFINE_STATIC_KEY_TRUE(name)	\
+	struct static_key_true name = STATIC_KEY_TRUE_INIT
+
+#define DEFINE_STATIC_KEY_FALSE(name)	\
+	struct static_key_false name = STATIC_KEY_FALSE_INIT
+
+#ifdef HAVE_JUMP_LABEL
+
+/*
+ * Combine the right initial value (type) with the right branch order
+ * to generate the desired result.
+ *
+ *
+ * type\branch|	likely (1)	      |	unlikely (0)
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  true (1)  |	   ...		      |	   ...
+ *            |    NOP		      |	   JMP L
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *            |                       |
+ *  false (0) |	   ...		      |	   ...
+ *            |    JMP L	      |	   NOP
+ *            |    <br-stmts>	      |	1: ...
+ *            |	L: ...		      |
+ *            |			      |
+ *            |			      |	L: <br-stmts>
+ *            |			      |	   jmp 1b
+ *            |                       |
+ * -----------+-----------------------+------------------
+ *
+ * The initial value is encoded in the LSB of static_key::entries,
+ * type: 0 = false, 1 = true.
+ *
+ * The branch type is encoded in the LSB of jump_entry::key,
+ * branch: 0 = unlikely, 1 = likely.
+ *
+ * This gives the following logic table:
+ *
+ *	enabled	type	branch	  instuction
+ * -----------------------------+-----------
+ *	0	0	0	| NOP
+ *	0	0	1	| JMP
+ *	0	1	0	| NOP
+ *	0	1	1	| JMP
+ *
+ *	1	0	0	| JMP
+ *	1	0	1	| NOP
+ *	1	1	0	| JMP
+ *	1	1	1	| NOP
+ *
+ * Which gives the following functions:
+ *
+ *   dynamic: instruction = enabled ^ branch
+ *   static:  instruction = type ^ branch
+ *
+ * See jump_label_type() / jump_label_init_type().
+ */
+
+extern bool ____wrong_branch_error(void);
+
+#define static_branch_likely(x)							\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = !arch_static_branch(&(x)->key, true);			\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = !arch_static_branch_jump(&(x)->key, true);		\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#define static_branch_unlikely(x)						\
+({										\
+	bool branch;								\
+	if (__builtin_types_compatible_p(typeof(*x), struct static_key_true))	\
+		branch = arch_static_branch_jump(&(x)->key, false);		\
+	else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \
+		branch = arch_static_branch(&(x)->key, false);			\
+	else									\
+		branch = ____wrong_branch_error();				\
+	branch;									\
+})
+
+#else /* !HAVE_JUMP_LABEL */
+
+#define static_branch_likely(x)		likely(static_key_enabled(&(x)->key))
+#define static_branch_unlikely(x)	unlikely(static_key_enabled(&(x)->key))
+
+#endif /* HAVE_JUMP_LABEL */
+
+/*
+ * Advanced usage; refcount, branch is enabled when: count != 0
+ */
+
+#define static_branch_inc(x)		static_key_slow_inc(&(x)->key)
+#define static_branch_dec(x)		static_key_slow_dec(&(x)->key)
+
+/*
+ * Normal usage; boolean enable/disable.
+ */
+
+#define static_branch_enable(x)		static_key_enable(&(x)->key)
+#define static_branch_disable(x)	static_key_disable(&(x)->key)
+
 #endif	/* _LINUX_JUMP_LABEL_H */
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/linux/llist.h b/include/linux/llist.h
index fbf10a0bc095..fd4ca0b4fe0f 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -55,8 +55,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/atomic.h>
 #include <linux/kernel.h>
-#include <asm/cmpxchg.h>
 
 struct llist_head {
 	struct llist_node *first;
diff --git a/kernel/futex.c b/kernel/futex.c
index c4a182f5357e..6e443efc65f4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -64,6 +64,7 @@
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
+#include <linux/fault-inject.h>
 
 #include <asm/futex.h>
 
@@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize;
 
 static struct futex_hash_bucket *futex_queues;
 
+/*
+ * Fault injections for futexes.
+ */
+#ifdef CONFIG_FAIL_FUTEX
+
+static struct {
+	struct fault_attr attr;
+
+	u32 ignore_private;
+} fail_futex = {
+	.attr = FAULT_ATTR_INITIALIZER,
+	.ignore_private = 0,
+};
+
+static int __init setup_fail_futex(char *str)
+{
+	return setup_fault_attr(&fail_futex.attr, str);
+}
+__setup("fail_futex=", setup_fail_futex);
+
+static bool should_fail_futex(bool fshared)
+{
+	if (fail_futex.ignore_private && !fshared)
+		return false;
+
+	return should_fail(&fail_futex.attr, 1);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_futex_debugfs(void)
+{
+	umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_futex", NULL,
+					&fail_futex.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	if (!debugfs_create_bool("ignore-private", mode, dir,
+				 &fail_futex.ignore_private)) {
+		debugfs_remove_recursive(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+late_initcall(fail_futex_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+#else
+static inline bool should_fail_futex(bool fshared)
+{
+	return false;
+}
+#endif /* CONFIG_FAIL_FUTEX */
+
 static inline void futex_get_mm(union futex_key *key)
 {
 	atomic_inc(&key->private.mm->mm_count);
@@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(fshared)))
+		return -EFAULT;
+
 	/*
 	 * PROCESS_PRIVATE futexes are fast.
 	 * As the mm cannot disappear under us and the 'key' only needs
@@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
 	}
 
 again:
+	/* Ignore any VERIFY_READ mapping (futex common case) */
+	if (unlikely(should_fail_futex(fshared)))
+		return -EFAULT;
+
 	err = get_user_pages_fast(address, 1, 1, &page);
 	/*
 	 * If write access is not required (eg. FUTEX_WAIT), try
@@ -516,7 +584,7 @@ again:
 		 * A RO anonymous page will never change and thus doesn't make
 		 * sense for futex operations.
 		 */
-		if (ro) {
+		if (unlikely(should_fail_futex(fshared)) || ro) {
 			err = -EFAULT;
 			goto out;
 		}
@@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
 {
 	u32 uninitialized_var(curval);
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
 		return -EFAULT;
 
@@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
 	if (get_futex_value_locked(&uval, uaddr))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	/*
 	 * Detect deadlocks.
 	 */
 	if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
 		return -EDEADLK;
 
+	if ((unlikely(should_fail_futex(true))))
+		return -EDEADLK;
+
 	/*
 	 * Lookup existing state first. If it exists, try to attach to
 	 * its pi_state.
@@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	 */
 	newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
+	if (unlikely(should_fail_futex(true)))
+		ret = -EFAULT;
+
 	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
 		ret = -EFAULT;
 	else if (curval != uval)
@@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 	if (get_futex_value_locked(&curval, pifutex))
 		return -EFAULT;
 
+	if (unlikely(should_fail_futex(true)))
+		return -EFAULT;
+
 	/*
 	 * Find the top_waiter and determine if there are additional waiters.
 	 * If the caller intends to requeue more than 1 waiter to pifutex,
@@ -2268,8 +2351,11 @@ static long futex_wait_restart(struct restart_block *restart)
 /*
  * Userspace tried a 0 -> TID atomic transition of the futex value
  * and failed. The kernel side here does the whole locking operation:
- * if there are waiters then it will block, it does PI, etc. (Due to
- * races the kernel might see a 0 value of the futex too.)
+ * if there are waiters then it will block as a consequence of relying
+ * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
+ * a 0 value of the futex too.).
+ *
+ * Also serves as futex trylock_pi()'ing, and due semantics.
  */
 static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 			 ktime_t *time, int trylock)
@@ -2300,6 +2386,10 @@ retry_private:
 
 	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
 	if (unlikely(ret)) {
+		/*
+		 * Atomic work succeeded and we got the lock,
+		 * or failed. Either way, we do _not_ block.
+		 */
 		switch (ret) {
 		case 1:
 			/* We got the lock. */
@@ -2530,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
  * @uaddr:	the futex we initially wait on (non-pi)
  * @flags:	futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
- * 		the same type, no requeueing from private to shared, etc.
+ *		the same type, no requeueing from private to shared, etc.
  * @val:	the expected value of uaddr
  * @abs_time:	absolute timeout
  * @bitset:	32 bit wakeup bitset set by userspace, defaults to all
@@ -3005,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
 		      cmd == FUTEX_WAIT_BITSET ||
 		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+			return -EFAULT;
 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
 			return -EFAULT;
 		if (!timespec_valid(&ts))
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 52ebaca1b9fc..f7dd15d537f9 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -54,7 +54,7 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
 }
 
-static void jump_label_update(struct static_key *key, int enable);
+static void jump_label_update(struct static_key *key);
 
 void static_key_slow_inc(struct static_key *key)
 {
@@ -63,13 +63,8 @@ void static_key_slow_inc(struct static_key *key)
 		return;
 
 	jump_label_lock();
-	if (atomic_read(&key->enabled) == 0) {
-		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_ENABLE);
-		else
-			jump_label_update(key, JUMP_LABEL_DISABLE);
-	}
-	atomic_inc(&key->enabled);
+	if (atomic_inc_return(&key->enabled) == 1)
+		jump_label_update(key);
 	jump_label_unlock();
 }
 EXPORT_SYMBOL_GPL(static_key_slow_inc);
@@ -87,10 +82,7 @@ static void __static_key_slow_dec(struct static_key *key,
 		atomic_inc(&key->enabled);
 		schedule_delayed_work(work, rate_limit);
 	} else {
-		if (!jump_label_get_branch_default(key))
-			jump_label_update(key, JUMP_LABEL_DISABLE);
-		else
-			jump_label_update(key, JUMP_LABEL_ENABLE);
+		jump_label_update(key);
 	}
 	jump_label_unlock();
 }
@@ -149,7 +141,7 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 	return 0;
 }
 
-/* 
+/*
  * Update code which is definitely not currently executing.
  * Architectures which need heavyweight synchronization to modify
  * running code can override this to make the non-live update case
@@ -158,37 +150,54 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start,
 void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry *entry,
 					    enum jump_label_type type)
 {
-	arch_jump_label_transform(entry, type);	
+	arch_jump_label_transform(entry, type);
+}
+
+static inline struct jump_entry *static_key_entries(struct static_key *key)
+{
+	return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK);
+}
+
+static inline bool static_key_type(struct static_key *key)
+{
+	return (unsigned long)key->entries & JUMP_TYPE_MASK;
+}
+
+static inline struct static_key *jump_entry_key(struct jump_entry *entry)
+{
+	return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static bool jump_entry_branch(struct jump_entry *entry)
+{
+	return (unsigned long)entry->key & 1UL;
+}
+
+static enum jump_label_type jump_label_type(struct jump_entry *entry)
+{
+	struct static_key *key = jump_entry_key(entry);
+	bool enabled = static_key_enabled(key);
+	bool branch = jump_entry_branch(entry);
+
+	/* See the comment in linux/jump_label.h */
+	return enabled ^ branch;
 }
 
 static void __jump_label_update(struct static_key *key,
 				struct jump_entry *entry,
-				struct jump_entry *stop, int enable)
+				struct jump_entry *stop)
 {
-	for (; (entry < stop) &&
-	      (entry->key == (jump_label_t)(unsigned long)key);
-	      entry++) {
+	for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
 		/*
 		 * entry->code set to 0 invalidates module init text sections
 		 * kernel_text_address() verifies we are not in core kernel
 		 * init code, see jump_label_invalidate_module_init().
 		 */
 		if (entry->code && kernel_text_address(entry->code))
-			arch_jump_label_transform(entry, enable);
+			arch_jump_label_transform(entry, jump_label_type(entry));
 	}
 }
 
-static enum jump_label_type jump_label_type(struct static_key *key)
-{
-	bool true_branch = jump_label_get_branch_default(key);
-	bool state = static_key_enabled(key);
-
-	if ((!true_branch && state) || (true_branch && !state))
-		return JUMP_LABEL_ENABLE;
-
-	return JUMP_LABEL_DISABLE;
-}
-
 void __init jump_label_init(void)
 {
 	struct jump_entry *iter_start = __start___jump_table;
@@ -202,8 +211,11 @@ void __init jump_label_init(void)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		iterk = (struct static_key *)(unsigned long)iter->key;
-		arch_jump_label_transform_static(iter, jump_label_type(iterk));
+		/* rewrite NOPs */
+		if (jump_label_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
+
+		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
 
@@ -222,6 +234,16 @@ void __init jump_label_init(void)
 
 #ifdef CONFIG_MODULES
 
+static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
+{
+	struct static_key *key = jump_entry_key(entry);
+	bool type = static_key_type(key);
+	bool branch = jump_entry_branch(entry);
+
+	/* See the comment in linux/jump_label.h */
+	return type ^ branch;
+}
+
 struct static_key_mod {
 	struct static_key_mod *next;
 	struct jump_entry *entries;
@@ -243,17 +265,15 @@ static int __jump_label_mod_text_reserved(void *start, void *end)
 				start, end);
 }
 
-static void __jump_label_mod_update(struct static_key *key, int enable)
+static void __jump_label_mod_update(struct static_key *key)
 {
-	struct static_key_mod *mod = key->next;
+	struct static_key_mod *mod;
 
-	while (mod) {
+	for (mod = key->next; mod; mod = mod->next) {
 		struct module *m = mod->mod;
 
 		__jump_label_update(key, mod->entries,
-				    m->jump_entries + m->num_jump_entries,
-				    enable);
-		mod = mod->next;
+				    m->jump_entries + m->num_jump_entries);
 	}
 }
 
@@ -276,7 +296,9 @@ void jump_label_apply_nops(struct module *mod)
 		return;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE);
+		/* Only write NOPs for arch_branch_static(). */
+		if (jump_label_init_type(iter) == JUMP_LABEL_NOP)
+			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 	}
 }
 
@@ -297,7 +319,7 @@ static int jump_label_add_module(struct module *mod)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
-		iterk = (struct static_key *)(unsigned long)iter->key;
+		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
 
@@ -318,8 +340,9 @@ static int jump_label_add_module(struct module *mod)
 		jlm->next = key->next;
 		key->next = jlm;
 
-		if (jump_label_type(key) == JUMP_LABEL_ENABLE)
-			__jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE);
+		/* Only update if we've changed from our initial state */
+		if (jump_label_type(iter) != jump_label_init_type(iter))
+			__jump_label_update(key, iter, iter_stop);
 	}
 
 	return 0;
@@ -334,10 +357,10 @@ static void jump_label_del_module(struct module *mod)
 	struct static_key_mod *jlm, **prev;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (iter->key == (jump_label_t)(unsigned long)key)
+		if (jump_entry_key(iter) == key)
 			continue;
 
-		key = (struct static_key *)(unsigned long)iter->key;
+		key = jump_entry_key(iter);
 
 		if (within_module(iter->key, mod))
 			continue;
@@ -439,14 +462,14 @@ int jump_label_text_reserved(void *start, void *end)
 	return ret;
 }
 
-static void jump_label_update(struct static_key *key, int enable)
+static void jump_label_update(struct static_key *key)
 {
 	struct jump_entry *stop = __stop___jump_table;
-	struct jump_entry *entry = jump_label_get_entries(key);
+	struct jump_entry *entry = static_key_entries(key);
 #ifdef CONFIG_MODULES
 	struct module *mod;
 
-	__jump_label_mod_update(key, enable);
+	__jump_label_mod_update(key);
 
 	preempt_disable();
 	mod = __module_address((unsigned long)key);
@@ -456,7 +479,44 @@ static void jump_label_update(struct static_key *key, int enable)
 #endif
 	/* if there are no users, entry can be NULL */
 	if (entry)
-		__jump_label_update(key, entry, stop, enable);
+		__jump_label_update(key, entry, stop);
 }
 
-#endif
+#ifdef CONFIG_STATIC_KEYS_SELFTEST
+static DEFINE_STATIC_KEY_TRUE(sk_true);
+static DEFINE_STATIC_KEY_FALSE(sk_false);
+
+static __init int jump_label_test(void)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		WARN_ON(static_key_enabled(&sk_true.key) != true);
+		WARN_ON(static_key_enabled(&sk_false.key) != false);
+
+		WARN_ON(!static_branch_likely(&sk_true));
+		WARN_ON(!static_branch_unlikely(&sk_true));
+		WARN_ON(static_branch_likely(&sk_false));
+		WARN_ON(static_branch_unlikely(&sk_false));
+
+		static_branch_disable(&sk_true);
+		static_branch_enable(&sk_false);
+
+		WARN_ON(static_key_enabled(&sk_true.key) == true);
+		WARN_ON(static_key_enabled(&sk_false.key) == false);
+
+		WARN_ON(static_branch_likely(&sk_true));
+		WARN_ON(static_branch_unlikely(&sk_true));
+		WARN_ON(!static_branch_likely(&sk_false));
+		WARN_ON(!static_branch_unlikely(&sk_false));
+
+		static_branch_enable(&sk_true);
+		static_branch_disable(&sk_false);
+	}
+
+	return 0;
+}
+late_initcall(jump_label_test);
+#endif /* STATIC_KEYS_SELFTEST */
+
+#endif /* HAVE_JUMP_LABEL */
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 7dd5c9918e4c..36942047ffc0 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
-obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 6c5da483966b..f17a3e3b3550 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -55,27 +55,29 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
 {
 	while ((cnts & _QW_WMASK) == _QW_LOCKED) {
 		cpu_relax_lowlatency();
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
+		cnts = atomic_read_acquire(&lock->cnts);
 	}
 }
 
 /**
- * queue_read_lock_slowpath - acquire read lock of a queue rwlock
+ * queued_read_lock_slowpath - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
+ * @cnts: Current qrwlock lock value
  */
-void queue_read_lock_slowpath(struct qrwlock *lock)
+void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 {
-	u32 cnts;
-
 	/*
 	 * Readers come here when they cannot get the lock without waiting
 	 */
 	if (unlikely(in_interrupt())) {
 		/*
-		 * Readers in interrupt context will spin until the lock is
-		 * available without waiting in the queue.
+		 * Readers in interrupt context will get the lock immediately
+		 * if the writer is just waiting (not holding the lock yet).
+		 * The rspin_until_writer_unlock() function returns immediately
+		 * in this case. Otherwise, they will spin (with ACQUIRE
+		 * semantics) until the lock is available without waiting in
+		 * the queue.
 		 */
-		cnts = smp_load_acquire((u32 *)&lock->cnts);
 		rspin_until_writer_unlock(lock, cnts);
 		return;
 	}
@@ -87,16 +89,11 @@ void queue_read_lock_slowpath(struct qrwlock *lock)
 	arch_spin_lock(&lock->lock);
 
 	/*
-	 * At the head of the wait queue now, wait until the writer state
-	 * goes to 0 and then try to increment the reader count and get
-	 * the lock. It is possible that an incoming writer may steal the
-	 * lock in the interim, so it is necessary to check the writer byte
-	 * to make sure that the write lock isn't taken.
+	 * The ACQUIRE semantics of the following spinning code ensure
+	 * that accesses can't leak upwards out of our subsequent critical
+	 * section in the case that the lock is currently held for write.
 	 */
-	while (atomic_read(&lock->cnts) & _QW_WMASK)
-		cpu_relax_lowlatency();
-
-	cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
+	cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts) - _QR_BIAS;
 	rspin_until_writer_unlock(lock, cnts);
 
 	/*
@@ -104,13 +101,13 @@ void queue_read_lock_slowpath(struct qrwlock *lock)
 	 */
 	arch_spin_unlock(&lock->lock);
 }
-EXPORT_SYMBOL(queue_read_lock_slowpath);
+EXPORT_SYMBOL(queued_read_lock_slowpath);
 
 /**
- * queue_write_lock_slowpath - acquire write lock of a queue rwlock
+ * queued_write_lock_slowpath - acquire write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
-void queue_write_lock_slowpath(struct qrwlock *lock)
+void queued_write_lock_slowpath(struct qrwlock *lock)
 {
 	u32 cnts;
 
@@ -119,7 +116,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 
 	/* Try to acquire the lock directly if no reader is present */
 	if (!atomic_read(&lock->cnts) &&
-	    (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
+	    (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
 		goto unlock;
 
 	/*
@@ -130,7 +127,7 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 		struct __qrwlock *l = (struct __qrwlock *)lock;
 
 		if (!READ_ONCE(l->wmode) &&
-		   (cmpxchg(&l->wmode, 0, _QW_WAITING) == 0))
+		   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
 			break;
 
 		cpu_relax_lowlatency();
@@ -140,8 +137,8 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 	for (;;) {
 		cnts = atomic_read(&lock->cnts);
 		if ((cnts == _QW_WAITING) &&
-		    (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
-				    _QW_LOCKED) == _QW_WAITING))
+		    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
+					    _QW_LOCKED) == _QW_WAITING))
 			break;
 
 		cpu_relax_lowlatency();
@@ -149,4 +146,4 @@ void queue_write_lock_slowpath(struct qrwlock *lock)
 unlock:
 	arch_spin_unlock(&lock->lock);
 }
-EXPORT_SYMBOL(queue_write_lock_slowpath);
+EXPORT_SYMBOL(queued_write_lock_slowpath);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 38c49202d532..337c8818541d 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
-
+static __always_inline void __pv_kick_node(struct qspinlock *lock,
+					   struct mcs_spinlock *node) { }
 static __always_inline void __pv_wait_head(struct qspinlock *lock,
 					   struct mcs_spinlock *node) { }
 
@@ -440,7 +440,7 @@ queue:
 		cpu_relax();
 
 	arch_mcs_spin_unlock_contended(&next->locked);
-	pv_kick_node(next);
+	pv_kick_node(lock, next);
 
 release:
 	/*
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index df19ae4debd0..c8e6e9a596f5 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,9 +22,14 @@
 
 #define _Q_SLOW_VAL	(3U << _Q_LOCKED_OFFSET)
 
+/*
+ * Queue node uses: vcpu_running & vcpu_halted.
+ * Queue head uses: vcpu_running & vcpu_hashed.
+ */
 enum vcpu_state {
 	vcpu_running = 0,
-	vcpu_halted,
+	vcpu_halted,		/* Used only in pv_wait_node */
+	vcpu_hashed,		/* = pv_hash'ed + vcpu_halted */
 };
 
 struct pv_node {
@@ -153,7 +158,8 @@ static void pv_init_node(struct mcs_spinlock *node)
 
 /*
  * Wait for node->locked to become true, halt the vcpu after a short spin.
- * pv_kick_node() is used to wake the vcpu again.
+ * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
+ * behalf.
  */
 static void pv_wait_node(struct mcs_spinlock *node)
 {
@@ -172,9 +178,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 *
 		 * [S] pn->state = vcpu_halted	  [S] next->locked = 1
 		 *     MB			      MB
-		 * [L] pn->locked		[RmW] pn->state = vcpu_running
+		 * [L] pn->locked		[RmW] pn->state = vcpu_hashed
 		 *
-		 * Matches the xchg() from pv_kick_node().
+		 * Matches the cmpxchg() from pv_kick_node().
 		 */
 		smp_store_mb(pn->state, vcpu_halted);
 
@@ -182,9 +188,10 @@ static void pv_wait_node(struct mcs_spinlock *node)
 			pv_wait(&pn->state, vcpu_halted);
 
 		/*
-		 * Reset the vCPU state to avoid unncessary CPU kicking
+		 * If pv_kick_node() changed us to vcpu_hashed, retain that value
+		 * so that pv_wait_head() knows to not also try to hash this lock.
 		 */
-		WRITE_ONCE(pn->state, vcpu_running);
+		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
@@ -194,6 +201,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 * MCS lock will be released soon.
 		 */
 	}
+
 	/*
 	 * By now our node->locked should be 1 and our caller will not actually
 	 * spin-wait for it. We do however rely on our caller to do a
@@ -202,24 +210,35 @@ static void pv_wait_node(struct mcs_spinlock *node)
 }
 
 /*
- * Called after setting next->locked = 1, used to wake those stuck in
- * pv_wait_node().
+ * Called after setting next->locked = 1 when we're the lock owner.
+ *
+ * Instead of waking the waiters stuck in pv_wait_node() advance their state such
+ * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
  */
-static void pv_kick_node(struct mcs_spinlock *node)
+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
+	struct __qspinlock *l = (void *)lock;
 
 	/*
-	 * Note that because node->locked is already set, this actual
-	 * mcs_spinlock entry could be re-used already.
+	 * If the vCPU is indeed halted, advance its state to match that of
+	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
+	 * observe its next->locked value and advance itself.
 	 *
-	 * This should be fine however, kicking people for no reason is
-	 * harmless.
+	 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
+	 */
+	if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted)
+		return;
+
+	/*
+	 * Put the lock into the hash table and set the _Q_SLOW_VAL.
 	 *
-	 * See the comment in pv_wait_node().
+	 * As this is the same vCPU that will check the _Q_SLOW_VAL value and
+	 * the hash table later on at unlock time, no atomic instruction is
+	 * needed.
 	 */
-	if (xchg(&pn->state, vcpu_running) == vcpu_halted)
-		pv_kick(pn->cpu);
+	WRITE_ONCE(l->locked, _Q_SLOW_VAL);
+	(void)pv_hash(lock, pn);
 }
 
 /*
@@ -233,6 +252,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 	struct qspinlock **lp = NULL;
 	int loop;
 
+	/*
+	 * If pv_kick_node() already advanced our state, we don't need to
+	 * insert ourselves into the hash table anymore.
+	 */
+	if (READ_ONCE(pn->state) == vcpu_hashed)
+		lp = (struct qspinlock **)1;
+
 	for (;;) {
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
 			if (!READ_ONCE(l->locked))
@@ -240,17 +266,22 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 			cpu_relax();
 		}
 
-		WRITE_ONCE(pn->state, vcpu_halted);
 		if (!lp) { /* ONCE */
+			WRITE_ONCE(pn->state, vcpu_hashed);
 			lp = pv_hash(lock, pn);
+
 			/*
-			 * lp must be set before setting _Q_SLOW_VAL
+			 * We must hash before setting _Q_SLOW_VAL, such that
+			 * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
+			 * we'll be sure to be able to observe our hash entry.
 			 *
-			 * [S] lp = lock                [RmW] l = l->locked = 0
-			 *     MB                             MB
-			 * [S] l->locked = _Q_SLOW_VAL  [L]   lp
+			 *   [S] pn->state
+			 *   [S] <hash>                 [Rmw] l->locked == _Q_SLOW_VAL
+			 *       MB                           RMB
+			 * [RmW] l->locked = _Q_SLOW_VAL  [L] <unhash>
+			 *                                [L] pn->state
 			 *
-			 * Matches the cmpxchg() in __pv_queued_spin_unlock().
+			 * Matches the smp_rmb() in __pv_queued_spin_unlock().
 			 */
 			if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
 				/*
@@ -287,24 +318,34 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
 	struct pv_node *node;
-	u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+	u8 locked;
 
 	/*
 	 * We must not unlock if SLOW, because in that case we must first
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	if (likely(lockval == _Q_LOCKED_VAL))
+	locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+	if (likely(locked == _Q_LOCKED_VAL))
 		return;
 
-	if (unlikely(lockval != _Q_SLOW_VAL)) {
-		if (debug_locks_silent)
-			return;
-		WARN(1, "pvqspinlock: lock %p has corrupted value 0x%x!\n", lock, atomic_read(&lock->val));
+	if (unlikely(locked != _Q_SLOW_VAL)) {
+		WARN(!debug_locks_silent,
+		     "pvqspinlock: lock 0x%lx has corrupted value 0x%x!\n",
+		     (unsigned long)lock, atomic_read(&lock->val));
 		return;
 	}
 
 	/*
+	 * A failed cmpxchg doesn't provide any memory-ordering guarantees,
+	 * so we need a barrier to order the read of the node data in
+	 * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
+	 *
+	 * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL.
+	 */
+	smp_rmb();
+
+	/*
 	 * Since the above failed to release, this must be the SLOW path.
 	 * Therefore start by looking up the blocked node and unhashing it.
 	 */
@@ -319,8 +360,11 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	/*
 	 * At this point the memory pointed at by lock can be freed/reused,
 	 * however we can still use the pv_node to kick the CPU.
+	 * The other vCPU may not really be halted, but kicking an active
+	 * vCPU is harmless other than the additional latency in completing
+	 * the unlock.
 	 */
-	if (READ_ONCE(node->state) == vcpu_halted)
+	if (READ_ONCE(node->state) == vcpu_hashed)
 		pv_kick(node->cpu);
 }
 /*
diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c
deleted file mode 100644
index 1d96dd0d93c1..000000000000
--- a/kernel/locking/rtmutex-tester.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * RT-Mutex-tester: scriptable tester for rt mutexes
- *
- * started by Thomas Gleixner:
- *
- *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
- *
- */
-#include <linux/device.h>
-#include <linux/kthread.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/sched/rt.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/freezer.h>
-#include <linux/stat.h>
-
-#include "rtmutex.h"
-
-#define MAX_RT_TEST_THREADS	8
-#define MAX_RT_TEST_MUTEXES	8
-
-static spinlock_t rttest_lock;
-static atomic_t rttest_event;
-
-struct test_thread_data {
-	int			opcode;
-	int			opdata;
-	int			mutexes[MAX_RT_TEST_MUTEXES];
-	int			event;
-	struct device		dev;
-};
-
-static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
-static struct task_struct *threads[MAX_RT_TEST_THREADS];
-static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
-
-enum test_opcodes {
-	RTTEST_NOP = 0,
-	RTTEST_SCHEDOT,		/* 1 Sched other, data = nice */
-	RTTEST_SCHEDRT,		/* 2 Sched fifo, data = prio */
-	RTTEST_LOCK,		/* 3 Lock uninterruptible, data = lockindex */
-	RTTEST_LOCKNOWAIT,	/* 4 Lock uninterruptible no wait in wakeup, data = lockindex */
-	RTTEST_LOCKINT,		/* 5 Lock interruptible, data = lockindex */
-	RTTEST_LOCKINTNOWAIT,	/* 6 Lock interruptible no wait in wakeup, data = lockindex */
-	RTTEST_LOCKCONT,	/* 7 Continue locking after the wakeup delay */
-	RTTEST_UNLOCK,		/* 8 Unlock, data = lockindex */
-	/* 9, 10 - reserved for BKL commemoration */
-	RTTEST_SIGNAL = 11,	/* 11 Signal other test thread, data = thread id */
-	RTTEST_RESETEVENT = 98,	/* 98 Reset event counter */
-	RTTEST_RESET = 99,	/* 99 Reset all pending operations */
-};
-
-static int handle_op(struct test_thread_data *td, int lockwakeup)
-{
-	int i, id, ret = -EINVAL;
-
-	switch(td->opcode) {
-
-	case RTTEST_NOP:
-		return 0;
-
-	case RTTEST_LOCKCONT:
-		td->mutexes[td->opdata] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		return 0;
-
-	case RTTEST_RESET:
-		for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) {
-			if (td->mutexes[i] == 4) {
-				rt_mutex_unlock(&mutexes[i]);
-				td->mutexes[i] = 0;
-			}
-		}
-		return 0;
-
-	case RTTEST_RESETEVENT:
-		atomic_set(&rttest_event, 0);
-		return 0;
-
-	default:
-		if (lockwakeup)
-			return ret;
-	}
-
-	switch(td->opcode) {
-
-	case RTTEST_LOCK:
-	case RTTEST_LOCKNOWAIT:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
-			return ret;
-
-		td->mutexes[id] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		rt_mutex_lock(&mutexes[id]);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = 4;
-		return 0;
-
-	case RTTEST_LOCKINT:
-	case RTTEST_LOCKINTNOWAIT:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES)
-			return ret;
-
-		td->mutexes[id] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		ret = rt_mutex_lock_interruptible(&mutexes[id], 0);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = ret ? 0 : 4;
-		return ret ? -EINTR : 0;
-
-	case RTTEST_UNLOCK:
-		id = td->opdata;
-		if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4)
-			return ret;
-
-		td->event = atomic_add_return(1, &rttest_event);
-		rt_mutex_unlock(&mutexes[id]);
-		td->event = atomic_add_return(1, &rttest_event);
-		td->mutexes[id] = 0;
-		return 0;
-
-	default:
-		break;
-	}
-	return ret;
-}
-
-/*
- * Schedule replacement for rtsem_down(). Only called for threads with
- * PF_MUTEX_TESTER set.
- *
- * This allows us to have finegrained control over the event flow.
- *
- */
-void schedule_rt_mutex_test(struct rt_mutex *mutex)
-{
-	int tid, op, dat;
-	struct test_thread_data *td;
-
-	/* We have to lookup the task */
-	for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) {
-		if (threads[tid] == current)
-			break;
-	}
-
-	BUG_ON(tid == MAX_RT_TEST_THREADS);
-
-	td = &thread_data[tid];
-
-	op = td->opcode;
-	dat = td->opdata;
-
-	switch (op) {
-	case RTTEST_LOCK:
-	case RTTEST_LOCKINT:
-	case RTTEST_LOCKNOWAIT:
-	case RTTEST_LOCKINTNOWAIT:
-		if (mutex != &mutexes[dat])
-			break;
-
-		if (td->mutexes[dat] != 1)
-			break;
-
-		td->mutexes[dat] = 2;
-		td->event = atomic_add_return(1, &rttest_event);
-		break;
-
-	default:
-		break;
-	}
-
-	schedule();
-
-
-	switch (op) {
-	case RTTEST_LOCK:
-	case RTTEST_LOCKINT:
-		if (mutex != &mutexes[dat])
-			return;
-
-		if (td->mutexes[dat] != 2)
-			return;
-
-		td->mutexes[dat] = 3;
-		td->event = atomic_add_return(1, &rttest_event);
-		break;
-
-	case RTTEST_LOCKNOWAIT:
-	case RTTEST_LOCKINTNOWAIT:
-		if (mutex != &mutexes[dat])
-			return;
-
-		if (td->mutexes[dat] != 2)
-			return;
-
-		td->mutexes[dat] = 1;
-		td->event = atomic_add_return(1, &rttest_event);
-		return;
-
-	default:
-		return;
-	}
-
-	td->opcode = 0;
-
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (td->opcode > 0) {
-			int ret;
-
-			set_current_state(TASK_RUNNING);
-			ret = handle_op(td, 1);
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (td->opcode == RTTEST_LOCKCONT)
-				break;
-			td->opcode = ret;
-		}
-
-		/* Wait for the next command to be executed */
-		schedule();
-	}
-
-	/* Restore previous command and data */
-	td->opcode = op;
-	td->opdata = dat;
-}
-
-static int test_func(void *data)
-{
-	struct test_thread_data *td = data;
-	int ret;
-
-	current->flags |= PF_MUTEX_TESTER;
-	set_freezable();
-	allow_signal(SIGHUP);
-
-	for(;;) {
-
-		set_current_state(TASK_INTERRUPTIBLE);
-
-		if (td->opcode > 0) {
-			set_current_state(TASK_RUNNING);
-			ret = handle_op(td, 0);
-			set_current_state(TASK_INTERRUPTIBLE);
-			td->opcode = ret;
-		}
-
-		/* Wait for the next command to be executed */
-		schedule();
-		try_to_freeze();
-
-		if (signal_pending(current))
-			flush_signals(current);
-
-		if(kthread_should_stop())
-			break;
-	}
-	return 0;
-}
-
-/**
- * sysfs_test_command - interface for test commands
- * @dev:	thread reference
- * @buf:	command for actual step
- * @count:	length of buffer
- *
- * command syntax:
- *
- * opcode:data
- */
-static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr,
-				  const char *buf, size_t count)
-{
-	struct sched_param schedpar;
-	struct test_thread_data *td;
-	char cmdbuf[32];
-	int op, dat, tid, ret;
-
-	td = container_of(dev, struct test_thread_data, dev);
-	tid = td->dev.id;
-
-	/* strings from sysfs write are not 0 terminated! */
-	if (count >= sizeof(cmdbuf))
-		return -EINVAL;
-
-	/* strip of \n: */
-	if (buf[count-1] == '\n')
-		count--;
-	if (count < 1)
-		return -EINVAL;
-
-	memcpy(cmdbuf, buf, count);
-	cmdbuf[count] = 0;
-
-	if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2)
-		return -EINVAL;
-
-	switch (op) {
-	case RTTEST_SCHEDOT:
-		schedpar.sched_priority = 0;
-		ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar);
-		if (ret)
-			return ret;
-		set_user_nice(current, 0);
-		break;
-
-	case RTTEST_SCHEDRT:
-		schedpar.sched_priority = dat;
-		ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar);
-		if (ret)
-			return ret;
-		break;
-
-	case RTTEST_SIGNAL:
-		send_sig(SIGHUP, threads[tid], 0);
-		break;
-
-	default:
-		if (td->opcode > 0)
-			return -EBUSY;
-		td->opdata = dat;
-		td->opcode = op;
-		wake_up_process(threads[tid]);
-	}
-
-	return count;
-}
-
-/**
- * sysfs_test_status - sysfs interface for rt tester
- * @dev:	thread to query
- * @buf:	char buffer to be filled with thread status info
- */
-static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr,
-				 char *buf)
-{
-	struct test_thread_data *td;
-	struct task_struct *tsk;
-	char *curr = buf;
-	int i;
-
-	td = container_of(dev, struct test_thread_data, dev);
-	tsk = threads[td->dev.id];
-
-	spin_lock(&rttest_lock);
-
-	curr += sprintf(curr,
-		"O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
-		td->opcode, td->event, tsk->state,
-			(MAX_RT_PRIO - 1) - tsk->prio,
-			(MAX_RT_PRIO - 1) - tsk->normal_prio,
-		tsk->pi_blocked_on);
-
-	for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
-		curr += sprintf(curr, "%d", td->mutexes[i]);
-
-	spin_unlock(&rttest_lock);
-
-	curr += sprintf(curr, ", T: %p, R: %p\n", tsk,
-			mutexes[td->dev.id].owner);
-
-	return curr - buf;
-}
-
-static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
-static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
-
-static struct bus_type rttest_subsys = {
-	.name = "rttest",
-	.dev_name = "rttest",
-};
-
-static int init_test_thread(int id)
-{
-	thread_data[id].dev.bus = &rttest_subsys;
-	thread_data[id].dev.id = id;
-
-	threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id);
-	if (IS_ERR(threads[id]))
-		return PTR_ERR(threads[id]);
-
-	return device_register(&thread_data[id].dev);
-}
-
-static int init_rttest(void)
-{
-	int ret, i;
-
-	spin_lock_init(&rttest_lock);
-
-	for (i = 0; i < MAX_RT_TEST_MUTEXES; i++)
-		rt_mutex_init(&mutexes[i]);
-
-	ret = subsys_system_register(&rttest_subsys, NULL);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < MAX_RT_TEST_THREADS; i++) {
-		ret = init_test_thread(i);
-		if (ret)
-			break;
-		ret = device_create_file(&thread_data[i].dev, &dev_attr_status);
-		if (ret)
-			break;
-		ret = device_create_file(&thread_data[i].dev, &dev_attr_command);
-		if (ret)
-			break;
-	}
-
-	printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" );
-
-	return ret;
-}
-
-device_initcall(init_rttest);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 5674b073473c..7781d801212f 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1120,7 +1120,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
 		debug_rt_mutex_print_deadlock(waiter);
 
-		schedule_rt_mutex(lock);
+		schedule();
 
 		raw_spin_lock(&lock->wait_lock);
 		set_current_state(state);
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 7844f8f0e639..4f5f83c7d2d3 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -15,28 +15,6 @@
 #include <linux/rtmutex.h>
 
 /*
- * The rtmutex in kernel tester is independent of rtmutex debugging. We
- * call schedule_rt_mutex_test() instead of schedule() for the tasks which
- * belong to the tester. That way we can delay the wakeup path of those
- * threads to provoke lock stealing and testing of  complex boosting scenarios.
- */
-#ifdef CONFIG_RT_MUTEX_TESTER
-
-extern void schedule_rt_mutex_test(struct rt_mutex *lock);
-
-#define schedule_rt_mutex(_lock)				\
-  do {								\
-	if (!(current->flags & PF_MUTEX_TESTER))		\
-		schedule();					\
-	else							\
-		schedule_rt_mutex_test(_lock);			\
-  } while (0)
-
-#else
-# define schedule_rt_mutex(_lock)			schedule()
-#endif
-
-/*
  * This is the control structure for tasks blocked on a rt_mutex,
  * which is allocated on the kernel stack on of the blocked task.
  *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8420c233ff7..3595403921bd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -164,14 +164,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
 
 static void sched_feat_disable(int i)
 {
-	if (static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_dec(&sched_feat_keys[i]);
+	static_key_disable(&sched_feat_keys[i]);
 }
 
 static void sched_feat_enable(int i)
 {
-	if (!static_key_enabled(&sched_feat_keys[i]))
-		static_key_slow_inc(&sched_feat_keys[i]);
+	static_key_enable(&sched_feat_keys[i]);
 }
 #else
 static void sched_feat_disable(int i) { };
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3e0b662cae09..ab76b99adc85 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -916,12 +916,6 @@ config DEBUG_RT_MUTEXES
 	 This allows rt mutex semantics violations and rt mutex related
 	 deadlocks (lockups) to be detected and reported automatically.
 
-config RT_MUTEX_TESTER
-	bool "Built-in scriptable tester for rt-mutexes"
-	depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN
-	help
-	  This option enables a rt-mutex tester.
-
 config DEBUG_SPINLOCK
 	bool "Spinlock and rw-lock debugging: basic checks"
 	depends on DEBUG_KERNEL
@@ -1528,6 +1522,13 @@ config FAIL_MMC_REQUEST
 	  and to test how the mmc host driver handles retries from
 	  the block device.
 
+config FAIL_FUTEX
+	bool "Fault-injection capability for futexes"
+	select DEBUG_FS
+	depends on FAULT_INJECTION && FUTEX
+	help
+	  Provide fault-injection capability for futexes.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@ -1826,6 +1827,15 @@ config MEMTEST
 	        memtest=17, mean do 17 test patterns.
 	  If you are unsure how to answer this question, answer N.
 
+config TEST_STATIC_KEYS
+	tristate "Test static keys"
+	default n
+	depends on m
+	help
+	  Test the static key interfaces.
+
+	  If unsure, say N.
+
 source "samples/Kconfig"
 
 source "lib/Kconfig.kgdb"
diff --git a/lib/Makefile b/lib/Makefile
index f2610061bfa4..f01c558bf80d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -39,6 +39,8 @@ obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
 obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
 obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
+obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
+obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/atomic64.c b/lib/atomic64.c
index 1298c05ef528..2886ebac6567 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -102,6 +102,9 @@ EXPORT_SYMBOL(atomic64_##op##_return);
 
 ATOMIC64_OPS(add, +=)
 ATOMIC64_OPS(sub, -=)
+ATOMIC64_OP(and, &=)
+ATOMIC64_OP(or, |=)
+ATOMIC64_OP(xor, ^=)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 0211d30d8c39..83c33a5bcffb 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -16,8 +16,39 @@
 #include <linux/kernel.h>
 #include <linux/atomic.h>
 
+#define TEST(bit, op, c_op, val)				\
+do {								\
+	atomic##bit##_set(&v, v0);				\
+	r = v0;							\
+	atomic##bit##_##op(val, &v);				\
+	r c_op val;						\
+	WARN(atomic##bit##_read(&v) != r, "%Lx != %Lx\n",	\
+		(unsigned long long)atomic##bit##_read(&v),	\
+		(unsigned long long)r);				\
+} while (0)
+
+static __init void test_atomic(void)
+{
+	int v0 = 0xaaa31337;
+	int v1 = 0xdeadbeef;
+	int onestwos = 0x11112222;
+	int one = 1;
+
+	atomic_t v;
+	int r;
+
+	TEST(, add, +=, onestwos);
+	TEST(, add, +=, -one);
+	TEST(, sub, -=, onestwos);
+	TEST(, sub, -=, -one);
+	TEST(, or, |=, v1);
+	TEST(, and, &=, v1);
+	TEST(, xor, ^=, v1);
+	TEST(, andnot, &= ~, v1);
+}
+
 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
-static __init int test_atomic64(void)
+static __init void test_atomic64(void)
 {
 	long long v0 = 0xaaa31337c001d00dLL;
 	long long v1 = 0xdeadbeefdeafcafeLL;
@@ -34,15 +65,14 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 	BUG_ON(atomic64_read(&v) != r);
 
-	INIT(v0);
-	atomic64_add(onestwos, &v);
-	r += onestwos;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
-	atomic64_add(-one, &v);
-	r += -one;
-	BUG_ON(v.counter != r);
+	TEST(64, add, +=, onestwos);
+	TEST(64, add, +=, -one);
+	TEST(64, sub, -=, onestwos);
+	TEST(64, sub, -=, -one);
+	TEST(64, or, |=, v1);
+	TEST(64, and, &=, v1);
+	TEST(64, xor, ^=, v1);
+	TEST(64, andnot, &= ~, v1);
 
 	INIT(v0);
 	r += onestwos;
@@ -55,16 +85,6 @@ static __init int test_atomic64(void)
 	BUG_ON(v.counter != r);
 
 	INIT(v0);
-	atomic64_sub(onestwos, &v);
-	r -= onestwos;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
-	atomic64_sub(-one, &v);
-	r -= -one;
-	BUG_ON(v.counter != r);
-
-	INIT(v0);
 	r -= onestwos;
 	BUG_ON(atomic64_sub_return(onestwos, &v) != r);
 	BUG_ON(v.counter != r);
@@ -147,6 +167,12 @@ static __init int test_atomic64(void)
 	BUG_ON(!atomic64_inc_not_zero(&v));
 	r += one;
 	BUG_ON(v.counter != r);
+}
+
+static __init int test_atomics(void)
+{
+	test_atomic();
+	test_atomic64();
 
 #ifdef CONFIG_X86
 	pr_info("passed for %s platform %s CX8 and %s SSE\n",
@@ -166,4 +192,4 @@ static __init int test_atomic64(void)
 	return 0;
 }
 
-core_initcall(test_atomic64);
+core_initcall(test_atomics);
diff --git a/lib/lockref.c b/lib/lockref.c
index 494994bf17c8..5a92189ad711 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -4,14 +4,6 @@
 #if USE_CMPXCHG_LOCKREF
 
 /*
- * Allow weakly-ordered memory architectures to provide barrier-less
- * cmpxchg semantics for lockref updates.
- */
-#ifndef cmpxchg64_relaxed
-# define cmpxchg64_relaxed cmpxchg64
-#endif
-
-/*
  * Note that the "cmpxchg()" reloads the "old" value for the
  * failure case.
  */
diff --git a/lib/test_static_key_base.c b/lib/test_static_key_base.c
new file mode 100644
index 000000000000..729447aea02f
--- /dev/null
+++ b/lib/test_static_key_base.c
@@ -0,0 +1,68 @@
+/*
+ * Kernel module for testing static keys.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key base_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_old_true_key);
+struct static_key base_inv_old_true_key = STATIC_KEY_INIT_TRUE;
+EXPORT_SYMBOL_GPL(base_inv_old_true_key);
+struct static_key base_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_old_false_key);
+struct static_key base_inv_old_false_key = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL_GPL(base_inv_old_false_key);
+
+/* new keys */
+DEFINE_STATIC_KEY_TRUE(base_true_key);
+EXPORT_SYMBOL_GPL(base_true_key);
+DEFINE_STATIC_KEY_TRUE(base_inv_true_key);
+EXPORT_SYMBOL_GPL(base_inv_true_key);
+DEFINE_STATIC_KEY_FALSE(base_false_key);
+EXPORT_SYMBOL_GPL(base_false_key);
+DEFINE_STATIC_KEY_FALSE(base_inv_false_key);
+EXPORT_SYMBOL_GPL(base_inv_false_key);
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static int __init test_static_key_base_init(void)
+{
+	invert_key(&base_inv_old_true_key);
+	invert_key(&base_inv_old_false_key);
+	invert_key(&base_inv_true_key.key);
+	invert_key(&base_inv_false_key.key);
+
+	return 0;
+}
+
+static void __exit test_static_key_base_exit(void)
+{
+}
+
+module_init(test_static_key_base_init);
+module_exit(test_static_key_base_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_static_keys.c b/lib/test_static_keys.c
new file mode 100644
index 000000000000..c61b299e367f
--- /dev/null
+++ b/lib/test_static_keys.c
@@ -0,0 +1,225 @@
+/*
+ * Kernel module for testing static keys.
+ *
+ * Copyright 2015 Akamai Technologies Inc. All Rights Reserved
+ *
+ * Authors:
+ *      Jason Baron       <jbaron@akamai.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/jump_label.h>
+
+/* old keys */
+struct static_key old_true_key	= STATIC_KEY_INIT_TRUE;
+struct static_key old_false_key	= STATIC_KEY_INIT_FALSE;
+
+/* new api */
+DEFINE_STATIC_KEY_TRUE(true_key);
+DEFINE_STATIC_KEY_FALSE(false_key);
+
+/* external */
+extern struct static_key base_old_true_key;
+extern struct static_key base_inv_old_true_key;
+extern struct static_key base_old_false_key;
+extern struct static_key base_inv_old_false_key;
+
+/* new api */
+extern struct static_key_true base_true_key;
+extern struct static_key_true base_inv_true_key;
+extern struct static_key_false base_false_key;
+extern struct static_key_false base_inv_false_key;
+
+
+struct test_key {
+	bool			init_state;
+	struct static_key	*key;
+	bool			(*test_key)(void);
+};
+
+#define test_key_func(key, branch) \
+	({bool func(void) { return branch(key); } func;	})
+
+static void invert_key(struct static_key *key)
+{
+	if (static_key_enabled(key))
+		static_key_disable(key);
+	else
+		static_key_enable(key);
+}
+
+static void invert_keys(struct test_key *keys, int size)
+{
+	struct static_key *previous = NULL;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		if (previous != keys[i].key) {
+			invert_key(keys[i].key);
+			previous = keys[i].key;
+		}
+	}
+}
+
+static int verify_keys(struct test_key *keys, int size, bool invert)
+{
+	int i;
+	bool ret, init;
+
+	for (i = 0; i < size; i++) {
+		ret = static_key_enabled(keys[i].key);
+		init = keys[i].init_state;
+		if (ret != (invert ? !init : init))
+			return -EINVAL;
+		ret = keys[i].test_key();
+		if (static_key_enabled(keys[i].key)) {
+			if (!ret)
+				return -EINVAL;
+		} else {
+			if (ret)
+				return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+static int __init test_static_key_init(void)
+{
+	int ret;
+	int size;
+
+	struct test_key static_key_tests[] = {
+		/* internal keys - old keys */
+		{
+			.init_state	= true,
+			.key		= &old_true_key,
+			.test_key	= test_key_func(&old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &old_false_key,
+			.test_key	= test_key_func(&old_false_key, static_key_false),
+		},
+		/* internal keys - new keys */
+		{
+			.init_state	= true,
+			.key		= &true_key.key,
+			.test_key	= test_key_func(&true_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &true_key.key,
+			.test_key	= test_key_func(&true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &false_key.key,
+			.test_key	= test_key_func(&false_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &false_key.key,
+			.test_key	= test_key_func(&false_key, static_branch_unlikely),
+		},
+		/* external keys - old keys */
+		{
+			.init_state	= true,
+			.key		= &base_old_true_key,
+			.test_key	= test_key_func(&base_old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_old_true_key,
+			.test_key	= test_key_func(&base_inv_old_true_key, static_key_true),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_old_false_key,
+			.test_key	= test_key_func(&base_old_false_key, static_key_false),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_old_false_key,
+			.test_key	= test_key_func(&base_inv_old_false_key, static_key_false),
+		},
+		/* external keys - new keys */
+		{
+			.init_state	= true,
+			.key		= &base_true_key.key,
+			.test_key	= test_key_func(&base_true_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_true_key.key,
+			.test_key	= test_key_func(&base_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_true_key.key,
+			.test_key	= test_key_func(&base_inv_true_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_inv_true_key.key,
+			.test_key	= test_key_func(&base_inv_true_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_false_key.key,
+			.test_key	= test_key_func(&base_false_key, static_branch_likely),
+		},
+		{
+			.init_state	= false,
+			.key		= &base_false_key.key,
+			.test_key	= test_key_func(&base_false_key, static_branch_unlikely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_false_key.key,
+			.test_key	= test_key_func(&base_inv_false_key, static_branch_likely),
+		},
+		{
+			.init_state	= true,
+			.key		= &base_inv_false_key.key,
+			.test_key	= test_key_func(&base_inv_false_key, static_branch_unlikely),
+		},
+	};
+
+	size = ARRAY_SIZE(static_key_tests);
+
+	ret = verify_keys(static_key_tests, size, false);
+	if (ret)
+		goto out;
+
+	invert_keys(static_key_tests, size);
+	ret = verify_keys(static_key_tests, size, true);
+	if (ret)
+		goto out;
+
+	invert_keys(static_key_tests, size);
+	ret = verify_keys(static_key_tests, size, false);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	return ret;
+}
+
+static void __exit test_static_key_exit(void)
+{
+}
+
+module_init(test_static_key_init);
+module_exit(test_static_key_exit);
+
+MODULE_AUTHOR("Jason Baron <jbaron@akamai.com>");
+MODULE_LICENSE("GPL");
diff --git a/scripts/rt-tester/check-all.sh b/scripts/rt-tester/check-all.sh
deleted file mode 100644
index 6b5c83baf148..000000000000
--- a/scripts/rt-tester/check-all.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
-function testit ()
-{
- printf "%-30s: " $1
- ./rt-tester.py $1 | grep Pass
-}
-
-testit t2-l1-2rt-sameprio.tst
-testit t2-l1-pi.tst
-testit t2-l1-signal.tst
-#testit t2-l2-2rt-deadlock.tst
-testit t3-l1-pi-1rt.tst
-testit t3-l1-pi-2rt.tst
-testit t3-l1-pi-3rt.tst
-testit t3-l1-pi-signal.tst
-testit t3-l1-pi-steal.tst
-testit t3-l2-pi.tst
-testit t4-l2-pi-deboost.tst
-testit t5-l4-pi-boost-deboost.tst
-testit t5-l4-pi-boost-deboost-setsched.tst
diff --git a/scripts/rt-tester/rt-tester.py b/scripts/rt-tester/rt-tester.py
deleted file mode 100755
index 6d916c2a45a5..000000000000
--- a/scripts/rt-tester/rt-tester.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#!/usr/bin/python
-#
-# rt-mutex tester
-#
-# (C) 2006 Thomas Gleixner <tglx@linutronix.de>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 as
-# published by the Free Software Foundation.
-#
-import os
-import sys
-import getopt
-import shutil
-import string
-
-# Globals
-quiet = 0
-test = 0
-comments = 0
-
-sysfsprefix = "/sys/devices/system/rttest/rttest"
-statusfile = "/status"
-commandfile = "/command"
-
-# Command opcodes
-cmd_opcodes = {
-    "schedother"    : "1",
-    "schedfifo"     : "2",
-    "lock"          : "3",
-    "locknowait"    : "4",
-    "lockint"       : "5",
-    "lockintnowait" : "6",
-    "lockcont"      : "7",
-    "unlock"        : "8",
-    "signal"        : "11",
-    "resetevent"    : "98",
-    "reset"         : "99",
-    }
-
-test_opcodes = {
-    "prioeq"        : ["P" , "eq" , None],
-    "priolt"        : ["P" , "lt" , None],
-    "priogt"        : ["P" , "gt" , None],
-    "nprioeq"       : ["N" , "eq" , None],
-    "npriolt"       : ["N" , "lt" , None],
-    "npriogt"       : ["N" , "gt" , None],
-    "unlocked"      : ["M" , "eq" , 0],
-    "trylock"       : ["M" , "eq" , 1],
-    "blocked"       : ["M" , "eq" , 2],
-    "blockedwake"   : ["M" , "eq" , 3],
-    "locked"        : ["M" , "eq" , 4],
-    "opcodeeq"      : ["O" , "eq" , None],
-    "opcodelt"      : ["O" , "lt" , None],
-    "opcodegt"      : ["O" , "gt" , None],
-    "eventeq"       : ["E" , "eq" , None],
-    "eventlt"       : ["E" , "lt" , None],
-    "eventgt"       : ["E" , "gt" , None],
-    }
-
-# Print usage information
-def usage():
-    print "rt-tester.py <-c -h -q -t> <testfile>"
-    print " -c    display comments after first command"
-    print " -h    help"
-    print " -q    quiet mode"
-    print " -t    test mode (syntax check)"
-    print " testfile: read test specification from testfile"
-    print " otherwise from stdin"
-    return
-
-# Print progress when not in quiet mode
-def progress(str):
-    if not quiet:
-        print str
-
-# Analyse a status value
-def analyse(val, top, arg):
-
-    intval = int(val)
-
-    if top[0] == "M":
-        intval = intval / (10 ** int(arg))
-	intval = intval % 10
-        argval = top[2]
-    elif top[0] == "O":
-        argval = int(cmd_opcodes.get(arg, arg))
-    else:
-        argval = int(arg)
-
-    # progress("%d %s %d" %(intval, top[1], argval))
-
-    if top[1] == "eq" and intval == argval:
-	return 1
-    if top[1] == "lt" and intval < argval:
-        return 1
-    if top[1] == "gt" and intval > argval:
-	return 1
-    return 0
-
-# Parse the commandline
-try:
-    (options, arguments) = getopt.getopt(sys.argv[1:],'chqt')
-except getopt.GetoptError, ex:
-    usage()
-    sys.exit(1)
-
-# Parse commandline options
-for option, value in options:
-    if option == "-c":
-        comments = 1
-    elif option == "-q":
-        quiet = 1
-    elif option == "-t":
-        test = 1
-    elif option == '-h':
-        usage()
-        sys.exit(0)
-
-# Select the input source
-if arguments:
-    try:
-        fd = open(arguments[0])
-    except Exception,ex:
-        sys.stderr.write("File not found %s\n" %(arguments[0]))
-        sys.exit(1)
-else:
-    fd = sys.stdin
-
-linenr = 0
-
-# Read the test patterns
-while 1:
-
-    linenr = linenr + 1
-    line = fd.readline()
-    if not len(line):
-        break
-
-    line = line.strip()
-    parts = line.split(":")
-
-    if not parts or len(parts) < 1:
-        continue
-
-    if len(parts[0]) == 0:
-        continue
-
-    if parts[0].startswith("#"):
-	if comments > 1:
-	    progress(line)
-	continue
-
-    if comments == 1:
-	comments = 2
-
-    progress(line)
-
-    cmd = parts[0].strip().lower()
-    opc = parts[1].strip().lower()
-    tid = parts[2].strip()
-    dat = parts[3].strip()
-
-    try:
-        # Test or wait for a status value
-        if cmd == "t" or cmd == "w":
-            testop = test_opcodes[opc]
-
-            fname = "%s%s%s" %(sysfsprefix, tid, statusfile)
-            if test:
-		print fname
-                continue
-
-            while 1:
-                query = 1
-                fsta = open(fname, 'r')
-                status = fsta.readline().strip()
-                fsta.close()
-                stat = status.split(",")
-                for s in stat:
-		    s = s.strip()
-                    if s.startswith(testop[0]):
-                        # Separate status value
-                        val = s[2:].strip()
-                        query = analyse(val, testop, dat)
-                        break
-                if query or cmd == "t":
-                    break
-
-            progress("   " + status)
-
-            if not query:
-                sys.stderr.write("Test failed in line %d\n" %(linenr))
-		sys.exit(1)
-
-        # Issue a command to the tester
-        elif cmd == "c":
-            cmdnr = cmd_opcodes[opc]
-            # Build command string and sys filename
-            cmdstr = "%s:%s" %(cmdnr, dat)
-            fname = "%s%s%s" %(sysfsprefix, tid, commandfile)
-            if test:
-		print fname
-                continue
-            fcmd = open(fname, 'w')
-            fcmd.write(cmdstr)
-            fcmd.close()
-
-    except Exception,ex:
-    	sys.stderr.write(str(ex))
-        sys.stderr.write("\nSyntax error in line %d\n" %(linenr))
-        if not test:
-            fd.close()
-            sys.exit(1)
-
-# Normal exit pass
-print "Pass"
-sys.exit(0)
diff --git a/scripts/rt-tester/t2-l1-2rt-sameprio.tst b/scripts/rt-tester/t2-l1-2rt-sameprio.tst
deleted file mode 100644
index 3710c8b2090d..000000000000
--- a/scripts/rt-tester/t2-l1-2rt-sameprio.tst
+++ /dev/null
@@ -1,94 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-C: locknowait:		1:	0
-W: locked:		0: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-W: locked:		1: 	0
-
-# Verify T0
-W: unlocked:		0: 	0
-T: prioeq:		0: 	80
-
-# Unlock
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
-# T1,T0 lock L0
-C: locknowait:		1: 	0
-C: locknowait:		0:	0
-W: locked:		1: 	0
-W: blocked:		0: 	0
-T: prioeq:		1: 	80
-
-# T1 unlock L0
-C: unlock:		1: 	0
-W: locked:		0: 	0
-
-# Verify T1
-W: unlocked:		1: 	0
-T: prioeq:		1: 	80
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
-
diff --git a/scripts/rt-tester/t2-l1-pi.tst b/scripts/rt-tester/t2-l1-pi.tst
deleted file mode 100644
index b4cc95975adb..000000000000
--- a/scripts/rt-tester/t2-l1-pi.tst
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock with priority inversion
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-W: locked:		1: 	0
-
-# Verify T1
-W: unlocked:		0: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
diff --git a/scripts/rt-tester/t2-l1-signal.tst b/scripts/rt-tester/t2-l1-signal.tst
deleted file mode 100644
index 1b57376cc1f7..000000000000
--- a/scripts/rt-tester/t2-l1-signal.tst
+++ /dev/null
@@ -1,72 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 1 lock with priority inversion
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-
-# Interrupt T1
-C: signal:		1:	0
-W: unlocked:		1: 	0
-T: opcodeeq:		1:	-4
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
diff --git a/scripts/rt-tester/t2-l2-2rt-deadlock.tst b/scripts/rt-tester/t2-l2-2rt-deadlock.tst
deleted file mode 100644
index 68b10629b6f4..000000000000
--- a/scripts/rt-tester/t2-l2-2rt-deadlock.tst
+++ /dev/null
@@ -1,84 +0,0 @@
-#
-# RT-Mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	0
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 2 threads 2 lock
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	80
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1:	1
-W: locked:		1: 	1
-
-# T0 lock L1
-C: lockintnowait:	0: 	1
-W: blocked:		0: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-
-# Make deadlock go away
-C: signal:		1:	0
-W: unlocked:		1:	0
-C: signal:		0:	0
-W: unlocked:		0:	1
-
-# Unlock and exit
-C: unlock:		0: 	0
-W: unlocked:		0: 	0
-C: unlock:		1: 	1
-W: unlocked:		1: 	1
-
diff --git a/scripts/rt-tester/t3-l1-pi-1rt.tst b/scripts/rt-tester/t3-l1-pi-1rt.tst
deleted file mode 100644
index 8e6c8b11ae56..000000000000
--- a/scripts/rt-tester/t3-l1-pi-1rt.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-2rt.tst b/scripts/rt-tester/t3-l1-pi-2rt.tst
deleted file mode 100644
index 69c2212fc520..000000000000
--- a/scripts/rt-tester/t3-l1-pi-2rt.tst
+++ /dev/null
@@ -1,88 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-T: prioeq:		1:	81
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-3rt.tst b/scripts/rt-tester/t3-l1-pi-3rt.tst
deleted file mode 100644
index 9b0f1eb26a88..000000000000
--- a/scripts/rt-tester/t3-l1-pi-3rt.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedfifo:		0: 	80
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: prioeq:		0:	80
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: locked:		1: 	0
-W: unlocked:		2: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l1-pi-signal.tst b/scripts/rt-tester/t3-l1-pi-signal.tst
deleted file mode 100644
index 39ec74ab06ee..000000000000
--- a/scripts/rt-tester/t3-l1-pi-signal.tst
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-# Reset event counter
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set priorities
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-C: schedfifo:		2: 	81
-
-# T0 lock L0
-C: lock:		0:	0
-W: locked:		0: 	0
-
-# T1 lock L0, no wait in the wakeup path
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0:	80
-T: prioeq:		1:	80
-
-# T2 lock L0 interruptible, no wait in the wakeup path
-C: lockintnowait:	2:	0
-W: blocked:		2: 	0
-T: prioeq:		0:	81
-T: prioeq:		1:	80
-
-# Interrupt T2
-C: signal:		2:	2
-W: unlocked:		2:	0
-T: prioeq:		1:	80
-T: prioeq:		0:	80
-
-T: locked:		0:	0
-T: blocked:		1:	0
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T1 has locked L0 and exit
-W: locked:		1:	0
-W: unlocked:		0: 	0
-T: priolt:		0:	1
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
-
-
-
diff --git a/scripts/rt-tester/t3-l1-pi-steal.tst b/scripts/rt-tester/t3-l1-pi-steal.tst
deleted file mode 100644
index e03db7e010fa..000000000000
--- a/scripts/rt-tester/t3-l1-pi-steal.tst
+++ /dev/null
@@ -1,91 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 1 lock PI steal pending ownership
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	80
-C: schedfifo:		2: 	81
-
-# T0 lock L0
-C: lock:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: lock:		1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	80
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T1 is in the wakeup loop
-W: blockedwake:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: lock:		2: 	0
-# T1 leave wakeup loop
-C: lockcont:		1: 	0
-
-# T2 must have the lock and T1 must be blocked
-W: locked:		2: 	0
-W: blocked:		1: 	0
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-# Wait until T1 is in the wakeup loop and let it run
-W: blockedwake:		1: 	0
-C: lockcont:		1: 	0
-W: locked:		1: 	0
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t3-l2-pi.tst b/scripts/rt-tester/t3-l2-pi.tst
deleted file mode 100644
index 7b59100d3e48..000000000000
--- a/scripts/rt-tester/t3-l2-pi.tst
+++ /dev/null
@@ -1,87 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 3 threads 2 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L0
-C: locknowait:		1: 	0
-W: blocked:		1: 	0
-T: priolt:		0: 	1
-
-# T2 lock L0
-C: locknowait:		2: 	0
-W: blocked:		2: 	0
-T: prioeq:		0: 	82
-
-# T0 unlock L0
-C: unlock:		0: 	0
-
-# Wait until T2 got the lock
-W: locked:		2: 	0
-W: unlocked:		0:	0
-T: priolt:		0:	1
-
-# T2 unlock L0
-C: unlock:		2: 	0
-
-W: unlocked:		2: 	0
-W: locked:		1: 	0
-
-C: unlock:		1: 	0
-W: unlocked:		1: 	0
diff --git a/scripts/rt-tester/t4-l2-pi-deboost.tst b/scripts/rt-tester/t4-l2-pi-deboost.tst
deleted file mode 100644
index 2f0e049d6443..000000000000
--- a/scripts/rt-tester/t4-l2-pi-deboost.tst
+++ /dev/null
@@ -1,118 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 4 threads 2 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedother:		1: 	0
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T3 lock L0
-C: lockintnowait:	3: 	0
-W: blocked:		3: 	0
-T: prioeq:		0: 	83
-
-# T0 lock L1
-C: lock:		0: 	1
-W: blocked:		0: 	1
-T: prioeq:		1: 	83
-
-# T1 unlock L1
-C: unlock:		1:	1
-
-# Wait until T0 is in the wakeup code
-W: blockedwake:		0:	1
-
-# Verify that T1 is unboosted
-W: unlocked:		1: 	1
-T: priolt:		1: 	1
-
-# T2 lock L1 (T0 is boosted and pending owner !)
-C: locknowait:		2:	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	83
-
-# Interrupt T3 and wait until T3 returned
-C: signal:		3:	0
-W: unlocked:		3:	0
-
-# Verify prio of T0 (still pending owner,
-# but T2 is enqueued due to the previous boost by T3
-T: prioeq:		0:	82
-
-# Let T0 continue
-C: lockcont:		0:	1
-W: locked:		0:	1
-
-# Unlock L1 and let T2 get L1
-C: unlock:		0:	1
-W: locked:		2:	1
-
-# Verify that T0 is unboosted
-W: unlocked:		0:	1
-T: priolt:		0:	1
-
-# Unlock everything and exit
-C: unlock:		2:	1
-W: unlocked:		2:	1
-
-C: unlock:		0:	0
-W: unlocked:		0:	0
-
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
deleted file mode 100644
index 04f4034ff895..000000000000
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost-setsched.tst
+++ /dev/null
@@ -1,178 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 5 threads 4 lock PI - modify priority of blocked threads
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-C: schedfifo:		4: 	84
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L2
-C: locknowait:		2: 	2
-W: locked:		2: 	2
-
-# T2 lock L1
-C: lockintnowait:	2: 	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-
-# T3 lock L3
-C: locknowait:		3: 	3
-W: locked:		3: 	3
-
-# T3 lock L2
-C: lockintnowait:	3: 	2
-W: blocked:		3: 	2
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-
-# T4 lock L3
-C: lockintnowait:	4:	3
-W: blocked:		4: 	3
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-
-# Reduce prio of T4
-C: schedfifo:		4: 	80
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-T: prioeq:		4:	80
-
-# Increase prio of T4
-C: schedfifo:		4: 	84
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Reduce prio of T3
-C: schedfifo:		3: 	80
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Increase prio of T3
-C: schedfifo:		3: 	85
-T: prioeq:		0: 	85
-T: prioeq:		1:	85
-T: prioeq:		2:	85
-T: prioeq:		3:	85
-T: prioeq:		4:	84
-
-# Reduce prio of T3
-C: schedfifo:		3: 	83
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-T: prioeq:		4:	84
-
-# Signal T4
-C: signal:		4: 	0
-W: unlocked:		4: 	3
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-
-# Signal T3
-C: signal:		3: 	0
-W: unlocked:		3: 	2
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-T: prioeq:		2:	82
-
-# Signal T2
-C: signal:		2: 	0
-W: unlocked:		2: 	1
-T: prioeq:		0: 	81
-T: prioeq:		1:	81
-
-# Signal T1
-C: signal:		1: 	0
-W: unlocked:		1: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		3:	3
-C: unlock:		2:	2
-C: unlock:		1:	1
-C: unlock:		0:	0
-
-W: unlocked:		3:	3
-W: unlocked:		2:	2
-W: unlocked:		1:	1
-W: unlocked:		0:	0
-
diff --git a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst b/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
deleted file mode 100644
index a48a6ee29ddc..000000000000
--- a/scripts/rt-tester/t5-l4-pi-boost-deboost.tst
+++ /dev/null
@@ -1,138 +0,0 @@
-#
-# rt-mutex test
-#
-# Op: C(ommand)/T(est)/W(ait)
-# |  opcode
-# |  |     threadid: 0-7
-# |  |     |  opcode argument
-# |  |     |  |
-# C: lock: 0: 0
-#
-# Commands
-#
-# opcode	opcode argument
-# schedother	nice value
-# schedfifo	priority
-# lock		lock nr (0-7)
-# locknowait	lock nr (0-7)
-# lockint	lock nr (0-7)
-# lockintnowait	lock nr (0-7)
-# lockcont	lock nr (0-7)
-# unlock	lock nr (0-7)
-# signal	thread to signal (0-7)
-# reset		0
-# resetevent	0
-#
-# Tests / Wait
-#
-# opcode	opcode argument
-#
-# prioeq	priority
-# priolt	priority
-# priogt	priority
-# nprioeq	normal priority
-# npriolt	normal priority
-# npriogt	normal priority
-# locked	lock nr (0-7)
-# blocked	lock nr (0-7)
-# blockedwake	lock nr (0-7)
-# unlocked	lock nr (0-7)
-# opcodeeq	command opcode or number
-# opcodelt	number
-# opcodegt	number
-# eventeq	number
-# eventgt	number
-# eventlt	number
-
-#
-# 5 threads 4 lock PI
-#
-C: resetevent:		0: 	0
-W: opcodeeq:		0: 	0
-
-# Set schedulers
-C: schedother:		0: 	0
-C: schedfifo:		1: 	81
-C: schedfifo:		2: 	82
-C: schedfifo:		3: 	83
-C: schedfifo:		4: 	84
-
-# T0 lock L0
-C: locknowait:		0: 	0
-W: locked:		0: 	0
-
-# T1 lock L1
-C: locknowait:		1: 	1
-W: locked:		1: 	1
-
-# T1 lock L0
-C: lockintnowait:	1: 	0
-W: blocked:		1: 	0
-T: prioeq:		0: 	81
-
-# T2 lock L2
-C: locknowait:		2: 	2
-W: locked:		2: 	2
-
-# T2 lock L1
-C: lockintnowait:	2: 	1
-W: blocked:		2: 	1
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-
-# T3 lock L3
-C: locknowait:		3: 	3
-W: locked:		3: 	3
-
-# T3 lock L2
-C: lockintnowait:	3: 	2
-W: blocked:		3: 	2
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-
-# T4 lock L3
-C: lockintnowait:	4:	3
-W: blocked:		4: 	3
-T: prioeq:		0: 	84
-T: prioeq:		1:	84
-T: prioeq:		2:	84
-T: prioeq:		3:	84
-
-# Signal T4
-C: signal:		4: 	0
-W: unlocked:		4: 	3
-T: prioeq:		0: 	83
-T: prioeq:		1:	83
-T: prioeq:		2:	83
-T: prioeq:		3:	83
-
-# Signal T3
-C: signal:		3: 	0
-W: unlocked:		3: 	2
-T: prioeq:		0: 	82
-T: prioeq:		1:	82
-T: prioeq:		2:	82
-
-# Signal T2
-C: signal:		2: 	0
-W: unlocked:		2: 	1
-T: prioeq:		0: 	81
-T: prioeq:		1:	81
-
-# Signal T1
-C: signal:		1: 	0
-W: unlocked:		1: 	0
-T: priolt:		0: 	1
-
-# Unlock and exit
-C: unlock:		3:	3
-C: unlock:		2:	2
-C: unlock:		1:	1
-C: unlock:		0:	0
-
-W: unlocked:		3:	3
-W: unlocked:		2:	2
-W: unlocked:		1:	1
-W: unlocked:		0:	0
-
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 24ae9e829e9a..b8f12e0897e6 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -20,6 +20,7 @@ ifneq (1, $(quicktest))
 TARGETS += timers
 endif
 TARGETS += user
+TARGETS += jumplabel
 TARGETS += vm
 TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 000000000000..9cdadf37f114
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100644
index 000000000000..1261e3fa1e3a
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# Runs static keys kernel module tests
+
+if /sbin/modprobe -q test_static_key_base; then
+	if /sbin/modprobe -q test_static_keys; then
+		echo "static_key: ok"
+		/sbin/modprobe -q -r test_static_keys
+		/sbin/modprobe -q -r test_static_key_base
+	else
+		echo "static_keys: [FAIL]"
+		/sbin/modprobe -q -r test_static_key_base
+	fi
+else
+	echo "static_key: [FAIL]"
+	exit 1
+fi