summary refs log tree commit diff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/Kconfig187
-rw-r--r--arch/arm64/Kconfig.debug12
-rw-r--r--arch/arm64/crypto/Kconfig5
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c4
-rw-r--r--arch/arm64/crypto/aes-ce-cipher.c112
-rw-r--r--arch/arm64/crypto/aes-ce-setkey.h5
-rw-r--r--arch/arm64/crypto/aes-glue.c18
-rw-r--r--arch/arm64/include/asm/alternative-asm.h29
-rw-r--r--arch/arm64/include/asm/alternative.h44
-rw-r--r--arch/arm64/include/asm/cache.h2
-rw-r--r--arch/arm64/include/asm/cacheflush.h2
-rw-r--r--arch/arm64/include/asm/cmpxchg.h73
-rw-r--r--arch/arm64/include/asm/compat.h7
-rw-r--r--arch/arm64/include/asm/cpu.h2
-rw-r--r--arch/arm64/include/asm/cpufeature.h29
-rw-r--r--arch/arm64/include/asm/cputype.h5
-rw-r--r--arch/arm64/include/asm/dmi.h31
-rw-r--r--arch/arm64/include/asm/fixmap.h8
-rw-r--r--arch/arm64/include/asm/hwcap.h1
-rw-r--r--arch/arm64/include/asm/insn.h10
-rw-r--r--arch/arm64/include/asm/io.h23
-rw-r--r--arch/arm64/include/asm/irq.h3
-rw-r--r--arch/arm64/include/asm/kvm_arm.h21
-rw-r--r--arch/arm64/include/asm/opcodes.h1
-rw-r--r--arch/arm64/include/asm/percpu.h215
-rw-r--r--arch/arm64/include/asm/pgalloc.h8
-rw-r--r--arch/arm64/include/asm/seccomp.h25
-rw-r--r--arch/arm64/include/asm/tlb.h67
-rw-r--r--arch/arm64/include/asm/traps.h16
-rw-r--r--arch/arm64/include/asm/unistd.h3
-rw-r--r--arch/arm64/include/asm/unistd32.h3
-rw-r--r--arch/arm64/kernel/Makefile7
-rw-r--r--arch/arm64/kernel/alternative.c85
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c553
-rw-r--r--arch/arm64/kernel/cpu_errata.c111
-rw-r--r--arch/arm64/kernel/cpuinfo.c23
-rw-r--r--arch/arm64/kernel/efi-entry.S3
-rw-r--r--arch/arm64/kernel/efi.c37
-rw-r--r--arch/arm64/kernel/entry-ftrace.S21
-rw-r--r--arch/arm64/kernel/entry.S138
-rw-r--r--arch/arm64/kernel/head.S434
-rw-r--r--arch/arm64/kernel/insn.c26
-rw-r--r--arch/arm64/kernel/io.c66
-rw-r--r--arch/arm64/kernel/irq.c2
-rw-r--r--arch/arm64/kernel/jump_label.c23
-rw-r--r--arch/arm64/kernel/module.c18
-rw-r--r--arch/arm64/kernel/perf_event.c10
-rw-r--r--arch/arm64/kernel/ptrace.c40
-rw-r--r--arch/arm64/kernel/setup.c108
-rw-r--r--arch/arm64/kernel/signal32.c6
-rw-r--r--arch/arm64/kernel/sleep.S36
-rw-r--r--arch/arm64/kernel/smp.c2
-rw-r--r--arch/arm64/kernel/suspend.c4
-rw-r--r--arch/arm64/kernel/sys_compat.c49
-rw-r--r--arch/arm64/kernel/topology.c7
-rw-r--r--arch/arm64/kernel/trace-events-emulation.h35
-rw-r--r--arch/arm64/kernel/traps.c66
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S33
-rw-r--r--arch/arm64/kvm/hyp.S4
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/cache.S10
-rw-r--r--arch/arm64/mm/dump.c332
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/init.c2
-rw-r--r--arch/arm64/mm/ioremap.c93
-rw-r--r--arch/arm64/mm/mm.h1
-rw-r--r--arch/arm64/mm/mmap.c12
-rw-r--r--arch/arm64/mm/mmu.c94
-rw-r--r--arch/arm64/mm/pgd.c4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c13
-rw-r--r--arch/microblaze/include/asm/tlb.h3
-rw-r--r--arch/powerpc/include/asm/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/tlb.h1
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
74 files changed, 2871 insertions, 620 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9532f8d5857e..7c79c6494379 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -34,13 +34,16 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_BPF_JIT
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_CC_STACKPROTECTOR
+	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
@@ -193,6 +196,114 @@ endmenu
 
 menu "Kernel Features"
 
+menu "ARM errata workarounds via the alternatives framework"
+
+config ARM64_ERRATUM_826319
+	bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 826319 on Cortex-A53 parts up to r0p2 with an AMBA 4 ACE or
+	  AXI master interface and an L2 cache.
+
+	  If a Cortex-A53 uses an AMBA AXI4 ACE interface to other processors
+	  and is unable to accept a certain write via this interface, it will
+	  not progress on read data presented on the read data channel and the
+	  system can deadlock.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_827319
+	bool "Cortex-A53: 827319: Data cache clean instructions might cause overlapping transactions to the interconnect"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 827319 on Cortex-A53 parts up to r0p2 with an AMBA 5 CHI
+	  master interface and an L2 cache.
+
+	  Under certain conditions this erratum can cause a clean line eviction
+	  to occur at the same time as another transaction to the same address
+	  on the AMBA 5 CHI interface, which can cause data corruption if the
+	  interconnect reorders the two transactions.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_824069
+	bool "Cortex-A53: 824069: Cache line might not be marked as clean after a CleanShared snoop"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 824069 on Cortex-A53 parts up to r0p2 when it is connected
+	  to a coherent interconnect.
+
+	  If a Cortex-A53 processor is executing a store or prefetch for
+	  write instruction at the same time as a processor in another
+	  cluster is executing a cache maintenance operation to the same
+	  address, then this erratum might cause a clean cache line to be
+	  incorrectly marked as dirty.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this option does not necessarily enable the
+	  workaround, as it depends on the alternative framework, which will
+	  only patch the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_819472
+	bool "Cortex-A53: 819472: Store exclusive instructions might cause data corruption"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 819472 on Cortex-A53 parts up to r0p1 with an L2 cache
+	  present when it is connected to a coherent interconnect.
+
+	  If the processor is executing a load and store exclusive sequence at
+	  the same time as a processor in another cluster is executing a cache
+	  maintenance operation to the same address, then this erratum might
+	  cause data corruption.
+
+	  The workaround promotes data cache clean instructions to
+	  data cache clean-and-invalidate.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+config ARM64_ERRATUM_832075
+	bool "Cortex-A57: 832075: possible deadlock on mixing exclusive memory accesses with device loads"
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 832075 on Cortex-A57 parts up to r1p2.
+
+	  Affected Cortex-A57 parts might deadlock when exclusive load/store
+	  instructions to Write-Back memory are mixed with Device loads.
+
+	  The workaround is to promote device loads to use Load-Acquire
+	  semantics.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
+endmenu
+
+
 choice
 	prompt "Page size"
 	default ARM64_4K_PAGES
@@ -345,6 +456,19 @@ config ARCH_HAS_CACHE_LINE_SIZE
 
 source "mm/Kconfig"
 
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	---help---
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
@@ -361,6 +485,58 @@ config FORCE_MAX_ZONEORDER
 	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
 	default "11"
 
+menuconfig ARMV8_DEPRECATED
+	bool "Emulate deprecated/obsolete ARMv8 instructions"
+	depends on COMPAT
+	help
+	  Legacy software support may require certain instructions
+	  that have been deprecated or obsoleted in the architecture.
+
+	  Enable this config to enable selective emulation of these
+	  features.
+
+	  If unsure, say Y
+
+if ARMV8_DEPRECATED
+
+config SWP_EMULATION
+	bool "Emulate SWP/SWPB instructions"
+	help
+	  ARMv8 obsoletes the use of A32 SWP/SWPB instructions such that
+	  they are always undefined. Say Y here to enable software
+	  emulation of these instructions for userspace using LDXR/STXR.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDXR/STXR rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y
+
+config CP15_BARRIER_EMULATION
+	bool "Emulate CP15 Barrier instructions"
+	help
+	  The CP15 barrier instructions - CP15ISB, CP15DSB, and
+	  CP15DMB - are deprecated in ARMv8 (and ARMv7). It is
+	  strongly recommended to use the ISB, DSB, and DMB
+	  instructions instead.
+
+	  Say Y here to enable software emulation of these
+	  instructions for AArch32 userspace code. When this option is
+	  enabled, CP15 barrier usage is traced which can help
+	  identify software that needs updating.
+
+	  If unsure, say Y
+
+endif
+
 endmenu
 
 menu "Boot options"
@@ -401,6 +577,17 @@ config EFI
 	  allow the kernel to be booted as an EFI application. This
 	  is only useful on systems that have UEFI firmware.
 
+config DMI
+	bool "Enable support for SMBIOS (DMI) tables"
+	depends on EFI
+	default y
+	help
+	  This enables SMBIOS/DMI feature for systems.
+
+	  This option is only useful on systems that have UEFI firmware.
+	  However, even with this option, the resultant kernel should
+	  continue to boot on existing non-UEFI platforms.
+
 endmenu
 
 menu "Userspace binary formats"
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 0a12933e50ed..5fdd6dce8061 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -6,6 +6,18 @@ config FRAME_POINTER
 	bool
 	default y
 
+config ARM64_PTDUMP
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+        help
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
 config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 5562652c5316..a38b02ce5f9a 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -27,20 +27,19 @@ config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
-	select CRYPTO_AES
 
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
-	select CRYPTO_AES
+	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_AEAD
 
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_AES
+	select CRYPTO_AES_ARM64_CE
 	select CRYPTO_ABLK_HELPER
 
 config CRYPTO_AES_ARM64_NEON_BLK
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 9e6cdde9b43d..0ac73b838fa3 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -16,6 +16,8 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#include "aes-ce-setkey.h"
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
 	/*
@@ -48,7 +50,7 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(tfm);
 	int ret;
 
-	ret = crypto_aes_expand_key(ctx, in_key, key_len);
+	ret = ce_aes_expandkey(ctx, in_key, key_len);
 	if (!ret)
 		return 0;
 
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index 2075e1acae6b..ce47792a983d 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -14,6 +14,8 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#include "aes-ce-setkey.h"
+
 MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -124,6 +126,114 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	kernel_neon_end();
 }
 
+/*
+ * aes_sub() - use the aese instruction to perform the AES sbox substitution
+ *             on each byte in 'input'
+ */
+static u32 aes_sub(u32 input)
+{
+	u32 ret;
+
+	__asm__("dup	v1.4s, %w[in]		;"
+		"movi	v0.16b, #0		;"
+		"aese	v0.16b, v1.16b		;"
+		"umov	%w[out], v0.4s[0]	;"
+
+	:	[out]	"=r"(ret)
+	:	[in]	"r"(input)
+	:		"v0","v1");
+
+	return ret;
+}
+
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		     unsigned int key_len)
+{
+	/*
+	 * The AES key schedule round constants
+	 */
+	static u8 const rcon[] = {
+		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+	};
+
+	u32 kwords = key_len / sizeof(u32);
+	struct aes_block *key_enc, *key_dec;
+	int i, j;
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	memcpy(ctx->key_enc, in_key, key_len);
+	ctx->key_length = key_len;
+
+	kernel_neon_begin_partial(2);
+	for (i = 0; i < sizeof(rcon); i++) {
+		u32 *rki = ctx->key_enc + (i * kwords);
+		u32 *rko = rki + kwords;
+
+		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+		rko[1] = rko[0] ^ rki[1];
+		rko[2] = rko[1] ^ rki[2];
+		rko[3] = rko[2] ^ rki[3];
+
+		if (key_len == AES_KEYSIZE_192) {
+			if (i >= 7)
+				break;
+			rko[4] = rko[3] ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+		} else if (key_len == AES_KEYSIZE_256) {
+			if (i >= 6)
+				break;
+			rko[4] = aes_sub(rko[3]) ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+			rko[6] = rko[5] ^ rki[6];
+			rko[7] = rko[6] ^ rki[7];
+		}
+	}
+
+	/*
+	 * Generate the decryption keys for the Equivalent Inverse Cipher.
+	 * This involves reversing the order of the round keys, and applying
+	 * the Inverse Mix Columns transformation on all but the first and
+	 * the last one.
+	 */
+	key_enc = (struct aes_block *)ctx->key_enc;
+	key_dec = (struct aes_block *)ctx->key_dec;
+	j = num_rounds(ctx);
+
+	key_dec[0] = key_enc[j];
+	for (i = 1, j--; j > 0; i++, j--)
+		__asm__("ld1	{v0.16b}, %[in]		;"
+			"aesimc	v1.16b, v0.16b		;"
+			"st1	{v1.16b}, %[out]	;"
+
+		:	[out]	"=Q"(key_dec[i])
+		:	[in]	"Q"(key_enc[j])
+		:		"v0","v1");
+	key_dec[i] = key_enc[0];
+
+	kernel_neon_end();
+	return 0;
+}
+EXPORT_SYMBOL(ce_aes_expandkey);
+
+int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		  unsigned int key_len)
+{
+	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	int ret;
+
+	ret = ce_aes_expandkey(ctx, in_key, key_len);
+	if (!ret)
+		return 0;
+
+	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	return -EINVAL;
+}
+EXPORT_SYMBOL(ce_aes_setkey);
+
 static struct crypto_alg aes_alg = {
 	.cra_name		= "aes",
 	.cra_driver_name	= "aes-ce",
@@ -135,7 +245,7 @@ static struct crypto_alg aes_alg = {
 	.cra_cipher = {
 		.cia_min_keysize	= AES_MIN_KEY_SIZE,
 		.cia_max_keysize	= AES_MAX_KEY_SIZE,
-		.cia_setkey		= crypto_aes_set_key,
+		.cia_setkey		= ce_aes_setkey,
 		.cia_encrypt		= aes_cipher_encrypt,
 		.cia_decrypt		= aes_cipher_decrypt
 	}
diff --git a/arch/arm64/crypto/aes-ce-setkey.h b/arch/arm64/crypto/aes-ce-setkey.h
new file mode 100644
index 000000000000..f08a6471d034
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-setkey.h
@@ -0,0 +1,5 @@
+
+int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+		  unsigned int key_len);
+int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		     unsigned int key_len);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 79cd911ef88c..801aae32841f 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -16,9 +16,13 @@
 #include <linux/module.h>
 #include <linux/cpufeature.h>
 
+#include "aes-ce-setkey.h"
+
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
 #define PRIO			300
+#define aes_setkey		ce_aes_setkey
+#define aes_expandkey		ce_aes_expandkey
 #define aes_ecb_encrypt		ce_aes_ecb_encrypt
 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
 #define aes_cbc_encrypt		ce_aes_cbc_encrypt
@@ -30,6 +34,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #else
 #define MODE			"neon"
 #define PRIO			200
+#define aes_setkey		crypto_aes_set_key
+#define aes_expandkey		crypto_aes_expand_key
 #define aes_ecb_encrypt		neon_aes_ecb_encrypt
 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
@@ -79,10 +85,10 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 	int ret;
 
-	ret = crypto_aes_expand_key(&ctx->key1, in_key, key_len / 2);
+	ret = aes_expandkey(&ctx->key1, in_key, key_len / 2);
 	if (!ret)
-		ret = crypto_aes_expand_key(&ctx->key2, &in_key[key_len / 2],
-					    key_len / 2);
+		ret = aes_expandkey(&ctx->key2, &in_key[key_len / 2],
+				    key_len / 2);
 	if (!ret)
 		return 0;
 
@@ -288,7 +294,7 @@ static struct crypto_alg aes_algs[] = { {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
 		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= crypto_aes_set_key,
+		.setkey		= aes_setkey,
 		.encrypt	= ecb_encrypt,
 		.decrypt	= ecb_decrypt,
 	},
@@ -306,7 +312,7 @@ static struct crypto_alg aes_algs[] = { {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
 		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= crypto_aes_set_key,
+		.setkey		= aes_setkey,
 		.encrypt	= cbc_encrypt,
 		.decrypt	= cbc_decrypt,
 	},
@@ -324,7 +330,7 @@ static struct crypto_alg aes_algs[] = { {
 		.min_keysize	= AES_MIN_KEY_SIZE,
 		.max_keysize	= AES_MAX_KEY_SIZE,
 		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= crypto_aes_set_key,
+		.setkey		= aes_setkey,
 		.encrypt	= ctr_encrypt,
 		.decrypt	= ctr_encrypt,
 	},
diff --git a/arch/arm64/include/asm/alternative-asm.h b/arch/arm64/include/asm/alternative-asm.h
new file mode 100644
index 000000000000..919a67855b63
--- /dev/null
+++ b/arch/arm64/include/asm/alternative-asm.h
@@ -0,0 +1,29 @@
+#ifndef __ASM_ALTERNATIVE_ASM_H
+#define __ASM_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+	.word \orig_offset - .
+	.word \alt_offset - .
+	.hword \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
+.macro alternative_insn insn1 insn2 cap
+661:	\insn1
+662:	.pushsection .altinstructions, "a"
+	altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
+	.popsection
+	.pushsection .altinstr_replacement, "ax"
+663:	\insn2
+664:	.popsection
+	.if ((664b-663b) != (662b-661b))
+		.error "Alternatives instruction length mismatch"
+	.endif
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+#endif /* __ASM_ALTERNATIVE_ASM_H */
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
new file mode 100644
index 000000000000..d261f01e2bae
--- /dev/null
+++ b/arch/arm64/include/asm/alternative.h
@@ -0,0 +1,44 @@
+#ifndef __ASM_ALTERNATIVE_H
+#define __ASM_ALTERNATIVE_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+
+struct alt_instr {
+	s32 orig_offset;	/* offset to original instruction */
+	s32 alt_offset;		/* offset to replacement instruction */
+	u16 cpufeature;		/* cpufeature bit set for replacement */
+	u8  orig_len;		/* size of original instruction(s) */
+	u8  alt_len;		/* size of new instruction(s), <= orig_len */
+};
+
+void apply_alternatives_all(void);
+void apply_alternatives(void *start, size_t length);
+void free_alternatives_memory(void);
+
+#define ALTINSTR_ENTRY(feature)						      \
+	" .word 661b - .\n"				/* label           */ \
+	" .word 663f - .\n"				/* new instruction */ \
+	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte 662b-661b\n"				/* source len      */ \
+	" .byte 664f-663f\n"				/* replacement len */
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+	"661:\n\t"							\
+	oldinstr "\n"							\
+	"662:\n"							\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature)						\
+	".popsection\n"							\
+	".pushsection .altinstr_replacement, \"a\"\n"			\
+	"663:\n\t"							\
+	newinstr "\n"							\
+	"664:\n\t"							\
+	".popsection\n\t"						\
+	".if ((664b-663b) != (662b-661b))\n\t"				\
+	"	.error \"Alternatives instruction length mismatch\"\n\t"\
+	".endif\n"
+
+#endif /* __ASM_ALTERNATIVE_H */
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 88cc05b5f3ac..bde449936e2f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -32,6 +32,8 @@
 
 #ifndef __ASSEMBLY__
 
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
 static inline int cache_line_size(void)
 {
 	u32 cwg = cache_type_cwg();
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 689b6379188c..7ae31a2cc6c0 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -73,7 +73,7 @@ extern void flush_cache_all(void);
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
-extern void __flush_cache_user_range(unsigned long start, unsigned long end);
+extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
 static inline void flush_cache_mm(struct mm_struct *mm)
 {
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index ddb9d7830558..cb9593079f29 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -19,6 +19,7 @@
 #define __ASM_CMPXCHG_H
 
 #include <linux/bug.h>
+#include <linux/mmdebug.h>
 
 #include <asm/barrier.h>
 
@@ -152,6 +153,51 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return oldval;
 }
 
+#define system_has_cmpxchg_double()     1
+
+static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2,
+		unsigned long old1, unsigned long old2,
+		unsigned long new1, unsigned long new2, int size)
+{
+	unsigned long loop, lost;
+
+	switch (size) {
+	case 8:
+		VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1);
+		do {
+			asm volatile("// __cmpxchg_double8\n"
+			"	ldxp	%0, %1, %2\n"
+			"	eor	%0, %0, %3\n"
+			"	eor	%1, %1, %4\n"
+			"	orr	%1, %0, %1\n"
+			"	mov	%w0, #0\n"
+			"	cbnz	%1, 1f\n"
+			"	stxp	%w0, %5, %6, %2\n"
+			"1:\n"
+				: "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1)
+				: "r" (old1), "r"(old2), "r"(new1), "r"(new2));
+		} while (loop);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return !lost;
+}
+
+static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
+			unsigned long old1, unsigned long old2,
+			unsigned long new1, unsigned long new2, int size)
+{
+	int ret;
+
+	smp_mb();
+	ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size);
+	smp_mb();
+
+	return ret;
+}
+
 static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 					 unsigned long new, int size)
 {
@@ -182,6 +228,33 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret; \
 })
 
+#define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
+({\
+	int __ret;\
+	__ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \
+			(unsigned long)(o2), (unsigned long)(n1), \
+			(unsigned long)(n2), sizeof(*(ptr1)));\
+	__ret; \
+})
+
+#define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
+({\
+	int __ret;\
+	__ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \
+			(unsigned long)(o2), (unsigned long)(n1), \
+			(unsigned long)(n2), sizeof(*(ptr1)));\
+	__ret; \
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
+	cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
+				o1, o2, n1, n2)
+
 #define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
 #define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
 
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 56de5aadede2..3fb053fa6e98 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -205,6 +205,13 @@ typedef struct compat_siginfo {
 			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
 			int _fd;
 		} _sigpoll;
+
+		/* SIGSYS */
+		struct {
+			compat_uptr_t _call_addr; /* calling user insn */
+			int _syscall;	/* triggering system call number */
+			compat_uint_t _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
 	} _sifields;
 } compat_siginfo_t;
 
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 056443086019..ace70682499b 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -30,6 +30,8 @@ struct cpuinfo_arm64 {
 	u32		reg_dczid;
 	u32		reg_midr;
 
+	u64		reg_id_aa64dfr0;
+	u64		reg_id_aa64dfr1;
 	u64		reg_id_aa64isar0;
 	u64		reg_id_aa64isar1;
 	u64		reg_id_aa64mmfr0;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index cd4ac0516488..07547ccc1f2b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -21,9 +21,38 @@
 #define MAX_CPU_FEATURES	(8 * sizeof(elf_hwcap))
 #define cpu_feature(x)		ilog2(HWCAP_ ## x)
 
+#define ARM64_WORKAROUND_CLEAN_CACHE		0
+#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+
+#define ARM64_NCAPS				2
+
+#ifndef __ASSEMBLY__
+
+extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
 static inline bool cpu_have_feature(unsigned int num)
 {
 	return elf_hwcap & (1UL << num);
 }
 
+static inline bool cpus_have_cap(unsigned int num)
+{
+	if (num >= ARM64_NCAPS)
+		return false;
+	return test_bit(num, cpu_hwcaps);
+}
+
+static inline void cpus_set_cap(unsigned int num)
+{
+	if (num >= ARM64_NCAPS)
+		pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
+			num, ARM64_NCAPS);
+	else
+		__set_bit(num, cpu_hwcaps);
+}
+
+void check_local_cpu_errata(void);
+
+#endif /* __ASSEMBLY__ */
+
 #endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 379d0b874328..8adb986a3086 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -57,6 +57,11 @@
 #define MIDR_IMPLEMENTOR(midr)	\
 	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
 
+#define MIDR_CPU_PART(imp, partnum) \
+	(((imp)			<< MIDR_IMPLEMENTOR_SHIFT) | \
+	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
+	((partnum)		<< MIDR_PARTNUM_SHIFT))
+
 #define ARM_CPU_IMP_ARM		0x41
 #define ARM_CPU_IMP_APM		0x50
 
diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h
new file mode 100644
index 000000000000..69d37d87b159
--- /dev/null
+++ b/arch/arm64/include/asm/dmi.h
@@ -0,0 +1,31 @@
+/*
+ * arch/arm64/include/asm/dmi.h
+ *
+ * Copyright (C) 2013 Linaro Limited.
+ * Written by: Yi Li (yi.li@linaro.org)
+ *
+ * based on arch/ia64/include/asm/dmi.h
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef __ASM_DMI_H
+#define __ASM_DMI_H
+
+#include <linux/io.h>
+#include <linux/slab.h>
+
+/*
+ * According to section 2.3.6 of the UEFI spec, the firmware should not
+ * request a virtual mapping for configuration tables such as SMBIOS.
+ * This means we have to map them before use.
+ */
+#define dmi_early_remap(x, l)		ioremap_cache(x, l)
+#define dmi_early_unmap(x, l)		iounmap(x)
+#define dmi_remap(x, l)			ioremap_cache(x, l)
+#define dmi_unmap(x)			iounmap(x)
+#define dmi_alloc(l)			kzalloc(l, GFP_KERNEL)
+
+#endif
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 5f7bfe6df723..9ef6eca905ca 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -31,6 +31,7 @@
  *
  */
 enum fixed_addresses {
+	FIX_HOLE,
 	FIX_EARLYCON_MEM_BASE,
 	__end_of_permanent_fixed_addresses,
 
@@ -56,10 +57,11 @@ enum fixed_addresses {
 
 #define FIXMAP_PAGE_IO     __pgprot(PROT_DEVICE_nGnRE)
 
-extern void __early_set_fixmap(enum fixed_addresses idx,
-			       phys_addr_t phys, pgprot_t flags);
+void __init early_fixmap_init(void);
 
-#define __set_fixmap __early_set_fixmap
+#define __early_set_fixmap __set_fixmap
+
+extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
 
 #include <asm-generic/fixmap.h>
 
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 024c46183c3c..0ad735166d9f 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -30,6 +30,7 @@
 #define COMPAT_HWCAP_IDIVA	(1 << 17)
 #define COMPAT_HWCAP_IDIVT	(1 << 18)
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_LPAE	(1 << 20)
 #define COMPAT_HWCAP_EVTSTRM	(1 << 21)
 
 #define COMPAT_HWCAP2_AES	(1 << 0)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 56a9e63b6c33..e2ff32a93b5c 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -354,6 +354,16 @@ bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
 int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
 int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+bool aarch32_insn_is_wide(u32 insn);
+
+#define A32_RN_OFFSET	16
+#define A32_RT_OFFSET	12
+#define A32_RT2_OFFSET	 0
+
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset);
+u32 aarch32_insn_mcr_extract_opc2(u32 insn);
+u32 aarch32_insn_mcr_extract_crm(u32 insn);
 #endif /* __ASSEMBLY__ */
 
 #endif	/* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 79f1d519221f..75825b63464d 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -28,6 +28,8 @@
 #include <asm/barrier.h>
 #include <asm/pgtable.h>
 #include <asm/early_ioremap.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
 
 #include <xen/xen.h>
 
@@ -57,28 +59,41 @@ static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
 static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
-	asm volatile("ldrb %w0, [%1]" : "=r" (val) : "r" (addr));
+	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
+				 "ldarb %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
 	return val;
 }
 
 static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
-	asm volatile("ldrh %w0, [%1]" : "=r" (val) : "r" (addr));
+
+	asm volatile(ALTERNATIVE("ldrh %w0, [%1]",
+				 "ldarh %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
 	return val;
 }
 
 static inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
-	asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
+				 "ldar %w0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
 	return val;
 }
 
 static inline u64 __raw_readq(const volatile void __iomem *addr)
 {
 	u64 val;
-	asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+	asm volatile(ALTERNATIVE("ldr %0, [%1]",
+				 "ldar %0, [%1]",
+				 ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)
+		     : "=r" (val) : "r" (addr));
 	return val;
 }
 
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index e1f7ecdde11f..94c53674a31d 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -3,7 +3,8 @@
 
 #include <asm-generic/irq.h>
 
-extern void (*handle_arch_irq)(struct pt_regs *);
+struct pt_regs;
+
 extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7fd3e27e3ccc..8afb863f5a9e 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -18,6 +18,7 @@
 #ifndef __ARM64_KVM_ARM_H__
 #define __ARM64_KVM_ARM_H__
 
+#include <asm/memory.h>
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
@@ -160,9 +161,9 @@
 #endif
 
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
-#define VTTBR_VMID_SHIFT  (48LLU)
-#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+#define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  (UL(48))
+#define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
 
 /* Hyp System Trap Register */
 #define HSTR_EL2_TTEE	(1 << 16)
@@ -185,13 +186,13 @@
 
 /* Exception Syndrome Register (ESR) bits */
 #define ESR_EL2_EC_SHIFT	(26)
-#define ESR_EL2_EC		(0x3fU << ESR_EL2_EC_SHIFT)
-#define ESR_EL2_IL		(1U << 25)
+#define ESR_EL2_EC		(UL(0x3f) << ESR_EL2_EC_SHIFT)
+#define ESR_EL2_IL		(UL(1) << 25)
 #define ESR_EL2_ISS		(ESR_EL2_IL - 1)
 #define ESR_EL2_ISV_SHIFT	(24)
-#define ESR_EL2_ISV		(1U << ESR_EL2_ISV_SHIFT)
+#define ESR_EL2_ISV		(UL(1) << ESR_EL2_ISV_SHIFT)
 #define ESR_EL2_SAS_SHIFT	(22)
-#define ESR_EL2_SAS		(3U << ESR_EL2_SAS_SHIFT)
+#define ESR_EL2_SAS		(UL(3) << ESR_EL2_SAS_SHIFT)
 #define ESR_EL2_SSE		(1 << 21)
 #define ESR_EL2_SRT_SHIFT	(16)
 #define ESR_EL2_SRT_MASK	(0x1f << ESR_EL2_SRT_SHIFT)
@@ -205,16 +206,16 @@
 #define ESR_EL2_FSC_TYPE	(0x3c)
 
 #define ESR_EL2_CV_SHIFT	(24)
-#define ESR_EL2_CV		(1U << ESR_EL2_CV_SHIFT)
+#define ESR_EL2_CV		(UL(1) << ESR_EL2_CV_SHIFT)
 #define ESR_EL2_COND_SHIFT	(20)
-#define ESR_EL2_COND		(0xfU << ESR_EL2_COND_SHIFT)
+#define ESR_EL2_COND		(UL(0xf) << ESR_EL2_COND_SHIFT)
 
 
 #define FSC_FAULT	(0x04)
 #define FSC_PERM	(0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
-#define HPFAR_MASK	(~0xFUL)
+#define HPFAR_MASK	(~UL(0xf))
 
 #define ESR_EL2_EC_UNKNOWN	(0x00)
 #define ESR_EL2_EC_WFI		(0x01)
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
new file mode 100644
index 000000000000..4e603ea36ad3
--- /dev/null
+++ b/arch/arm64/include/asm/opcodes.h
@@ -0,0 +1 @@
+#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 5279e5733386..09da25bc596f 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -44,6 +44,221 @@ static inline unsigned long __my_cpu_offset(void)
 
 #endif /* CONFIG_SMP */
 
+#define PERCPU_OP(op, asm_op)						\
+static inline unsigned long __percpu_##op(void *ptr,			\
+			unsigned long val, int size)			\
+{									\
+	unsigned long loop, ret;					\
+									\
+	switch (size) {							\
+	case 1:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_1\n"			\
+			"ldxrb	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxrb	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u8 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 2:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_2\n"			\
+			"ldxrh	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxrh	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr]  "+Q"(*(u16 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 4:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_4\n"			\
+			"ldxr	  %w[ret], %[ptr]\n"			\
+			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
+			"stxr	  %w[loop], %w[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u32 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	case 8:								\
+		do {							\
+			asm ("//__per_cpu_" #op "_8\n"			\
+			"ldxr	  %[ret], %[ptr]\n"			\
+			#asm_op " %[ret], %[ret], %[val]\n"		\
+			"stxr	  %w[loop], %[ret], %[ptr]\n"		\
+			: [loop] "=&r" (loop), [ret] "=&r" (ret),	\
+			  [ptr] "+Q"(*(u64 *)ptr)			\
+			: [val] "Ir" (val));				\
+		} while (loop);						\
+		break;							\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	return ret;							\
+}
+
+PERCPU_OP(add, add)
+PERCPU_OP(and, and)
+PERCPU_OP(or, orr)
+#undef PERCPU_OP
+
+static inline unsigned long __percpu_read(void *ptr, int size)
+{
+	unsigned long ret;
+
+	switch (size) {
+	case 1:
+		ret = ACCESS_ONCE(*(u8 *)ptr);
+		break;
+	case 2:
+		ret = ACCESS_ONCE(*(u16 *)ptr);
+		break;
+	case 4:
+		ret = ACCESS_ONCE(*(u32 *)ptr);
+		break;
+	case 8:
+		ret = ACCESS_ONCE(*(u64 *)ptr);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return ret;
+}
+
+static inline void __percpu_write(void *ptr, unsigned long val, int size)
+{
+	switch (size) {
+	case 1:
+		ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
+		break;
+	case 2:
+		ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
+		break;
+	case 4:
+		ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
+		break;
+	case 8:
+		ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
+		break;
+	default:
+		BUILD_BUG();
+	}
+}
+
+static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
+						int size)
+{
+	unsigned long ret, loop;
+
+	switch (size) {
+	case 1:
+		do {
+			asm ("//__percpu_xchg_1\n"
+			"ldxrb %w[ret], %[ptr]\n"
+			"stxrb %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u8 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 2:
+		do {
+			asm ("//__percpu_xchg_2\n"
+			"ldxrh %w[ret], %[ptr]\n"
+			"stxrh %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u16 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 4:
+		do {
+			asm ("//__percpu_xchg_4\n"
+			"ldxr %w[ret], %[ptr]\n"
+			"stxr %w[loop], %w[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u32 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	case 8:
+		do {
+			asm ("//__percpu_xchg_8\n"
+			"ldxr %[ret], %[ptr]\n"
+			"stxr %w[loop], %[val], %[ptr]\n"
+			: [loop] "=&r"(loop), [ret] "=&r"(ret),
+			  [ptr] "+Q"(*(u64 *)ptr)
+			: [val] "r" (val));
+		} while (loop);
+		break;
+	default:
+		BUILD_BUG();
+	}
+
+	return ret;
+}
+
+#define _percpu_add(pcp, val) \
+	__percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+
+#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
+
+#define _percpu_and(pcp, val) \
+	__percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+
+#define _percpu_or(pcp, val) \
+	__percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+
+#define _percpu_read(pcp) (typeof(pcp))	\
+	(__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
+
+#define _percpu_write(pcp, val) \
+	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
+
+#define _percpu_xchg(pcp, val) (typeof(pcp)) \
+	(__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
+
+#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
+
+#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
+
+#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
+
+#define this_cpu_read_1(pcp) _percpu_read(pcp)
+#define this_cpu_read_2(pcp) _percpu_read(pcp)
+#define this_cpu_read_4(pcp) _percpu_read(pcp)
+#define this_cpu_read_8(pcp) _percpu_read(pcp)
+
+#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
+#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
+
 #include <asm-generic/percpu.h>
 
 #endif /* __ASM_PERCPU_H */
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index d5bed02073d6..e20df38a8ff3 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -26,11 +26,13 @@
 
 #define check_pgt_cache()		do { } while (0)
 
+#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+
 #if CONFIG_ARM64_PGTABLE_LEVELS > 2
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -50,7 +52,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -69,8 +71,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
-#define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
-
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
 {
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h
new file mode 100644
index 000000000000..c76fac979629
--- /dev/null
+++ b/arch/arm64/include/asm/seccomp.h
@@ -0,0 +1,25 @@
+/*
+ * arch/arm64/include/asm/seccomp.h
+ *
+ * Copyright (C) 2014 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#ifdef CONFIG_COMPAT
+#define __NR_seccomp_read_32		__NR_compat_read
+#define __NR_seccomp_write_32		__NR_compat_write
+#define __NR_seccomp_exit_32		__NR_compat_exit
+#define __NR_seccomp_sigreturn_32	__NR_compat_rt_sigreturn
+#endif /* CONFIG_COMPAT */
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index a82c0c5c8b52..c028fe37456f 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -19,10 +19,6 @@
 #ifndef __ASM_TLB_H
 #define __ASM_TLB_H
 
-#define  __tlb_remove_pmd_tlb_entry __tlb_remove_pmd_tlb_entry
-
-#include <asm-generic/tlb.h>
-
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 
@@ -37,71 +33,22 @@ static inline void __tlb_remove_table(void *_table)
 #define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry)
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
-/*
- * There's three ways the TLB shootdown code is used:
- *  1. Unmapping a range of vmas.  See zap_page_range(), unmap_region().
- *     tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- *  2. Unmapping all vmas.  See exit_mmap().
- *     tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- *     Page tables will be freed.
- *  3. Unmapping argument pages.  See shift_arg_pages().
- *     tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- */
+#include <asm-generic/tlb.h>
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (tlb->fullmm) {
 		flush_tlb_mm(tlb->mm);
-	} else if (tlb->end > 0) {
+	} else {
 		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
 		flush_tlb_range(&vma, tlb->start, tlb->end);
-		tlb->start = TASK_SIZE;
-		tlb->end = 0;
-	}
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
-	if (!tlb->fullmm) {
-		tlb->start = min(tlb->start, addr);
-		tlb->end = max(tlb->end, addr + PAGE_SIZE);
-	}
-}
-
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
-					  unsigned long addr)
-{
-	tlb_add_flush(tlb, addr);
-}
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush.  When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void tlb_start_vma(struct mmu_gather *tlb,
-				 struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm) {
-		tlb->start = TASK_SIZE;
-		tlb->end = 0;
 	}
 }
 
-static inline void tlb_end_vma(struct mmu_gather *tlb,
-			       struct vm_area_struct *vma)
-{
-	if (!tlb->fullmm)
-		tlb_flush(tlb);
-}
-
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				  unsigned long addr)
 {
 	pgtable_page_dtor(pte);
-	tlb_add_flush(tlb, addr);
 	tlb_remove_entry(tlb, pte);
 }
 
@@ -109,7 +56,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 				  unsigned long addr)
 {
-	tlb_add_flush(tlb, addr);
 	tlb_remove_entry(tlb, virt_to_page(pmdp));
 }
 #endif
@@ -118,15 +64,8 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 				  unsigned long addr)
 {
-	tlb_add_flush(tlb, addr);
 	tlb_remove_entry(tlb, virt_to_page(pudp));
 }
 #endif
 
-static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp,
-						unsigned long address)
-{
-	tlb_add_flush(tlb, address);
-}
-
 #endif
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 10ca8ff93cc2..232e4ba5d314 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -18,6 +18,22 @@
 #ifndef __ASM_TRAP_H
 #define __ASM_TRAP_H
 
+#include <linux/list.h>
+
+struct pt_regs;
+
+struct undef_hook {
+	struct list_head node;
+	u32 instr_mask;
+	u32 instr_val;
+	u64 pstate_mask;
+	u64 pstate_val;
+	int (*fn)(struct pt_regs *regs, u32 instr);
+};
+
+void register_undef_hook(struct undef_hook *hook);
+void unregister_undef_hook(struct undef_hook *hook);
+
 static inline int in_exception_text(unsigned long ptr)
 {
 	extern char __exception_text_start[];
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 6d2bf419431d..49c9aefd24a5 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -31,6 +31,9 @@
  * Compat syscall numbers used by the AArch64 kernel.
  */
 #define __NR_compat_restart_syscall	0
+#define __NR_compat_exit		1
+#define __NR_compat_read		3
+#define __NR_compat_write		4
 #define __NR_compat_sigreturn		119
 #define __NR_compat_rt_sigreturn	173
 
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 9dfdac4a74a1..8893cebcea5b 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -787,7 +787,8 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 382
 __SYSCALL(__NR_renameat2, sys_renameat2)
-			/* 383 for seccomp */
+#define __NR_seccomp 383
+__SYSCALL(__NR_seccomp, sys_seccomp)
 #define __NR_getrandom 384
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 385
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5bd029b43644..eaa77ed7766a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -5,6 +5,7 @@
 CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+CFLAGS_armv8_deprecated.o := -I$(src)
 
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_insn.o = -pg
@@ -15,10 +16,11 @@ arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
 			   hyp-stub.o psci.o cpu_ops.o insn.o return_address.o	\
-			   cpuinfo.o
+			   cpuinfo.o cpu_errata.o alternative.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
-					   sys_compat.o
+					   sys_compat.o 			\
+					   ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
 arm64-obj-$(CONFIG_SMP)			+= smp.o smp_spin_table.o topology.o
@@ -31,6 +33,7 @@ arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
 arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
+arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
new file mode 100644
index 000000000000..ad7821d64a1d
--- /dev/null
+++ b/arch/arm64/kernel/alternative.c
@@ -0,0 +1,85 @@
+/*
+ * alternative runtime patching
+ * inspired by the x86 version
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <asm/cacheflush.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
+#include <linux/stop_machine.h>
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+struct alt_region {
+	struct alt_instr *begin;
+	struct alt_instr *end;
+};
+
+static int __apply_alternatives(void *alt_region)
+{
+	struct alt_instr *alt;
+	struct alt_region *region = alt_region;
+	u8 *origptr, *replptr;
+
+	for (alt = region->begin; alt < region->end; alt++) {
+		if (!cpus_have_cap(alt->cpufeature))
+			continue;
+
+		BUG_ON(alt->alt_len > alt->orig_len);
+
+		pr_info_once("patching kernel code\n");
+
+		origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
+		replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
+		memcpy(origptr, replptr, alt->alt_len);
+		flush_icache_range((uintptr_t)origptr,
+				   (uintptr_t)(origptr + alt->alt_len));
+	}
+
+	return 0;
+}
+
+void apply_alternatives_all(void)
+{
+	struct alt_region region = {
+		.begin	= __alt_instructions,
+		.end	= __alt_instructions_end,
+	};
+
+	/* better not try code patching on a live SMP system */
+	stop_machine(__apply_alternatives, &region, NULL);
+}
+
+void apply_alternatives(void *start, size_t length)
+{
+	struct alt_region region = {
+		.begin	= start,
+		.end	= start + length,
+	};
+
+	__apply_alternatives(&region);
+}
+
+void free_alternatives_memory(void)
+{
+	free_reserved_area(__alt_instructions, __alt_instructions_end,
+			   0, "alternatives");
+}
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
new file mode 100644
index 000000000000..c363671d7509
--- /dev/null
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -0,0 +1,553 @@
+/*
+ *  Copyright (C) 2014 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sysctl.h>
+
+#include <asm/insn.h>
+#include <asm/opcodes.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
+#include <asm/uaccess.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace-events-emulation.h"
+
+/*
+ * The runtime support for deprecated instruction support can be in one of
+ * following three states -
+ *
+ * 0 = undef
+ * 1 = emulate (software emulation)
+ * 2 = hw (supported in hardware)
+ */
+enum insn_emulation_mode {
+	INSN_UNDEF,
+	INSN_EMULATE,
+	INSN_HW,
+};
+
+enum legacy_insn_status {
+	INSN_DEPRECATED,
+	INSN_OBSOLETE,
+};
+
+struct insn_emulation_ops {
+	const char		*name;
+	enum legacy_insn_status	status;
+	struct undef_hook	*hooks;
+	int			(*set_hw_mode)(bool enable);
+};
+
+struct insn_emulation {
+	struct list_head node;
+	struct insn_emulation_ops *ops;
+	int current_mode;
+	int min;
+	int max;
+};
+
+static LIST_HEAD(insn_emulation);
+static int nr_insn_emulated;
+static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
+
+static void register_emulation_hooks(struct insn_emulation_ops *ops)
+{
+	struct undef_hook *hook;
+
+	BUG_ON(!ops->hooks);
+
+	for (hook = ops->hooks; hook->instr_mask; hook++)
+		register_undef_hook(hook);
+
+	pr_notice("Registered %s emulation handler\n", ops->name);
+}
+
+static void remove_emulation_hooks(struct insn_emulation_ops *ops)
+{
+	struct undef_hook *hook;
+
+	BUG_ON(!ops->hooks);
+
+	for (hook = ops->hooks; hook->instr_mask; hook++)
+		unregister_undef_hook(hook);
+
+	pr_notice("Removed %s emulation handler\n", ops->name);
+}
+
+static int update_insn_emulation_mode(struct insn_emulation *insn,
+				       enum insn_emulation_mode prev)
+{
+	int ret = 0;
+
+	switch (prev) {
+	case INSN_UNDEF: /* Nothing to be done */
+		break;
+	case INSN_EMULATE:
+		remove_emulation_hooks(insn->ops);
+		break;
+	case INSN_HW:
+		if (insn->ops->set_hw_mode) {
+			insn->ops->set_hw_mode(false);
+			pr_notice("Disabled %s support\n", insn->ops->name);
+		}
+		break;
+	}
+
+	switch (insn->current_mode) {
+	case INSN_UNDEF:
+		break;
+	case INSN_EMULATE:
+		register_emulation_hooks(insn->ops);
+		break;
+	case INSN_HW:
+		if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(true))
+			pr_notice("Enabled %s support\n", insn->ops->name);
+		else
+			ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static void register_insn_emulation(struct insn_emulation_ops *ops)
+{
+	unsigned long flags;
+	struct insn_emulation *insn;
+
+	insn = kzalloc(sizeof(*insn), GFP_KERNEL);
+	insn->ops = ops;
+	insn->min = INSN_UNDEF;
+
+	switch (ops->status) {
+	case INSN_DEPRECATED:
+		insn->current_mode = INSN_EMULATE;
+		insn->max = INSN_HW;
+		break;
+	case INSN_OBSOLETE:
+		insn->current_mode = INSN_UNDEF;
+		insn->max = INSN_EMULATE;
+		break;
+	}
+
+	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
+	list_add(&insn->node, &insn_emulation);
+	nr_insn_emulated++;
+	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
+
+	/* Register any handlers if required */
+	update_insn_emulation_mode(insn, INSN_UNDEF);
+}
+
+static int emulation_proc_handler(struct ctl_table *table, int write,
+				  void __user *buffer, size_t *lenp,
+				  loff_t *ppos)
+{
+	int ret = 0;
+	struct insn_emulation *insn = (struct insn_emulation *) table->data;
+	enum insn_emulation_mode prev_mode = insn->current_mode;
+
+	table->data = &insn->current_mode;
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+	if (ret || !write || prev_mode == insn->current_mode)
+		goto ret;
+
+	ret = update_insn_emulation_mode(insn, prev_mode);
+	if (ret) {
+		/* Mode change failed, revert to previous mode. */
+		insn->current_mode = prev_mode;
+		update_insn_emulation_mode(insn, INSN_UNDEF);
+	}
+ret:
+	table->data = insn;
+	return ret;
+}
+
+static struct ctl_table ctl_abi[] = {
+	{
+		.procname = "abi",
+		.mode = 0555,
+	},
+	{ }
+};
+
+static void register_insn_emulation_sysctl(struct ctl_table *table)
+{
+	unsigned long flags;
+	int i = 0;
+	struct insn_emulation *insn;
+	struct ctl_table *insns_sysctl, *sysctl;
+
+	insns_sysctl = kzalloc(sizeof(*sysctl) * (nr_insn_emulated + 1),
+			      GFP_KERNEL);
+
+	raw_spin_lock_irqsave(&insn_emulation_lock, flags);
+	list_for_each_entry(insn, &insn_emulation, node) {
+		sysctl = &insns_sysctl[i];
+
+		sysctl->mode = 0644;
+		sysctl->maxlen = sizeof(int);
+
+		sysctl->procname = insn->ops->name;
+		sysctl->data = insn;
+		sysctl->extra1 = &insn->min;
+		sysctl->extra2 = &insn->max;
+		sysctl->proc_handler = emulation_proc_handler;
+		i++;
+	}
+	raw_spin_unlock_irqrestore(&insn_emulation_lock, flags);
+
+	table->child = insns_sysctl;
+	register_sysctl_table(table);
+}
+
+/*
+ *  Implement emulation of the SWP/SWPB instructions using load-exclusive and
+ *  store-exclusive.
+ *
+ *  Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>]
+ *  Where: Rt  = destination
+ *	   Rt2 = source
+ *	   Rn  = address
+ */
+
+/*
+ * Error-checking SWP macros implemented using ldxr{b}/stxr{b}
+ */
+#define __user_swpX_asm(data, addr, res, temp, B)		\
+	__asm__ __volatile__(					\
+	"	mov		%w2, %w1\n"			\
+	"0:	ldxr"B"		%w1, [%3]\n"			\
+	"1:	stxr"B"		%w0, %w2, [%3]\n"		\
+	"	cbz		%w0, 2f\n"			\
+	"	mov		%w0, %w4\n"			\
+	"2:\n"							\
+	"	.pushsection	 .fixup,\"ax\"\n"		\
+	"	.align		2\n"				\
+	"3:	mov		%w0, %w5\n"			\
+	"	b		2b\n"				\
+	"	.popsection"					\
+	"	.pushsection	 __ex_table,\"a\"\n"		\
+	"	.align		3\n"				\
+	"	.quad		0b, 3b\n"			\
+	"	.quad		1b, 3b\n"			\
+	"	.popsection"					\
+	: "=&r" (res), "+r" (data), "=&r" (temp)		\
+	: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)		\
+	: "memory")
+
+#define __user_swp_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "")
+#define __user_swpb_asm(data, addr, res, temp) \
+	__user_swpX_asm(data, addr, res, temp, "b")
+
+/*
+ * Bit 22 of the instruction encoding distinguishes between
+ * the SWP and SWPB variants (bit set means SWPB).
+ */
+#define TYPE_SWPB (1 << 22)
+
+/*
+ * Set up process info to signal segmentation fault - called on access error.
+ */
+static void set_segfault(struct pt_regs *regs, unsigned long addr)
+{
+	siginfo_t info;
+
+	down_read(&current->mm->mmap_sem);
+	if (find_vma(current->mm, addr) == NULL)
+		info.si_code = SEGV_MAPERR;
+	else
+		info.si_code = SEGV_ACCERR;
+	up_read(&current->mm->mmap_sem);
+
+	info.si_signo = SIGSEGV;
+	info.si_errno = 0;
+	info.si_addr  = (void *) instruction_pointer(regs);
+
+	pr_debug("SWP{B} emulation: access caused memory abort!\n");
+	arm64_notify_die("Illegal memory access", regs, &info, 0);
+}
+
+static int emulate_swpX(unsigned int address, unsigned int *data,
+			unsigned int type)
+{
+	unsigned int res = 0;
+
+	if ((type != TYPE_SWPB) && (address & 0x3)) {
+		/* SWP to unaligned address not permitted */
+		pr_debug("SWP instruction on unaligned pointer!\n");
+		return -EFAULT;
+	}
+
+	while (1) {
+		unsigned long temp;
+
+		if (type == TYPE_SWPB)
+			__user_swpb_asm(*data, address, res, temp);
+		else
+			__user_swp_asm(*data, address, res, temp);
+
+		if (likely(res != -EAGAIN) || signal_pending(current))
+			break;
+
+		cond_resched();
+	}
+
+	return res;
+}
+
+/*
+ * swp_handler logs the id of calling process, dissects the instruction, sanity
+ * checks the memory location, calls emulate_swpX for the actual operation and
+ * deals with fixup/error handling before returning
+ */
+static int swp_handler(struct pt_regs *regs, u32 instr)
+{
+	u32 destreg, data, type, address = 0;
+	int rn, rt2, res = 0;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	type = instr & TYPE_SWPB;
+
+	switch (arm_check_condition(instr, regs->pstate)) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		goto ret;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a SWP, undef */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	rn = aarch32_insn_extract_reg_num(instr, A32_RN_OFFSET);
+	rt2 = aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET);
+
+	address = (u32)regs->user_regs.regs[rn];
+	data	= (u32)regs->user_regs.regs[rt2];
+	destreg = aarch32_insn_extract_reg_num(instr, A32_RT_OFFSET);
+
+	pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n",
+		rn, address, destreg,
+		aarch32_insn_extract_reg_num(instr, A32_RT2_OFFSET), data);
+
+	/* Check access in reasonable access range for both SWP and SWPB */
+	if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) {
+		pr_debug("SWP{B} emulation: access to 0x%08x not allowed!\n",
+			address);
+		goto fault;
+	}
+
+	res = emulate_swpX(address, &data, type);
+	if (res == -EFAULT)
+		goto fault;
+	else if (res == 0)
+		regs->user_regs.regs[destreg] = data;
+
+ret:
+	if (type == TYPE_SWPB)
+		trace_instruction_emulation("swpb", regs->pc);
+	else
+		trace_instruction_emulation("swp", regs->pc);
+
+	pr_warn_ratelimited("\"%s\" (%ld) uses obsolete SWP{B} instruction at 0x%llx\n",
+			current->comm, (unsigned long)current->pid, regs->pc);
+
+	regs->pc += 4;
+	return 0;
+
+fault:
+	set_segfault(regs, address);
+
+	return 0;
+}
+
+/*
+ * Only emulate SWP/SWPB executed in ARM state/User mode.
+ * The kernel must be SWP free and SWP{B} does not exist in Thumb.
+ */
+static struct undef_hook swp_hooks[] = {
+	{
+		.instr_mask	= 0x0fb00ff0,
+		.instr_val	= 0x01000090,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= swp_handler
+	},
+	{ }
+};
+
+static struct insn_emulation_ops swp_ops = {
+	.name = "swp",
+	.status = INSN_OBSOLETE,
+	.hooks = swp_hooks,
+	.set_hw_mode = NULL,
+};
+
+static int cp15barrier_handler(struct pt_regs *regs, u32 instr)
+{
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc);
+
+	switch (arm_check_condition(instr, regs->pstate)) {
+	case ARM_OPCODE_CONDTEST_PASS:
+		break;
+	case ARM_OPCODE_CONDTEST_FAIL:
+		/* Condition failed - return to next instruction */
+		goto ret;
+	case ARM_OPCODE_CONDTEST_UNCOND:
+		/* If unconditional encoding - not a barrier instruction */
+		return -EFAULT;
+	default:
+		return -EINVAL;
+	}
+
+	switch (aarch32_insn_mcr_extract_crm(instr)) {
+	case 10:
+		/*
+		 * dmb - mcr p15, 0, Rt, c7, c10, 5
+		 * dsb - mcr p15, 0, Rt, c7, c10, 4
+		 */
+		if (aarch32_insn_mcr_extract_opc2(instr) == 5) {
+			dmb(sy);
+			trace_instruction_emulation(
+				"mcr p15, 0, Rt, c7, c10, 5 ; dmb", regs->pc);
+		} else {
+			dsb(sy);
+			trace_instruction_emulation(
+				"mcr p15, 0, Rt, c7, c10, 4 ; dsb", regs->pc);
+		}
+		break;
+	case 5:
+		/*
+		 * isb - mcr p15, 0, Rt, c7, c5, 4
+		 *
+		 * Taking an exception or returning from one acts as an
+		 * instruction barrier. So no explicit barrier needed here.
+		 */
+		trace_instruction_emulation(
+			"mcr p15, 0, Rt, c7, c5, 4 ; isb", regs->pc);
+		break;
+	}
+
+ret:
+	pr_warn_ratelimited("\"%s\" (%ld) uses deprecated CP15 Barrier instruction at 0x%llx\n",
+			current->comm, (unsigned long)current->pid, regs->pc);
+
+	regs->pc += 4;
+	return 0;
+}
+
+#define SCTLR_EL1_CP15BEN (1 << 5)
+
+static inline void config_sctlr_el1(u32 clear, u32 set)
+{
+	u32 val;
+
+	asm volatile("mrs %0, sctlr_el1" : "=r" (val));
+	val &= ~clear;
+	val |= set;
+	asm volatile("msr sctlr_el1, %0" : : "r" (val));
+}
+
+static void enable_cp15_ben(void *info)
+{
+	config_sctlr_el1(0, SCTLR_EL1_CP15BEN);
+}
+
+static void disable_cp15_ben(void *info)
+{
+	config_sctlr_el1(SCTLR_EL1_CP15BEN, 0);
+}
+
+static int cpu_hotplug_notify(struct notifier_block *b,
+			      unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		enable_cp15_ben(NULL);
+		return NOTIFY_DONE;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_cp15_ben(NULL);
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block cpu_hotplug_notifier = {
+	.notifier_call = cpu_hotplug_notify,
+};
+
+static int cp15_barrier_set_hw_mode(bool enable)
+{
+	if (enable) {
+		register_cpu_notifier(&cpu_hotplug_notifier);
+		on_each_cpu(enable_cp15_ben, NULL, true);
+	} else {
+		unregister_cpu_notifier(&cpu_hotplug_notifier);
+		on_each_cpu(disable_cp15_ben, NULL, true);
+	}
+
+	return true;
+}
+
+static struct undef_hook cp15_barrier_hooks[] = {
+	{
+		.instr_mask	= 0x0fff0fdf,
+		.instr_val	= 0x0e070f9a,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= cp15barrier_handler,
+	},
+	{
+		.instr_mask	= 0x0fff0fff,
+		.instr_val	= 0x0e070f95,
+		.pstate_mask	= COMPAT_PSR_MODE_MASK,
+		.pstate_val	= COMPAT_PSR_MODE_USR,
+		.fn		= cp15barrier_handler,
+	},
+	{ }
+};
+
+static struct insn_emulation_ops cp15_barrier_ops = {
+	.name = "cp15_barrier",
+	.status = INSN_DEPRECATED,
+	.hooks = cp15_barrier_hooks,
+	.set_hw_mode = cp15_barrier_set_hw_mode,
+};
+
+/*
+ * Invoked as late_initcall, since not needed before init spawned.
+ */
+static int __init armv8_deprecated_init(void)
+{
+	if (IS_ENABLED(CONFIG_SWP_EMULATION))
+		register_insn_emulation(&swp_ops);
+
+	if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION))
+		register_insn_emulation(&cp15_barrier_ops);
+
+	register_insn_emulation_sysctl(ctl_abi);
+
+	return 0;
+}
+
+late_initcall(armv8_deprecated_init);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
new file mode 100644
index 000000000000..fa62637e63a8
--- /dev/null
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -0,0 +1,111 @@
+/*
+ * Contains CPU specific errata definitions
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpufeature.h>
+
+#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+
+/*
+ * Add a struct or another datatype to the union below if you need
+ * different means to detect an affected CPU.
+ */
+struct arm64_cpu_capabilities {
+	const char *desc;
+	u16 capability;
+	bool (*is_affected)(struct arm64_cpu_capabilities *);
+	union {
+		struct {
+			u32 midr_model;
+			u32 midr_range_min, midr_range_max;
+		};
+	};
+};
+
+#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+			MIDR_ARCHITECTURE_MASK)
+
+static bool __maybe_unused
+is_affected_midr_range(struct arm64_cpu_capabilities *entry)
+{
+	u32 midr = read_cpuid_id();
+
+	if ((midr & CPU_MODEL_MASK) != entry->midr_model)
+		return false;
+
+	midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
+
+	return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+}
+
+#define MIDR_RANGE(model, min, max) \
+	.is_affected = is_affected_midr_range, \
+	.midr_model = model, \
+	.midr_range_min = min, \
+	.midr_range_max = max
+
+struct arm64_cpu_capabilities arm64_errata[] = {
+#if	defined(CONFIG_ARM64_ERRATUM_826319) || \
+	defined(CONFIG_ARM64_ERRATUM_827319) || \
+	defined(CONFIG_ARM64_ERRATUM_824069)
+	{
+	/* Cortex-A53 r0p[012] */
+		.desc = "ARM errata 826319, 827319, 824069",
+		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_819472
+	{
+	/* Cortex-A53 r0p[01] */
+		.desc = "ARM errata 819472",
+		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01),
+	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_832075
+	{
+	/* Cortex-A57 r0p0 - r1p2 */
+		.desc = "ARM erratum 832075",
+		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
+		MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12),
+	},
+#endif
+	{
+	}
+};
+
+void check_local_cpu_errata(void)
+{
+	struct arm64_cpu_capabilities *cpus = arm64_errata;
+	int i;
+
+	for (i = 0; cpus[i].desc; i++) {
+		if (!cpus[i].is_affected(&cpus[i]))
+			continue;
+
+		if (!cpus_have_cap(cpus[i].capability))
+			pr_info("enabling workaround for %s\n", cpus[i].desc);
+		cpus_set_cap(cpus[i].capability);
+	}
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 504fdaa8367e..57b641747534 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -18,6 +18,7 @@
 #include <asm/cachetype.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
+#include <asm/cpufeature.h>
 
 #include <linux/bitops.h>
 #include <linux/bug.h>
@@ -111,6 +112,15 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
 	diff |= CHECK(cntfrq, boot, cur, cpu);
 
 	/*
+	 * The kernel uses self-hosted debug features and expects CPUs to
+	 * support identical debug features. We presently need CTX_CMPs, WRPs,
+	 * and BRPs to be identical.
+	 * ID_AA64DFR1 is currently RES0.
+	 */
+	diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
+	diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
+
+	/*
 	 * Even in big.LITTLE, processors should be identical instruction-set
 	 * wise.
 	 */
@@ -143,7 +153,12 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
 	diff |= CHECK(id_isar3, boot, cur, cpu);
 	diff |= CHECK(id_isar4, boot, cur, cpu);
 	diff |= CHECK(id_isar5, boot, cur, cpu);
-	diff |= CHECK(id_mmfr0, boot, cur, cpu);
+	/*
+	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+	 * ACTLR formats could differ across CPUs and therefore would have to
+	 * be trapped for virtualization anyway.
+	 */
+	diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
 	diff |= CHECK(id_mmfr1, boot, cur, cpu);
 	diff |= CHECK(id_mmfr2, boot, cur, cpu);
 	diff |= CHECK(id_mmfr3, boot, cur, cpu);
@@ -155,7 +170,7 @@ static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
 	 * pretend to support them.
 	 */
 	WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
-			"Unsupported CPU feature variation.");
+			"Unsupported CPU feature variation.\n");
 }
 
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
@@ -165,6 +180,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_dczid = read_cpuid(DCZID_EL0);
 	info->reg_midr = read_cpuid_id();
 
+	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
+	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
 	info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
 	info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
 	info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
@@ -186,6 +203,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
 
 	cpuinfo_detect_icache_policy(info);
+
+	check_local_cpu_errata();
 }
 
 void cpuinfo_store_cpu(void)
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index d18a44940968..8ce9b0577442 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,8 @@ ENTRY(efi_stub_entry)
 	 */
 	mov	x20, x0		// DTB address
 	ldr	x0, [sp, #16]	// relocated _text address
-	mov	x21, x0
+	ldr	x21, =stext_offset
+	add	x21, x0, x21
 
 	/*
 	 * Calculate size of the kernel Image (same for original and copy).
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 95c49ebc660d..6fac253bc783 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include <linux/dmi.h>
 #include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
@@ -112,8 +113,6 @@ static int __init uefi_init(void)
 		efi.systab->hdr.revision & 0xffff, vendor);
 
 	retval = efi_config_init(NULL);
-	if (retval == 0)
-		set_bit(EFI_CONFIG_TABLES, &efi.flags);
 
 out:
 	early_memunmap(efi.systab,  sizeof(efi_system_table_t));
@@ -125,17 +124,17 @@ out:
  */
 static __init int is_reserve_region(efi_memory_desc_t *md)
 {
-	if (!is_normal_ram(md))
+	switch (md->type) {
+	case EFI_LOADER_CODE:
+	case EFI_LOADER_DATA:
+	case EFI_BOOT_SERVICES_CODE:
+	case EFI_BOOT_SERVICES_DATA:
+	case EFI_CONVENTIONAL_MEMORY:
 		return 0;
-
-	if (md->attribute & EFI_MEMORY_RUNTIME)
-		return 1;
-
-	if (md->type == EFI_ACPI_RECLAIM_MEMORY ||
-	    md->type == EFI_RESERVED_TYPE)
-		return 1;
-
-	return 0;
+	default:
+		break;
+	}
+	return is_normal_ram(md);
 }
 
 static __init void reserve_regions(void)
@@ -471,3 +470,17 @@ err_unmap:
 	return -1;
 }
 early_initcall(arm64_enter_virtual_mode);
+
+static int __init arm64_dmi_init(void)
+{
+	/*
+	 * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to
+	 * be called early because dmi_id_init(), which is an arch_initcall
+	 * itself, depends on dmi_scan_machine() having been called already.
+	 */
+	dmi_scan_machine();
+	if (dmi_available)
+		dmi_set_dump_stack_arch_desc();
+	return 0;
+}
+core_initcall(arm64_dmi_init);
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 38e704e597f7..08cafc518b9a 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -98,8 +98,8 @@
 ENTRY(_mcount)
 	mcount_enter
 
-	ldr	x0, =ftrace_trace_function
-	ldr	x2, [x0]
+	adrp	x0, ftrace_trace_function
+	ldr	x2, [x0, #:lo12:ftrace_trace_function]
 	adr	x0, ftrace_stub
 	cmp	x0, x2			// if (ftrace_trace_function
 	b.eq	skip_ftrace_call	//     != ftrace_stub) {
@@ -115,14 +115,15 @@ skip_ftrace_call:			//   return;
 	mcount_exit			//   return;
 					// }
 skip_ftrace_call:
-	ldr	x1, =ftrace_graph_return
-	ldr	x2, [x1]		//   if ((ftrace_graph_return
-	cmp	x0, x2			//        != ftrace_stub)
-	b.ne	ftrace_graph_caller
-
-	ldr	x1, =ftrace_graph_entry	//     || (ftrace_graph_entry
-	ldr	x2, [x1]		//        != ftrace_graph_entry_stub))
-	ldr	x0, =ftrace_graph_entry_stub
+	adrp	x1, ftrace_graph_return
+	ldr	x2, [x1, #:lo12:ftrace_graph_return]
+	cmp	x0, x2			//   if ((ftrace_graph_return
+	b.ne	ftrace_graph_caller	//        != ftrace_stub)
+
+	adrp	x1, ftrace_graph_entry	//     || (ftrace_graph_entry
+	adrp	x0, ftrace_graph_entry_stub //     != ftrace_graph_entry_stub))
+	ldr	x2, [x1, #:lo12:ftrace_graph_entry]
+	add	x0, x0, #:lo12:ftrace_graph_entry_stub
 	cmp	x0, x2
 	b.ne	ftrace_graph_caller	//     ftrace_graph_caller();
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 726b910fe6ec..fd4fa374e5d2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -64,25 +64,26 @@
 #define BAD_ERROR	3
 
 	.macro	kernel_entry, el, regsize = 64
-	sub	sp, sp, #S_FRAME_SIZE - S_LR	// room for LR, SP, SPSR, ELR
+	sub	sp, sp, #S_FRAME_SIZE
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
 	.endif
-	push	x28, x29
-	push	x26, x27
-	push	x24, x25
-	push	x22, x23
-	push	x20, x21
-	push	x18, x19
-	push	x16, x17
-	push	x14, x15
-	push	x12, x13
-	push	x10, x11
-	push	x8, x9
-	push	x6, x7
-	push	x4, x5
-	push	x2, x3
-	push	x0, x1
+	stp	x0, x1, [sp, #16 * 0]
+	stp	x2, x3, [sp, #16 * 1]
+	stp	x4, x5, [sp, #16 * 2]
+	stp	x6, x7, [sp, #16 * 3]
+	stp	x8, x9, [sp, #16 * 4]
+	stp	x10, x11, [sp, #16 * 5]
+	stp	x12, x13, [sp, #16 * 6]
+	stp	x14, x15, [sp, #16 * 7]
+	stp	x16, x17, [sp, #16 * 8]
+	stp	x18, x19, [sp, #16 * 9]
+	stp	x20, x21, [sp, #16 * 10]
+	stp	x22, x23, [sp, #16 * 11]
+	stp	x24, x25, [sp, #16 * 12]
+	stp	x26, x27, [sp, #16 * 13]
+	stp	x28, x29, [sp, #16 * 14]
+
 	.if	\el == 0
 	mrs	x21, sp_el0
 	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
@@ -118,33 +119,31 @@
 	.if	\el == 0
 	ct_user_enter
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
+	msr	sp_el0, x23
 	.endif
+	msr	elr_el1, x21			// set up the return data
+	msr	spsr_el1, x22
 	.if	\ret
 	ldr	x1, [sp, #S_X1]			// preserve x0 (syscall return)
-	add	sp, sp, S_X2
 	.else
-	pop	x0, x1
+	ldp	x0, x1, [sp, #16 * 0]
 	.endif
-	pop	x2, x3				// load the rest of the registers
-	pop	x4, x5
-	pop	x6, x7
-	pop	x8, x9
-	msr	elr_el1, x21			// set up the return data
-	msr	spsr_el1, x22
-	.if	\el == 0
-	msr	sp_el0, x23
-	.endif
-	pop	x10, x11
-	pop	x12, x13
-	pop	x14, x15
-	pop	x16, x17
-	pop	x18, x19
-	pop	x20, x21
-	pop	x22, x23
-	pop	x24, x25
-	pop	x26, x27
-	pop	x28, x29
-	ldr	lr, [sp], #S_FRAME_SIZE - S_LR	// load LR and restore SP
+	ldp	x2, x3, [sp, #16 * 1]
+	ldp	x4, x5, [sp, #16 * 2]
+	ldp	x6, x7, [sp, #16 * 3]
+	ldp	x8, x9, [sp, #16 * 4]
+	ldp	x10, x11, [sp, #16 * 5]
+	ldp	x12, x13, [sp, #16 * 6]
+	ldp	x14, x15, [sp, #16 * 7]
+	ldp	x16, x17, [sp, #16 * 8]
+	ldp	x18, x19, [sp, #16 * 9]
+	ldp	x20, x21, [sp, #16 * 10]
+	ldp	x22, x23, [sp, #16 * 11]
+	ldp	x24, x25, [sp, #16 * 12]
+	ldp	x26, x27, [sp, #16 * 13]
+	ldp	x28, x29, [sp, #16 * 14]
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #S_FRAME_SIZE		// restore sp
 	eret					// return to kernel
 	.endm
 
@@ -168,7 +167,8 @@ tsk	.req	x28		// current thread_info
  * Interrupt handling.
  */
 	.macro	irq_handler
-	ldr	x1, handle_arch_irq
+	adrp	x1, handle_arch_irq
+	ldr	x1, [x1, #:lo12:handle_arch_irq]
 	mov	x0, sp
 	blr	x1
 	.endm
@@ -455,8 +455,8 @@ el0_da:
 	bic	x0, x26, #(0xff << 56)
 	mov	x1, x25
 	mov	x2, sp
-	adr	lr, ret_to_user
-	b	do_mem_abort
+	bl	do_mem_abort
+	b	ret_to_user
 el0_ia:
 	/*
 	 * Instruction abort handling
@@ -468,8 +468,8 @@ el0_ia:
 	mov	x0, x26
 	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts
 	mov	x2, sp
-	adr	lr, ret_to_user
-	b	do_mem_abort
+	bl	do_mem_abort
+	b	ret_to_user
 el0_fpsimd_acc:
 	/*
 	 * Floating Point or Advanced SIMD access
@@ -478,8 +478,8 @@ el0_fpsimd_acc:
 	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
-	adr	lr, ret_to_user
-	b	do_fpsimd_acc
+	bl	do_fpsimd_acc
+	b	ret_to_user
 el0_fpsimd_exc:
 	/*
 	 * Floating Point or Advanced SIMD exception
@@ -488,8 +488,8 @@ el0_fpsimd_exc:
 	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
-	adr	lr, ret_to_user
-	b	do_fpsimd_exc
+	bl	do_fpsimd_exc
+	b	ret_to_user
 el0_sp_pc:
 	/*
 	 * Stack or PC alignment exception handling
@@ -500,8 +500,8 @@ el0_sp_pc:
 	mov	x0, x26
 	mov	x1, x25
 	mov	x2, sp
-	adr	lr, ret_to_user
-	b	do_sp_pc_abort
+	bl	do_sp_pc_abort
+	b	ret_to_user
 el0_undef:
 	/*
 	 * Undefined instruction
@@ -510,8 +510,8 @@ el0_undef:
 	enable_dbg_and_irq
 	ct_user_exit
 	mov	x0, sp
-	adr	lr, ret_to_user
-	b	do_undefinstr
+	bl	do_undefinstr
+	b	ret_to_user
 el0_dbg:
 	/*
 	 * Debug exception handling
@@ -530,8 +530,8 @@ el0_inv:
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
 	mrs	x2, esr_el1
-	adr	lr, ret_to_user
-	b	bad_mode
+	bl	bad_mode
+	b	ret_to_user
 ENDPROC(el0_sync)
 
 	.align	6
@@ -653,14 +653,15 @@ el0_svc_naked:					// compat entry point
 	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall hooks
 	tst	x16, #_TIF_SYSCALL_WORK
 	b.ne	__sys_trace
-	adr	lr, ret_fast_syscall		// return address
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys
 	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
-	br	x16				// call sys_* routine
+	blr	x16				// call sys_* routine
+	b	ret_fast_syscall
 ni_sys:
 	mov	x0, sp
-	b	do_ni_syscall
+	bl	do_ni_syscall
+	b	ret_fast_syscall
 ENDPROC(el0_svc)
 
 	/*
@@ -668,26 +669,38 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	x0, sp
+	mov	w0, #-1				// set default errno for
+	cmp     scno, x0			// user-issued syscall(-1)
+	b.ne	1f
+	mov	x0, #-ENOSYS
+	str	x0, [sp, #S_X0]
+1:	mov	x0, sp
 	bl	syscall_trace_enter
-	adr	lr, __sys_trace_return		// return address
+	cmp	w0, #-1				// skip the syscall?
+	b.eq	__sys_trace_return_skipped
 	uxtw	scno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
 	cmp	scno, sc_nr			// check upper syscall limit
-	b.hs	ni_sys
+	b.hs	__ni_sys_trace
 	ldp	x0, x1, [sp]			// restore the syscall args
 	ldp	x2, x3, [sp, #S_X2]
 	ldp	x4, x5, [sp, #S_X4]
 	ldp	x6, x7, [sp, #S_X6]
 	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
-	br	x16				// call sys_* routine
+	blr	x16				// call sys_* routine
 
 __sys_trace_return:
-	str	x0, [sp]			// save returned x0
+	str	x0, [sp, #S_X0]			// save returned x0
+__sys_trace_return_skipped:
 	mov	x0, sp
 	bl	syscall_trace_exit
 	b	ret_to_user
 
+__ni_sys_trace:
+	mov	x0, sp
+	bl	do_ni_syscall
+	b	__sys_trace_return
+
 /*
  * Special system call wrappers.
  */
@@ -695,6 +708,3 @@ ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
-
-ENTRY(handle_arch_irq)
-	.quad	0
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 0a6e4f924df8..8ce88e08c030 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -132,6 +132,8 @@ efi_head:
 #endif
 
 #ifdef CONFIG_EFI
+	.globl	stext_offset
+	.set	stext_offset, stext - efi_head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -155,12 +157,12 @@ optional_header:
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
 	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint
-	.long	stext - efi_head		// BaseOfCode
+	.long	stext_offset			// BaseOfCode
 
 extra_header_fields:
 	.quad	0				// ImageBase
-	.long	0x20				// SectionAlignment
-	.long	0x8				// FileAlignment
+	.long	0x1000				// SectionAlignment
+	.long	PECOFF_FILE_ALIGNMENT		// FileAlignment
 	.short	0				// MajorOperatingSystemVersion
 	.short	0				// MinorOperatingSystemVersion
 	.short	0				// MajorImageVersion
@@ -172,7 +174,7 @@ extra_header_fields:
 	.long	_end - efi_head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
-	.long	stext - efi_head		// SizeOfHeaders
+	.long	stext_offset			// SizeOfHeaders
 	.long	0				// CheckSum
 	.short	0xa				// Subsystem (EFI application)
 	.short	0				// DllCharacteristics
@@ -217,16 +219,24 @@ section_table:
 	.byte	0
 	.byte	0        		// end of 0 padding of section name
 	.long	_end - stext		// VirtualSize
-	.long	stext - efi_head	// VirtualAddress
+	.long	stext_offset		// VirtualAddress
 	.long	_edata - stext		// SizeOfRawData
-	.long	stext - efi_head	// PointerToRawData
+	.long	stext_offset		// PointerToRawData
 
 	.long	0		// PointerToRelocations (0 for executables)
 	.long	0		// PointerToLineNumbers (0 for executables)
 	.short	0		// NumberOfRelocations  (0 for executables)
 	.short	0		// NumberOfLineNumbers  (0 for executables)
 	.long	0xe0500020	// Characteristics (section flags)
-	.align 5
+
+	/*
+	 * EFI will load stext onwards at the 4k section alignment
+	 * described in the PE/COFF header. To ensure that instruction
+	 * sequences using an adrp and a :lo12: immediate will function
+	 * correctly at this alignment, we must ensure that stext is
+	 * placed at a 4k boundary in the Image to begin with.
+	 */
+	.align 12
 #endif
 
 ENTRY(stext)
@@ -238,7 +248,13 @@ ENTRY(stext)
 	mov	x0, x22
 	bl	lookup_processor_type
 	mov	x23, x0				// x23=current cpu_table
-	cbz	x23, __error_p			// invalid processor (x23=0)?
+	/*
+	 * __error_p may end up out of range for cbz if text areas are
+	 * aligned up to section sizes.
+	 */
+	cbnz	x23, 1f				// invalid processor (x23=0)?
+	b	__error_p
+1:
 	bl	__vet_fdt
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
@@ -250,13 +266,214 @@ ENTRY(stext)
 	 */
 	ldr	x27, __switch_data		// address to jump to after
 						// MMU has been enabled
-	adr	lr, __enable_mmu		// return (PIC) address
+	adrp	lr, __enable_mmu		// return (PIC) address
+	add	lr, lr, #:lo12:__enable_mmu
 	ldr	x12, [x23, #CPU_INFO_SETUP]
 	add	x12, x12, x28			// __virt_to_phys
 	br	x12				// initialise processor
 ENDPROC(stext)
 
 /*
+ * Determine validity of the x21 FDT pointer.
+ * The dtb must be 8-byte aligned and live in the first 512M of memory.
+ */
+__vet_fdt:
+	tst	x21, #0x7
+	b.ne	1f
+	cmp	x21, x24
+	b.lt	1f
+	mov	x0, #(1 << 29)
+	add	x0, x0, x24
+	cmp	x21, x0
+	b.ge	1f
+	ret
+1:
+	mov	x21, #0
+	ret
+ENDPROC(__vet_fdt)
+/*
+ * Macro to create a table entry to the next page.
+ *
+ *	tbl:	page table address
+ *	virt:	virtual address
+ *	shift:	#imm page table shift
+ *	ptrs:	#imm pointers per table page
+ *
+ * Preserves:	virt
+ * Corrupts:	tmp1, tmp2
+ * Returns:	tbl -> next level table page address
+ */
+	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
+	lsr	\tmp1, \virt, #\shift
+	and	\tmp1, \tmp1, #\ptrs - 1	// table index
+	add	\tmp2, \tbl, #PAGE_SIZE
+	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
+	str	\tmp2, [\tbl, \tmp1, lsl #3]
+	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
+	.endm
+
+/*
+ * Macro to populate the PGD (and possibily PUD) for the corresponding
+ * block entry in the next level (tbl) for the given virtual address.
+ *
+ * Preserves:	tbl, next, virt
+ * Corrupts:	tmp1, tmp2
+ */
+	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
+	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
+#if SWAPPER_PGTABLE_LEVELS == 3
+	create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+#endif
+	.endm
+
+/*
+ * Macro to populate block entries in the page table for the start..end
+ * virtual range (inclusive).
+ *
+ * Preserves:	tbl, flags
+ * Corrupts:	phys, start, end, pstate
+ */
+	.macro	create_block_map, tbl, flags, phys, start, end
+	lsr	\phys, \phys, #BLOCK_SHIFT
+	lsr	\start, \start, #BLOCK_SHIFT
+	and	\start, \start, #PTRS_PER_PTE - 1	// table index
+	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
+	lsr	\end, \end, #BLOCK_SHIFT
+	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
+9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
+	add	\start, \start, #1			// next entry
+	add	\phys, \phys, #BLOCK_SIZE		// next block
+	cmp	\start, \end
+	b.ls	9999b
+	.endm
+
+/*
+ * Setup the initial page tables. We only setup the barest amount which is
+ * required to get the kernel running. The following sections are required:
+ *   - identity mapping to enable the MMU (low address, TTBR0)
+ *   - first few MB of the kernel linear mapping to jump to once the MMU has
+ *     been enabled, including the FDT blob (TTBR1)
+ *   - pgd entry for fixed mappings (TTBR1)
+ */
+__create_page_tables:
+	pgtbl	x25, x26, x28			// idmap_pg_dir and swapper_pg_dir addresses
+	mov	x27, lr
+
+	/*
+	 * Invalidate the idmap and swapper page tables to avoid potential
+	 * dirty cache lines being evicted.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	bl	__inval_cache_range
+
+	/*
+	 * Clear the idmap and swapper page tables.
+	 */
+	mov	x0, x25
+	add	x6, x26, #SWAPPER_DIR_SIZE
+1:	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	stp	xzr, xzr, [x0], #16
+	cmp	x0, x6
+	b.lo	1b
+
+	ldr	x7, =MM_MMUFLAGS
+
+	/*
+	 * Create the identity mapping.
+	 */
+	mov	x0, x25				// idmap_pg_dir
+	ldr	x3, =KERNEL_START
+	add	x3, x3, x28			// __pa(KERNEL_START)
+	create_pgd_entry x0, x3, x5, x6
+	ldr	x6, =KERNEL_END
+	mov	x5, x3				// __pa(KERNEL_START)
+	add	x6, x6, x28			// __pa(KERNEL_END)
+	create_block_map x0, x7, x3, x5, x6
+
+	/*
+	 * Map the kernel image (starting with PHYS_OFFSET).
+	 */
+	mov	x0, x26				// swapper_pg_dir
+	mov	x5, #PAGE_OFFSET
+	create_pgd_entry x0, x5, x3, x6
+	ldr	x6, =KERNEL_END
+	mov	x3, x24				// phys offset
+	create_block_map x0, x7, x3, x5, x6
+
+	/*
+	 * Map the FDT blob (maximum 2MB; must be within 512MB of
+	 * PHYS_OFFSET).
+	 */
+	mov	x3, x21				// FDT phys address
+	and	x3, x3, #~((1 << 21) - 1)	// 2MB aligned
+	mov	x6, #PAGE_OFFSET
+	sub	x5, x3, x24			// subtract PHYS_OFFSET
+	tst	x5, #~((1 << 29) - 1)		// within 512MB?
+	csel	x21, xzr, x21, ne		// zero the FDT pointer
+	b.ne	1f
+	add	x5, x5, x6			// __va(FDT blob)
+	add	x6, x5, #1 << 21		// 2MB for the FDT blob
+	sub	x6, x6, #1			// inclusive range
+	create_block_map x0, x7, x3, x5, x6
+1:
+	/*
+	 * Since the page tables have been populated with non-cacheable
+	 * accesses (MMU disabled), invalidate the idmap and swapper page
+	 * tables again to remove any speculatively loaded cache lines.
+	 */
+	mov	x0, x25
+	add	x1, x26, #SWAPPER_DIR_SIZE
+	bl	__inval_cache_range
+
+	mov	lr, x27
+	ret
+ENDPROC(__create_page_tables)
+	.ltorg
+
+	.align	3
+	.type	__switch_data, %object
+__switch_data:
+	.quad	__mmap_switched
+	.quad	__bss_start			// x6
+	.quad	__bss_stop			// x7
+	.quad	processor_id			// x4
+	.quad	__fdt_pointer			// x5
+	.quad	memstart_addr			// x6
+	.quad	init_thread_union + THREAD_START_SP // sp
+
+/*
+ * The following fragment of code is executed with the MMU on in MMU mode, and
+ * uses absolute addresses; this is not position independent.
+ */
+__mmap_switched:
+	adr	x3, __switch_data + 8
+
+	ldp	x6, x7, [x3], #16
+1:	cmp	x6, x7
+	b.hs	2f
+	str	xzr, [x6], #8			// Clear BSS
+	b	1b
+2:
+	ldp	x4, x5, [x3], #16
+	ldr	x6, [x3], #8
+	ldr	x16, [x3]
+	mov	sp, x16
+	str	x22, [x4]			// Save processor ID
+	str	x21, [x5]			// Save FDT pointer
+	str	x24, [x6]			// Save PHYS_OFFSET
+	mov	x29, #0
+	b	start_kernel
+ENDPROC(__mmap_switched)
+
+/*
+ * end early head section, begin head code that is also used for
+ * hotplug and needs to have the same protections as the text region
+ */
+	.section ".text","ax"
+/*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
  *
@@ -331,7 +548,8 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	msr	vttbr_el2, xzr
 
 	/* Hypervisor stub */
-	adr	x0, __hyp_stub_vectors
+	adrp	x0, __hyp_stub_vectors
+	add	x0, x0, #:lo12:__hyp_stub_vectors
 	msr	vbar_el2, x0
 
 	/* spsr */
@@ -492,183 +710,6 @@ ENDPROC(__calc_phys_offset)
 	.quad	PAGE_OFFSET
 
 /*
- * Macro to create a table entry to the next page.
- *
- *	tbl:	page table address
- *	virt:	virtual address
- *	shift:	#imm page table shift
- *	ptrs:	#imm pointers per table page
- *
- * Preserves:	virt
- * Corrupts:	tmp1, tmp2
- * Returns:	tbl -> next level table page address
- */
-	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
-	lsr	\tmp1, \virt, #\shift
-	and	\tmp1, \tmp1, #\ptrs - 1	// table index
-	add	\tmp2, \tbl, #PAGE_SIZE
-	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
-	str	\tmp2, [\tbl, \tmp1, lsl #3]
-	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
-	.endm
-
-/*
- * Macro to populate the PGD (and possibily PUD) for the corresponding
- * block entry in the next level (tbl) for the given virtual address.
- *
- * Preserves:	tbl, next, virt
- * Corrupts:	tmp1, tmp2
- */
-	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
-	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
-	create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
-#endif
-	.endm
-
-/*
- * Macro to populate block entries in the page table for the start..end
- * virtual range (inclusive).
- *
- * Preserves:	tbl, flags
- * Corrupts:	phys, start, end, pstate
- */
-	.macro	create_block_map, tbl, flags, phys, start, end
-	lsr	\phys, \phys, #BLOCK_SHIFT
-	lsr	\start, \start, #BLOCK_SHIFT
-	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
-	lsr	\end, \end, #BLOCK_SHIFT
-	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
-9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
-	add	\start, \start, #1			// next entry
-	add	\phys, \phys, #BLOCK_SIZE		// next block
-	cmp	\start, \end
-	b.ls	9999b
-	.endm
-
-/*
- * Setup the initial page tables. We only setup the barest amount which is
- * required to get the kernel running. The following sections are required:
- *   - identity mapping to enable the MMU (low address, TTBR0)
- *   - first few MB of the kernel linear mapping to jump to once the MMU has
- *     been enabled, including the FDT blob (TTBR1)
- *   - pgd entry for fixed mappings (TTBR1)
- */
-__create_page_tables:
-	pgtbl	x25, x26, x28			// idmap_pg_dir and swapper_pg_dir addresses
-	mov	x27, lr
-
-	/*
-	 * Invalidate the idmap and swapper page tables to avoid potential
-	 * dirty cache lines being evicted.
-	 */
-	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
-	bl	__inval_cache_range
-
-	/*
-	 * Clear the idmap and swapper page tables.
-	 */
-	mov	x0, x25
-	add	x6, x26, #SWAPPER_DIR_SIZE
-1:	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	stp	xzr, xzr, [x0], #16
-	cmp	x0, x6
-	b.lo	1b
-
-	ldr	x7, =MM_MMUFLAGS
-
-	/*
-	 * Create the identity mapping.
-	 */
-	mov	x0, x25				// idmap_pg_dir
-	ldr	x3, =KERNEL_START
-	add	x3, x3, x28			// __pa(KERNEL_START)
-	create_pgd_entry x0, x3, x5, x6
-	ldr	x6, =KERNEL_END
-	mov	x5, x3				// __pa(KERNEL_START)
-	add	x6, x6, x28			// __pa(KERNEL_END)
-	create_block_map x0, x7, x3, x5, x6
-
-	/*
-	 * Map the kernel image (starting with PHYS_OFFSET).
-	 */
-	mov	x0, x26				// swapper_pg_dir
-	mov	x5, #PAGE_OFFSET
-	create_pgd_entry x0, x5, x3, x6
-	ldr	x6, =KERNEL_END
-	mov	x3, x24				// phys offset
-	create_block_map x0, x7, x3, x5, x6
-
-	/*
-	 * Map the FDT blob (maximum 2MB; must be within 512MB of
-	 * PHYS_OFFSET).
-	 */
-	mov	x3, x21				// FDT phys address
-	and	x3, x3, #~((1 << 21) - 1)	// 2MB aligned
-	mov	x6, #PAGE_OFFSET
-	sub	x5, x3, x24			// subtract PHYS_OFFSET
-	tst	x5, #~((1 << 29) - 1)		// within 512MB?
-	csel	x21, xzr, x21, ne		// zero the FDT pointer
-	b.ne	1f
-	add	x5, x5, x6			// __va(FDT blob)
-	add	x6, x5, #1 << 21		// 2MB for the FDT blob
-	sub	x6, x6, #1			// inclusive range
-	create_block_map x0, x7, x3, x5, x6
-1:
-	/*
-	 * Since the page tables have been populated with non-cacheable
-	 * accesses (MMU disabled), invalidate the idmap and swapper page
-	 * tables again to remove any speculatively loaded cache lines.
-	 */
-	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
-	bl	__inval_cache_range
-
-	mov	lr, x27
-	ret
-ENDPROC(__create_page_tables)
-	.ltorg
-
-	.align	3
-	.type	__switch_data, %object
-__switch_data:
-	.quad	__mmap_switched
-	.quad	__bss_start			// x6
-	.quad	__bss_stop			// x7
-	.quad	processor_id			// x4
-	.quad	__fdt_pointer			// x5
-	.quad	memstart_addr			// x6
-	.quad	init_thread_union + THREAD_START_SP // sp
-
-/*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
- */
-__mmap_switched:
-	adr	x3, __switch_data + 8
-
-	ldp	x6, x7, [x3], #16
-1:	cmp	x6, x7
-	b.hs	2f
-	str	xzr, [x6], #8			// Clear BSS
-	b	1b
-2:
-	ldp	x4, x5, [x3], #16
-	ldr	x6, [x3], #8
-	ldr	x16, [x3]
-	mov	sp, x16
-	str	x22, [x4]			// Save processor ID
-	str	x21, [x5]			// Save FDT pointer
-	str	x24, [x6]			// Save PHYS_OFFSET
-	mov	x29, #0
-	b	start_kernel
-ENDPROC(__mmap_switched)
-
-/*
  * Exception handling. Something went wrong and we can't proceed. We ought to
  * tell the user, but since we don't have any guarantee that we're even
  * running on the right architecture, we do virtually nothing.
@@ -715,22 +756,3 @@ __lookup_processor_type_data:
 	.quad	.
 	.quad	cpu_table
 	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
-
-/*
- * Determine validity of the x21 FDT pointer.
- * The dtb must be 8-byte aligned and live in the first 512M of memory.
- */
-__vet_fdt:
-	tst	x21, #0x7
-	b.ne	1f
-	cmp	x21, x24
-	b.lt	1f
-	mov	x0, #(1 << 29)
-	add	x0, x0, x24
-	cmp	x21, x0
-	b.ge	1f
-	ret
-1:
-	mov	x21, #0
-	ret
-ENDPROC(__vet_fdt)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 8cd27fedc8b6..7e9327a0986d 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -960,3 +960,29 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 
 	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
 }
+
+bool aarch32_insn_is_wide(u32 insn)
+{
+	return insn >= 0xe800;
+}
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset)
+{
+	return (insn & (0xf << offset)) >> offset;
+}
+
+#define OPC2_MASK	0x7
+#define OPC2_OFFSET	5
+u32 aarch32_insn_mcr_extract_opc2(u32 insn)
+{
+	return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET;
+}
+
+#define CRM_MASK	0xf
+u32 aarch32_insn_mcr_extract_crm(u32 insn)
+{
+	return insn & CRM_MASK;
+}
diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c
index 7d37ead4d199..354be2a872ae 100644
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -25,12 +25,26 @@
  */
 void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
-	unsigned char *t = to;
-	while (count) {
+	while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
+			 !IS_ALIGNED((unsigned long)to, 8))) {
+		*(u8 *)to = __raw_readb(from);
+		from++;
+		to++;
 		count--;
-		*t = readb(from);
-		t++;
+	}
+
+	while (count >= 8) {
+		*(u64 *)to = __raw_readq(from);
+		from += 8;
+		to += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		*(u8 *)to = __raw_readb(from);
 		from++;
+		to++;
+		count--;
 	}
 }
 EXPORT_SYMBOL(__memcpy_fromio);
@@ -40,12 +54,26 @@ EXPORT_SYMBOL(__memcpy_fromio);
  */
 void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
-	const unsigned char *f = from;
-	while (count) {
+	while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
+			 !IS_ALIGNED((unsigned long)from, 8))) {
+		__raw_writeb(*(volatile u8 *)from, to);
+		from++;
+		to++;
 		count--;
-		writeb(*f, to);
-		f++;
+	}
+
+	while (count >= 8) {
+		__raw_writeq(*(volatile u64 *)from, to);
+		from += 8;
+		to += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		__raw_writeb(*(volatile u8 *)from, to);
+		from++;
 		to++;
+		count--;
 	}
 }
 EXPORT_SYMBOL(__memcpy_toio);
@@ -55,10 +83,28 @@ EXPORT_SYMBOL(__memcpy_toio);
  */
 void __memset_io(volatile void __iomem *dst, int c, size_t count)
 {
-	while (count) {
+	u64 qc = (u8)c;
+
+	qc |= qc << 8;
+	qc |= qc << 16;
+	qc |= qc << 32;
+
+	while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
+		__raw_writeb(c, dst);
+		dst++;
 		count--;
-		writeb(c, dst);
+	}
+
+	while (count >= 8) {
+		__raw_writeq(qc, dst);
+		dst += 8;
+		count -= 8;
+	}
+
+	while (count) {
+		__raw_writeb(c, dst);
 		dst++;
+		count--;
 	}
 }
 EXPORT_SYMBOL(__memset_io);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 071a6ec13bd8..240b75c0e94f 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -40,6 +40,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	return 0;
 }
 
+void (*handle_arch_irq)(struct pt_regs *) = NULL;
+
 void __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 {
 	if (handle_arch_irq)
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 263a166291fb..4f1fec7a46db 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -22,9 +22,8 @@
 
 #ifdef HAVE_JUMP_LABEL
 
-static void __arch_jump_label_transform(struct jump_entry *entry,
-					enum jump_label_type type,
-					bool is_static)
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
 {
 	void *addr = (void *)entry->code;
 	u32 insn;
@@ -37,22 +36,18 @@ static void __arch_jump_label_transform(struct jump_entry *entry,
 		insn = aarch64_insn_gen_nop();
 	}
 
-	if (is_static)
-		aarch64_insn_patch_text_nosync(addr, insn);
-	else
-		aarch64_insn_patch_text(&addr, &insn, 1);
-}
-
-void arch_jump_label_transform(struct jump_entry *entry,
-			       enum jump_label_type type)
-{
-	__arch_jump_label_transform(entry, type, false);
+	aarch64_insn_patch_text(&addr, &insn, 1);
 }
 
 void arch_jump_label_transform_static(struct jump_entry *entry,
 				      enum jump_label_type type)
 {
-	__arch_jump_label_transform(entry, type, true);
+	/*
+	 * We use the architected A64 NOP in arch_static_branch, so there's no
+	 * need to patch an identical A64 NOP over the top of it here. The core
+	 * will call arch_jump_label_transform from a module notifier if the
+	 * NOP needs to be replaced by a branch.
+	 */
 }
 
 #endif	/* HAVE_JUMP_LABEL */
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 1eb1cc955139..fd027b101de5 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -26,6 +26,7 @@
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 #include <asm/insn.h>
+#include <asm/sections.h>
 
 #define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX
 #define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16
@@ -394,3 +395,20 @@ overflow:
 	       me->name, (int)ELF64_R_TYPE(rel[i].r_info), val);
 	return -ENOEXEC;
 }
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s, *se;
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+		if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
+			apply_alternatives((void *)s->sh_addr, s->sh_size);
+			return 0;
+		}
+	}
+
+	return 0;
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index aa29ecb4f800..25a5308744b1 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -169,8 +169,14 @@ armpmu_event_set_period(struct perf_event *event,
 		ret = 1;
 	}
 
-	if (left > (s64)armpmu->max_period)
-		left = armpmu->max_period;
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
 
 	local64_set(&hwc->prev_count, (u64)-left);
 
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8a4ae8e73213..d882b833dbdb 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -27,6 +27,7 @@
 #include <linux/smp.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/seccomp.h>
 #include <linux/security.h>
 #include <linux/init.h>
 #include <linux/signal.h>
@@ -551,6 +552,32 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
 	return ret;
 }
 
+static int system_call_get(struct task_struct *target,
+			   const struct user_regset *regset,
+			   unsigned int pos, unsigned int count,
+			   void *kbuf, void __user *ubuf)
+{
+	int syscallno = task_pt_regs(target)->syscallno;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &syscallno, 0, -1);
+}
+
+static int system_call_set(struct task_struct *target,
+			   const struct user_regset *regset,
+			   unsigned int pos, unsigned int count,
+			   const void *kbuf, const void __user *ubuf)
+{
+	int syscallno, ret;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1);
+	if (ret)
+		return ret;
+
+	task_pt_regs(target)->syscallno = syscallno;
+	return ret;
+}
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -559,6 +586,7 @@ enum aarch64_regset {
 	REGSET_HW_BREAK,
 	REGSET_HW_WATCH,
 #endif
+	REGSET_SYSTEM_CALL,
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -608,6 +636,14 @@ static const struct user_regset aarch64_regsets[] = {
 		.set = hw_break_set,
 	},
 #endif
+	[REGSET_SYSTEM_CALL] = {
+		.core_note_type = NT_ARM_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(int),
+		.align = sizeof(int),
+		.get = system_call_get,
+		.set = system_call_set,
+	},
 };
 
 static const struct user_regset_view user_aarch64_view = {
@@ -1114,6 +1150,10 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 
 asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
+	/* Do the secure computing check first; failures should be fast. */
+	if (secure_computing() == -1)
+		return -1;
+
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 2437196cc5d4..b80991166754 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -43,12 +43,14 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/efi.h>
+#include <linux/personality.h>
 
 #include <asm/fixmap.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
 #include <asm/cputable.h>
+#include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -72,13 +74,15 @@ EXPORT_SYMBOL_GPL(elf_hwcap);
 				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
 				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
 				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV)
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+				 COMPAT_HWCAP_LPAE)
 unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
 unsigned int compat_elf_hwcap2 __read_mostly;
 #endif
 
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
 static const char *cpu_name;
-static const char *machine_name;
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -116,12 +120,16 @@ void __init early_print(const char *str, ...)
 
 void __init smp_setup_processor_id(void)
 {
+	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
+	cpu_logical_map(0) = mpidr;
+
 	/*
 	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
 	 * using percpu variable early, for example, lockdep will
 	 * access percpu variable inside lock_release
 	 */
 	set_my_cpu_offset(0);
+	pr_info("Booting Linux on physical CPU 0x%lx\n", (unsigned long)mpidr);
 }
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
@@ -311,7 +319,7 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
 			cpu_relax();
 	}
 
-	machine_name = of_flat_dt_get_machine_name();
+	dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name());
 }
 
 /*
@@ -376,6 +384,7 @@ void __init setup_arch(char **cmdline_p)
 
 	*cmdline_p = boot_command_line;
 
+	early_fixmap_init();
 	early_ioremap_init();
 
 	parse_early_param();
@@ -398,7 +407,6 @@ void __init setup_arch(char **cmdline_p)
 
 	psci_init();
 
-	cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
 	smp_init_cpus();
@@ -447,14 +455,50 @@ static const char *hwcap_str[] = {
 	NULL
 };
 
+#ifdef CONFIG_COMPAT
+static const char *compat_hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	"java",
+	"iwmmxt",
+	"crunch",
+	"thumbee",
+	"neon",
+	"vfpv3",
+	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm"
+};
+
+static const char *compat_hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+#endif /* CONFIG_COMPAT */
+
 static int c_show(struct seq_file *m, void *v)
 {
-	int i;
-
-	seq_printf(m, "Processor\t: %s rev %d (%s)\n",
-		   cpu_name, read_cpuid_id() & 15, ELF_PLATFORM);
+	int i, j;
 
 	for_each_online_cpu(i) {
+		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+		u32 midr = cpuinfo->reg_midr;
+
 		/*
 		 * glibc reads /proc/cpuinfo to determine the number of
 		 * online processors, looking for lines beginning with
@@ -463,24 +507,38 @@ static int c_show(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
 		seq_printf(m, "processor\t: %d\n", i);
 #endif
-	}
-
-	/* dump out the processor features */
-	seq_puts(m, "Features\t: ");
-
-	for (i = 0; hwcap_str[i]; i++)
-		if (elf_hwcap & (1 << i))
-			seq_printf(m, "%s ", hwcap_str[i]);
-
-	seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24);
-	seq_printf(m, "CPU architecture: AArch64\n");
-	seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15);
-	seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff);
-	seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15);
 
-	seq_puts(m, "\n");
-
-	seq_printf(m, "Hardware\t: %s\n", machine_name);
+		/*
+		 * Dump out the common processor features in a single line.
+		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+		 * rather than attempting to parse this, but there's a body of
+		 * software which does already (at least for 32-bit).
+		 */
+		seq_puts(m, "Features\t:");
+		if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+			for (j = 0; compat_hwcap_str[j]; j++)
+				if (compat_elf_hwcap & (1 << j))
+					seq_printf(m, " %s", compat_hwcap_str[j]);
+
+			for (j = 0; compat_hwcap2_str[j]; j++)
+				if (compat_elf_hwcap2 & (1 << j))
+					seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+		} else {
+			for (j = 0; hwcap_str[j]; j++)
+				if (elf_hwcap & (1 << j))
+					seq_printf(m, " %s", hwcap_str[j]);
+		}
+		seq_puts(m, "\n");
+
+		seq_printf(m, "CPU implementer\t: 0x%02x\n",
+			   MIDR_IMPLEMENTOR(midr));
+		seq_printf(m, "CPU architecture: 8\n");
+		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+	}
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 1b9ad02837cf..5a1ba6e80d4e 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -186,6 +186,12 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
 		err |= __put_user(from->si_uid, &to->si_uid);
 		err |= __put_user((compat_uptr_t)(unsigned long)from->si_ptr, &to->si_ptr);
 		break;
+	case __SI_SYS:
+		err |= __put_user((compat_uptr_t)(unsigned long)
+				from->si_call_addr, &to->si_call_addr);
+		err |= __put_user(from->si_syscall, &to->si_syscall);
+		err |= __put_user(from->si_arch, &to->si_arch);
+		break;
 	default: /* this is just in case for now ... */
 		err |= __put_user(from->si_pid, &to->si_pid);
 		err |= __put_user(from->si_uid, &to->si_uid);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index a564b440416a..ede186cdd452 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -147,14 +147,12 @@ cpu_resume_after_mmu:
 	ret
 ENDPROC(cpu_resume_after_mmu)
 
-	.data
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
 #ifdef CONFIG_SMP
 	mrs	x1, mpidr_el1
-	adr	x4, mpidr_hash_ptr
-	ldr	x5, [x4]
-	add	x8, x4, x5		// x8 = struct mpidr_hash phys address
+	adrp	x8, mpidr_hash
+	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
         /* retrieve mpidr_hash members to compute the hash */
 	ldr	x2, [x8, #MPIDR_HASH_MASK]
 	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS]
@@ -164,14 +162,15 @@ ENTRY(cpu_resume)
 #else
 	mov	x7, xzr
 #endif
-	adr	x0, sleep_save_sp
+	adrp	x0, sleep_save_sp
+	add	x0, x0, #:lo12:sleep_save_sp
 	ldr	x0, [x0, #SLEEP_SAVE_SP_PHYS]
 	ldr	x0, [x0, x7, lsl #3]
 	/* load sp from context */
 	ldr	x2, [x0, #CPU_CTX_SP]
-	adr	x1, sleep_idmap_phys
+	adrp	x1, sleep_idmap_phys
 	/* load physical address of identity map page table in x1 */
-	ldr	x1, [x1]
+	ldr	x1, [x1, #:lo12:sleep_idmap_phys]
 	mov	sp, x2
 	/*
 	 * cpu_do_resume expects x0 to contain context physical address
@@ -180,26 +179,3 @@ ENTRY(cpu_resume)
 	bl	cpu_do_resume		// PC relative jump, MMU off
 	b	cpu_resume_mmu		// Resume MMU, never returns
 ENDPROC(cpu_resume)
-
-	.align 3
-mpidr_hash_ptr:
-	/*
-	 * offset of mpidr_hash symbol from current location
-	 * used to obtain run-time mpidr_hash address with MMU off
-         */
-	.quad	mpidr_hash - .
-/*
- * physical address of identity mapped page tables
- */
-	.type	sleep_idmap_phys, #object
-ENTRY(sleep_idmap_phys)
-	.quad	0
-/*
- * struct sleep_save_sp {
- *	phys_addr_t *save_ptr_stash;
- *	phys_addr_t save_ptr_stash_phys;
- * };
- */
-	.type	sleep_save_sp, #object
-ENTRY(sleep_save_sp)
-	.space	SLEEP_SAVE_SP_SZ	// struct sleep_save_sp
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b06d1d90ee8c..7ae6ee085261 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -37,6 +37,7 @@
 #include <linux/of.h>
 #include <linux/irq_work.h>
 
+#include <asm/alternative.h>
 #include <asm/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
@@ -309,6 +310,7 @@ void cpu_die(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+	apply_alternatives_all();
 }
 
 void __init smp_prepare_boot_cpu(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 13ad4dbb1615..3771b72b6569 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -126,8 +126,8 @@ int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	return ret;
 }
 
-extern struct sleep_save_sp sleep_save_sp;
-extern phys_addr_t sleep_idmap_phys;
+struct sleep_save_sp sleep_save_sp;
+phys_addr_t sleep_idmap_phys;
 
 static int __init cpu_suspend_init(void)
 {
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index dc47e53e9e28..28c511b06edf 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -28,29 +28,39 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd.h>
 
-static inline void
-do_compat_cache_op(unsigned long start, unsigned long end, int flags)
+static long
+__do_compat_cache_op(unsigned long start, unsigned long end)
 {
-	struct mm_struct *mm = current->active_mm;
-	struct vm_area_struct *vma;
+	long ret;
 
-	if (end < start || flags)
-		return;
+	do {
+		unsigned long chunk = min(PAGE_SIZE, end - start);
 
-	down_read(&mm->mmap_sem);
-	vma = find_vma(mm, start);
-	if (vma && vma->vm_start < end) {
-		if (start < vma->vm_start)
-			start = vma->vm_start;
-		if (end > vma->vm_end)
-			end = vma->vm_end;
-		up_read(&mm->mmap_sem);
-		__flush_cache_user_range(start & PAGE_MASK, PAGE_ALIGN(end));
-		return;
-	}
-	up_read(&mm->mmap_sem);
+		if (fatal_signal_pending(current))
+			return 0;
+
+		ret = __flush_cache_user_range(start, start + chunk);
+		if (ret)
+			return ret;
+
+		cond_resched();
+		start += chunk;
+	} while (start < end);
+
+	return 0;
 }
 
+static inline long
+do_compat_cache_op(unsigned long start, unsigned long end, int flags)
+{
+	if (end < start || flags)
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ, start, end - start))
+		return -EFAULT;
+
+	return __do_compat_cache_op(start, end);
+}
 /*
  * Handle all unrecognised system calls.
  */
@@ -74,8 +84,7 @@ long compat_arm_syscall(struct pt_regs *regs)
 	 * the specified region).
 	 */
 	case __ARM_NR_compat_cacheflush:
-		do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]);
-		return 0;
+		return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]);
 
 	case __ARM_NR_compat_set_tls:
 		current->thread.tp_value = regs->regs[0];
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b6ee26b0939a..fcb8f7b42271 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -255,12 +255,15 @@ void store_cpu_topology(unsigned int cpuid)
 		/* Multiprocessor system : Multi-threads per core */
 		cpuid_topo->thread_id  = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
 	} else {
 		/* Multiprocessor system : Single-thread per core */
 		cpuid_topo->thread_id  = -1;
 		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+		cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
+					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
 	}
 
 	pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
diff --git a/arch/arm64/kernel/trace-events-emulation.h b/arch/arm64/kernel/trace-events-emulation.h
new file mode 100644
index 000000000000..ae1dd598ea65
--- /dev/null
+++ b/arch/arm64/kernel/trace-events-emulation.h
@@ -0,0 +1,35 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM emulation
+
+#if !defined(_TRACE_EMULATION_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EMULATION_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(instruction_emulation,
+
+	TP_PROTO(const char *instr, u64 addr),
+	TP_ARGS(instr, addr),
+
+	TP_STRUCT__entry(
+		__string(instr, instr)
+		__field(u64, addr)
+	),
+
+	TP_fast_assign(
+		__assign_str(instr, instr);
+		__entry->addr = addr;
+	),
+
+	TP_printk("instr=\"%s\" addr=0x%llx", __get_str(instr), __entry->addr)
+);
+
+#endif /* _TRACE_EMULATION_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+
+#define TRACE_INCLUDE_FILE trace-events-emulation
+#include <trace/define_trace.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index de1b085e7963..0a801e3743d5 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -259,6 +259,69 @@ void arm64_notify_die(const char *str, struct pt_regs *regs,
 	}
 }
 
+static LIST_HEAD(undef_hook);
+static DEFINE_RAW_SPINLOCK(undef_lock);
+
+void register_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_add(&hook->node, &undef_hook);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+void unregister_undef_hook(struct undef_hook *hook)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_del(&hook->node);
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+}
+
+static int call_undef_hook(struct pt_regs *regs)
+{
+	struct undef_hook *hook;
+	unsigned long flags;
+	u32 instr;
+	int (*fn)(struct pt_regs *regs, u32 instr) = NULL;
+	void __user *pc = (void __user *)instruction_pointer(regs);
+
+	if (!user_mode(regs))
+		return 1;
+
+	if (compat_thumb_mode(regs)) {
+		/* 16-bit Thumb instruction */
+		if (get_user(instr, (u16 __user *)pc))
+			goto exit;
+		instr = le16_to_cpu(instr);
+		if (aarch32_insn_is_wide(instr)) {
+			u32 instr2;
+
+			if (get_user(instr2, (u16 __user *)(pc + 2)))
+				goto exit;
+			instr2 = le16_to_cpu(instr2);
+			instr = (instr << 16) | instr2;
+		}
+	} else {
+		/* 32-bit ARM instruction */
+		if (get_user(instr, (u32 __user *)pc))
+			goto exit;
+		instr = le32_to_cpu(instr);
+	}
+
+	raw_spin_lock_irqsave(&undef_lock, flags);
+	list_for_each_entry(hook, &undef_hook, node)
+		if ((instr & hook->instr_mask) == hook->instr_val &&
+			(regs->pstate & hook->pstate_mask) == hook->pstate_val)
+			fn = hook->fn;
+
+	raw_spin_unlock_irqrestore(&undef_lock, flags);
+exit:
+	return fn ? fn(regs, instr) : 1;
+}
+
 asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 {
 	siginfo_t info;
@@ -268,6 +331,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
 	if (!aarch32_break_handler(regs))
 		return;
 
+	if (call_undef_hook(regs) == 0)
+		return;
+
 	if (show_unhandled_signals && unhandled_signal(current, SIGILL) &&
 	    printk_ratelimit()) {
 		pr_info("%s[%d]: undefined instruction: pc=%p\n",
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index edf8715ba39b..9965ec87cbec 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -11,8 +11,9 @@
 
 #include "image.h"
 
-#define ARM_EXIT_KEEP(x)
-#define ARM_EXIT_DISCARD(x)	x
+/* .exit.text needed in case of alternative patching */
+#define ARM_EXIT_KEEP(x)	x
+#define ARM_EXIT_DISCARD(x)
 
 OUTPUT_ARCH(aarch64)
 ENTRY(_text)
@@ -32,6 +33,22 @@ jiffies = jiffies_64;
 	*(.hyp.text)					\
 	VMLINUX_SYMBOL(__hyp_text_end) = .;
 
+/*
+ * The size of the PE/COFF section that covers the kernel image, which
+ * runs from stext to _edata, must be a round multiple of the PE/COFF
+ * FileAlignment, which we set to its minimum value of 0x200. 'stext'
+ * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned
+ * boundary should be sufficient.
+ */
+PECOFF_FILE_ALIGNMENT = 0x200;
+
+#ifdef CONFIG_EFI
+#define PECOFF_EDATA_PADDING	\
+	.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
+#else
+#define PECOFF_EDATA_PADDING
+#endif
+
 SECTIONS
 {
 	/*
@@ -100,9 +117,21 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 
+	. = ALIGN(4);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+	.altinstr_replacement : {
+		*(.altinstr_replacement)
+	}
+
+	. = ALIGN(PAGE_SIZE);
 	_data = .;
 	_sdata = .;
 	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
+	PECOFF_EDATA_PADDING
 	_edata = .;
 
 	BSS_SECTION(0, 0, 0)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index b72aa9f9215c..fbe909fb0a1a 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -761,10 +761,10 @@
 .macro activate_traps
 	ldr     x2, [x0, #VCPU_HCR_EL2]
 	msr     hcr_el2, x2
-	ldr	x2, =(CPTR_EL2_TTA)
+	mov	x2, #CPTR_EL2_TTA
 	msr	cptr_el2, x2
 
-	ldr	x2, =(1 << 15)	// Trap CP15 Cr=15
+	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
 	msr	hstr_el2, x2
 
 	mrs	x2, mdcr_el2
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index c56179ed2c09..773d37a14039 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -3,3 +3,4 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   ioremap.o mmap.o pgd.o mmu.o \
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 23663837acff..2560e1e1562e 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -17,9 +17,12 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <asm/assembler.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
 
 #include "proc-macros.S"
 
@@ -138,9 +141,12 @@ USER(9f, ic	ivau, x4	)		// invalidate I line PoU
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	1b
-9:						// ignore any faulting cache operation
 	dsb	ish
 	isb
+	mov	x0, #0
+	ret
+9:
+	mov	x0, #-EFAULT
 	ret
 ENDPROC(flush_icache_range)
 ENDPROC(__flush_cache_user_range)
@@ -210,7 +216,7 @@ __dma_clean_range:
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x0, x0, x3
-1:	dc	cvac, x0			// clean D / U line
+1:	alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE
 	add	x0, x0, x2
 	cmp	x0, x1
 	b.lo	1b
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
new file mode 100644
index 000000000000..bf69601be546
--- /dev/null
+++ b/arch/arm64/mm/dump.c
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * Derived from x86 and arm implementation:
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgtable.h>
+
+#define LOWEST_ADDR	(UL(0xffffffffffffffff) << VA_BITS)
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+enum address_markers_idx {
+	VMALLOC_START_NR = 0,
+	VMALLOC_END_NR,
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	VMEMMAP_START_NR,
+	VMEMMAP_END_NR,
+#endif
+	PCI_START_NR,
+	PCI_END_NR,
+	FIXADDR_START_NR,
+	FIXADDR_END_NR,
+	MODULES_START_NR,
+	MODUELS_END_NR,
+	KERNEL_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+	{ VMALLOC_START,	"vmalloc() Area" },
+	{ VMALLOC_END,		"vmalloc() End" },
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	{ 0,			"vmemmap start" },
+	{ 0,			"vmemmap end" },
+#endif
+	{ (unsigned long) PCI_IOBASE,		"PCI I/O start" },
+	{ (unsigned long) PCI_IOBASE + SZ_16M,	"PCI I/O end" },
+	{ FIXADDR_START,	"Fixmap start" },
+	{ FIXADDR_TOP,		"Fixmap end" },
+	{ MODULES_VADDR,	"Modules start" },
+	{ MODULES_END,		"Modules end" },
+	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ -1,			NULL },
+};
+
+struct pg_state {
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned level;
+	u64 current_prot;
+};
+
+struct prot_bits {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+	{
+		.mask	= PTE_USER,
+		.val	= PTE_USER,
+		.set	= "USR",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_RDONLY,
+		.val	= PTE_RDONLY,
+		.set	= "ro",
+		.clear	= "RW",
+	}, {
+		.mask	= PTE_PXN,
+		.val	= PTE_PXN,
+		.set	= "NX",
+		.clear	= "x ",
+	}, {
+		.mask	= PTE_SHARED,
+		.val	= PTE_SHARED,
+		.set	= "SHD",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_AF,
+		.val	= PTE_AF,
+		.set	= "AF",
+		.clear	= "  ",
+	}, {
+		.mask	= PTE_NG,
+		.val	= PTE_NG,
+		.set	= "NG",
+		.clear	= "  ",
+	}, {
+		.mask	= PTE_UXN,
+		.val	= PTE_UXN,
+		.set	= "UXN",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRnE),
+		.set	= "DEVICE/nGnRnE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRE),
+		.set	= "DEVICE/nGnRE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_DEVICE_GRE),
+		.set	= "DEVICE/GRE",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_NORMAL_NC),
+		.set	= "MEM/NORMAL-NC",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_NORMAL),
+		.set	= "MEM/NORMAL",
+	}
+};
+
+struct pg_level {
+	const struct prot_bits *bits;
+	size_t num;
+	u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+	{
+	}, { /* pgd */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pud */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pmd */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	}, { /* pte */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	},
+};
+
+static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
+			size_t num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++, bits++) {
+		const char *s;
+
+		if ((st->current_prot & bits->mask) == bits->val)
+			s = bits->set;
+		else
+			s = bits->clear;
+
+		if (s)
+			seq_printf(st->seq, " %s", s);
+	}
+}
+
+static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
+				u64 val)
+{
+	static const char units[] = "KMGTPE";
+	u64 prot = val & pg_level[level].mask;
+
+	if (addr < LOWEST_ADDR)
+		return;
+
+	if (!st->level) {
+		st->level = level;
+		st->current_prot = prot;
+		st->start_address = addr;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != st->current_prot || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+		const char *unit = units;
+		unsigned long delta;
+
+		if (st->current_prot) {
+			seq_printf(st->seq, "0x%16lx-0x%16lx   ",
+				   st->start_address, addr);
+
+			delta = (addr - st->start_address) >> 10;
+			while (!(delta & 1023) && unit[1]) {
+				delta >>= 10;
+				unit++;
+			}
+			seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (pg_level[st->level].bits)
+				dump_prot(st, pg_level[st->level].bits,
+					  pg_level[st->level].num);
+			seq_puts(st->seq, "\n");
+		}
+
+		if (addr >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+		}
+
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->level = level;
+	}
+
+	if (addr >= st->marker[1].start_address) {
+		st->marker++;
+		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	}
+
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		addr = start + i * PAGE_SIZE;
+		note_page(st, addr, 4, pte_val(*pte));
+	}
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+		addr = start + i * PMD_SIZE;
+		if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd))
+			note_page(st, addr, 3, pmd_val(*pmd));
+		else
+			walk_pte(st, pmd, addr);
+	}
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+	pud_t *pud = pud_offset(pgd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		addr = start + i * PUD_SIZE;
+		if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud))
+			note_page(st, addr, 2, pud_val(*pud));
+		else
+			walk_pmd(st, pud, addr);
+	}
+}
+
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
+{
+	pgd_t *pgd = pgd_offset(mm, 0);
+	unsigned i;
+	unsigned long addr;
+
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+		addr = start + i * PGDIR_SIZE;
+		if (pgd_none(*pgd) || pgd_bad(*pgd))
+			note_page(st, addr, 1, pgd_val(*pgd));
+		else
+			walk_pud(st, pgd, addr);
+	}
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct pg_state st = {
+		.seq = m,
+		.marker = address_markers,
+	};
+
+	walk_pgd(&st, &init_mm, LOWEST_ADDR);
+
+	note_page(&st, 0, 0, 0);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ptdump_init(void)
+{
+	struct dentry *pe;
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+		if (pg_level[i].bits)
+			for (j = 0; j < pg_level[i].num; j++)
+				pg_level[i].mask |= pg_level[i].bits[j].mask;
+
+	address_markers[VMEMMAP_START_NR].start_address =
+				(unsigned long)virt_to_page(PAGE_OFFSET);
+	address_markers[VMEMMAP_END_NR].start_address =
+				(unsigned long)virt_to_page(high_memory);
+
+	pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
+				 &ptdump_fops);
+	return pe ? 0 : -ENOMEM;
+}
+device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 41cb6d3d6075..c11cd27ca8f5 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -380,7 +380,7 @@ static struct fault_info {
 	{ do_bad,		SIGBUS,  0,		"level 1 address size fault"	},
 	{ do_bad,		SIGBUS,  0,		"level 2 address size fault"	},
 	{ do_bad,		SIGBUS,  0,		"level 3 address size fault"	},
-	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"input address range fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 0 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
 	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 494297c698ca..bac492c12fcc 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -39,6 +39,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/alternative.h>
 
 #include "mm.h"
 
@@ -325,6 +326,7 @@ void __init mem_init(void)
 void free_initmem(void)
 {
 	free_initmem_default(0);
+	free_alternatives_memory();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index 4a07630a6616..cbb99c8f1e04 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -103,97 +103,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
 }
 EXPORT_SYMBOL(ioremap_cache);
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_ARM64_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_ARM64_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
-static inline pud_t * __init early_ioremap_pud(unsigned long addr)
-{
-	pgd_t *pgd;
-
-	pgd = pgd_offset_k(addr);
-	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
-
-	return pud_offset(pgd, addr);
-}
-
-static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
-{
-	pud_t *pud = early_ioremap_pud(addr);
-
-	BUG_ON(pud_none(*pud) || pud_bad(*pud));
-
-	return pmd_offset(pud, addr);
-}
-
-static inline pte_t * __init early_ioremap_pte(unsigned long addr)
-{
-	pmd_t *pmd = early_ioremap_pmd(addr);
-
-	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-	return pte_offset_kernel(pmd, addr);
-}
-
+/*
+ * Must be called after early_fixmap_init
+ */
 void __init early_ioremap_init(void)
 {
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN);
-
-	pgd = pgd_offset_k(addr);
-	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = pud_offset(pgd, addr);
-	pud_populate(&init_mm, pud, bm_pmd);
-	pmd = pmd_offset(pud, addr);
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
-
-	/*
-	 * The boot-ioremap range spans multiple pmds, for which
-	 * we are not prepared:
-	 */
-	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
-		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
-
-	if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
-		WARN_ON(1);
-		pr_warn("pmd %p != %p\n",
-			pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
-		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
-			fix_to_virt(FIX_BTMAP_BEGIN));
-		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
-			fix_to_virt(FIX_BTMAP_END));
-
-		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
-		pr_warn("FIX_BTMAP_BEGIN:     %d\n",
-			FIX_BTMAP_BEGIN);
-	}
-
 	early_ioremap_setup();
 }
-
-void __init __early_set_fixmap(enum fixed_addresses idx,
-			       phys_addr_t phys, pgprot_t flags)
-{
-	unsigned long addr = __fix_to_virt(idx);
-	pte_t *pte;
-
-	if (idx >= __end_of_fixed_addresses) {
-		BUG();
-		return;
-	}
-
-	pte = early_ioremap_pte(addr);
-
-	if (pgprot_val(flags))
-		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
-	else {
-		pte_clear(&init_mm, addr, pte);
-		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
-	}
-}
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index d519f4f50c8c..50c3351df9c7 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,2 +1 @@
 extern void __init bootmem_init(void);
-extern void __init arm64_swiotlb_init(void);
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 1d73662f00ff..54922d1275b8 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -47,22 +47,14 @@ static int mmap_is_legacy(void)
 	return sysctl_legacy_va_layout;
 }
 
-/*
- * Since get_random_int() returns the same value within a 1 jiffy window, we
- * will almost always get the same randomisation for the stack and mmap
- * region. This will mean the relative distance between stack and mmap will be
- * the same.
- *
- * To avoid this we can shift the randomness by 1 bit.
- */
 static unsigned long mmap_rnd(void)
 {
 	unsigned long rnd = 0;
 
 	if (current->flags & PF_RANDOMIZE)
-		rnd = (long)get_random_int() & (STACK_RND_MASK >> 1);
+		rnd = (long)get_random_int() & STACK_RND_MASK;
 
-	return rnd << (PAGE_SHIFT + 1);
+	return rnd << PAGE_SHIFT;
 }
 
 static unsigned long mmap_base(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index f4f8b500f74c..6032f3e3056a 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,6 +28,7 @@
 #include <linux/io.h>
 
 #include <asm/cputype.h>
+#include <asm/fixmap.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -463,3 +464,96 @@ void vmemmap_free(unsigned long start, unsigned long end)
 {
 }
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
+
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
+#endif
+#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+
+static inline pud_t * fixmap_pud(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+
+	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+
+	return pud_offset(pgd, addr);
+}
+
+static inline pmd_t * fixmap_pmd(unsigned long addr)
+{
+	pud_t *pud = fixmap_pud(addr);
+
+	BUG_ON(pud_none(*pud) || pud_bad(*pud));
+
+	return pmd_offset(pud, addr);
+}
+
+static inline pte_t * fixmap_pte(unsigned long addr)
+{
+	pmd_t *pmd = fixmap_pmd(addr);
+
+	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
+
+	return pte_offset_kernel(pmd, addr);
+}
+
+void __init early_fixmap_init(void)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	unsigned long addr = FIXADDR_START;
+
+	pgd = pgd_offset_k(addr);
+	pgd_populate(&init_mm, pgd, bm_pud);
+	pud = pud_offset(pgd, addr);
+	pud_populate(&init_mm, pud, bm_pmd);
+	pmd = pmd_offset(pud, addr);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+	/*
+	 * The boot-ioremap range spans multiple pmds, for which
+	 * we are not preparted:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+	if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+	     || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		pr_warn("pmd %p != %p, %p\n",
+			pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+			fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
+		pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		pr_warn("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		pr_warn("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		pr_warn("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
+}
+
+void __set_fixmap(enum fixed_addresses idx,
+			       phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long addr = __fix_to_virt(idx);
+	pte_t *pte;
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	pte = fixmap_pte(addr);
+
+	if (pgprot_val(flags)) {
+		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+	} else {
+		pte_clear(&init_mm, addr, pte);
+		flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
+	}
+}
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 6682b361d3ac..71ca104f97bd 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -35,9 +35,9 @@ static struct kmem_cache *pgd_cache;
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	if (PGD_SIZE == PAGE_SIZE)
-		return (pgd_t *)get_zeroed_page(GFP_KERNEL);
+		return (pgd_t *)__get_free_page(PGALLOC_GFP);
 	else
-		return kmem_cache_zalloc(pgd_cache, GFP_KERNEL);
+		return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
 }
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 41f1e3e2ea24..edba042b2325 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -60,7 +60,7 @@ struct jit_ctx {
 	const struct bpf_prog *prog;
 	int idx;
 	int tmp_used;
-	int body_offset;
+	int epilogue_offset;
 	int *offset;
 	u32 *image;
 };
@@ -130,8 +130,8 @@ static void jit_fill_hole(void *area, unsigned int size)
 
 static inline int epilogue_offset(const struct jit_ctx *ctx)
 {
-	int to = ctx->offset[ctx->prog->len - 1];
-	int from = ctx->idx - ctx->body_offset;
+	int to = ctx->epilogue_offset;
+	int from = ctx->idx;
 
 	return to - from;
 }
@@ -463,6 +463,8 @@ emit_cond_jmp:
 	}
 	/* function return */
 	case BPF_JMP | BPF_EXIT:
+		/* Optimization: when last instruction is EXIT,
+		   simply fallthrough to epilogue. */
 		if (i == ctx->prog->len - 1)
 			break;
 		jmp_offset = epilogue_offset(ctx);
@@ -685,11 +687,13 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 
 	/* 1. Initial fake pass to compute ctx->idx. */
 
-	/* Fake pass to fill in ctx->offset. */
+	/* Fake pass to fill in ctx->offset and ctx->tmp_used. */
 	if (build_body(&ctx))
 		goto out;
 
 	build_prologue(&ctx);
+
+	ctx.epilogue_offset = ctx.idx;
 	build_epilogue(&ctx);
 
 	/* Now we know the actual image size. */
@@ -706,7 +710,6 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 
 	build_prologue(&ctx);
 
-	ctx.body_offset = ctx.idx;
 	if (build_body(&ctx)) {
 		bpf_jit_binary_free(header);
 		goto out;
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 8aa97817cc8c..99b6ded54849 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -14,7 +14,6 @@
 #define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
 
 #include <linux/pagemap.h>
-#include <asm-generic/tlb.h>
 
 #ifdef CONFIG_MMU
 #define tlb_start_vma(tlb, vma)		do { } while (0)
@@ -22,4 +21,6 @@
 #define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
 #endif
 
+#include <asm-generic/tlb.h>
+
 #endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e9a9f60e596d..fc3ee06eab87 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -3,7 +3,6 @@
 #ifdef __KERNEL__
 
 #include <linux/mm.h>
-#include <asm-generic/tlb.h>
 
 #ifdef CONFIG_PPC_BOOK3E
 extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
@@ -14,6 +13,8 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
 }
 #endif /* !CONFIG_PPC_BOOK3E */
 
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
 #ifdef CONFIG_PPC64
 #include <asm/pgalloc-64.h>
 #else
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e2b428b0f7ba..20733fa518ae 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,6 +27,7 @@
 
 #define tlb_start_vma(tlb, vma)	do { } while (0)
 #define tlb_end_vma(tlb, vma)	do { } while (0)
+#define __tlb_remove_tlb_entry	__tlb_remove_tlb_entry
 
 extern void tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e70ae968e5f..6a4a5fcb9730 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -517,8 +517,6 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
 	for (i = 0; i < num_hugepd; i++, hpdp++)
 		hpdp->pd = 0;
 
-	tlb->need_flush = 1;
-
 #ifdef CONFIG_PPC_FSL_BOOK3E
 	hugepd_free(tlb, hugepte);
 #else