summary refs log tree commit diff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_asm.h76
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h1
-rw-r--r--arch/arm64/include/asm/kvm_host.h87
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h9
-rw-r--r--arch/arm64/include/asm/sysreg.h21
-rw-r--r--arch/arm64/kernel/asm-offsets.c40
-rw-r--r--arch/arm64/kvm/Makefile3
-rw-r--r--arch/arm64/kvm/guest.c10
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/kvm/hyp-init.S9
-rw-r--r--arch/arm64/kvm/hyp.S1081
-rw-r--r--arch/arm64/kvm/hyp/Makefile14
-rw-r--r--arch/arm64/kvm/hyp/debug-sr.c140
-rw-r--r--arch/arm64/kvm/hyp/entry.S160
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S33
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S212
-rw-r--r--arch/arm64/kvm/hyp/hyp.h90
-rw-r--r--arch/arm64/kvm/hyp/switch.c175
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c138
-rw-r--r--arch/arm64/kvm/hyp/timer-sr.c71
-rw-r--r--arch/arm64/kvm/hyp/tlb.c80
-rw-r--r--arch/arm64/kvm/hyp/vgic-v2-sr.c84
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c228
-rw-r--r--arch/arm64/kvm/sys_regs.c59
-rw-r--r--arch/arm64/kvm/vgic-v2-switch.S134
-rw-r--r--arch/arm64/kvm/vgic-v3-switch.S269
28 files changed, 1597 insertions, 1635 deletions
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5e6857b6bdc4..738a95f93e49 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -125,6 +125,7 @@
 #define VTCR_EL2_SL0_LVL1	(1 << 6)
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
+#define VTCR_EL2_VS		19
 
 /*
  * We configure the Stage-2 page tables to always restrict the IPA space to be
@@ -169,7 +170,7 @@
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (UL(48))
-#define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
+#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e377101f919..52b777b7d407 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -20,82 +20,6 @@
 
 #include <asm/virt.h>
 
-/*
- * 0 is reserved as an invalid value.
- * Order *must* be kept in sync with the hyp switch code.
- */
-#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
-#define	CSSELR_EL1	2	/* Cache Size Selection Register */
-#define	SCTLR_EL1	3	/* System Control Register */
-#define	ACTLR_EL1	4	/* Auxiliary Control Register */
-#define	CPACR_EL1	5	/* Coprocessor Access Control */
-#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
-#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
-#define	TCR_EL1		8	/* Translation Control Register */
-#define	ESR_EL1		9	/* Exception Syndrome Register */
-#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
-#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
-#define	FAR_EL1		12	/* Fault Address Register */
-#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
-#define	VBAR_EL1	14	/* Vector Base Address Register */
-#define	CONTEXTIDR_EL1	15	/* Context ID Register */
-#define	TPIDR_EL0	16	/* Thread ID, User R/W */
-#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
-#define	TPIDR_EL1	18	/* Thread ID, Privileged */
-#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
-#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
-#define	PAR_EL1		21	/* Physical Address Register */
-#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
-#define MDCCINT_EL1	23	/* Monitor Debug Comms Channel Interrupt Enable Reg */
-
-/* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	24	/* Domain Access Control Register */
-#define	IFSR32_EL2	25	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	26	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	27	/* Debug Vector Catch Register */
-#define	NR_SYS_REGS	28
-
-/* 32bit mapping */
-#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
-#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
-#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
-#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
-#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
-#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
-#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
-#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
-#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
-#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
-#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
-#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
-#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
-#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
-#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
-#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
-#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
-#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
-#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
-#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
-#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
-#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
-#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
-#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
-
-#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
-
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 25a40213bd9b..3066328cd86b 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,7 +26,6 @@
 
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 #include <asm/cputype.h>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce7266aac..689d4c95e12f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,7 +25,6 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info {
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
 };
 
+/*
+ * 0 is reserved as an invalid value.
+ * Order should be kept in sync with the save/restore code.
+ */
+enum vcpu_sysreg {
+	__INVALID_SYSREG__,
+	MPIDR_EL1,	/* MultiProcessor Affinity Register */
+	CSSELR_EL1,	/* Cache Size Selection Register */
+	SCTLR_EL1,	/* System Control Register */
+	ACTLR_EL1,	/* Auxiliary Control Register */
+	CPACR_EL1,	/* Coprocessor Access Control */
+	TTBR0_EL1,	/* Translation Table Base Register 0 */
+	TTBR1_EL1,	/* Translation Table Base Register 1 */
+	TCR_EL1,	/* Translation Control Register */
+	ESR_EL1,	/* Exception Syndrome Register */
+	AFSR0_EL1,	/* Auxilary Fault Status Register 0 */
+	AFSR1_EL1,	/* Auxilary Fault Status Register 1 */
+	FAR_EL1,	/* Fault Address Register */
+	MAIR_EL1,	/* Memory Attribute Indirection Register */
+	VBAR_EL1,	/* Vector Base Address Register */
+	CONTEXTIDR_EL1,	/* Context ID Register */
+	TPIDR_EL0,	/* Thread ID, User R/W */
+	TPIDRRO_EL0,	/* Thread ID, User R/O */
+	TPIDR_EL1,	/* Thread ID, Privileged */
+	AMAIR_EL1,	/* Aux Memory Attribute Indirection Register */
+	CNTKCTL_EL1,	/* Timer Control Register (EL1) */
+	PAR_EL1,	/* Physical Address Register */
+	MDSCR_EL1,	/* Monitor Debug System Control Register */
+	MDCCINT_EL1,	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+	/* 32bit specific registers. Keep them at the end of the range */
+	DACR32_EL2,	/* Domain Access Control Register */
+	IFSR32_EL2,	/* Instruction Fault Status Register */
+	FPEXC32_EL2,	/* Floating-Point Exception Control Register */
+	DBGVCR32_EL2,	/* Debug Vector Catch Register */
+
+	NR_SYS_REGS	/* Nothing after this line! */
+};
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
+
 struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	union {
@@ -197,6 +276,12 @@ struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_wakeup;
+	u32 hvc_exit_stat;
+	u64 wfe_exit_stat;
+	u64 wfi_exit_stat;
+	u64 mmio_exit_user;
+	u64 mmio_exit_kernel;
+	u64 exits;
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 889c908ee631..fe612a962576 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -19,7 +19,6 @@
 #define __ARM64_KVM_MMIO_H__
 
 #include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 
 /*
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 61505676d085..0bf8b4320a91 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -20,6 +20,7 @@
 
 #include <asm/page.h>
 #include <asm/memory.h>
+#include <asm/cpufeature.h>
 
 /*
  * As we only have the TTBR0_EL2 register, we cannot express
@@ -158,7 +159,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
 #endif
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
-#define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
 #define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
 
@@ -302,5 +302,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
 }
 
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+	int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
+
+	return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b41f52..4aeebec3d882 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,8 @@
 #ifndef __ASM_SYSREG_H
 #define __ASM_SYSREG_H
 
+#include <linux/stringify.h>
+
 #include <asm/opcodes.h>
 
 /*
@@ -208,6 +210,8 @@
 
 #else
 
+#include <linux/types.h>
+
 asm(
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
 "	.equ	__reg_num_x\\num, \\num\n"
@@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
 	val |= set;
 	asm volatile("msr sctlr_el1, %0" : : "r" (val));
 }
+
+/*
+ * Unlike read_cpuid, calls to read_sysreg are never expected to be
+ * optimized away or replaced with synthetic values.
+ */
+#define read_sysreg(r) ({					\
+	u64 __val;						\
+	asm volatile("mrs %0, " __stringify(r) : "=r" (__val));	\
+	__val;							\
+})
+
+#define write_sysreg(v, r) do {					\
+	u64 __val = (u64)v;					\
+	asm volatile("msr " __stringify(r) ", %0"		\
+		     : : "r" (__val));				\
+} while (0)
+
 #endif
 
 #endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index bb493d44445f..fffa4ac6c25a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -109,49 +109,11 @@ int main(void)
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
-  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
-  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
-  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
-  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
-  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
-  DEFINE(VCPU_DEBUG_PTR,	offsetof(struct kvm_vcpu, arch.debug_ptr));
-  DEFINE(DEBUG_BCR, 		offsetof(struct kvm_guest_debug_arch, dbg_bcr));
-  DEFINE(DEBUG_BVR, 		offsetof(struct kvm_guest_debug_arch, dbg_bvr));
-  DEFINE(DEBUG_WCR, 		offsetof(struct kvm_guest_debug_arch, dbg_wcr));
-  DEFINE(DEBUG_WVR, 		offsetof(struct kvm_guest_debug_arch, dbg_wvr));
-  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
-  DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
-  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
-  DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
-  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
-  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
-  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
-  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
-  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
-  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
-  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
-  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
-  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
-  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
-  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
-  DEFINE(VGIC_V3_CPU_SRE,	offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
-  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
-  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
-  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
-  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
-  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
-  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
-  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
-  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
-  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
-  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 1949fe5f5424..caee9ee8e12a 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -10,6 +10,7 @@ KVM=../../../virt/kvm
 ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
@@ -22,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d250160d32bc..fcb778899a38 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -28,13 +28,21 @@
 #include <asm/cputype.h>
 #include <asm/uaccess.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 
 #include "trace.h"
 
+#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
+#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(hvc_exit_stat),
+	VCPU_STAT(wfe_exit_stat),
+	VCPU_STAT(wfi_exit_stat),
+	VCPU_STAT(mmio_exit_user),
+	VCPU_STAT(mmio_exit_kernel),
+	VCPU_STAT(exits),
 	{ NULL }
 };
 
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 15f0477b0d2a..eba89e42f0ed 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 
 #include <asm/esr.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
@@ -39,6 +40,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
+	vcpu->stat.hvc_exit_stat++;
 
 	ret = kvm_psci_call(vcpu);
 	if (ret < 0) {
@@ -71,9 +73,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
+		vcpu->stat.wfe_exit_stat++;
 		kvm_vcpu_on_spin(vcpu);
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
+		vcpu->stat.wfi_exit_stat++;
 		kvm_vcpu_block(vcpu);
 	}
 
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba2248a98..3e568dcd907b 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -94,6 +94,15 @@ __do_hyp_init:
 	 */
 	mrs	x5, ID_AA64MMFR0_EL1
 	bfi	x4, x5, #16, #3
+	/*
+	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR1_EL1
+	ubfx	x5, x5, #5, #1
+	lsl	x5, x5, #VTCR_EL2_VS
+	orr	x4, x4, x5
+
 	msr	vtcr_el2, x4
 
 	mrs	x4, mair_el1
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c289832272..0ccdcbbef3c2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -17,910 +17,7 @@
 
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/asm-offsets.h>
 #include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/debug-monitors.h>
-#include <asm/esr.h>
-#include <asm/fpsimdmacros.h>
-#include <asm/kvm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/memory.h>
-
-#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
-#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
-#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
-
-	.text
-	.pushsection	.hyp.text, "ax"
-	.align	PAGE_SHIFT
-
-.macro save_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	stp	x19, x20, [x3]
-	stp	x21, x22, [x3, #16]
-	stp	x23, x24, [x3, #32]
-	stp	x25, x26, [x3, #48]
-	stp	x27, x28, [x3, #64]
-	stp	x29, lr, [x3, #80]
-
-	mrs	x19, sp_el0
-	mrs	x20, elr_el2		// pc before entering el2
-	mrs	x21, spsr_el2		// pstate before entering el2
-
-	stp	x19, x20, [x3, #96]
-	str	x21, [x3, #112]
-
-	mrs	x22, sp_el1
-	mrs	x23, elr_el1
-	mrs	x24, spsr_el1
-
-	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-.endm
-
-.macro restore_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-
-	msr	sp_el1, x22
-	msr	elr_el1, x23
-	msr	spsr_el1, x24
-
-	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
-	ldp	x19, x20, [x3]
-	ldr	x21, [x3, #16]
-
-	msr	sp_el0, x19
-	msr	elr_el2, x20 		// pc on return from el2
-	msr	spsr_el2, x21 		// pstate on return from el2
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	ldp	x19, x20, [x3]
-	ldp	x21, x22, [x3, #16]
-	ldp	x23, x24, [x3, #32]
-	ldp	x25, x26, [x3, #48]
-	ldp	x27, x28, [x3, #64]
-	ldp	x29, lr, [x3, #80]
-.endm
-
-.macro save_host_regs
-	save_common_regs
-.endm
-
-.macro restore_host_regs
-	restore_common_regs
-.endm
-
-.macro save_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_save x3, 4
-.endm
-
-.macro restore_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_restore x3, 4
-.endm
-
-.macro save_guest_regs
-	// x0 is the vcpu address
-	// x1 is the return code, do not corrupt!
-	// x2 is the cpu context
-	// x3 is a tmp register
-	// Guest's x0-x3 are on the stack
-
-	// Compute base to save registers
-	add	x3, x2, #CPU_XREG_OFFSET(4)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	str	x18, [x3, #112]
-
-	pop	x6, x7			// x2, x3
-	pop	x4, x5			// x0, x1
-
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	save_common_regs
-.endm
-
-.macro restore_guest_regs
-	// x0 is the vcpu address.
-	// x2 is the cpu context
-	// x3 is a tmp register
-
-	// Prepare x0-x3 for later restore
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	push	x4, x5		// Push x0-x3 on the stack
-	push	x6, x7
-
-	// x4-x18
-	ldp	x4, x5, [x3, #32]
-	ldp	x6, x7, [x3, #48]
-	ldp	x8, x9, [x3, #64]
-	ldp	x10, x11, [x3, #80]
-	ldp	x12, x13, [x3, #96]
-	ldp	x14, x15, [x3, #112]
-	ldp	x16, x17, [x3, #128]
-	ldr	x18, [x3, #144]
-
-	// x19-x29, lr, sp*, elr*, spsr*
-	restore_common_regs
-
-	// Last bits of the 64bit state
-	pop	x2, x3
-	pop	x0, x1
-
-	// Do not touch any register after this!
-.endm
-
-/*
- * Macros to perform system register save/restore.
- *
- * Ordering here is absolutely critical, and must be kept consistent
- * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
- * and in kvm_asm.h.
- *
- * In other words, don't touch any of these unless you know what
- * you are doing.
- */
-.macro save_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	mrs	x4,	vmpidr_el2
-	mrs	x5,	csselr_el1
-	mrs	x6,	sctlr_el1
-	mrs	x7,	actlr_el1
-	mrs	x8,	cpacr_el1
-	mrs	x9,	ttbr0_el1
-	mrs	x10,	ttbr1_el1
-	mrs	x11,	tcr_el1
-	mrs	x12,	esr_el1
-	mrs	x13, 	afsr0_el1
-	mrs	x14,	afsr1_el1
-	mrs	x15,	far_el1
-	mrs	x16,	mair_el1
-	mrs	x17,	vbar_el1
-	mrs	x18,	contextidr_el1
-	mrs	x19,	tpidr_el0
-	mrs	x20,	tpidrro_el0
-	mrs	x21,	tpidr_el1
-	mrs	x22, 	amair_el1
-	mrs	x23, 	cntkctl_el1
-	mrs	x24,	par_el1
-	mrs	x25,	mdscr_el1
-
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	stp	x18, x19, [x3, #112]
-	stp	x20, x21, [x3, #128]
-	stp	x22, x23, [x3, #144]
-	stp	x24, x25, [x3, #160]
-.endm
-
-.macro save_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	mrs	x21, \type\()15_el1
-	mrs	x20, \type\()14_el1
-	mrs	x19, \type\()13_el1
-	mrs	x18, \type\()12_el1
-	mrs	x17, \type\()11_el1
-	mrs	x16, \type\()10_el1
-	mrs	x15, \type\()9_el1
-	mrs	x14, \type\()8_el1
-	mrs	x13, \type\()7_el1
-	mrs	x12, \type\()6_el1
-	mrs	x11, \type\()5_el1
-	mrs	x10, \type\()4_el1
-	mrs	x9, \type\()3_el1
-	mrs	x8, \type\()2_el1
-	mrs	x7, \type\()1_el1
-	mrs	x6, \type\()0_el1
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	str	x21, [x4, #(15 * 8)]
-	str	x20, [x4, #(14 * 8)]
-	str	x19, [x4, #(13 * 8)]
-	str	x18, [x4, #(12 * 8)]
-	str	x17, [x4, #(11 * 8)]
-	str	x16, [x4, #(10 * 8)]
-	str	x15, [x4, #(9 * 8)]
-	str	x14, [x4, #(8 * 8)]
-	str	x13, [x4, #(7 * 8)]
-	str	x12, [x4, #(6 * 8)]
-	str	x11, [x4, #(5 * 8)]
-	str	x10, [x4, #(4 * 8)]
-	str	x9, [x4, #(3 * 8)]
-	str	x8, [x4, #(2 * 8)]
-	str	x7, [x4, #(1 * 8)]
-	str	x6, [x4, #(0 * 8)]
-.endm
-
-.macro restore_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	ldp	x8, x9, [x3, #32]
-	ldp	x10, x11, [x3, #48]
-	ldp	x12, x13, [x3, #64]
-	ldp	x14, x15, [x3, #80]
-	ldp	x16, x17, [x3, #96]
-	ldp	x18, x19, [x3, #112]
-	ldp	x20, x21, [x3, #128]
-	ldp	x22, x23, [x3, #144]
-	ldp	x24, x25, [x3, #160]
-
-	msr	vmpidr_el2,	x4
-	msr	csselr_el1,	x5
-	msr	sctlr_el1,	x6
-	msr	actlr_el1,	x7
-	msr	cpacr_el1,	x8
-	msr	ttbr0_el1,	x9
-	msr	ttbr1_el1,	x10
-	msr	tcr_el1,	x11
-	msr	esr_el1,	x12
-	msr	afsr0_el1,	x13
-	msr	afsr1_el1,	x14
-	msr	far_el1,	x15
-	msr	mair_el1,	x16
-	msr	vbar_el1,	x17
-	msr	contextidr_el1,	x18
-	msr	tpidr_el0,	x19
-	msr	tpidrro_el0,	x20
-	msr	tpidr_el1,	x21
-	msr	amair_el1,	x22
-	msr	cntkctl_el1,	x23
-	msr	par_el1,	x24
-	msr	mdscr_el1,	x25
-.endm
-
-.macro restore_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	ldr	x21, [x4, #(15 * 8)]
-	ldr	x20, [x4, #(14 * 8)]
-	ldr	x19, [x4, #(13 * 8)]
-	ldr	x18, [x4, #(12 * 8)]
-	ldr	x17, [x4, #(11 * 8)]
-	ldr	x16, [x4, #(10 * 8)]
-	ldr	x15, [x4, #(9 * 8)]
-	ldr	x14, [x4, #(8 * 8)]
-	ldr	x13, [x4, #(7 * 8)]
-	ldr	x12, [x4, #(6 * 8)]
-	ldr	x11, [x4, #(5 * 8)]
-	ldr	x10, [x4, #(4 * 8)]
-	ldr	x9, [x4, #(3 * 8)]
-	ldr	x8, [x4, #(2 * 8)]
-	ldr	x7, [x4, #(1 * 8)]
-	ldr	x6, [x4, #(0 * 8)]
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	msr	\type\()15_el1, x21
-	msr	\type\()14_el1, x20
-	msr	\type\()13_el1, x19
-	msr	\type\()12_el1, x18
-	msr	\type\()11_el1, x17
-	msr	\type\()10_el1, x16
-	msr	\type\()9_el1, x15
-	msr	\type\()8_el1, x14
-	msr	\type\()7_el1, x13
-	msr	\type\()6_el1, x12
-	msr	\type\()5_el1, x11
-	msr	\type\()4_el1, x10
-	msr	\type\()3_el1, x9
-	msr	\type\()2_el1, x8
-	msr	\type\()1_el1, x7
-	msr	\type\()0_el1, x6
-.endm
-
-.macro skip_32bit_state tmp, target
-	// Skip 32bit state if not needed
-	mrs	\tmp, hcr_el2
-	tbnz	\tmp, #HCR_RW_SHIFT, \target
-.endm
-
-.macro skip_tee_state tmp, target
-	// Skip ThumbEE state if not needed
-	mrs	\tmp, id_pfr0_el1
-	tbz	\tmp, #12, \target
-.endm
-
-.macro skip_debug_state tmp, target
-	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
-	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
-.endm
-
-/*
- * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
- */
-.macro skip_fpsimd_state tmp, target
-	mrs	\tmp, cptr_el2
-	tbnz	\tmp, #CPTR_EL2_TFP_SHIFT, \target
-.endm
-
-.macro compute_debug_state target
-	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
-	// is set, we do a full save/restore cycle and disable trapping.
-	add	x25, x0, #VCPU_CONTEXT
-
-	// Check the state of MDSCR_EL1
-	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
-	and	x26, x25, #DBG_MDSCR_KDE
-	and	x25, x25, #DBG_MDSCR_MDE
-	adds	xzr, x25, x26
-	b.eq	9998f		// Nothing to see there
-
-	// If any interesting bits was set, we must set the flag
-	mov	x26, #KVM_ARM64_DEBUG_DIRTY
-	str	x26, [x0, #VCPU_DEBUG_FLAGS]
-	b	9999f		// Don't skip restore
-
-9998:
-	// Otherwise load the flags from memory in case we recently
-	// trapped
-	skip_debug_state x25, \target
-9999:
-.endm
-
-.macro save_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	mrs	x4, spsr_abt
-	mrs	x5, spsr_und
-	mrs	x6, spsr_irq
-	mrs	x7, spsr_fiq
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	mrs	x4, dacr32_el2
-	mrs	x5, ifsr32_el2
-	stp	x4, x5, [x3]
-
-	skip_fpsimd_state x8, 2f
-	mrs	x6, fpexc32_el2
-	str	x6, [x3, #16]
-2:
-	skip_debug_state x8, 1f
-	mrs	x7, dbgvcr32_el2
-	str	x7, [x3, #24]
-1:
-.endm
-
-.macro restore_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	msr	spsr_abt, x4
-	msr	spsr_und, x5
-	msr	spsr_irq, x6
-	msr	spsr_fiq, x7
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	ldp	x4, x5, [x3]
-	msr	dacr32_el2, x4
-	msr	ifsr32_el2, x5
-
-	skip_debug_state x8, 1f
-	ldr	x7, [x3, #24]
-	msr	dbgvcr32_el2, x7
-1:
-.endm
-
-.macro activate_traps
-	ldr     x2, [x0, #VCPU_HCR_EL2]
-
-	/*
-	 * We are about to set CPTR_EL2.TFP to trap all floating point
-	 * register accesses to EL2, however, the ARM ARM clearly states that
-	 * traps are only taken to EL2 if the operation would not otherwise
-	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
-	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-	 */
-	tbnz	x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
-	mov	x3, #(1 << 30)
-	msr	fpexc32_el2, x3
-	isb
-99:
-	msr     hcr_el2, x2
-	mov	x2, #CPTR_EL2_TTA
-	orr     x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-
-	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
-	msr	hstr_el2, x2
-
-	// Monitor Debug Config - see kvm_arm_setup_debug()
-	ldr	x2, [x0, #VCPU_MDCR_EL2]
-	msr	mdcr_el2, x2
-.endm
-
-.macro deactivate_traps
-	mov	x2, #HCR_RW
-	msr	hcr_el2, x2
-	msr	hstr_el2, xzr
-
-	mrs	x2, mdcr_el2
-	and	x2, x2, #MDCR_EL2_HPMN_MASK
-	msr	mdcr_el2, x2
-.endm
-
-.macro activate_vm
-	ldr	x1, [x0, #VCPU_KVM]
-	kern_hyp_va	x1
-	ldr	x2, [x1, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-.endm
-
-.macro deactivate_vm
-	msr	vttbr_el2, xzr
-.endm
-
-/*
- * Call into the vgic backend for state saving
- */
-.macro save_vgic_state
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__save_vgic_v2_state
-alternative_else
-	bl	__save_vgic_v3_state
-alternative_endif
-	mrs	x24, hcr_el2
-	mov	x25, #HCR_INT_OVERRIDE
-	neg	x25, x25
-	and	x24, x24, x25
-	msr	hcr_el2, x24
-.endm
-
-/*
- * Call into the vgic backend for state restoring
- */
-.macro restore_vgic_state
-	mrs	x24, hcr_el2
-	ldr	x25, [x0, #VCPU_IRQ_LINES]
-	orr	x24, x24, #HCR_INT_OVERRIDE
-	orr	x24, x24, x25
-	msr	hcr_el2, x24
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__restore_vgic_v2_state
-alternative_else
-	bl	__restore_vgic_v3_state
-alternative_endif
-.endm
-
-.macro save_timer_state
-	// x0: vcpu pointer
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	mrs	x3, cntv_ctl_el0
-	and	x3, x3, #3
-	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
-
-	isb
-
-	mrs	x3, cntv_cval_el0
-	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
-
-1:
-	// Disable the virtual timer
-	msr	cntv_ctl_el0, xzr
-
-	// Allow physical timer/counter access for the host
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #3
-	msr	cnthctl_el2, x2
-
-	// Clear cntvoff for the host
-	msr	cntvoff_el2, xzr
-.endm
-
-.macro restore_timer_state
-	// x0: vcpu pointer
-	// Disallow physical timer access for the guest
-	// Physical counter access is allowed
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #1
-	bic	x2, x2, #2
-	msr	cnthctl_el2, x2
-
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
-	msr	cntvoff_el2, x3
-	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
-	msr	cntv_cval_el0, x2
-	isb
-
-	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
-	and	x2, x2, #3
-	msr	cntv_ctl_el0, x2
-1:
-.endm
-
-__save_sysregs:
-	save_sysregs
-	ret
-
-__restore_sysregs:
-	restore_sysregs
-	ret
-
-/* Save debug state */
-__save_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	save_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	save_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	save_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	save_debug dbgwvr
-
-	mrs	x21, mdccint_el1
-	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	ret
-
-/* Restore debug state */
-__restore_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	restore_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	restore_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	restore_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	restore_debug dbgwvr
-
-	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	msr	mdccint_el1, x21
-
-	ret
-
-__save_fpsimd:
-	skip_fpsimd_state x3, 1f
-	save_fpsimd
-1:	ret
-
-__restore_fpsimd:
-	skip_fpsimd_state x3, 1f
-	restore_fpsimd
-1:	ret
-
-switch_to_guest_fpsimd:
-	push	x4, lr
-
-	mrs	x2, cptr_el2
-	bic	x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-	isb
-
-	mrs	x0, tpidr_el2
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-	bl __save_fpsimd
-
-	add	x2, x0, #VCPU_CONTEXT
-	bl __restore_fpsimd
-
-	skip_32bit_state x3, 1f
-	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
-	msr	fpexc32_el2, x4
-1:
-	pop	x4, lr
-	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-/*
- * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
- *
- * This is the world switch. The first half of the function
- * deals with entering the guest, and anything from __kvm_vcpu_return
- * to the end of the function deals with reentering the host.
- * On the enter path, only x0 (vcpu pointer) must be preserved until
- * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
- * code) must both be preserved until the epilogue.
- * In both cases, x2 points to the CPU context we're saving/restoring from/to.
- */
-ENTRY(__kvm_vcpu_run)
-	kern_hyp_va	x0
-	msr	tpidr_el2, x0	// Save the vcpu register
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	save_host_regs
-	bl __save_sysregs
-
-	compute_debug_state 1f
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__save_debug
-1:
-	activate_traps
-	activate_vm
-
-	restore_vgic_state
-	restore_timer_state
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	// We must restore the 32-bit state before the sysregs, thanks
-	// to Cortex-A57 erratum #852523.
-	restore_guest_32bit_state
-	bl __restore_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__restore_debug
-1:
-	restore_guest_regs
-
-	// That's it, no more messing around.
-	eret
-
-__kvm_vcpu_return:
-	// Assume x0 is the vcpu pointer, x1 the return code
-	// Guest's x0-x3 are on the stack
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	save_guest_regs
-	bl __save_fpsimd
-	bl __save_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__save_debug
-1:
-	save_guest_32bit_state
-
-	save_timer_state
-	save_vgic_state
-
-	deactivate_traps
-	deactivate_vm
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-	bl __restore_fpsimd
-	/* Clear FPSIMD and Trace trapping */
-	msr     cptr_el2, xzr
-
-	skip_debug_state x3, 1f
-	// Clear the dirty flag for the next run, as all the state has
-	// already been saved. Note that we nuke the whole 64bit word.
-	// If we ever add more flags, we'll have to be more careful...
-	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__restore_debug
-1:
-	restore_host_regs
-
-	mov	x0, x1
-	ret
-END(__kvm_vcpu_run)
-
-// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-ENTRY(__kvm_tlb_flush_vmid_ipa)
-	dsb	ishst
-
-	kern_hyp_va	x0
-	ldr	x2, [x0, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-	isb
-
-	/*
-	 * We could do so much better if we had the VA as well.
-	 * Instead, we invalidate Stage-2 for this IPA, and the
-	 * whole of Stage-1. Weep...
-	 */
-	lsr	x1, x1, #12
-	tlbi	ipas2e1is, x1
-	/*
-	 * We have to ensure completion of the invalidation at Stage-2,
-	 * since a table walk on another CPU could refill a TLB with a
-	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
-	 * the Stage-1 invalidation happened first.
-	 */
-	dsb	ish
-	tlbi	vmalle1is
-	dsb	ish
-	isb
-
-	msr	vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid_ipa)
-
-/**
- * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
- * @struct kvm *kvm - pointer to kvm structure
- *
- * Invalidates all Stage 1 and 2 TLB entries for current VMID.
- */
-ENTRY(__kvm_tlb_flush_vmid)
-	dsb     ishst
-
-	kern_hyp_va     x0
-	ldr     x2, [x0, #KVM_VTTBR]
-	msr     vttbr_el2, x2
-	isb
-
-	tlbi    vmalls12e1is
-	dsb     ish
-	isb
-
-	msr     vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid)
-
-ENTRY(__kvm_flush_vm_context)
-	dsb	ishst
-	tlbi	alle1is
-	ic	ialluis
-	dsb	ish
-	ret
-ENDPROC(__kvm_flush_vm_context)
-
-__kvm_hyp_panic:
-	// Stash PAR_EL1 before corrupting it in __restore_sysregs
-	mrs	x0, par_el1
-	push	x0, xzr
-
-	// Guess the context by looking at VTTBR:
-	// If zero, then we're already a host.
-	// Otherwise restore a minimal host context before panicing.
-	mrs	x0, vttbr_el2
-	cbz	x0, 1f
-
-	mrs	x0, tpidr_el2
-
-	deactivate_traps
-	deactivate_vm
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-
-	/*
-	 * Make sure we have a valid host stack, and don't leave junk in the
-	 * frame pointer that will give us a misleading host stack unwinding.
-	 */
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	msr	sp_el1, x22
-	mov	x29, xzr
-
-1:	adr	x0, __hyp_panic_str
-	adr	x1, 2f
-	ldp	x2, x3, [x1]
-	sub	x0, x0, x2
-	add	x0, x0, x3
-	mrs	x1, spsr_el2
-	mrs	x2, elr_el2
-	mrs	x3, esr_el2
-	mrs	x4, far_el2
-	mrs	x5, hpfar_el2
-	pop	x6, xzr		// active context PAR_EL1
-	mrs	x7, tpidr_el2
-
-	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-		      PSR_MODE_EL1h)
-	msr	spsr_el2, lr
-	ldr	lr, =panic
-	msr	elr_el2, lr
-	eret
-
-	.align	3
-2:	.quad	HYP_PAGE_OFFSET
-	.quad	PAGE_OFFSET
-ENDPROC(__kvm_hyp_panic)
-
-__hyp_panic_str:
-	.ascii	"HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
-
-	.align	2
 
 /*
  * u64 kvm_call_hyp(void *hypfn, ...);
@@ -934,7 +31,7 @@ __hyp_panic_str:
  * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
  * function pointer can be passed).  The function being called must be mapped
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
- * passed in r0 and r1.
+ * passed in x0.
  *
  * A function pointer with a value of 0 has a special meaning, and is
  * used to implement __hyp_get_vectors in the same way as in
@@ -944,179 +41,3 @@ ENTRY(kvm_call_hyp)
 	hvc	#0
 	ret
 ENDPROC(kvm_call_hyp)
-
-.macro invalid_vector	label, target
-	.align	2
-\label:
-	b \target
-ENDPROC(\label)
-.endm
-
-	/* None of these should ever happen */
-	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
-	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_error_invalid, __kvm_hyp_panic
-
-el1_sync:					// Guest trapped into EL2
-	push	x0, x1
-	push	x2, x3
-
-	mrs	x1, esr_el2
-	lsr	x2, x1, #ESR_ELx_EC_SHIFT
-
-	cmp	x2, #ESR_ELx_EC_HVC64
-	b.ne	el1_trap
-
-	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
-	cbnz	x3, el1_trap			// called HVC
-
-	/* Here, we're pretty sure the host called HVC. */
-	pop	x2, x3
-	pop	x0, x1
-
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
-	mrs	x0, vbar_el2
-	b	2f
-
-1:	push	lr, xzr
-
-	/*
-	 * Compute the function address in EL2, and shuffle the parameters.
-	 */
-	kern_hyp_va	x0
-	mov	lr, x0
-	mov	x0, x1
-	mov	x1, x2
-	mov	x2, x3
-	blr	lr
-
-	pop	lr, xzr
-2:	eret
-
-el1_trap:
-	/*
-	 * x1: ESR
-	 * x2: ESR_EC
-	 */
-
-	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
-	cmp	x2, #ESR_ELx_EC_FP_ASIMD
-	b.eq	switch_to_guest_fpsimd
-
-	cmp	x2, #ESR_ELx_EC_DABT_LOW
-	mov	x0, #ESR_ELx_EC_IABT_LOW
-	ccmp	x2, x0, #4, ne
-	b.ne	1f		// Not an abort we care about
-
-	/* This is an abort. Check for permission fault */
-alternative_if_not ARM64_WORKAROUND_834220
-	and	x2, x1, #ESR_ELx_FSC_TYPE
-	cmp	x2, #FSC_PERM
-	b.ne	1f		// Not a permission fault
-alternative_else
-	nop			// Use the permission fault path to
-	nop			// check for a valid S1 translation,
-	nop			// regardless of the ESR value.
-alternative_endif
-
-	/*
-	 * Check for Stage-1 page table walk, which is guaranteed
-	 * to give a valid HPFAR_EL2.
-	 */
-	tbnz	x1, #7, 1f	// S1PTW is set
-
-	/* Preserve PAR_EL1 */
-	mrs	x3, par_el1
-	push	x3, xzr
-
-	/*
-	 * Permission fault, HPFAR_EL2 is invalid.
-	 * Resolve the IPA the hard way using the guest VA.
-	 * Stage-1 translation already validated the memory access rights.
-	 * As such, we can use the EL1 translation regime, and don't have
-	 * to distinguish between EL0 and EL1 access.
-	 */
-	mrs	x2, far_el2
-	at	s1e1r, x2
-	isb
-
-	/* Read result */
-	mrs	x3, par_el1
-	pop	x0, xzr			// Restore PAR_EL1 from the stack
-	msr	par_el1, x0
-	tbnz	x3, #0, 3f		// Bail out if we failed the translation
-	ubfx	x3, x3, #12, #36	// Extract IPA
-	lsl	x3, x3, #4		// and present it like HPFAR
-	b	2f
-
-1:	mrs	x3, hpfar_el2
-	mrs	x2, far_el2
-
-2:	mrs	x0, tpidr_el2
-	str	w1, [x0, #VCPU_ESR_EL2]
-	str	x2, [x0, #VCPU_FAR_EL2]
-	str	x3, [x0, #VCPU_HPFAR_EL2]
-
-	mov	x1, #ARM_EXCEPTION_TRAP
-	b	__kvm_vcpu_return
-
-	/*
-	 * Translation failed. Just return to the guest and
-	 * let it fault again. Another CPU is probably playing
-	 * behind our back.
-	 */
-3:	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-el1_irq:
-	push	x0, x1
-	push	x2, x3
-	mrs	x0, tpidr_el2
-	mov	x1, #ARM_EXCEPTION_IRQ
-	b	__kvm_vcpu_return
-
-	.ltorg
-
-	.align 11
-
-ENTRY(__kvm_hyp_vector)
-	ventry	el2t_sync_invalid		// Synchronous EL2t
-	ventry	el2t_irq_invalid		// IRQ EL2t
-	ventry	el2t_fiq_invalid		// FIQ EL2t
-	ventry	el2t_error_invalid		// Error EL2t
-
-	ventry	el2h_sync_invalid		// Synchronous EL2h
-	ventry	el2h_irq_invalid		// IRQ EL2h
-	ventry	el2h_fiq_invalid		// FIQ EL2h
-	ventry	el2h_error_invalid		// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq				// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error_invalid		// Error 64-bit EL1
-
-	ventry	el1_sync			// Synchronous 32-bit EL1
-	ventry	el1_irq				// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__kvm_hyp_vector)
-
-
-ENTRY(__kvm_get_mdcr_el2)
-	mrs	x0, mdcr_el2
-	ret
-ENDPROC(__kvm_get_mdcr_el2)
-
-	.popsection
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
new file mode 100644
index 000000000000..826032bc3945
--- /dev/null
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for Kernel-based Virtual Machine module, HYP part
+#
+
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
+obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
new file mode 100644
index 000000000000..c9c1e97501a9
--- /dev/null
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define read_debug(r,n)		read_sysreg(r##n##_el1)
+#define write_debug(v,r,n)	write_sysreg(v, r##n##_el1)
+
+#define save_debug(ptr,reg,nr)						\
+	switch (nr) {							\
+	case 15:	ptr[15] = read_debug(reg, 15);			\
+	case 14:	ptr[14] = read_debug(reg, 14);			\
+	case 13:	ptr[13] = read_debug(reg, 13);			\
+	case 12:	ptr[12] = read_debug(reg, 12);			\
+	case 11:	ptr[11] = read_debug(reg, 11);			\
+	case 10:	ptr[10] = read_debug(reg, 10);			\
+	case 9:		ptr[9] = read_debug(reg, 9);			\
+	case 8:		ptr[8] = read_debug(reg, 8);			\
+	case 7:		ptr[7] = read_debug(reg, 7);			\
+	case 6:		ptr[6] = read_debug(reg, 6);			\
+	case 5:		ptr[5] = read_debug(reg, 5);			\
+	case 4:		ptr[4] = read_debug(reg, 4);			\
+	case 3:		ptr[3] = read_debug(reg, 3);			\
+	case 2:		ptr[2] = read_debug(reg, 2);			\
+	case 1:		ptr[1] = read_debug(reg, 1);			\
+	default:	ptr[0] = read_debug(reg, 0);			\
+	}
+
+#define restore_debug(ptr,reg,nr)					\
+	switch (nr) {							\
+	case 15:	write_debug(ptr[15], reg, 15);			\
+	case 14:	write_debug(ptr[14], reg, 14);			\
+	case 13:	write_debug(ptr[13], reg, 13);			\
+	case 12:	write_debug(ptr[12], reg, 12);			\
+	case 11:	write_debug(ptr[11], reg, 11);			\
+	case 10:	write_debug(ptr[10], reg, 10);			\
+	case 9:		write_debug(ptr[9], reg, 9);			\
+	case 8:		write_debug(ptr[8], reg, 8);			\
+	case 7:		write_debug(ptr[7], reg, 7);			\
+	case 6:		write_debug(ptr[6], reg, 6);			\
+	case 5:		write_debug(ptr[5], reg, 5);			\
+	case 4:		write_debug(ptr[4], reg, 4);			\
+	case 3:		write_debug(ptr[3], reg, 3);			\
+	case 2:		write_debug(ptr[2], reg, 2);			\
+	case 1:		write_debug(ptr[1], reg, 1);			\
+	default:	write_debug(ptr[0], reg, 0);			\
+	}
+
+void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+				   struct kvm_guest_debug_arch *dbg,
+				   struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	save_debug(dbg->dbg_bcr, dbgbcr, brps);
+	save_debug(dbg->dbg_bvr, dbgbvr, brps);
+	save_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	save_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+}
+
+void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+				      struct kvm_guest_debug_arch *dbg,
+				      struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	restore_debug(dbg->dbg_bcr, dbgbcr, brps);
+	restore_debug(dbg->dbg_bvr, dbgbvr, brps);
+	restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+}
+
+void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+{
+	/* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
+	 * a full save/restore cycle. */
+	if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
+	    (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
+	__debug_save_state(vcpu, &vcpu->arch.host_debug_state,
+			   kern_hyp_va(vcpu->arch.host_cpu_context));
+}
+
+void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+{
+	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
+			      kern_hyp_va(vcpu->arch.host_cpu_context));
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+}
+
+static u32 __hyp_text __debug_read_mdcr_el2(void)
+{
+	return read_sysreg(mdcr_el2);
+}
+
+__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void);
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
new file mode 100644
index 000000000000..fd0fbe9b7e6a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro save_callee_saved_regs ctxt
+	stp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	stp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	stp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	stp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	stp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	stp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+	ldp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	ldp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	ldp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	ldp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	ldp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	ldp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+/*
+ * u64 __guest_enter(struct kvm_vcpu *vcpu,
+ *		     struct kvm_cpu_context *host_ctxt);
+ */
+ENTRY(__guest_enter)
+	// x0: vcpu
+	// x1: host/guest context
+	// x2-x18: clobbered by macros
+
+	// Store the host regs
+	save_callee_saved_regs x1
+
+	// Preserve vcpu & host_ctxt for use at exit time
+	stp	x0, x1, [sp, #-16]!
+
+	add	x1, x0, #VCPU_CONTEXT
+
+	// Prepare x0-x1 for later restore by pushing them onto the stack
+	ldp	x2, x3, [x1, #CPU_XREG_OFFSET(0)]
+	stp	x2, x3, [sp, #-16]!
+
+	// x2-x18
+	ldp	x2, x3,   [x1, #CPU_XREG_OFFSET(2)]
+	ldp	x4, x5,   [x1, #CPU_XREG_OFFSET(4)]
+	ldp	x6, x7,   [x1, #CPU_XREG_OFFSET(6)]
+	ldp	x8, x9,   [x1, #CPU_XREG_OFFSET(8)]
+	ldp	x10, x11, [x1, #CPU_XREG_OFFSET(10)]
+	ldp	x12, x13, [x1, #CPU_XREG_OFFSET(12)]
+	ldp	x14, x15, [x1, #CPU_XREG_OFFSET(14)]
+	ldp	x16, x17, [x1, #CPU_XREG_OFFSET(16)]
+	ldr	x18,      [x1, #CPU_XREG_OFFSET(18)]
+
+	// x19-x29, lr
+	restore_callee_saved_regs x1
+
+	// Last bits of the 64bit state
+	ldp	x0, x1, [sp], #16
+
+	// Do not touch any register after this!
+	eret
+ENDPROC(__guest_enter)
+
+ENTRY(__guest_exit)
+	// x0: vcpu
+	// x1: return code
+	// x2-x3: free
+	// x4-x29,lr: vcpu regs
+	// vcpu x0-x3 on the stack
+
+	add	x2, x0, #VCPU_CONTEXT
+
+	stp	x4, x5,   [x2, #CPU_XREG_OFFSET(4)]
+	stp	x6, x7,   [x2, #CPU_XREG_OFFSET(6)]
+	stp	x8, x9,   [x2, #CPU_XREG_OFFSET(8)]
+	stp	x10, x11, [x2, #CPU_XREG_OFFSET(10)]
+	stp	x12, x13, [x2, #CPU_XREG_OFFSET(12)]
+	stp	x14, x15, [x2, #CPU_XREG_OFFSET(14)]
+	stp	x16, x17, [x2, #CPU_XREG_OFFSET(16)]
+	str	x18,      [x2, #CPU_XREG_OFFSET(18)]
+
+	ldp	x6, x7, [sp], #16	// x2, x3
+	ldp	x4, x5, [sp], #16	// x0, x1
+
+	stp	x4, x5, [x2, #CPU_XREG_OFFSET(0)]
+	stp	x6, x7, [x2, #CPU_XREG_OFFSET(2)]
+
+	save_callee_saved_regs x2
+
+	// Restore vcpu & host_ctxt from the stack
+	// (preserving return code in x1)
+	ldp	x0, x2, [sp], #16
+	// Now restore the host regs
+	restore_callee_saved_regs x2
+
+	mov	x0, x1
+	ret
+ENDPROC(__guest_exit)
+
+ENTRY(__fpsimd_guest_restore)
+	stp	x4, lr, [sp, #-16]!
+
+	mrs	x2, cptr_el2
+	bic	x2, x2, #CPTR_EL2_TFP
+	msr	cptr_el2, x2
+	isb
+
+	mrs	x3, tpidr_el2
+
+	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x0
+	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_save_state
+
+	add	x2, x3, #VCPU_CONTEXT
+	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_restore_state
+
+	// Skip restoring fpexc32 for AArch64 guests
+	mrs	x1, hcr_el2
+	tbnz	x1, #HCR_RW_SHIFT, 1f
+	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
+	msr	fpexc32_el2, x4
+1:
+	ldp	x4, lr, [sp], #16
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+
+	eret
+ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
new file mode 100644
index 000000000000..da3f22c7f14a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/fpsimdmacros.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+ENTRY(__fpsimd_save_state)
+	fpsimd_save	x0, 1
+	ret
+ENDPROC(__fpsimd_save_state)
+
+ENTRY(__fpsimd_restore_state)
+	fpsimd_restore	x0, 1
+	ret
+ENDPROC(__fpsimd_restore_state)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
new file mode 100644
index 000000000000..93e8d983c0bd
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro	save_x0_to_x3
+	stp	x0, x1, [sp, #-16]!
+	stp	x2, x3, [sp, #-16]!
+.endm
+
+.macro	restore_x0_to_x3
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+.endm
+
+el1_sync:				// Guest trapped into EL2
+	save_x0_to_x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+
+	cmp	x2, #ESR_ELx_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2		// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap		// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	restore_x0_to_x3
+
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	stp	lr, xzr, [sp, #-16]!
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	ldp	lr, xzr, [sp], #16
+2:	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+
+	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
+	cmp	x2, #ESR_ELx_EC_FP_ASIMD
+	b.eq	__fpsimd_guest_restore
+
+	cmp	x2, #ESR_ELx_EC_DABT_LOW
+	mov	x0, #ESR_ELx_EC_IABT_LOW
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
+	and	x2, x1, #ESR_ELx_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+alternative_else
+	nop			// Use the permission fault path to
+	nop			// check for a valid S1 translation,
+	nop			// regardless of the ESR value.
+alternative_endif
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	stp	x3, xzr, [sp, #-16]!
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	ldp	x0, xzr, [sp], #16	// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	w1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__guest_exit
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	restore_x0_to_x3
+
+	eret
+
+el1_irq:
+	save_x0_to_x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__guest_exit
+
+ENTRY(__hyp_do_panic)
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+ENDPROC(__hyp_do_panic)
+
+.macro invalid_vector	label, target = __hyp_panic
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid
+	invalid_vector	el2t_irq_invalid
+	invalid_vector	el2t_fiq_invalid
+	invalid_vector	el2t_error_invalid
+	invalid_vector	el2h_sync_invalid
+	invalid_vector	el2h_irq_invalid
+	invalid_vector	el2h_fiq_invalid
+	invalid_vector	el2h_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__kvm_hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
new file mode 100644
index 000000000000..fb275178b6af
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
+#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
+						      + PAGE_OFFSET)
+
+/**
+ * hyp_alternate_select - Generates patchable code sequences that are
+ * used to switch between two implementations of a function, depending
+ * on the availability of a feature.
+ *
+ * @fname: a symbol name that will be defined as a function returning a
+ * function pointer whose type will match @orig and @alt
+ * @orig: A pointer to the default function, as returned by @fname when
+ * @cond doesn't hold
+ * @alt: A pointer to the alternate function, as returned by @fname
+ * when @cond holds
+ * @cond: a CPU feature (as described in asm/cpufeature.h)
+ */
+#define hyp_alternate_select(fname, orig, alt, cond)			\
+typeof(orig) * __hyp_text fname(void)					\
+{									\
+	typeof(alt) *val = orig;					\
+	asm volatile(ALTERNATIVE("nop		\n",			\
+				 "mov	%0, %1	\n",			\
+				 cond)					\
+		     : "+r" (val) : "r" (alt));				\
+	return val;							\
+}
+
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
+void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
+void __sysreg_save_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
+void __sysreg32_save_state(struct kvm_vcpu *vcpu);
+void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
+
+void __debug_save_state(struct kvm_vcpu *vcpu,
+			struct kvm_guest_debug_arch *dbg,
+			struct kvm_cpu_context *ctxt);
+void __debug_restore_state(struct kvm_vcpu *vcpu,
+			   struct kvm_guest_debug_arch *dbg,
+			   struct kvm_cpu_context *ctxt);
+void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
+void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+static inline bool __fpsimd_enabled(void)
+{
+	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
+}
+
+u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+void __noreturn __hyp_do_panic(unsigned long, ...);
+
+#endif /* __ARM64_KVM_HYP_H__ */
+
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
new file mode 100644
index 000000000000..ca8f5a5e2f96
--- /dev/null
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	/*
+	 * We are about to set CPTR_EL2.TFP to trap all floating point
+	 * register accesses to EL2, however, the ARM ARM clearly states that
+	 * traps are only taken to EL2 if the operation would not otherwise
+	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
+	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+	 */
+	val = vcpu->arch.hcr_el2;
+	if (!(val & HCR_RW)) {
+		write_sysreg(1 << 30, fpexc32_el2);
+		isb();
+	}
+	write_sysreg(val, hcr_el2);
+	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+	write_sysreg(1 << 15, hstr_el2);
+	write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(HCR_RW, hcr_el2);
+	write_sysreg(0, hstr_el2);
+	write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
+	write_sysreg(0, cptr_el2);
+}
+
+static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__vgic_call_save_state,
+			    __vgic_v2_save_state, __vgic_v3_save_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static hyp_alternate_select(__vgic_call_restore_state,
+			    __vgic_v2_restore_state, __vgic_v3_restore_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+{
+	__vgic_call_save_state()(vcpu);
+	write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+}
+
+static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	val = read_sysreg(hcr_el2);
+	val |= 	HCR_INT_OVERRIDE;
+	val |= vcpu->arch.irq_lines;
+	write_sysreg(val, hcr_el2);
+
+	__vgic_call_restore_state()(vcpu);
+}
+
+static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	bool fp_enabled;
+	u64 exit_code;
+
+	vcpu = kern_hyp_va(vcpu);
+	write_sysreg(vcpu, tpidr_el2);
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state(host_ctxt);
+	__debug_cond_save_host_state(vcpu);
+
+	__activate_traps(vcpu);
+	__activate_vm(vcpu);
+
+	__vgic_restore_state(vcpu);
+	__timer_restore_state(vcpu);
+
+	/*
+	 * We must restore the 32-bit state before the sysregs, thanks
+	 * to Cortex-A57 erratum #852523.
+	 */
+	__sysreg32_restore_state(vcpu);
+	__sysreg_restore_state(guest_ctxt);
+	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+
+	/* Jump in the fire! */
+	exit_code = __guest_enter(vcpu, host_ctxt);
+	/* And we're baaack! */
+
+	fp_enabled = __fpsimd_enabled();
+
+	__sysreg_save_state(guest_ctxt);
+	__sysreg32_save_state(vcpu);
+	__timer_save_state(vcpu);
+	__vgic_save_state(vcpu);
+
+	__deactivate_traps(vcpu);
+	__deactivate_vm(vcpu);
+
+	__sysreg_restore_state(host_ctxt);
+
+	if (fp_enabled) {
+		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+	}
+
+	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+	__debug_cond_restore_host_state(vcpu);
+
+	return exit_code;
+}
+
+__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
+static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+void __hyp_text __noreturn __hyp_panic(void)
+{
+	unsigned long str_va = (unsigned long)__hyp_panic_string;
+	u64 spsr = read_sysreg(spsr_el2);
+	u64 elr = read_sysreg(elr_el2);
+	u64 par = read_sysreg(par_el1);
+
+	if (read_sysreg(vttbr_el2)) {
+		struct kvm_vcpu *vcpu;
+		struct kvm_cpu_context *host_ctxt;
+
+		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
+		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+		__deactivate_traps(vcpu);
+		__deactivate_vm(vcpu);
+		__sysreg_restore_state(host_ctxt);
+	}
+
+	/* Call panic for real */
+	__hyp_do_panic(hyp_kern_va(str_va),
+		       spsr,  elr,
+		       read_sysreg(esr_el2),   read_sysreg(far_el2),
+		       read_sysreg(hpfar_el2), par,
+		       (void *)read_sysreg(tpidr_el2));
+
+	unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
new file mode 100644
index 000000000000..425630980229
--- /dev/null
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* ctxt is already in the HYP VA space */
+void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+	ctxt->sys_regs[MPIDR_EL1]	= read_sysreg(vmpidr_el2);
+	ctxt->sys_regs[CSSELR_EL1]	= read_sysreg(csselr_el1);
+	ctxt->sys_regs[SCTLR_EL1]	= read_sysreg(sctlr_el1);
+	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
+	ctxt->sys_regs[CPACR_EL1]	= read_sysreg(cpacr_el1);
+	ctxt->sys_regs[TTBR0_EL1]	= read_sysreg(ttbr0_el1);
+	ctxt->sys_regs[TTBR1_EL1]	= read_sysreg(ttbr1_el1);
+	ctxt->sys_regs[TCR_EL1]		= read_sysreg(tcr_el1);
+	ctxt->sys_regs[ESR_EL1]		= read_sysreg(esr_el1);
+	ctxt->sys_regs[AFSR0_EL1]	= read_sysreg(afsr0_el1);
+	ctxt->sys_regs[AFSR1_EL1]	= read_sysreg(afsr1_el1);
+	ctxt->sys_regs[FAR_EL1]		= read_sysreg(far_el1);
+	ctxt->sys_regs[MAIR_EL1]	= read_sysreg(mair_el1);
+	ctxt->sys_regs[VBAR_EL1]	= read_sysreg(vbar_el1);
+	ctxt->sys_regs[CONTEXTIDR_EL1]	= read_sysreg(contextidr_el1);
+	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
+	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
+	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
+	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg(amair_el1);
+	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg(cntkctl_el1);
+	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
+	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
+
+	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
+	ctxt->gp_regs.regs.pc		= read_sysreg(elr_el2);
+	ctxt->gp_regs.regs.pstate	= read_sysreg(spsr_el2);
+	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
+	ctxt->gp_regs.elr_el1		= read_sysreg(elr_el1);
+	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
+}
+
+void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+	write_sysreg(ctxt->sys_regs[MPIDR_EL1],	  vmpidr_el2);
+	write_sysreg(ctxt->sys_regs[CSSELR_EL1],  csselr_el1);
+	write_sysreg(ctxt->sys_regs[SCTLR_EL1],	  sctlr_el1);
+	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
+	write_sysreg(ctxt->sys_regs[CPACR_EL1],	  cpacr_el1);
+	write_sysreg(ctxt->sys_regs[TTBR0_EL1],	  ttbr0_el1);
+	write_sysreg(ctxt->sys_regs[TTBR1_EL1],	  ttbr1_el1);
+	write_sysreg(ctxt->sys_regs[TCR_EL1],	  tcr_el1);
+	write_sysreg(ctxt->sys_regs[ESR_EL1],	  esr_el1);
+	write_sysreg(ctxt->sys_regs[AFSR0_EL1],	  afsr0_el1);
+	write_sysreg(ctxt->sys_regs[AFSR1_EL1],	  afsr1_el1);
+	write_sysreg(ctxt->sys_regs[FAR_EL1],	  far_el1);
+	write_sysreg(ctxt->sys_regs[MAIR_EL1],	  mair_el1);
+	write_sysreg(ctxt->sys_regs[VBAR_EL1],	  vbar_el1);
+	write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
+	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
+	write_sysreg(ctxt->sys_regs[AMAIR_EL1],	  amair_el1);
+	write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
+	write_sysreg(ctxt->sys_regs[PAR_EL1],	  par_el1);
+	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
+
+	write_sysreg(ctxt->gp_regs.regs.sp,	sp_el0);
+	write_sysreg(ctxt->gp_regs.regs.pc,	elr_el2);
+	write_sysreg(ctxt->gp_regs.regs.pstate,	spsr_el2);
+	write_sysreg(ctxt->gp_regs.sp_el1,	sp_el1);
+	write_sysreg(ctxt->gp_regs.elr_el1,	elr_el1);
+	write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
+}
+
+void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
+	spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
+	spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
+	spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
+
+	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
+	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
+
+	if (__fpsimd_enabled())
+		sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
+}
+
+void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
+	write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
+	write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
+	write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
+
+	write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
+	write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
+}
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c
new file mode 100644
index 000000000000..1051e5d7320f
--- /dev/null
+++ b/arch/arm64/kvm/hyp/timer-sr.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <clocksource/arm_arch_timer.h>
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	if (kvm->arch.timer.enabled) {
+		timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
+		timer->cntv_cval = read_sysreg(cntv_cval_el0);
+	}
+
+	/* Disable the virtual timer */
+	write_sysreg(0, cntv_ctl_el0);
+
+	/* Allow physical timer/counter access for the host */
+	val = read_sysreg(cnthctl_el2);
+	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+	write_sysreg(val, cnthctl_el2);
+
+	/* Clear cntvoff for the host */
+	write_sysreg(0, cntvoff_el2);
+}
+
+void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	/*
+	 * Disallow physical timer access for the guest
+	 * Physical counter access is allowed
+	 */
+	val = read_sysreg(cnthctl_el2);
+	val &= ~CNTHCTL_EL1PCEN;
+	val |= CNTHCTL_EL1PCTEN;
+	write_sysreg(val, cnthctl_el2);
+
+	if (kvm->arch.timer.enabled) {
+		write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
+		write_sysreg(timer->cntv_cval, cntv_cval_el0);
+		isb();
+		write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
+	}
+}
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
new file mode 100644
index 000000000000..2a7e0d838698
--- /dev/null
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	ipa >>= 12;
+	asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb(ish);
+	asm volatile("tlbi vmalle1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
+							    phys_addr_t ipa);
+
+static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	asm volatile("tlbi vmalls12e1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
+static void __hyp_text __tlb_flush_vm_context(void)
+{
+	dsb(ishst);
+	asm volatile("tlbi alle1is	\n"
+		     "ic ialluis	  ": : );
+	dsb(ish);
+}
+
+__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
new file mode 100644
index 000000000000..e71761238cfc
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	u32 eisr0, eisr1, elrsr0, elrsr1;
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
+	cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
+	eisr0  = readl_relaxed(base + GICH_EISR0);
+	elrsr0 = readl_relaxed(base + GICH_ELRSR0);
+	if (unlikely(nr_lr > 32)) {
+		eisr1  = readl_relaxed(base + GICH_EISR1);
+		elrsr1 = readl_relaxed(base + GICH_ELRSR1);
+	} else {
+		eisr1 = elrsr1 = 0;
+	}
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	cpu_if->vgic_eisr  = ((u64)eisr0 << 32) | eisr1;
+	cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
+#else
+	cpu_if->vgic_eisr  = ((u64)eisr1 << 32) | eisr0;
+	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
+#endif
+	cpu_if->vgic_apr    = readl_relaxed(base + GICH_APR);
+
+	writel_relaxed(0, base + GICH_HCR);
+
+	for (i = 0; i < nr_lr; i++)
+		cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+}
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
+	writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
+	writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	for (i = 0; i < nr_lr; i++)
+		writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
new file mode 100644
index 000000000000..9142e082f5f3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define vtr_to_max_lr_idx(v)		((v) & 0xf)
+#define vtr_to_nr_pri_bits(v)		(((u32)(v) >> 29) + 1)
+
+#define read_gicreg(r)							\
+	({								\
+		u64 reg;						\
+		asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg));	\
+		reg;							\
+	})
+
+#define write_gicreg(v,r)						\
+	do {								\
+		u64 __val = (v);					\
+		asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
+	} while (0)
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * Make sure stores to the GIC via the memory mapped interface
+	 * are now visible to the system register interface.
+	 */
+	dsb(st);
+
+	cpu_if->vgic_vmcr  = read_gicreg(ICH_VMCR_EL2);
+	cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
+	cpu_if->vgic_eisr  = read_gicreg(ICH_EISR_EL2);
+	cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+
+	write_gicreg(0, ICH_HCR_EL2);
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (max_lr_idx) {
+	case 15:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
+	case 14:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
+	case 13:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
+	case 12:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
+	case 11:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
+	case 10:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
+	case 9:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
+	case 8:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
+	case 7:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
+	case 6:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
+	case 5:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
+	case 4:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
+	case 3:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
+	case 2:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
+	case 1:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
+	case 0:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
+		cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+	case 6:
+		cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+	default:
+		cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
+		cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+	case 6:
+		cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+	default:
+		cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+	}
+
+	val = read_gicreg(ICC_SRE_EL2);
+	write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+	isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
+	write_gicreg(1, ICC_SRE_EL1);
+}
+
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
+	 * Group0 interrupt (as generated in GICv2 mode) to be
+	 * delivered as a FIQ to the guest, with potentially fatal
+	 * consequences. So we must make sure that ICC_SRE_EL1 has
+	 * been actually programmed with the value we want before
+	 * starting to mess with the rest of the GIC.
+	 */
+	write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
+	isb();
+
+	write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+	}	 	                           
+		 	                           
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+	}
+
+	switch (max_lr_idx) {
+	case 15:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
+	case 14:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
+	case 13:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
+	case 12:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
+	case 11:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
+	case 10:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
+	case 9:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
+	case 8:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
+	case 7:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
+	case 6:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
+	case 5:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
+	case 4:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
+	case 3:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
+	case 2:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
+	case 1:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
+	case 0:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
+	}
+
+	/*
+	 * Ensures that the above will have reached the
+	 * (re)distributors. This ensure the guest will read the
+	 * correct values from the memory-mapped interface.
+	 */
+	isb();
+	dsb(sy);
+
+	/*
+	 * Prevent the guest from touching the GIC system registers if
+	 * SRE isn't enabled for GICv3 emulation.
+	 */
+	if (!cpu_if->vgic_sre) {
+		write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+			     ICC_SRE_EL2);
+	}
+}
+
+static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
+{
+	return read_gicreg(ICH_VTR_EL2);
+}
+
+__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d2650e84faf2..eec3598b4184 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -29,6 +29,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
@@ -219,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
  * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
  * hyp.S code switches between host and guest values in future.
  */
-static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void reg_to_dbg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 	u64 val = p->regval;
 
@@ -234,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
 	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 }
 
-static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void dbg_to_reg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 	p->regval = *dbg_reg;
 	if (p->is_32bit)
 		p->regval &= 0xffffffffUL;
 }
 
-static inline bool trap_bvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
@@ -279,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_bvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
 }
 
-static inline bool trap_bcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 
@@ -322,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_bcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
 }
 
-static inline bool trap_wvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 
@@ -365,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_wvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
 }
 
-static inline bool trap_wcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 
@@ -407,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_wcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
 }
@@ -722,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
  * system is in.
  */
 
-static inline bool trap_xvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_xvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
deleted file mode 100644
index 3f000712a85d..000000000000
--- a/arch/arm64/kvm/vgic-v2-switch.S
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-ENTRY(__save_vgic_v2_state)
-__save_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w5, [x3, #VGIC_V2_CPU_VMCR]
-	str	w6, [x3, #VGIC_V2_CPU_MISR]
-CPU_LE(	str	w7, [x3, #VGIC_V2_CPU_EISR] )
-CPU_LE(	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_LE(	str	w9, [x3, #VGIC_V2_CPU_ELRSR] )
-CPU_LE(	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_BE(	str	w8, [x3, #VGIC_V2_CPU_EISR] )
-CPU_BE(	str	w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w10, [x3, #VGIC_V2_CPU_ELRSR] )
-	str	w11, [x3, #VGIC_V2_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__save_vgic_v2_state)
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-ENTRY(__restore_vgic_v2_state)
-__restore_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_V2_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__restore_vgic_v2_state)
-
-	.popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
deleted file mode 100644
index 3c20730ddff5..000000000000
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * We store LRs in reverse order to let the CPU deal with streaming
- * access. Use this macro to make it look saner...
- */
-#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-.macro	save_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Make sure stores to the GIC via the memory mapped interface
-	// are now visible to the system register interface
-	dsb	st
-
-	// Save all interesting registers
-	mrs_s	x5, ICH_VMCR_EL2
-	mrs_s	x6, ICH_MISR_EL2
-	mrs_s	x7, ICH_EISR_EL2
-	mrs_s	x8, ICH_ELSR_EL2
-
-	str	w5, [x3, #VGIC_V3_CPU_VMCR]
-	str	w6, [x3, #VGIC_V3_CPU_MISR]
-	str	w7, [x3, #VGIC_V3_CPU_EISR]
-	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
-
-	msr_s	ICH_HCR_EL2, xzr
-
-	mrs_s	x21, ICH_VTR_EL2
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	mrs_s	x20, ICH_LR15_EL2
-	mrs_s	x19, ICH_LR14_EL2
-	mrs_s	x18, ICH_LR13_EL2
-	mrs_s	x17, ICH_LR12_EL2
-	mrs_s	x16, ICH_LR11_EL2
-	mrs_s	x15, ICH_LR10_EL2
-	mrs_s	x14, ICH_LR9_EL2
-	mrs_s	x13, ICH_LR8_EL2
-	mrs_s	x12, ICH_LR7_EL2
-	mrs_s	x11, ICH_LR6_EL2
-	mrs_s	x10, ICH_LR5_EL2
-	mrs_s	x9, ICH_LR4_EL2
-	mrs_s	x8, ICH_LR3_EL2
-	mrs_s	x7, ICH_LR2_EL2
-	mrs_s	x6, ICH_LR1_EL2
-	mrs_s	x5, ICH_LR0_EL2
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	str	x20, [x3, #LR_OFFSET(15)]
-	str	x19, [x3, #LR_OFFSET(14)]
-	str	x18, [x3, #LR_OFFSET(13)]
-	str	x17, [x3, #LR_OFFSET(12)]
-	str	x16, [x3, #LR_OFFSET(11)]
-	str	x15, [x3, #LR_OFFSET(10)]
-	str	x14, [x3, #LR_OFFSET(9)]
-	str	x13, [x3, #LR_OFFSET(8)]
-	str	x12, [x3, #LR_OFFSET(7)]
-	str	x11, [x3, #LR_OFFSET(6)]
-	str	x10, [x3, #LR_OFFSET(5)]
-	str	x9, [x3, #LR_OFFSET(4)]
-	str	x8, [x3, #LR_OFFSET(3)]
-	str	x7, [x3, #LR_OFFSET(2)]
-	str	x6, [x3, #LR_OFFSET(1)]
-	str	x5, [x3, #LR_OFFSET(0)]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP0R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	mrs_s	x19, ICH_AP0R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-6:	mrs_s	x18, ICH_AP0R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-5:	mrs_s	x17, ICH_AP0R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP0R]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP1R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	mrs_s	x19, ICH_AP1R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-6:	mrs_s	x18, ICH_AP1R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-5:	mrs_s	x17, ICH_AP1R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP1R]
-
-	// Restore SRE_EL1 access and re-enable SRE at EL1.
-	mrs_s	x5, ICC_SRE_EL2
-	orr	x5, x5, #ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-	isb
-	mov	x5, #1
-	msr_s	ICC_SRE_EL1, x5
-.endm
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-.macro	restore_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Restore all interesting registers
-	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
-	ldr	w25, [x3, #VGIC_V3_CPU_SRE]
-
-	msr_s	ICC_SRE_EL1, x25
-
-	// make sure SRE is valid before writing the other registers
-	isb
-
-	msr_s	ICH_HCR_EL2, x4
-	msr_s	ICH_VMCR_EL2, x5
-
-	mrs_s	x21, ICH_VTR_EL2
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	msr_s	ICH_AP1R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-	msr_s	ICH_AP1R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-	msr_s	ICH_AP1R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
-	msr_s	ICH_AP1R0_EL2, x17
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	msr_s	ICH_AP0R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-	msr_s	ICH_AP0R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-	msr_s	ICH_AP0R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
-	msr_s	ICH_AP0R0_EL2, x17
-
-	and	w22, w21, #0xf
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	ldr	x20, [x3, #LR_OFFSET(15)]
-	ldr	x19, [x3, #LR_OFFSET(14)]
-	ldr	x18, [x3, #LR_OFFSET(13)]
-	ldr	x17, [x3, #LR_OFFSET(12)]
-	ldr	x16, [x3, #LR_OFFSET(11)]
-	ldr	x15, [x3, #LR_OFFSET(10)]
-	ldr	x14, [x3, #LR_OFFSET(9)]
-	ldr	x13, [x3, #LR_OFFSET(8)]
-	ldr	x12, [x3, #LR_OFFSET(7)]
-	ldr	x11, [x3, #LR_OFFSET(6)]
-	ldr	x10, [x3, #LR_OFFSET(5)]
-	ldr	x9, [x3, #LR_OFFSET(4)]
-	ldr	x8, [x3, #LR_OFFSET(3)]
-	ldr	x7, [x3, #LR_OFFSET(2)]
-	ldr	x6, [x3, #LR_OFFSET(1)]
-	ldr	x5, [x3, #LR_OFFSET(0)]
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	msr_s	ICH_LR15_EL2, x20
-	msr_s	ICH_LR14_EL2, x19
-	msr_s	ICH_LR13_EL2, x18
-	msr_s	ICH_LR12_EL2, x17
-	msr_s	ICH_LR11_EL2, x16
-	msr_s	ICH_LR10_EL2, x15
-	msr_s	ICH_LR9_EL2,  x14
-	msr_s	ICH_LR8_EL2,  x13
-	msr_s	ICH_LR7_EL2,  x12
-	msr_s	ICH_LR6_EL2,  x11
-	msr_s	ICH_LR5_EL2,  x10
-	msr_s	ICH_LR4_EL2,   x9
-	msr_s	ICH_LR3_EL2,   x8
-	msr_s	ICH_LR2_EL2,   x7
-	msr_s	ICH_LR1_EL2,   x6
-	msr_s	ICH_LR0_EL2,   x5
-
-	// Ensure that the above will have reached the
-	// (re)distributors. This ensure the guest will read
-	// the correct values from the memory-mapped interface.
-	isb
-	dsb	sy
-
-	// Prevent the guest from touching the GIC system registers
-	// if SRE isn't enabled for GICv3 emulation
-	cbnz	x25, 1f
-	mrs_s	x5, ICC_SRE_EL2
-	and	x5, x5, #~ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-1:
-.endm
-
-ENTRY(__save_vgic_v3_state)
-	save_vgic_v3_state
-	ret
-ENDPROC(__save_vgic_v3_state)
-
-ENTRY(__restore_vgic_v3_state)
-	restore_vgic_v3_state
-	ret
-ENDPROC(__restore_vgic_v3_state)
-
-ENTRY(__vgic_v3_get_ich_vtr_el2)
-	mrs_s	x0, ICH_VTR_EL2
-	ret
-ENDPROC(__vgic_v3_get_ich_vtr_el2)
-
-	.popsection