diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/boot/tools/build.c | 16 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 19 | ||||
-rw-r--r-- | arch/x86/include/asm/bitops.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/ftrace.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/stackprotector.h | 7 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/unwind_orc.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 39 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 36 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 41 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 60 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 4 | ||||
-rw-r--r-- | arch/x86/xen/smp_pv.c | 1 |
18 files changed, 199 insertions, 112 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1197b5596d5a..2d3f963fd6f1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -68,6 +68,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 8f8c8e386cea..c8b8c1a8d1fc 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -59,14 +59,14 @@ u8 buf[SETUP_SECT_MAX*512]; #define PECOFF_COMPAT_RESERVE 0x0 #endif -unsigned long efi32_stub_entry; -unsigned long efi64_stub_entry; -unsigned long efi_pe_entry; -unsigned long efi32_pe_entry; -unsigned long kernel_info; -unsigned long startup_64; -unsigned long _ehead; -unsigned long _end; +static unsigned long efi32_stub_entry; +static unsigned long efi64_stub_entry; +static unsigned long efi_pe_entry; +static unsigned long efi32_pe_entry; +static unsigned long kernel_info; +static unsigned long startup_64; +static unsigned long _ehead; +static unsigned long _end; /*----------------------------------------------------------------------*/ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index fd51bac11b46..acf76b466db6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -226,10 +226,18 @@ static int hv_cpu_die(unsigned int cpu) rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); if (re_ctrl.target_vp == hv_vp_index[cpu]) { - /* Reassign to some other online CPU */ + /* + * Reassign reenlightenment notifications to some other online + * CPU or just disable the feature if there are no online CPUs + * left (happens on hibernation). + */ new_cpu = cpumask_any_but(cpu_online_mask, cpu); - re_ctrl.target_vp = hv_vp_index[new_cpu]; + if (new_cpu < nr_cpu_ids) + re_ctrl.target_vp = hv_vp_index[new_cpu]; + else + re_ctrl.enabled = 0; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); } @@ -293,6 +301,13 @@ static void hv_resume(void) hv_hypercall_pg = hv_hypercall_pg_saved; hv_hypercall_pg_saved = NULL; + + /* + * Reenlightenment notifications are disabled by hv_cpu_die(0), + * reenable them here if hv_reenlightenment_cb was previously set. + */ + if (hv_reenlightenment_cb) + set_hv_tscchange_cb(hv_reenlightenment_cb); } /* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 53f246e9df5a..0367efdc5b7a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -52,9 +52,9 @@ static __always_inline void arch_set_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "orb %1,%0" + asm volatile(LOCK_PREFIX "orb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" (CONST_MASK(nr) & 0xff) + : "iq" (CONST_MASK(nr)) : "memory"); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" @@ -72,9 +72,9 @@ static __always_inline void arch_clear_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "andb %1,%0" + asm volatile(LOCK_PREFIX "andb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" (CONST_MASK(nr) ^ 0xff)); + : "iq" (~CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); @@ -123,9 +123,9 @@ static __always_inline void arch_change_bit(long nr, volatile unsigned long *addr) { if (__builtin_constant_p(nr)) { - asm volatile(LOCK_PREFIX "xorb %1,%0" + asm volatile(LOCK_PREFIX "xorb %b1,%0" : CONST_MASK_ADDR(nr, addr) - : "iq" ((u8)CONST_MASK(nr))); + : "iq" (CONST_MASK(nr))); } else { asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : : RLONG_ADDR(addr), "Ir" (nr) : "memory"); diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 70b96cae5b42..84b9449be080 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,12 @@ struct dyn_arch_ftrace { #ifndef __ASSEMBLY__ +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE) +extern void set_ftrace_ops_ro(void); +#else +static inline void set_ftrace_ops_ro(void) { } +#endif + #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0dea9f122bb9..0a6b35353fc7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -578,6 +578,7 @@ struct kvm_vcpu_arch { unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr8; + u32 host_pkru; u32 pkru; u32 hflags; u64 efer; @@ -1093,8 +1094,6 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - u64 (*get_dr6)(struct kvm_vcpu *vcpu); - void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); @@ -1449,6 +1448,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 91e29b6a86a5..9804a7957f4e 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -55,8 +55,13 @@ /* * Initialize the stackprotector canary value. * - * NOTE: this must only be called from functions that never return, + * NOTE: this must only be called from functions that never return * and it must always be inlined. + * + * In addition, it should be called from a compilation unit for which + * stack protector is disabled. Alternatively, the caller should not end + * with a function call which gets tail-call optimized as that would + * lead to checking a modified canary value. */ static __always_inline void boot_init_stack_canary(void) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 37a0aeaf89e7..b0e641793be4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -407,7 +407,8 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - set_memory_ro((unsigned long)trampoline, npages); + if (likely(system_state != SYSTEM_BOOTING)) + set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: @@ -415,6 +416,32 @@ fail: return 0; } +void set_ftrace_ops_ro(void) +{ + struct ftrace_ops *ops; + unsigned long start_offset; + unsigned long end_offset; + unsigned long npages; + unsigned long size; + + do_for_each_ftrace_op(ops, ftrace_ops_list) { + if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP)) + continue; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + start_offset = (unsigned long)ftrace_regs_caller; + end_offset = (unsigned long)ftrace_regs_caller_end; + } else { + start_offset = (unsigned long)ftrace_caller; + end_offset = (unsigned long)ftrace_epilogue; + } + size = end_offset - start_offset; + size = size + RET_SIZE + sizeof(void *); + npages = DIV_ROUND_UP(size, PAGE_SIZE); + set_memory_ro((unsigned long)ops->trampoline, npages); + } while_for_each_ftrace_op(ops); +} + static unsigned long calc_trampoline_call_offset(bool save_regs) { unsigned long start_offset; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c89e4d9ad28..2f24c334a938 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -266,6 +266,14 @@ static void notrace start_secondary(void *unused) wmb(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + + /* + * Prevent tail call to cpu_startup_entry() because the stack protector + * guard has been changed a couple of function calls up, in + * boot_init_stack_canary() and must not be checked before tail calling + * another function. + */ + prevent_tail_call_optimization(); } /** diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 5b0bd8581fe6..7f969b2d240f 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -320,12 +320,19 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { + struct task_struct *task = state->task; + if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; + if (task != current && state->sp == task->thread.sp) { + struct inactive_task_frame *frame = (void *)task->thread.sp; + return &frame->ret_addr; + } + if (state->sp) return (unsigned long *)state->sp - 1; @@ -617,23 +624,23 @@ EXPORT_SYMBOL_GPL(unwind_next_frame); void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { - if (!orc_init) - goto done; - memset(state, 0, sizeof(*state)); state->task = task; + if (!orc_init) + goto err; + /* * Refuse to unwind the stack of a task while it's executing on another * CPU. This check is racy, but that's ok: the unwinder has other * checks to prevent it from going off the rails. */ if (task_on_another_cpu(task)) - goto done; + goto err; if (regs) { if (user_mode(regs)) - goto done; + goto the_end; state->ip = regs->ip; state->sp = regs->sp; @@ -666,6 +673,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * generate some kind of backtrace if this happens. */ void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp); + state->error = true; if (get_stack_info(next_page, state->task, &state->stack_info, &state->stack_mask)) return; @@ -691,8 +699,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, return; -done: +err: + state->error = true; +the_end: state->stack_info.type = STACK_TYPE_UNKNOWN; - return; } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index bcefa9d4e57e..54d4b98b49e1 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1427,7 +1427,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, */ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, - vcpu_mask, &hv_vcpu->tlb_flush); + NULL, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: /* We always do full TLB flush, set rep_done = rep_cnt. */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 90a1ca939627..9a2a62e5afeb 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <asm/msr-index.h> +#include <asm/debugreg.h> #include "kvm_emulate.h" #include "trace.h" @@ -267,7 +268,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.rsp = nested_vmcb->save.rsp; svm->vmcb->save.rip = nested_vmcb->save.rip; svm->vmcb->save.dr7 = nested_vmcb->save.dr7; - svm->vmcb->save.dr6 = nested_vmcb->save.dr6; + svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; @@ -482,7 +483,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.rsp = vmcb->save.rsp; nested_vmcb->save.rax = vmcb->save.rax; nested_vmcb->save.dr7 = vmcb->save.dr7; - nested_vmcb->save.dr6 = vmcb->save.dr6; + nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; nested_vmcb->control.int_ctl = vmcb->control.int_ctl; @@ -606,26 +607,45 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) /* DB exceptions for our internal use must not cause vmexit */ static int nested_svm_intercept_db(struct vcpu_svm *svm) { - unsigned long dr6; + unsigned long dr6 = svm->vmcb->save.dr6; + + /* Always catch it and pass it to userspace if debugging. */ + if (svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return NESTED_EXIT_HOST; /* if we're not singlestepping, it's not ours */ if (!svm->nmi_singlestep) - return NESTED_EXIT_DONE; + goto reflected_db; /* if it's not a singlestep exception, it's not ours */ - if (kvm_get_dr(&svm->vcpu, 6, &dr6)) - return NESTED_EXIT_DONE; if (!(dr6 & DR6_BS)) - return NESTED_EXIT_DONE; + goto reflected_db; /* if the guest is singlestepping, it should get the vmexit */ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { disable_nmi_singlestep(svm); - return NESTED_EXIT_DONE; + goto reflected_db; } /* it's ours, the nested hypervisor must not see this one */ return NESTED_EXIT_HOST; + +reflected_db: + /* + * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; + * it will be moved into the nested VMCB by nested_svm_vmexit. Once + * exceptions will be moved to svm_check_nested_events, all this stuff + * will just go away and we could just return NESTED_EXIT_HOST + * unconditionally. db_interception will queue the exception, which + * will be processed by svm_check_nested_events if a nested vmexit is + * required, and we will just use kvm_deliver_exception_payload to copy + * the payload to DR6 before vmexit. + */ + WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); + svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); + svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; + return NESTED_EXIT_DONE; } static int nested_svm_intercept_ioio(struct vcpu_svm *svm) @@ -682,6 +702,9 @@ static int nested_svm_intercept(struct vcpu_svm *svm) if (svm->nested.intercept_exceptions & excp_bits) { if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) vmexit = nested_svm_intercept_db(svm); + else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && + svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) + vmexit = NESTED_EXIT_HOST; else vmexit = NESTED_EXIT_DONE; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 38f6aeefeb55..a862c768fd54 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1672,17 +1672,14 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } -static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) { - return to_svm(vcpu)->vmcb->save.dr6; -} - -static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) -{ - struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; - svm->vmcb->save.dr6 = value; - mark_dirty(svm->vmcb, VMCB_DR); + if (unlikely(value != vmcb->save.dr6)) { + vmcb->save.dr6 = value; + mark_dirty(vmcb, VMCB_DR); + } } static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) @@ -1693,9 +1690,12 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) get_debugreg(vcpu->arch.db[1], 1); get_debugreg(vcpu->arch.db[2], 2); get_debugreg(vcpu->arch.db[3], 3); - vcpu->arch.dr6 = svm_get_dr6(vcpu); + /* + * We cannot reset svm->vmcb->save.dr6 to DR6_FIXED_1|DR6_RTM here, + * because db_interception might need it. We can do it before vmentry. + */ + vcpu->arch.dr6 = svm->vmcb->save.dr6; vcpu->arch.dr7 = svm->vmcb->save.dr7; - vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; set_dr_intercepts(svm); } @@ -1739,7 +1739,8 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && !svm->nmi_singlestep) { - kvm_queue_exception(&svm->vcpu, DB_VECTOR); + u32 payload = (svm->vmcb->save.dr6 ^ DR6_RTM) & ~DR6_FIXED_1; + kvm_queue_exception_p(&svm->vcpu, DB_VECTOR, payload); return 1; } @@ -3317,6 +3318,15 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.cr2 = vcpu->arch.cr2; + /* + * Run with all-zero DR6 unless needed, so that we can get the exact cause + * of a #DB. + */ + if (unlikely(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + svm_set_dr6(svm, vcpu->arch.dr6); + else + svm_set_dr6(svm, DR6_FIXED_1 | DR6_RTM); + clgi(); kvm_load_guest_xsave_state(vcpu); @@ -3931,8 +3941,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, - .get_dr6 = svm_get_dr6, - .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c2c6335a998c..89c766fad889 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1372,7 +1372,6 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx_vcpu_pi_load(vcpu, cpu); - vmx->host_pkru = read_pkru(); vmx->host_debugctlmsr = get_debugctlmsr(); } @@ -4677,15 +4676,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; + kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); /* fall through */ case BP_VECTOR: @@ -4929,16 +4926,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) * guest debugging itself. */ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; + vcpu->run->debug.arch.dr6 = DR6_BD | DR6_RTM | DR6_FIXED_1; vcpu->run->debug.arch.dr7 = dr7; vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu); vcpu->run->debug.arch.exception = DB_VECTOR; vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= DR6_BD | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BD); return 1; } } @@ -4969,15 +4964,6 @@ static int handle_dr(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } -static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.dr6; -} - -static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) -{ -} - static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { get_debugreg(vcpu->arch.db[0], 0); @@ -6577,11 +6563,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) kvm_load_guest_xsave_state(vcpu); - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE) && - vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vcpu->arch.pkru); - pt_guest_enter(vmx); if (vcpu_to_pmu(vcpu)->version) @@ -6671,18 +6652,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) pt_guest_exit(vmx); - /* - * eager fpu is enabled if PKEY is supported and CR4 is switched - * back on host, so it is safe to read guest PKRU from current - * XSAVE. - */ - if (static_cpu_has(X86_FEATURE_PKU) && - kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = rdpkru(); - if (vcpu->arch.pkru != vmx->host_pkru) - __write_pkru(vmx->host_pkru); - } - kvm_load_host_xsave_state(vcpu); vmx->nested.nested_run_pending = 0; @@ -7740,8 +7709,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, - .get_dr6 = vmx_get_dr6, - .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d786c7d27ce5..c17e6eb9ad43 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -572,11 +572,12 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) } EXPORT_SYMBOL_GPL(kvm_requeue_exception); -static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, - unsigned long payload) +void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, + unsigned long payload) { kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false); } +EXPORT_SYMBOL_GPL(kvm_queue_exception_p); static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code, unsigned long payload) @@ -836,11 +837,25 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu) vcpu->arch.ia32_xss != host_xss) wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss); } + + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) && + vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.pkru); } EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu) { + if (static_cpu_has(X86_FEATURE_PKU) && + (kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || + (vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) { + vcpu->arch.pkru = rdpkru(); + if (vcpu->arch.pkru != vcpu->arch.host_pkru) + __write_pkru(vcpu->arch.host_pkru); + } + if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) { if (vcpu->arch.xcr0 != host_xcr0) @@ -1045,12 +1060,6 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu) } } -static void kvm_update_dr6(struct kvm_vcpu *vcpu) -{ - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - kvm_x86_ops.set_dr6(vcpu, vcpu->arch.dr6); -} - static void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -1090,7 +1099,6 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); - kvm_update_dr6(vcpu); break; case 5: /* fall through */ @@ -1126,10 +1134,7 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) case 4: /* fall through */ case 6: - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - *val = vcpu->arch.dr6; - else - *val = kvm_x86_ops.get_dr6(vcpu); + *val = vcpu->arch.dr6; break; case 5: /* fall through */ @@ -3558,6 +3563,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_x86_ops.vcpu_load(vcpu, cpu); + /* Save host pkru register if supported */ + vcpu->arch.host_pkru = read_pkru(); + /* Apply any externally detected TSC adjustments (due to suspend) */ if (unlikely(vcpu->arch.tsc_offset_adjustment)) { adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment); @@ -3751,7 +3759,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + if (!bank_num || bank_num > KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; @@ -4009,7 +4017,6 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; - kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; kvm_update_dr7(vcpu); @@ -6659,7 +6666,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu); kvm_run->debug.arch.exception = DB_VECTOR; kvm_run->exit_reason = KVM_EXIT_DEBUG; return 0; @@ -6719,9 +6726,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.db); if (dr6 != 0) { - vcpu->arch.dr6 &= ~DR_TRAP_BITS; - vcpu->arch.dr6 |= dr6 | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); *r = 1; return true; } @@ -8042,7 +8047,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, zalloc_cpumask_var(&cpus, GFP_ATOMIC); kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, - vcpu_bitmap, cpus); + NULL, vcpu_bitmap, cpus); free_cpumask_var(cpus); } @@ -8072,6 +8077,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv); */ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) { + struct kvm_vcpu *except; unsigned long old, new, expected; if (!kvm_x86_ops.check_apicv_inhibit_reasons || @@ -8096,7 +8102,17 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit) trace_kvm_apicv_update_request(activate, bit); if (kvm_x86_ops.pre_update_apicv_exec_ctrl) kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate); - kvm_make_all_cpus_request(kvm, KVM_REQ_APICV_UPDATE); + + /* + * Sending request to update APICV for all other vcpus, + * while update the calling vcpu immediately instead of + * waiting for another #VMEXIT to handle the request. + */ + except = kvm_get_running_vcpu(); + kvm_make_all_cpus_request_except(kvm, KVM_REQ_APICV_UPDATE, + except); + if (except) + kvm_vcpu_update_apicv(except); } EXPORT_SYMBOL_GPL(kvm_request_apicv_update); @@ -8420,7 +8436,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops.sync_dirty_debug_regs(vcpu); kvm_update_dr0123(vcpu); - kvm_update_dr6(vcpu); kvm_update_dr7(vcpu); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } @@ -9481,7 +9496,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); kvm_update_dr0123(vcpu); vcpu->arch.dr6 = DR6_INIT; - kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3b289c2f75cd..8b5f73f5e207 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -54,6 +54,7 @@ #include <asm/init.h> #include <asm/uv/uv.h> #include <asm/setup.h> +#include <asm/ftrace.h> #include "mm_internal.h" @@ -1291,6 +1292,8 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); + set_ftrace_ops_ro(); + #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 109325d77b3e..43fd19b3f118 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -372,7 +372,7 @@ static void enter_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL && + if (!cpumask_available(downed_cpus) && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { pr_notice("Failed to allocate mask\n"); goto out; @@ -402,7 +402,7 @@ static void leave_uniprocessor(void) int cpu; int err; - if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) + if (!cpumask_available(downed_cpus) || cpumask_weight(downed_cpus) == 0) return; pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 8fb8a50a28b4..f2adb63b2d7c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -93,6 +93,7 @@ asmlinkage __visible void cpu_bringup_and_idle(void) cpu_bringup(); boot_init_stack_canary(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + prevent_tail_call_optimization(); } void xen_smp_intr_free_pv(unsigned int cpu) |