Merge branch 'kvm-nvmx-and-vm-teardown' into HEAD
The immediate issue being fixed here is a nVMX bug where KVM fails to detect that, after nested VM-Exit, L1 has a pending IRQ (or NMI). However, checking for a pending interrupt accesses the legacy PIC, and x86's kvm_arch_destroy_vm() currently frees the PIC before destroying vCPUs, i.e. checking for IRQs during the forced nested VM-Exit results in a NULL pointer deref; that's a prerequisite for the nVMX fix. The remaining patches attempt to bring a bit of sanity to x86's VM teardown code, which has accumulated a lot of cruft over the years. E.g. KVM currently unloads each vCPU's MMUs in a separate operation from destroying vCPUs, all because when guest SMP support was added, KVM had a kludgy MMU teardown flow that broke when a VM had more than one 1 vCPU. And that oddity lived on, for 18 years... Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>pull/1184/head
commit
361da275e5
|
|
@ -1375,8 +1375,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
|
||||||
return cpus_have_final_cap(ARM64_SPECTRE_V3A);
|
return cpus_have_final_cap(ARM64_SPECTRE_V3A);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
|
|
||||||
void kvm_init_host_debug_data(void);
|
void kvm_init_host_debug_data(void);
|
||||||
void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
|
void kvm_vcpu_load_debug(struct kvm_vcpu *vcpu);
|
||||||
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
|
void kvm_vcpu_put_debug(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
||||||
|
|
@ -326,7 +326,6 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch)
|
||||||
|
|
||||||
/* Misc */
|
/* Misc */
|
||||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||||
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
|
||||||
|
|
|
||||||
|
|
@ -886,7 +886,6 @@ extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
|
||||||
extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
extern int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
||||||
struct kvm_mips_interrupt *irq);
|
struct kvm_mips_interrupt *irq);
|
||||||
|
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot) {}
|
struct kvm_memory_slot *slot) {}
|
||||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||||
|
|
|
||||||
|
|
@ -902,7 +902,6 @@ struct kvm_vcpu_arch {
|
||||||
#define __KVM_HAVE_ARCH_WQP
|
#define __KVM_HAVE_ARCH_WQP
|
||||||
#define __KVM_HAVE_CREATE_DEVICE
|
#define __KVM_HAVE_CREATE_DEVICE
|
||||||
|
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||||
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
|
||||||
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
|
||||||
|
|
|
||||||
|
|
@ -301,8 +301,6 @@ static inline bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
|
||||||
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
|
return IS_ENABLED(CONFIG_GUEST_PERF_EVENTS) && !!vcpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
|
|
||||||
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
|
#define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12
|
||||||
|
|
||||||
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
|
void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
|
||||||
|
|
|
||||||
|
|
@ -1056,7 +1056,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
|
||||||
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
|
extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
|
||||||
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
|
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
|
||||||
|
|
||||||
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
|
|
||||||
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
static inline void kvm_arch_free_memslot(struct kvm *kvm,
|
||||||
struct kvm_memory_slot *slot) {}
|
struct kvm_memory_slot *slot) {}
|
||||||
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
|
||||||
|
|
|
||||||
|
|
@ -12369,6 +12369,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
|
kvm_clear_async_pf_completion_queue(vcpu);
|
||||||
|
kvm_mmu_unload(vcpu);
|
||||||
|
|
||||||
kvmclock_reset(vcpu);
|
kvmclock_reset(vcpu);
|
||||||
|
|
||||||
kvm_x86_call(vcpu_free)(vcpu);
|
kvm_x86_call(vcpu_free)(vcpu);
|
||||||
|
|
@ -12762,31 +12765,6 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
|
|
||||||
{
|
|
||||||
vcpu_load(vcpu);
|
|
||||||
kvm_mmu_unload(vcpu);
|
|
||||||
vcpu_put(vcpu);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kvm_unload_vcpu_mmus(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
unsigned long i;
|
|
||||||
struct kvm_vcpu *vcpu;
|
|
||||||
|
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
||||||
kvm_clear_async_pf_completion_queue(vcpu);
|
|
||||||
kvm_unload_vcpu_mmu(vcpu);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_arch_sync_events(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
|
|
||||||
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
|
|
||||||
kvm_free_pit(kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* __x86_set_memory_region: Setup KVM internal memory slot
|
* __x86_set_memory_region: Setup KVM internal memory slot
|
||||||
*
|
*
|
||||||
|
|
@ -12865,6 +12843,17 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
|
||||||
|
|
||||||
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Stop all background workers and kthreads before destroying vCPUs, as
|
||||||
|
* iterating over vCPUs in a different task while vCPUs are being freed
|
||||||
|
* is unsafe, i.e. will lead to use-after-free. The PIT also needs to
|
||||||
|
* be stopped before IRQ routing is freed.
|
||||||
|
*/
|
||||||
|
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
|
||||||
|
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
|
||||||
|
|
||||||
|
kvm_free_pit(kvm);
|
||||||
|
|
||||||
kvm_mmu_pre_destroy_vm(kvm);
|
kvm_mmu_pre_destroy_vm(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -12884,7 +12873,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||||
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
|
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
|
||||||
mutex_unlock(&kvm->slots_lock);
|
mutex_unlock(&kvm->slots_lock);
|
||||||
}
|
}
|
||||||
kvm_unload_vcpu_mmus(kvm);
|
|
||||||
kvm_destroy_vcpus(kvm);
|
kvm_destroy_vcpus(kvm);
|
||||||
kvm_x86_call(vm_destroy)(kvm);
|
kvm_x86_call(vm_destroy)(kvm);
|
||||||
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
|
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
|
||||||
|
|
|
||||||
|
|
@ -1747,7 +1747,6 @@ static inline void kvm_unregister_perf_callbacks(void) {}
|
||||||
|
|
||||||
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
|
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
|
||||||
void kvm_arch_destroy_vm(struct kvm *kvm);
|
void kvm_arch_destroy_vm(struct kvm *kvm);
|
||||||
void kvm_arch_sync_events(struct kvm *kvm);
|
|
||||||
|
|
||||||
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
|
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -489,6 +489,14 @@ void kvm_destroy_vcpus(struct kvm *kvm)
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
kvm_vcpu_destroy(vcpu);
|
kvm_vcpu_destroy(vcpu);
|
||||||
xa_erase(&kvm->vcpu_array, i);
|
xa_erase(&kvm->vcpu_array, i);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Assert that the vCPU isn't visible in any way, to ensure KVM
|
||||||
|
* doesn't trigger a use-after-free if destroying vCPUs results
|
||||||
|
* in VM-wide request, e.g. to flush remote TLBs when tearing
|
||||||
|
* down MMUs, or to mark the VM dead if a KVM_BUG_ON() fires.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(xa_load(&kvm->vcpu_array, i) || kvm_get_vcpu(kvm, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
atomic_set(&kvm->online_vcpus, 0);
|
atomic_set(&kvm->online_vcpus, 0);
|
||||||
|
|
@ -1263,7 +1271,6 @@ static void kvm_destroy_vm(struct kvm *kvm)
|
||||||
kvm_destroy_pm_notifier(kvm);
|
kvm_destroy_pm_notifier(kvm);
|
||||||
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
|
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
|
||||||
kvm_destroy_vm_debugfs(kvm);
|
kvm_destroy_vm_debugfs(kvm);
|
||||||
kvm_arch_sync_events(kvm);
|
|
||||||
mutex_lock(&kvm_lock);
|
mutex_lock(&kvm_lock);
|
||||||
list_del(&kvm->vm_list);
|
list_del(&kvm->vm_list);
|
||||||
mutex_unlock(&kvm_lock);
|
mutex_unlock(&kvm_lock);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue