Merge tag 'kvm-x86-irqs-6.17' of https://github.com/kvm-x86/linux into HEAD
KVM IRQ changes for 6.17 - Rework irqbypass to track/match producers and consumers via an xarray instead of a linked list. Using a linked list leads to O(n^2) insertion times, which is hugely problematic for use cases that create large numbers of VMs. Such use cases typically don't actually use irqbypass, but eliminating the pointless registration is a future problem to solve as it likely requires new uAPI. - Track irqbypass's "token" as "struct eventfd_ctx *" instead of a "void *", to avoid making a simple concept unnecessarily difficult to understand. - Add CONFIG_KVM_IOAPIC for x86 to allow disabling support for I/O APIC, PIC, and PIT emulation at compile time. - Drop x86's irq_comm.c, and move a pile of IRQ related code into irq.c. - Fix a variety of flaws and bugs in the AVIC device posted IRQ code. - Inhibited AVIC if a vCPU's ID is too big (relative to what hardware supports) instead of rejecting vCPU creation. - Extend enable_ipiv module param support to SVM, by simply leaving IsRunning clear in the vCPU's physical ID table entry. - Disable IPI virtualization, via enable_ipiv, if the CPU is affected by erratum #1235, to allow (safely) enabling AVIC on such CPUs. - Dedup x86's device posted IRQ code, as the vast majority of functionality can be shared verbatime between SVM and VMX. - Harden the device posted IRQ code against bugs and runtime errors. - Use vcpu_idx, not vcpu_id, for GA log tag/metadata, to make lookups O(1) instead of O(n). - Generate GA Log interrupts if and only if the target vCPU is blocking, i.e. only if KVM needs a notification in order to wake the vCPU. - Decouple device posted IRQs from VFIO device assignment, as binding a VM to a VFIO group is not a requirement for enabling device posted IRQs. - Clean up and document/comment the irqfd assignment code. - Disallow binding multiple irqfds to an eventfd with a priority waiter, i.e. ensure an eventfd is bound to at most one irqfd through the entire host, and add a selftest to verify eventfd:irqfd bindings are globally unique.pull/1309/head
commit
f02b1bcc73
|
|
@ -2765,19 +2765,15 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
|
||||||
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
|
kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
|
void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
|
||||||
struct kvm_kernel_irq_routing_entry *new)
|
struct kvm_kernel_irq_routing_entry *old,
|
||||||
|
struct kvm_kernel_irq_routing_entry *new)
|
||||||
{
|
{
|
||||||
if (old->type != KVM_IRQ_ROUTING_MSI ||
|
if (old->type == KVM_IRQ_ROUTING_MSI &&
|
||||||
new->type != KVM_IRQ_ROUTING_MSI)
|
new->type == KVM_IRQ_ROUTING_MSI &&
|
||||||
return true;
|
!memcmp(&old->msi, &new->msi, sizeof(new->msi)))
|
||||||
|
return;
|
||||||
|
|
||||||
return memcmp(&old->msi, &new->msi, sizeof(new->msi));
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
|
||||||
uint32_t guest_irq, bool set)
|
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* Remapping the vLPI requires taking the its_lock mutex to resolve
|
* Remapping the vLPI requires taking the its_lock mutex to resolve
|
||||||
* the new translation. We're in spinlock land at this point, so no
|
* the new translation. We're in spinlock land at this point, so no
|
||||||
|
|
@ -2785,7 +2781,7 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
||||||
*
|
*
|
||||||
* Unmap the vLPI and fall back to software LPI injection.
|
* Unmap the vLPI and fall back to software LPI injection.
|
||||||
*/
|
*/
|
||||||
return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
|
return kvm_vgic_v4_unset_forwarding(irqfd->kvm, irqfd->producer->irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
|
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
|
||||||
|
|
|
||||||
|
|
@ -758,7 +758,7 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
|
||||||
if (irq) {
|
if (irq) {
|
||||||
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
|
scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
|
||||||
if (irq->hw)
|
if (irq->hw)
|
||||||
WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
|
its_unmap_vlpi(ite->irq->host_irq);
|
||||||
|
|
||||||
irq->hw = false;
|
irq->hw = false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -527,28 +527,26 @@ static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
|
void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
|
||||||
{
|
{
|
||||||
struct vgic_irq *irq;
|
struct vgic_irq *irq;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (!vgic_supports_direct_msis(kvm))
|
if (!vgic_supports_direct_msis(kvm))
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
|
irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
|
||||||
if (!irq)
|
if (!irq)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||||
WARN_ON(irq->hw && irq->host_irq != host_irq);
|
WARN_ON(irq->hw && irq->host_irq != host_irq);
|
||||||
if (irq->hw) {
|
if (irq->hw) {
|
||||||
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
|
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
|
||||||
irq->hw = false;
|
irq->hw = false;
|
||||||
ret = its_unmap_vlpi(host_irq);
|
its_unmap_vlpi(host_irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
||||||
vgic_put_irq(kvm, irq);
|
vgic_put_irq(kvm, irq);
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,22 @@ enum {
|
||||||
IRQ_REMAP_X2APIC_MODE,
|
IRQ_REMAP_X2APIC_MODE,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vcpu_data {
|
/*
|
||||||
|
* This is mainly used to communicate information back-and-forth
|
||||||
|
* between SVM and IOMMU for setting up and tearing down posted
|
||||||
|
* interrupt
|
||||||
|
*/
|
||||||
|
struct amd_iommu_pi_data {
|
||||||
|
u64 vapic_addr; /* Physical address of the vCPU's vAPIC. */
|
||||||
|
u32 ga_tag;
|
||||||
|
u32 vector; /* Guest vector of the interrupt */
|
||||||
|
int cpu;
|
||||||
|
bool ga_log_intr;
|
||||||
|
bool is_guest_mode;
|
||||||
|
void *ir_data;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct intel_iommu_pi_data {
|
||||||
u64 pi_desc_addr; /* Physical address of PI Descriptor */
|
u64 pi_desc_addr; /* Physical address of PI Descriptor */
|
||||||
u32 vector; /* Guest vector of the interrupt */
|
u32 vector; /* Guest vector of the interrupt */
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
|
||||||
KVM_X86_OP_OPTIONAL(vcpu_blocking)
|
KVM_X86_OP_OPTIONAL(vcpu_blocking)
|
||||||
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
|
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
|
||||||
KVM_X86_OP_OPTIONAL(pi_update_irte)
|
KVM_X86_OP_OPTIONAL(pi_update_irte)
|
||||||
KVM_X86_OP_OPTIONAL(pi_start_assignment)
|
KVM_X86_OP_OPTIONAL(pi_start_bypass)
|
||||||
KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
|
KVM_X86_OP_OPTIONAL(apicv_pre_state_restore)
|
||||||
KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
|
KVM_X86_OP_OPTIONAL(apicv_post_state_restore)
|
||||||
KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
|
KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
|
||||||
|
|
|
||||||
|
|
@ -297,6 +297,7 @@ enum x86_intercept_stage;
|
||||||
*/
|
*/
|
||||||
#define KVM_APIC_PV_EOI_PENDING 1
|
#define KVM_APIC_PV_EOI_PENDING 1
|
||||||
|
|
||||||
|
struct kvm_kernel_irqfd;
|
||||||
struct kvm_kernel_irq_routing_entry;
|
struct kvm_kernel_irq_routing_entry;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1320,6 +1321,12 @@ enum kvm_apicv_inhibit {
|
||||||
*/
|
*/
|
||||||
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
|
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AVIC is disabled because the vCPU's APIC ID is beyond the max
|
||||||
|
* supported by AVIC/x2AVIC, i.e. the vCPU is unaddressable.
|
||||||
|
*/
|
||||||
|
APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG,
|
||||||
|
|
||||||
NR_APICV_INHIBIT_REASONS,
|
NR_APICV_INHIBIT_REASONS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -1338,7 +1345,8 @@ enum kvm_apicv_inhibit {
|
||||||
__APICV_INHIBIT_REASON(IRQWIN), \
|
__APICV_INHIBIT_REASON(IRQWIN), \
|
||||||
__APICV_INHIBIT_REASON(PIT_REINJ), \
|
__APICV_INHIBIT_REASON(PIT_REINJ), \
|
||||||
__APICV_INHIBIT_REASON(SEV), \
|
__APICV_INHIBIT_REASON(SEV), \
|
||||||
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
|
__APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED), \
|
||||||
|
__APICV_INHIBIT_REASON(PHYSICAL_ID_TOO_BIG)
|
||||||
|
|
||||||
struct kvm_arch {
|
struct kvm_arch {
|
||||||
unsigned long n_used_mmu_pages;
|
unsigned long n_used_mmu_pages;
|
||||||
|
|
@ -1381,9 +1389,13 @@ struct kvm_arch {
|
||||||
atomic_t noncoherent_dma_count;
|
atomic_t noncoherent_dma_count;
|
||||||
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
|
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
|
||||||
atomic_t assigned_device_count;
|
atomic_t assigned_device_count;
|
||||||
|
unsigned long nr_possible_bypass_irqs;
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
struct kvm_pic *vpic;
|
struct kvm_pic *vpic;
|
||||||
struct kvm_ioapic *vioapic;
|
struct kvm_ioapic *vioapic;
|
||||||
struct kvm_pit *vpit;
|
struct kvm_pit *vpit;
|
||||||
|
#endif
|
||||||
atomic_t vapics_in_nmi_mode;
|
atomic_t vapics_in_nmi_mode;
|
||||||
struct mutex apic_map_lock;
|
struct mutex apic_map_lock;
|
||||||
struct kvm_apic_map __rcu *apic_map;
|
struct kvm_apic_map __rcu *apic_map;
|
||||||
|
|
@ -1403,7 +1415,6 @@ struct kvm_arch {
|
||||||
bool pause_in_guest;
|
bool pause_in_guest;
|
||||||
bool cstate_in_guest;
|
bool cstate_in_guest;
|
||||||
|
|
||||||
unsigned long irq_sources_bitmap;
|
|
||||||
s64 kvmclock_offset;
|
s64 kvmclock_offset;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1432,9 +1443,6 @@ struct kvm_arch {
|
||||||
struct delayed_work kvmclock_update_work;
|
struct delayed_work kvmclock_update_work;
|
||||||
struct delayed_work kvmclock_sync_work;
|
struct delayed_work kvmclock_sync_work;
|
||||||
|
|
||||||
/* reads protected by irq_srcu, writes by irq_lock */
|
|
||||||
struct hlist_head mask_notifier_list;
|
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
#ifdef CONFIG_KVM_HYPERV
|
||||||
struct kvm_hv hyperv;
|
struct kvm_hv hyperv;
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -1853,9 +1861,10 @@ struct kvm_x86_ops {
|
||||||
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
|
void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
|
||||||
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
|
void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
|
int (*pi_update_irte)(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
|
||||||
uint32_t guest_irq, bool set);
|
unsigned int host_irq, uint32_t guest_irq,
|
||||||
void (*pi_start_assignment)(struct kvm *kvm);
|
struct kvm_vcpu *vcpu, u32 vector);
|
||||||
|
void (*pi_start_bypass)(struct kvm *kvm);
|
||||||
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
|
void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu);
|
||||||
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
|
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
|
||||||
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
|
bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
|
||||||
|
|
@ -1950,6 +1959,7 @@ struct kvm_arch_async_pf {
|
||||||
extern u32 __read_mostly kvm_nr_uret_msrs;
|
extern u32 __read_mostly kvm_nr_uret_msrs;
|
||||||
extern bool __read_mostly allow_smaller_maxphyaddr;
|
extern bool __read_mostly allow_smaller_maxphyaddr;
|
||||||
extern bool __read_mostly enable_apicv;
|
extern bool __read_mostly enable_apicv;
|
||||||
|
extern bool __read_mostly enable_ipiv;
|
||||||
extern bool __read_mostly enable_device_posted_irqs;
|
extern bool __read_mostly enable_device_posted_irqs;
|
||||||
extern struct kvm_x86_ops kvm_x86_ops;
|
extern struct kvm_x86_ops kvm_x86_ops;
|
||||||
|
|
||||||
|
|
@ -2044,19 +2054,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
|
||||||
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||||
const void *val, int bytes);
|
const void *val, int bytes);
|
||||||
|
|
||||||
struct kvm_irq_mask_notifier {
|
|
||||||
void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
|
|
||||||
int irq;
|
|
||||||
struct hlist_node link;
|
|
||||||
};
|
|
||||||
|
|
||||||
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
|
||||||
struct kvm_irq_mask_notifier *kimn);
|
|
||||||
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
|
||||||
struct kvm_irq_mask_notifier *kimn);
|
|
||||||
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
|
||||||
bool mask);
|
|
||||||
|
|
||||||
extern bool tdp_enabled;
|
extern bool tdp_enabled;
|
||||||
|
|
||||||
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
||||||
|
|
@ -2215,9 +2212,6 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state,
|
||||||
return !!(*irq_state);
|
return !!(*irq_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
|
|
||||||
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
|
|
||||||
|
|
||||||
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
|
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
|
||||||
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
|
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
|
|
@ -2394,9 +2388,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
|
||||||
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||||
struct kvm_vcpu **dest_vcpu);
|
struct kvm_vcpu **dest_vcpu);
|
||||||
|
|
||||||
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm_lapic_irq *irq);
|
|
||||||
|
|
||||||
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
|
static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
|
||||||
{
|
{
|
||||||
/* We can only post Fixed and LowPrio IRQs */
|
/* We can only post Fixed and LowPrio IRQs */
|
||||||
|
|
|
||||||
|
|
@ -252,16 +252,21 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
|
||||||
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
|
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
|
||||||
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
|
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GA_LOG_INTR is a synthetic flag that's never propagated to hardware-visible
|
||||||
|
* tables. GA_LOG_INTR is set if the vCPU needs device posted IRQs to generate
|
||||||
|
* GA log interrupts to wake the vCPU (because it's blocking or about to block).
|
||||||
|
*/
|
||||||
|
#define AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR BIT_ULL(61)
|
||||||
|
|
||||||
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
|
#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
|
||||||
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
|
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK GENMASK_ULL(51, 12)
|
||||||
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
|
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
|
||||||
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
|
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
|
||||||
#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
|
#define AVIC_PHYSICAL_ID_TABLE_SIZE_MASK (0xFFULL)
|
||||||
|
|
||||||
#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
|
#define AVIC_DOORBELL_PHYSICAL_ID_MASK GENMASK_ULL(11, 0)
|
||||||
|
|
||||||
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
|
|
||||||
|
|
||||||
#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
|
#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
|
||||||
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
|
#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
|
||||||
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
|
#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
|
||||||
|
|
@ -290,8 +295,6 @@ enum avic_ipi_failure_cause {
|
||||||
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
|
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
|
||||||
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
|
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
|
||||||
|
|
||||||
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
|
|
||||||
|
|
||||||
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
|
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
|
||||||
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
|
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
|
||||||
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
|
#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
|
||||||
|
|
|
||||||
|
|
@ -166,6 +166,16 @@ config KVM_AMD_SEV
|
||||||
Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
|
Encrypted State (SEV-ES), and Secure Encrypted Virtualization with
|
||||||
Secure Nested Paging (SEV-SNP) technologies on AMD processors.
|
Secure Nested Paging (SEV-SNP) technologies on AMD processors.
|
||||||
|
|
||||||
|
config KVM_IOAPIC
|
||||||
|
bool "I/O APIC, PIC, and PIT emulation"
|
||||||
|
default y
|
||||||
|
depends on KVM
|
||||||
|
help
|
||||||
|
Provides support for KVM to emulate an I/O APIC, PIC, and PIT, i.e.
|
||||||
|
for full in-kernel APIC emulation.
|
||||||
|
|
||||||
|
If unsure, say Y.
|
||||||
|
|
||||||
config KVM_SMM
|
config KVM_SMM
|
||||||
bool "System Management Mode emulation"
|
bool "System Management Mode emulation"
|
||||||
default y
|
default y
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,11 @@ ccflags-$(CONFIG_KVM_WERROR) += -Werror
|
||||||
|
|
||||||
include $(srctree)/virt/kvm/Makefile.kvm
|
include $(srctree)/virt/kvm/Makefile.kvm
|
||||||
|
|
||||||
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
|
kvm-y += x86.o emulate.o irq.o lapic.o cpuid.o pmu.o mtrr.o \
|
||||||
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
|
debugfs.o mmu/mmu.o mmu/page_track.o mmu/spte.o
|
||||||
debugfs.o mmu/mmu.o mmu/page_track.o \
|
|
||||||
mmu/spte.o
|
|
||||||
|
|
||||||
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
|
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
|
||||||
|
kvm-$(CONFIG_KVM_IOAPIC) += i8259.o i8254.o ioapic.o
|
||||||
kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
|
kvm-$(CONFIG_KVM_HYPERV) += hyperv.o
|
||||||
kvm-$(CONFIG_KVM_XEN) += xen.o
|
kvm-$(CONFIG_KVM_XEN) += xen.o
|
||||||
kvm-$(CONFIG_KVM_SMM) += smm.o
|
kvm-$(CONFIG_KVM_SMM) += smm.o
|
||||||
|
|
|
||||||
|
|
@ -497,15 +497,19 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
|
int kvm_hv_synic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
|
int irq_source_id, int level, bool line_status)
|
||||||
{
|
{
|
||||||
struct kvm_vcpu_hv_synic *synic;
|
struct kvm_vcpu_hv_synic *synic;
|
||||||
|
|
||||||
synic = synic_get(kvm, vpidx);
|
if (!level)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
synic = synic_get(kvm, e->hv_sint.vcpu);
|
||||||
if (!synic)
|
if (!synic)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
return synic_set_irq(synic, sint);
|
return synic_set_irq(synic, e->hv_sint.sint);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
|
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,8 @@ static inline bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu)
|
||||||
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
|
int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
void kvm_hv_irq_routing_update(struct kvm *kvm);
|
void kvm_hv_irq_routing_update(struct kvm *kvm);
|
||||||
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
|
int kvm_hv_synic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
|
int irq_source_id, int level, bool line_status);
|
||||||
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
|
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
|
||||||
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
|
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -248,8 +248,8 @@ static void pit_do_work(struct kthread_work *work)
|
||||||
if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
|
if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
kvm_set_irq(kvm, pit->irq_source_id, 0, 1, false);
|
kvm_set_irq(kvm, KVM_PIT_IRQ_SOURCE_ID, 0, 1, false);
|
||||||
kvm_set_irq(kvm, pit->irq_source_id, 0, 0, false);
|
kvm_set_irq(kvm, KVM_PIT_IRQ_SOURCE_ID, 0, 0, false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Provides NMI watchdog support via Virtual Wire mode.
|
* Provides NMI watchdog support via Virtual Wire mode.
|
||||||
|
|
@ -288,7 +288,7 @@ static inline void kvm_pit_reset_reinject(struct kvm_pit *pit)
|
||||||
atomic_set(&pit->pit_state.irq_ack, 1);
|
atomic_set(&pit->pit_state.irq_ack, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
|
static void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject)
|
||||||
{
|
{
|
||||||
struct kvm_kpit_state *ps = &pit->pit_state;
|
struct kvm_kpit_state *ps = &pit->pit_state;
|
||||||
struct kvm *kvm = pit->kvm;
|
struct kvm *kvm = pit->kvm;
|
||||||
|
|
@ -400,8 +400,8 @@ static void pit_load_count(struct kvm_pit *pit, int channel, u32 val)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
|
static void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
|
||||||
int hpet_legacy_start)
|
int hpet_legacy_start)
|
||||||
{
|
{
|
||||||
u8 saved_mode;
|
u8 saved_mode;
|
||||||
|
|
||||||
|
|
@ -649,6 +649,79 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
|
||||||
kvm_pit_reset_reinject(pit);
|
kvm_pit_reset_reinject(pit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
||||||
|
{
|
||||||
|
struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
|
||||||
|
|
||||||
|
mutex_lock(&kps->lock);
|
||||||
|
memcpy(ps, &kps->channels, sizeof(*ps));
|
||||||
|
mutex_unlock(&kps->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct kvm_pit *pit = kvm->arch.vpit;
|
||||||
|
|
||||||
|
mutex_lock(&pit->pit_state.lock);
|
||||||
|
memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
|
||||||
|
mutex_unlock(&pit->pit_state.lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
||||||
|
{
|
||||||
|
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
||||||
|
memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
|
||||||
|
sizeof(ps->channels));
|
||||||
|
ps->flags = kvm->arch.vpit->pit_state.flags;
|
||||||
|
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
||||||
|
memset(&ps->reserved, 0, sizeof(ps->reserved));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
||||||
|
{
|
||||||
|
int start = 0;
|
||||||
|
int i;
|
||||||
|
u32 prev_legacy, cur_legacy;
|
||||||
|
struct kvm_pit *pit = kvm->arch.vpit;
|
||||||
|
|
||||||
|
mutex_lock(&pit->pit_state.lock);
|
||||||
|
prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
||||||
|
cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
||||||
|
if (!prev_legacy && cur_legacy)
|
||||||
|
start = 1;
|
||||||
|
memcpy(&pit->pit_state.channels, &ps->channels,
|
||||||
|
sizeof(pit->pit_state.channels));
|
||||||
|
pit->pit_state.flags = ps->flags;
|
||||||
|
for (i = 0; i < 3; i++)
|
||||||
|
kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
|
||||||
|
start && i == 0);
|
||||||
|
mutex_unlock(&pit->pit_state.lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_reinject(struct kvm *kvm, struct kvm_reinject_control *control)
|
||||||
|
{
|
||||||
|
struct kvm_pit *pit = kvm->arch.vpit;
|
||||||
|
|
||||||
|
/* pit->pit_state.lock was overloaded to prevent userspace from getting
|
||||||
|
* an inconsistent state after running multiple KVM_REINJECT_CONTROL
|
||||||
|
* ioctls in parallel. Use a separate lock if that ioctl isn't rare.
|
||||||
|
*/
|
||||||
|
mutex_lock(&pit->pit_state.lock);
|
||||||
|
kvm_pit_set_reinject(pit, control->pit_reinject);
|
||||||
|
mutex_unlock(&pit->pit_state.lock);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static const struct kvm_io_device_ops pit_dev_ops = {
|
static const struct kvm_io_device_ops pit_dev_ops = {
|
||||||
.read = pit_ioport_read,
|
.read = pit_ioport_read,
|
||||||
.write = pit_ioport_write,
|
.write = pit_ioport_write,
|
||||||
|
|
@ -671,10 +744,6 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
|
||||||
if (!pit)
|
if (!pit)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
pit->irq_source_id = kvm_request_irq_source_id(kvm);
|
|
||||||
if (pit->irq_source_id < 0)
|
|
||||||
goto fail_request;
|
|
||||||
|
|
||||||
mutex_init(&pit->pit_state.lock);
|
mutex_init(&pit->pit_state.lock);
|
||||||
|
|
||||||
pid = get_pid(task_tgid(current));
|
pid = get_pid(task_tgid(current));
|
||||||
|
|
@ -726,8 +795,6 @@ fail_register_pit:
|
||||||
kvm_pit_set_reinject(pit, false);
|
kvm_pit_set_reinject(pit, false);
|
||||||
kthread_destroy_worker(pit->worker);
|
kthread_destroy_worker(pit->worker);
|
||||||
fail_kthread:
|
fail_kthread:
|
||||||
kvm_free_irq_source_id(kvm, pit->irq_source_id);
|
|
||||||
fail_request:
|
|
||||||
kfree(pit);
|
kfree(pit);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -744,7 +811,6 @@ void kvm_free_pit(struct kvm *kvm)
|
||||||
kvm_pit_set_reinject(pit, false);
|
kvm_pit_set_reinject(pit, false);
|
||||||
hrtimer_cancel(&pit->pit_state.timer);
|
hrtimer_cancel(&pit->pit_state.timer);
|
||||||
kthread_destroy_worker(pit->worker);
|
kthread_destroy_worker(pit->worker);
|
||||||
kvm_free_irq_source_id(kvm, pit->irq_source_id);
|
|
||||||
kfree(pit);
|
kfree(pit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,11 @@
|
||||||
|
|
||||||
#include <kvm/iodev.h>
|
#include <kvm/iodev.h>
|
||||||
|
|
||||||
|
#include <uapi/asm/kvm.h>
|
||||||
|
|
||||||
|
#include "ioapic.h"
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
struct kvm_kpit_channel_state {
|
struct kvm_kpit_channel_state {
|
||||||
u32 count; /* can be 65536 */
|
u32 count; /* can be 65536 */
|
||||||
u16 latched_count;
|
u16 latched_count;
|
||||||
|
|
@ -42,7 +47,6 @@ struct kvm_pit {
|
||||||
struct kvm_io_device speaker_dev;
|
struct kvm_io_device speaker_dev;
|
||||||
struct kvm *kvm;
|
struct kvm *kvm;
|
||||||
struct kvm_kpit_state pit_state;
|
struct kvm_kpit_state pit_state;
|
||||||
int irq_source_id;
|
|
||||||
struct kvm_irq_mask_notifier mask_notifier;
|
struct kvm_irq_mask_notifier mask_notifier;
|
||||||
struct kthread_worker *worker;
|
struct kthread_worker *worker;
|
||||||
struct kthread_work expired;
|
struct kthread_work expired;
|
||||||
|
|
@ -55,11 +59,14 @@ struct kvm_pit {
|
||||||
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
|
#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
|
||||||
#define KVM_PIT_CHANNEL_MASK 0x3
|
#define KVM_PIT_CHANNEL_MASK 0x3
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps);
|
||||||
|
int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps);
|
||||||
|
int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps);
|
||||||
|
int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps);
|
||||||
|
int kvm_vm_ioctl_reinject(struct kvm *kvm, struct kvm_reinject_control *control);
|
||||||
|
|
||||||
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
|
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
|
||||||
void kvm_free_pit(struct kvm *kvm);
|
void kvm_free_pit(struct kvm *kvm);
|
||||||
|
#endif /* CONFIG_KVM_IOAPIC */
|
||||||
void kvm_pit_load_count(struct kvm_pit *pit, int channel, u32 val,
|
|
||||||
int hpet_legacy_start);
|
|
||||||
void kvm_pit_set_reinject(struct kvm_pit *pit, bool reinject);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,8 @@
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/bitops.h>
|
#include <linux/bitops.h>
|
||||||
|
|
||||||
|
#include "ioapic.h"
|
||||||
#include "irq.h"
|
#include "irq.h"
|
||||||
|
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
|
|
@ -185,8 +187,11 @@ void kvm_pic_update_irq(struct kvm_pic *s)
|
||||||
pic_unlock(s);
|
pic_unlock(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
|
int kvm_pic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
|
int irq_source_id, int level, bool line_status)
|
||||||
{
|
{
|
||||||
|
struct kvm_pic *s = kvm->arch.vpic;
|
||||||
|
int irq = e->irqchip.pin;
|
||||||
int ret, irq_level;
|
int ret, irq_level;
|
||||||
|
|
||||||
BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
|
BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
|
||||||
|
|
@ -203,16 +208,6 @@ int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_pic_clear_all(struct kvm_pic *s, int irq_source_id)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
pic_lock(s);
|
|
||||||
for (i = 0; i < PIC_NUM_PINS; i++)
|
|
||||||
__clear_bit(irq_source_id, &s->irq_states[i]);
|
|
||||||
pic_unlock(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* acknowledge interrupt 'irq'
|
* acknowledge interrupt 'irq'
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -41,11 +41,11 @@
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/current.h>
|
#include <asm/current.h>
|
||||||
#include <trace/events/kvm.h>
|
|
||||||
|
|
||||||
#include "ioapic.h"
|
#include "ioapic.h"
|
||||||
#include "lapic.h"
|
#include "lapic.h"
|
||||||
#include "irq.h"
|
#include "irq.h"
|
||||||
|
#include "trace.h"
|
||||||
|
|
||||||
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
|
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
|
||||||
bool line_status);
|
bool line_status);
|
||||||
|
|
@ -310,6 +310,42 @@ void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
|
||||||
kvm_make_scan_ioapic_request(kvm);
|
kvm_make_scan_ioapic_request(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||||
|
struct kvm_irq_mask_notifier *kimn)
|
||||||
|
{
|
||||||
|
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||||
|
|
||||||
|
mutex_lock(&kvm->irq_lock);
|
||||||
|
kimn->irq = irq;
|
||||||
|
hlist_add_head_rcu(&kimn->link, &ioapic->mask_notifier_list);
|
||||||
|
mutex_unlock(&kvm->irq_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||||
|
struct kvm_irq_mask_notifier *kimn)
|
||||||
|
{
|
||||||
|
mutex_lock(&kvm->irq_lock);
|
||||||
|
hlist_del_rcu(&kimn->link);
|
||||||
|
mutex_unlock(&kvm->irq_lock);
|
||||||
|
synchronize_srcu(&kvm->irq_srcu);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
||||||
|
bool mask)
|
||||||
|
{
|
||||||
|
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||||
|
struct kvm_irq_mask_notifier *kimn;
|
||||||
|
int idx, gsi;
|
||||||
|
|
||||||
|
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||||
|
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
||||||
|
if (gsi != -1)
|
||||||
|
hlist_for_each_entry_rcu(kimn, &ioapic->mask_notifier_list, link)
|
||||||
|
if (kimn->irq == gsi)
|
||||||
|
kimn->func(kimn, mask);
|
||||||
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||||
|
}
|
||||||
|
|
||||||
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
|
||||||
{
|
{
|
||||||
unsigned index;
|
unsigned index;
|
||||||
|
|
@ -479,9 +515,11 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
int kvm_ioapic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
int level, bool line_status)
|
int irq_source_id, int level, bool line_status)
|
||||||
{
|
{
|
||||||
|
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
||||||
|
int irq = e->irqchip.pin;
|
||||||
int ret, irq_level;
|
int ret, irq_level;
|
||||||
|
|
||||||
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
|
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
|
||||||
|
|
@ -496,16 +534,6 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
spin_lock(&ioapic->lock);
|
|
||||||
for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
|
|
||||||
__clear_bit(irq_source_id, &ioapic->irq_states[i]);
|
|
||||||
spin_unlock(&ioapic->lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
|
static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -718,6 +746,7 @@ int kvm_ioapic_init(struct kvm *kvm)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
spin_lock_init(&ioapic->lock);
|
spin_lock_init(&ioapic->lock);
|
||||||
INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
|
INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
|
||||||
|
INIT_HLIST_HEAD(&ioapic->mask_notifier_list);
|
||||||
kvm->arch.vioapic = ioapic;
|
kvm->arch.vioapic = ioapic;
|
||||||
kvm_ioapic_reset(ioapic);
|
kvm_ioapic_reset(ioapic);
|
||||||
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
|
||||||
|
|
|
||||||
|
|
@ -86,8 +86,24 @@ struct kvm_ioapic {
|
||||||
struct delayed_work eoi_inject;
|
struct delayed_work eoi_inject;
|
||||||
u32 irq_eoi[IOAPIC_NUM_PINS];
|
u32 irq_eoi[IOAPIC_NUM_PINS];
|
||||||
u32 irr_delivered;
|
u32 irr_delivered;
|
||||||
|
|
||||||
|
/* reads protected by irq_srcu, writes by irq_lock */
|
||||||
|
struct hlist_head mask_notifier_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct kvm_irq_mask_notifier {
|
||||||
|
void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
|
||||||
|
int irq;
|
||||||
|
struct hlist_node link;
|
||||||
|
};
|
||||||
|
|
||||||
|
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||||
|
struct kvm_irq_mask_notifier *kimn);
|
||||||
|
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||||
|
struct kvm_irq_mask_notifier *kimn);
|
||||||
|
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
||||||
|
bool mask);
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
#define ASSERT(x) \
|
#define ASSERT(x) \
|
||||||
do { \
|
do { \
|
||||||
|
|
@ -103,7 +119,7 @@ do { \
|
||||||
|
|
||||||
static inline int ioapic_in_kernel(struct kvm *kvm)
|
static inline int ioapic_in_kernel(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
return irqchip_kernel(kvm);
|
return irqchip_full(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
|
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
|
||||||
|
|
@ -111,9 +127,9 @@ void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
|
||||||
int trigger_mode);
|
int trigger_mode);
|
||||||
int kvm_ioapic_init(struct kvm *kvm);
|
int kvm_ioapic_init(struct kvm *kvm);
|
||||||
void kvm_ioapic_destroy(struct kvm *kvm);
|
void kvm_ioapic_destroy(struct kvm *kvm);
|
||||||
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
|
int kvm_ioapic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
int level, bool line_status);
|
int irq_source_id, int level, bool line_status);
|
||||||
void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
|
|
||||||
void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||||
void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
|
||||||
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
|
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
|
||||||
|
|
|
||||||
|
|
@ -11,9 +11,12 @@
|
||||||
|
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
|
#include <linux/kvm_irqfd.h>
|
||||||
|
|
||||||
|
#include "hyperv.h"
|
||||||
|
#include "ioapic.h"
|
||||||
#include "irq.h"
|
#include "irq.h"
|
||||||
#include "i8254.h"
|
#include "trace.h"
|
||||||
#include "x86.h"
|
#include "x86.h"
|
||||||
#include "xen.h"
|
#include "xen.h"
|
||||||
|
|
||||||
|
|
@ -41,6 +44,14 @@ static int pending_userspace_extint(struct kvm_vcpu *v)
|
||||||
return v->arch.pending_external_vector != -1;
|
return v->arch.pending_external_vector != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int get_userspace_extint(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
int vector = vcpu->arch.pending_external_vector;
|
||||||
|
|
||||||
|
vcpu->arch.pending_external_vector = -1;
|
||||||
|
return vector;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check if there is pending interrupt from
|
* check if there is pending interrupt from
|
||||||
* non-APIC source without intack.
|
* non-APIC source without intack.
|
||||||
|
|
@ -67,10 +78,13 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v)
|
||||||
if (!kvm_apic_accept_pic_intr(v))
|
if (!kvm_apic_accept_pic_intr(v))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (irqchip_split(v->kvm))
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
return pending_userspace_extint(v);
|
if (pic_in_kernel(v->kvm))
|
||||||
else
|
|
||||||
return v->kvm->arch.vpic->output;
|
return v->kvm->arch.vpic->output;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
WARN_ON_ONCE(!irqchip_split(v->kvm));
|
||||||
|
return pending_userspace_extint(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -126,13 +140,13 @@ int kvm_cpu_get_extint(struct kvm_vcpu *v)
|
||||||
return v->kvm->arch.xen.upcall_vector;
|
return v->kvm->arch.xen.upcall_vector;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (irqchip_split(v->kvm)) {
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
int vector = v->arch.pending_external_vector;
|
if (pic_in_kernel(v->kvm))
|
||||||
|
|
||||||
v->arch.pending_external_vector = -1;
|
|
||||||
return vector;
|
|
||||||
} else
|
|
||||||
return kvm_pic_read_irq(v->kvm); /* PIC */
|
return kvm_pic_read_irq(v->kvm); /* PIC */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
WARN_ON_ONCE(!irqchip_split(v->kvm));
|
||||||
|
return get_userspace_extint(v);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_cpu_get_extint);
|
EXPORT_SYMBOL_GPL(kvm_cpu_get_extint);
|
||||||
|
|
||||||
|
|
@ -163,7 +177,9 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
|
||||||
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
|
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
__kvm_migrate_apic_timer(vcpu);
|
__kvm_migrate_apic_timer(vcpu);
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
__kvm_migrate_pit_timer(vcpu);
|
__kvm_migrate_pit_timer(vcpu);
|
||||||
|
#endif
|
||||||
kvm_x86_call(migrate_timers)(vcpu);
|
kvm_x86_call(migrate_timers)(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -171,10 +187,539 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
|
||||||
{
|
{
|
||||||
bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE;
|
bool resample = args->flags & KVM_IRQFD_FLAG_RESAMPLE;
|
||||||
|
|
||||||
return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm);
|
return resample ? irqchip_full(kvm) : irqchip_in_kernel(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
|
bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
return irqchip_in_kernel(kvm);
|
return irqchip_in_kernel(kvm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||||
|
struct kvm_lapic_irq *irq, struct dest_map *dest_map)
|
||||||
|
{
|
||||||
|
int r = -1;
|
||||||
|
struct kvm_vcpu *vcpu, *lowest = NULL;
|
||||||
|
unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
|
||||||
|
unsigned int dest_vcpus = 0;
|
||||||
|
|
||||||
|
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
|
||||||
|
return r;
|
||||||
|
|
||||||
|
if (irq->dest_mode == APIC_DEST_PHYSICAL &&
|
||||||
|
irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
|
||||||
|
pr_info("apic: phys broadcast and lowest prio\n");
|
||||||
|
irq->delivery_mode = APIC_DM_FIXED;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
|
||||||
|
|
||||||
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
|
if (!kvm_apic_present(vcpu))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
|
||||||
|
irq->dest_id, irq->dest_mode))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!kvm_lowest_prio_delivery(irq)) {
|
||||||
|
if (r < 0)
|
||||||
|
r = 0;
|
||||||
|
r += kvm_apic_set_irq(vcpu, irq, dest_map);
|
||||||
|
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
|
||||||
|
if (!kvm_vector_hashing_enabled()) {
|
||||||
|
if (!lowest)
|
||||||
|
lowest = vcpu;
|
||||||
|
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
|
||||||
|
lowest = vcpu;
|
||||||
|
} else {
|
||||||
|
__set_bit(i, dest_vcpu_bitmap);
|
||||||
|
dest_vcpus++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dest_vcpus != 0) {
|
||||||
|
int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
|
||||||
|
dest_vcpu_bitmap, KVM_MAX_VCPUS);
|
||||||
|
|
||||||
|
lowest = kvm_get_vcpu(kvm, idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lowest)
|
||||||
|
r = kvm_apic_set_irq(lowest, irq, dest_map);
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void kvm_msi_to_lapic_irq(struct kvm *kvm,
|
||||||
|
struct kvm_kernel_irq_routing_entry *e,
|
||||||
|
struct kvm_lapic_irq *irq)
|
||||||
|
{
|
||||||
|
struct msi_msg msg = { .address_lo = e->msi.address_lo,
|
||||||
|
.address_hi = e->msi.address_hi,
|
||||||
|
.data = e->msi.data };
|
||||||
|
|
||||||
|
trace_kvm_msi_set_irq(msg.address_lo | (kvm->arch.x2apic_format ?
|
||||||
|
(u64)msg.address_hi << 32 : 0), msg.data);
|
||||||
|
|
||||||
|
irq->dest_id = x86_msi_msg_get_destid(&msg, kvm->arch.x2apic_format);
|
||||||
|
irq->vector = msg.arch_data.vector;
|
||||||
|
irq->dest_mode = kvm_lapic_irq_dest_mode(msg.arch_addr_lo.dest_mode_logical);
|
||||||
|
irq->trig_mode = msg.arch_data.is_level;
|
||||||
|
irq->delivery_mode = msg.arch_data.delivery_mode << 8;
|
||||||
|
irq->msi_redir_hint = msg.arch_addr_lo.redirect_hint;
|
||||||
|
irq->level = 1;
|
||||||
|
irq->shorthand = APIC_DEST_NOSHORT;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool kvm_msi_route_invalid(struct kvm *kvm,
|
||||||
|
struct kvm_kernel_irq_routing_entry *e)
|
||||||
|
{
|
||||||
|
return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
||||||
|
struct kvm *kvm, int irq_source_id, int level, bool line_status)
|
||||||
|
{
|
||||||
|
struct kvm_lapic_irq irq;
|
||||||
|
|
||||||
|
if (kvm_msi_route_invalid(kvm, e))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!level)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
kvm_msi_to_lapic_irq(kvm, e, &irq);
|
||||||
|
|
||||||
|
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
||||||
|
struct kvm *kvm, int irq_source_id, int level,
|
||||||
|
bool line_status)
|
||||||
|
{
|
||||||
|
struct kvm_lapic_irq irq;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
switch (e->type) {
|
||||||
|
#ifdef CONFIG_KVM_HYPERV
|
||||||
|
case KVM_IRQ_ROUTING_HV_SINT:
|
||||||
|
return kvm_hv_synic_set_irq(e, kvm, irq_source_id, level,
|
||||||
|
line_status);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case KVM_IRQ_ROUTING_MSI:
|
||||||
|
if (kvm_msi_route_invalid(kvm, e))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
kvm_msi_to_lapic_irq(kvm, e, &irq);
|
||||||
|
|
||||||
|
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
|
||||||
|
return r;
|
||||||
|
break;
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_XEN
|
||||||
|
case KVM_IRQ_ROUTING_XEN_EVTCHN:
|
||||||
|
if (!level)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
return kvm_xen_set_evtchn_fast(&e->xen_evtchn, kvm);
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -EWOULDBLOCK;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
||||||
|
bool line_status)
|
||||||
|
{
|
||||||
|
if (!irqchip_in_kernel(kvm))
|
||||||
|
return -ENXIO;
|
||||||
|
|
||||||
|
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
||||||
|
irq_event->irq, irq_event->level,
|
||||||
|
line_status);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return irqchip_in_kernel(kvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_set_routing_entry(struct kvm *kvm,
|
||||||
|
struct kvm_kernel_irq_routing_entry *e,
|
||||||
|
const struct kvm_irq_routing_entry *ue)
|
||||||
|
{
|
||||||
|
/* We can't check irqchip_in_kernel() here as some callers are
|
||||||
|
* currently initializing the irqchip. Other callers should therefore
|
||||||
|
* check kvm_arch_can_set_irq_routing() before calling this function.
|
||||||
|
*/
|
||||||
|
switch (ue->type) {
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
|
case KVM_IRQ_ROUTING_IRQCHIP:
|
||||||
|
if (irqchip_split(kvm))
|
||||||
|
return -EINVAL;
|
||||||
|
e->irqchip.pin = ue->u.irqchip.pin;
|
||||||
|
switch (ue->u.irqchip.irqchip) {
|
||||||
|
case KVM_IRQCHIP_PIC_SLAVE:
|
||||||
|
e->irqchip.pin += PIC_NUM_PINS / 2;
|
||||||
|
fallthrough;
|
||||||
|
case KVM_IRQCHIP_PIC_MASTER:
|
||||||
|
if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
|
||||||
|
return -EINVAL;
|
||||||
|
e->set = kvm_pic_set_irq;
|
||||||
|
break;
|
||||||
|
case KVM_IRQCHIP_IOAPIC:
|
||||||
|
if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
|
||||||
|
return -EINVAL;
|
||||||
|
e->set = kvm_ioapic_set_irq;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
e->irqchip.irqchip = ue->u.irqchip.irqchip;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
case KVM_IRQ_ROUTING_MSI:
|
||||||
|
e->set = kvm_set_msi;
|
||||||
|
e->msi.address_lo = ue->u.msi.address_lo;
|
||||||
|
e->msi.address_hi = ue->u.msi.address_hi;
|
||||||
|
e->msi.data = ue->u.msi.data;
|
||||||
|
|
||||||
|
if (kvm_msi_route_invalid(kvm, e))
|
||||||
|
return -EINVAL;
|
||||||
|
break;
|
||||||
|
#ifdef CONFIG_KVM_HYPERV
|
||||||
|
case KVM_IRQ_ROUTING_HV_SINT:
|
||||||
|
e->set = kvm_hv_synic_set_irq;
|
||||||
|
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
|
||||||
|
e->hv_sint.sint = ue->u.hv_sint.sint;
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_KVM_XEN
|
||||||
|
case KVM_IRQ_ROUTING_XEN_EVTCHN:
|
||||||
|
return kvm_xen_setup_evtchn(kvm, e, ue);
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
||||||
|
struct kvm_vcpu **dest_vcpu)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
unsigned long i;
|
||||||
|
struct kvm_vcpu *vcpu;
|
||||||
|
|
||||||
|
if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||||
|
if (!kvm_apic_present(vcpu))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
|
||||||
|
irq->dest_id, irq->dest_mode))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (++r == 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
*dest_vcpu = vcpu;
|
||||||
|
}
|
||||||
|
|
||||||
|
return r == 1;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
|
||||||
|
|
||||||
|
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
|
||||||
|
u8 vector, unsigned long *ioapic_handled_vectors)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Intercept EOI if the vCPU is the target of the new IRQ routing, or
|
||||||
|
* the vCPU has a pending IRQ from the old routing, i.e. if the vCPU
|
||||||
|
* may receive a level-triggered IRQ in the future, or already received
|
||||||
|
* level-triggered IRQ. The EOI needs to be intercepted and forwarded
|
||||||
|
* to I/O APIC emulation so that the IRQ can be de-asserted.
|
||||||
|
*/
|
||||||
|
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode)) {
|
||||||
|
__set_bit(vector, ioapic_handled_vectors);
|
||||||
|
} else if (kvm_apic_pending_eoi(vcpu, vector)) {
|
||||||
|
__set_bit(vector, ioapic_handled_vectors);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Track the highest pending EOI for which the vCPU is NOT the
|
||||||
|
* target in the new routing. Only the EOI for the IRQ that is
|
||||||
|
* in-flight (for the old routing) needs to be intercepted, any
|
||||||
|
* future IRQs that arrive on this vCPU will be coincidental to
|
||||||
|
* the level-triggered routing and don't need to be intercepted.
|
||||||
|
*/
|
||||||
|
if ((int)vector > vcpu->arch.highest_stale_pending_ioapic_eoi)
|
||||||
|
vcpu->arch.highest_stale_pending_ioapic_eoi = vector;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
||||||
|
ulong *ioapic_handled_vectors)
|
||||||
|
{
|
||||||
|
struct kvm *kvm = vcpu->kvm;
|
||||||
|
struct kvm_kernel_irq_routing_entry *entry;
|
||||||
|
struct kvm_irq_routing_table *table;
|
||||||
|
u32 i, nr_ioapic_pins;
|
||||||
|
int idx;
|
||||||
|
|
||||||
|
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||||
|
table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||||
|
nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
|
||||||
|
kvm->arch.nr_reserved_ioapic_pins);
|
||||||
|
for (i = 0; i < nr_ioapic_pins; ++i) {
|
||||||
|
hlist_for_each_entry(entry, &table->map[i], link) {
|
||||||
|
struct kvm_lapic_irq irq;
|
||||||
|
|
||||||
|
if (entry->type != KVM_IRQ_ROUTING_MSI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
kvm_msi_to_lapic_irq(vcpu->kvm, entry, &irq);
|
||||||
|
|
||||||
|
if (!irq.trig_mode)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
kvm_scan_ioapic_irq(vcpu, irq.dest_id, irq.dest_mode,
|
||||||
|
irq.vector, ioapic_handled_vectors);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_arch_irq_routing_update(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_KVM_HYPERV
|
||||||
|
kvm_hv_irq_routing_update(kvm);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (irqchip_split(kvm))
|
||||||
|
kvm_make_scan_ioapic_request(kvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int kvm_pi_update_irte(struct kvm_kernel_irqfd *irqfd,
|
||||||
|
struct kvm_kernel_irq_routing_entry *entry)
|
||||||
|
{
|
||||||
|
unsigned int host_irq = irqfd->producer->irq;
|
||||||
|
struct kvm *kvm = irqfd->kvm;
|
||||||
|
struct kvm_vcpu *vcpu = NULL;
|
||||||
|
struct kvm_lapic_irq irq;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!irqchip_in_kernel(kvm) || !kvm_arch_has_irq_bypass()))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (entry && entry->type == KVM_IRQ_ROUTING_MSI) {
|
||||||
|
kvm_msi_to_lapic_irq(kvm, entry, &irq);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Force remapped mode if hardware doesn't support posting the
|
||||||
|
* virtual interrupt to a vCPU. Only IRQs are postable (NMIs,
|
||||||
|
* SMIs, etc. are not), and neither AMD nor Intel IOMMUs support
|
||||||
|
* posting multicast/broadcast IRQs. If the interrupt can't be
|
||||||
|
* posted, the device MSI needs to be routed to the host so that
|
||||||
|
* the guest's desired interrupt can be synthesized by KVM.
|
||||||
|
*
|
||||||
|
* This means that KVM can only post lowest-priority interrupts
|
||||||
|
* if they have a single CPU as the destination, e.g. only if
|
||||||
|
* the guest has affined the interrupt to a single vCPU.
|
||||||
|
*/
|
||||||
|
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
|
||||||
|
!kvm_irq_is_postable(&irq))
|
||||||
|
vcpu = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!irqfd->irq_bypass_vcpu && !vcpu)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
r = kvm_x86_call(pi_update_irte)(irqfd, irqfd->kvm, host_irq, irqfd->gsi,
|
||||||
|
vcpu, irq.vector);
|
||||||
|
if (r) {
|
||||||
|
WARN_ON_ONCE(irqfd->irq_bypass_vcpu && !vcpu);
|
||||||
|
irqfd->irq_bypass_vcpu = NULL;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
irqfd->irq_bypass_vcpu = vcpu;
|
||||||
|
|
||||||
|
trace_kvm_pi_irte_update(host_irq, vcpu, irqfd->gsi, irq.vector, !!vcpu);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
|
||||||
|
struct irq_bypass_producer *prod)
|
||||||
|
{
|
||||||
|
struct kvm_kernel_irqfd *irqfd =
|
||||||
|
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
||||||
|
struct kvm *kvm = irqfd->kvm;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
kvm_arch_start_assignment(irqfd->kvm);
|
||||||
|
|
||||||
|
spin_lock_irq(&kvm->irqfds.lock);
|
||||||
|
irqfd->producer = prod;
|
||||||
|
|
||||||
|
if (!kvm->arch.nr_possible_bypass_irqs++)
|
||||||
|
kvm_x86_call(pi_start_bypass)(kvm);
|
||||||
|
|
||||||
|
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
|
||||||
|
ret = kvm_pi_update_irte(irqfd, &irqfd->irq_entry);
|
||||||
|
if (ret) {
|
||||||
|
kvm->arch.nr_possible_bypass_irqs--;
|
||||||
|
kvm_arch_end_assignment(irqfd->kvm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock_irq(&kvm->irqfds.lock);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
|
||||||
|
struct irq_bypass_producer *prod)
|
||||||
|
{
|
||||||
|
struct kvm_kernel_irqfd *irqfd =
|
||||||
|
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
||||||
|
struct kvm *kvm = irqfd->kvm;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
WARN_ON(irqfd->producer != prod);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the producer of an IRQ that is currently being posted to a vCPU
|
||||||
|
* is unregistered, change the associated IRTE back to remapped mode as
|
||||||
|
* the IRQ has been released (or repurposed) by the device driver, i.e.
|
||||||
|
* KVM must relinquish control of the IRTE.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&kvm->irqfds.lock);
|
||||||
|
|
||||||
|
if (irqfd->irq_entry.type == KVM_IRQ_ROUTING_MSI) {
|
||||||
|
ret = kvm_pi_update_irte(irqfd, NULL);
|
||||||
|
if (ret)
|
||||||
|
pr_info("irq bypass consumer (eventfd %p) unregistration fails: %d\n",
|
||||||
|
irqfd->consumer.eventfd, ret);
|
||||||
|
}
|
||||||
|
irqfd->producer = NULL;
|
||||||
|
|
||||||
|
kvm->arch.nr_possible_bypass_irqs--;
|
||||||
|
|
||||||
|
spin_unlock_irq(&kvm->irqfds.lock);
|
||||||
|
|
||||||
|
|
||||||
|
kvm_arch_end_assignment(irqfd->kvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
|
||||||
|
struct kvm_kernel_irq_routing_entry *old,
|
||||||
|
struct kvm_kernel_irq_routing_entry *new)
|
||||||
|
{
|
||||||
|
if (new->type != KVM_IRQ_ROUTING_MSI &&
|
||||||
|
old->type != KVM_IRQ_ROUTING_MSI)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (old->type == KVM_IRQ_ROUTING_MSI &&
|
||||||
|
new->type == KVM_IRQ_ROUTING_MSI &&
|
||||||
|
!memcmp(&old->msi, &new->msi, sizeof(new->msi)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
kvm_pi_update_irte(irqfd, new);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
|
#define IOAPIC_ROUTING_ENTRY(irq) \
|
||||||
|
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
||||||
|
.u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
|
||||||
|
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
|
||||||
|
|
||||||
|
#define PIC_ROUTING_ENTRY(irq) \
|
||||||
|
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
||||||
|
.u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
|
||||||
|
#define ROUTING_ENTRY2(irq) \
|
||||||
|
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
|
||||||
|
|
||||||
|
static const struct kvm_irq_routing_entry default_routing[] = {
|
||||||
|
ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
|
||||||
|
ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
|
||||||
|
ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
|
||||||
|
ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
|
||||||
|
ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
|
||||||
|
ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
|
||||||
|
ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
|
||||||
|
ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
|
||||||
|
ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
|
||||||
|
ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
|
||||||
|
ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
|
||||||
|
ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
|
||||||
|
};
|
||||||
|
|
||||||
|
int kvm_setup_default_ioapic_and_pic_routing(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return kvm_set_irq_routing(kvm, default_routing,
|
||||||
|
ARRAY_SIZE(default_routing), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
||||||
|
{
|
||||||
|
struct kvm_pic *pic = kvm->arch.vpic;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
switch (chip->chip_id) {
|
||||||
|
case KVM_IRQCHIP_PIC_MASTER:
|
||||||
|
memcpy(&chip->chip.pic, &pic->pics[0],
|
||||||
|
sizeof(struct kvm_pic_state));
|
||||||
|
break;
|
||||||
|
case KVM_IRQCHIP_PIC_SLAVE:
|
||||||
|
memcpy(&chip->chip.pic, &pic->pics[1],
|
||||||
|
sizeof(struct kvm_pic_state));
|
||||||
|
break;
|
||||||
|
case KVM_IRQCHIP_IOAPIC:
|
||||||
|
kvm_get_ioapic(kvm, &chip->chip.ioapic);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
r = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
||||||
|
{
|
||||||
|
struct kvm_pic *pic = kvm->arch.vpic;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
r = 0;
|
||||||
|
switch (chip->chip_id) {
|
||||||
|
case KVM_IRQCHIP_PIC_MASTER:
|
||||||
|
spin_lock(&pic->lock);
|
||||||
|
memcpy(&pic->pics[0], &chip->chip.pic,
|
||||||
|
sizeof(struct kvm_pic_state));
|
||||||
|
spin_unlock(&pic->lock);
|
||||||
|
break;
|
||||||
|
case KVM_IRQCHIP_PIC_SLAVE:
|
||||||
|
spin_lock(&pic->lock);
|
||||||
|
memcpy(&pic->pics[1], &chip->chip.pic,
|
||||||
|
sizeof(struct kvm_pic_state));
|
||||||
|
spin_unlock(&pic->lock);
|
||||||
|
break;
|
||||||
|
case KVM_IRQCHIP_IOAPIC:
|
||||||
|
kvm_set_ioapic(kvm, &chip->chip.ioapic);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
r = -EINVAL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
kvm_pic_update_irq(pic);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,8 @@
|
||||||
#include <kvm/iodev.h>
|
#include <kvm/iodev.h>
|
||||||
#include "lapic.h"
|
#include "lapic.h"
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
|
|
||||||
#define PIC_NUM_PINS 16
|
#define PIC_NUM_PINS 16
|
||||||
#define SELECT_PIC(irq) \
|
#define SELECT_PIC(irq) \
|
||||||
((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
|
((irq) < 8 ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE)
|
||||||
|
|
@ -63,6 +65,34 @@ int kvm_pic_init(struct kvm *kvm);
|
||||||
void kvm_pic_destroy(struct kvm *kvm);
|
void kvm_pic_destroy(struct kvm *kvm);
|
||||||
int kvm_pic_read_irq(struct kvm *kvm);
|
int kvm_pic_read_irq(struct kvm *kvm);
|
||||||
void kvm_pic_update_irq(struct kvm_pic *s);
|
void kvm_pic_update_irq(struct kvm_pic *s);
|
||||||
|
int kvm_pic_set_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
|
||||||
|
int irq_source_id, int level, bool line_status);
|
||||||
|
|
||||||
|
int kvm_setup_default_ioapic_and_pic_routing(struct kvm *kvm);
|
||||||
|
|
||||||
|
int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip);
|
||||||
|
int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip);
|
||||||
|
|
||||||
|
static inline int irqchip_full(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
int mode = kvm->arch.irqchip_mode;
|
||||||
|
|
||||||
|
/* Matches smp_wmb() when setting irqchip_mode */
|
||||||
|
smp_rmb();
|
||||||
|
return mode == KVM_IRQCHIP_KERNEL;
|
||||||
|
}
|
||||||
|
#else /* CONFIG_KVM_IOAPIC */
|
||||||
|
static __always_inline int irqchip_full(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline int pic_in_kernel(struct kvm *kvm)
|
||||||
|
{
|
||||||
|
return irqchip_full(kvm);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int irqchip_split(struct kvm *kvm)
|
static inline int irqchip_split(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
|
@ -73,20 +103,6 @@ static inline int irqchip_split(struct kvm *kvm)
|
||||||
return mode == KVM_IRQCHIP_SPLIT;
|
return mode == KVM_IRQCHIP_SPLIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int irqchip_kernel(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
int mode = kvm->arch.irqchip_mode;
|
|
||||||
|
|
||||||
/* Matches smp_wmb() when setting irqchip_mode */
|
|
||||||
smp_rmb();
|
|
||||||
return mode == KVM_IRQCHIP_KERNEL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int pic_in_kernel(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return irqchip_kernel(kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int irqchip_in_kernel(struct kvm *kvm)
|
static inline int irqchip_in_kernel(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
int mode = kvm->arch.irqchip_mode;
|
int mode = kvm->arch.irqchip_mode;
|
||||||
|
|
@ -105,7 +121,6 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
|
int apic_has_pending_timer(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
int kvm_setup_default_irq_routing(struct kvm *kvm);
|
|
||||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
||||||
struct kvm_lapic_irq *irq,
|
struct kvm_lapic_irq *irq,
|
||||||
struct dest_map *dest_map);
|
struct dest_map *dest_map);
|
||||||
|
|
|
||||||
|
|
@ -1,469 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
|
||||||
/*
|
|
||||||
* irq_comm.c: Common API for in kernel interrupt controller
|
|
||||||
* Copyright (c) 2007, Intel Corporation.
|
|
||||||
*
|
|
||||||
* Authors:
|
|
||||||
* Yaozu (Eddie) Dong <Eddie.dong@intel.com>
|
|
||||||
*
|
|
||||||
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
|
||||||
*/
|
|
||||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
||||||
|
|
||||||
#include <linux/kvm_host.h>
|
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <linux/export.h>
|
|
||||||
#include <linux/rculist.h>
|
|
||||||
|
|
||||||
#include <trace/events/kvm.h>
|
|
||||||
|
|
||||||
#include "irq.h"
|
|
||||||
|
|
||||||
#include "ioapic.h"
|
|
||||||
|
|
||||||
#include "lapic.h"
|
|
||||||
|
|
||||||
#include "hyperv.h"
|
|
||||||
#include "x86.h"
|
|
||||||
#include "xen.h"
|
|
||||||
|
|
||||||
static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm *kvm, int irq_source_id, int level,
|
|
||||||
bool line_status)
|
|
||||||
{
|
|
||||||
struct kvm_pic *pic = kvm->arch.vpic;
|
|
||||||
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm *kvm, int irq_source_id, int level,
|
|
||||||
bool line_status)
|
|
||||||
{
|
|
||||||
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
|
|
||||||
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
|
|
||||||
line_status);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
|
|
||||||
struct kvm_lapic_irq *irq, struct dest_map *dest_map)
|
|
||||||
{
|
|
||||||
int r = -1;
|
|
||||||
struct kvm_vcpu *vcpu, *lowest = NULL;
|
|
||||||
unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
|
|
||||||
unsigned int dest_vcpus = 0;
|
|
||||||
|
|
||||||
if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
|
|
||||||
return r;
|
|
||||||
|
|
||||||
if (irq->dest_mode == APIC_DEST_PHYSICAL &&
|
|
||||||
irq->dest_id == 0xff && kvm_lowest_prio_delivery(irq)) {
|
|
||||||
pr_info("apic: phys broadcast and lowest prio\n");
|
|
||||||
irq->delivery_mode = APIC_DM_FIXED;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
|
|
||||||
|
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
||||||
if (!kvm_apic_present(vcpu))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
|
|
||||||
irq->dest_id, irq->dest_mode))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!kvm_lowest_prio_delivery(irq)) {
|
|
||||||
if (r < 0)
|
|
||||||
r = 0;
|
|
||||||
r += kvm_apic_set_irq(vcpu, irq, dest_map);
|
|
||||||
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
|
|
||||||
if (!kvm_vector_hashing_enabled()) {
|
|
||||||
if (!lowest)
|
|
||||||
lowest = vcpu;
|
|
||||||
else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
|
|
||||||
lowest = vcpu;
|
|
||||||
} else {
|
|
||||||
__set_bit(i, dest_vcpu_bitmap);
|
|
||||||
dest_vcpus++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dest_vcpus != 0) {
|
|
||||||
int idx = kvm_vector_to_index(irq->vector, dest_vcpus,
|
|
||||||
dest_vcpu_bitmap, KVM_MAX_VCPUS);
|
|
||||||
|
|
||||||
lowest = kvm_get_vcpu(kvm, idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (lowest)
|
|
||||||
r = kvm_apic_set_irq(lowest, irq, dest_map);
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm_lapic_irq *irq)
|
|
||||||
{
|
|
||||||
struct msi_msg msg = { .address_lo = e->msi.address_lo,
|
|
||||||
.address_hi = e->msi.address_hi,
|
|
||||||
.data = e->msi.data };
|
|
||||||
|
|
||||||
trace_kvm_msi_set_irq(msg.address_lo | (kvm->arch.x2apic_format ?
|
|
||||||
(u64)msg.address_hi << 32 : 0), msg.data);
|
|
||||||
|
|
||||||
irq->dest_id = x86_msi_msg_get_destid(&msg, kvm->arch.x2apic_format);
|
|
||||||
irq->vector = msg.arch_data.vector;
|
|
||||||
irq->dest_mode = kvm_lapic_irq_dest_mode(msg.arch_addr_lo.dest_mode_logical);
|
|
||||||
irq->trig_mode = msg.arch_data.is_level;
|
|
||||||
irq->delivery_mode = msg.arch_data.delivery_mode << 8;
|
|
||||||
irq->msi_redir_hint = msg.arch_addr_lo.redirect_hint;
|
|
||||||
irq->level = 1;
|
|
||||||
irq->shorthand = APIC_DEST_NOSHORT;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
|
|
||||||
|
|
||||||
static inline bool kvm_msi_route_invalid(struct kvm *kvm,
|
|
||||||
struct kvm_kernel_irq_routing_entry *e)
|
|
||||||
{
|
|
||||||
return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm *kvm, int irq_source_id, int level, bool line_status)
|
|
||||||
{
|
|
||||||
struct kvm_lapic_irq irq;
|
|
||||||
|
|
||||||
if (kvm_msi_route_invalid(kvm, e))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
if (!level)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
kvm_set_msi_irq(kvm, e, &irq);
|
|
||||||
|
|
||||||
return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
|
||||||
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm *kvm, int irq_source_id, int level,
|
|
||||||
bool line_status)
|
|
||||||
{
|
|
||||||
if (!level)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct kvm *kvm, int irq_source_id, int level,
|
|
||||||
bool line_status)
|
|
||||||
{
|
|
||||||
struct kvm_lapic_irq irq;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
switch (e->type) {
|
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
|
||||||
case KVM_IRQ_ROUTING_HV_SINT:
|
|
||||||
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
|
|
||||||
line_status);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
case KVM_IRQ_ROUTING_MSI:
|
|
||||||
if (kvm_msi_route_invalid(kvm, e))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
kvm_set_msi_irq(kvm, e, &irq);
|
|
||||||
|
|
||||||
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
|
|
||||||
return r;
|
|
||||||
break;
|
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_XEN
|
|
||||||
case KVM_IRQ_ROUTING_XEN_EVTCHN:
|
|
||||||
if (!level)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
return kvm_xen_set_evtchn_fast(&e->xen_evtchn, kvm);
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -EWOULDBLOCK;
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_request_irq_source_id(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
|
|
||||||
int irq_source_id;
|
|
||||||
|
|
||||||
mutex_lock(&kvm->irq_lock);
|
|
||||||
irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
|
|
||||||
|
|
||||||
if (irq_source_id >= BITS_PER_LONG) {
|
|
||||||
pr_warn("exhausted allocatable IRQ sources!\n");
|
|
||||||
irq_source_id = -EFAULT;
|
|
||||||
goto unlock;
|
|
||||||
}
|
|
||||||
|
|
||||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
|
||||||
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
|
||||||
set_bit(irq_source_id, bitmap);
|
|
||||||
unlock:
|
|
||||||
mutex_unlock(&kvm->irq_lock);
|
|
||||||
|
|
||||||
return irq_source_id;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
|
|
||||||
{
|
|
||||||
ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
|
|
||||||
ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
|
|
||||||
|
|
||||||
mutex_lock(&kvm->irq_lock);
|
|
||||||
if (irq_source_id < 0 ||
|
|
||||||
irq_source_id >= BITS_PER_LONG) {
|
|
||||||
pr_err("IRQ source ID out of range!\n");
|
|
||||||
goto unlock;
|
|
||||||
}
|
|
||||||
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
|
|
||||||
if (!irqchip_kernel(kvm))
|
|
||||||
goto unlock;
|
|
||||||
|
|
||||||
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
|
|
||||||
kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
|
|
||||||
unlock:
|
|
||||||
mutex_unlock(&kvm->irq_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
|
|
||||||
struct kvm_irq_mask_notifier *kimn)
|
|
||||||
{
|
|
||||||
mutex_lock(&kvm->irq_lock);
|
|
||||||
kimn->irq = irq;
|
|
||||||
hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
|
|
||||||
mutex_unlock(&kvm->irq_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
|
||||||
struct kvm_irq_mask_notifier *kimn)
|
|
||||||
{
|
|
||||||
mutex_lock(&kvm->irq_lock);
|
|
||||||
hlist_del_rcu(&kimn->link);
|
|
||||||
mutex_unlock(&kvm->irq_lock);
|
|
||||||
synchronize_srcu(&kvm->irq_srcu);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
|
||||||
bool mask)
|
|
||||||
{
|
|
||||||
struct kvm_irq_mask_notifier *kimn;
|
|
||||||
int idx, gsi;
|
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
|
||||||
gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
|
|
||||||
if (gsi != -1)
|
|
||||||
hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
|
|
||||||
if (kimn->irq == gsi)
|
|
||||||
kimn->func(kimn, mask);
|
|
||||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_arch_can_set_irq_routing(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return irqchip_in_kernel(kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_set_routing_entry(struct kvm *kvm,
|
|
||||||
struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
const struct kvm_irq_routing_entry *ue)
|
|
||||||
{
|
|
||||||
/* We can't check irqchip_in_kernel() here as some callers are
|
|
||||||
* currently initializing the irqchip. Other callers should therefore
|
|
||||||
* check kvm_arch_can_set_irq_routing() before calling this function.
|
|
||||||
*/
|
|
||||||
switch (ue->type) {
|
|
||||||
case KVM_IRQ_ROUTING_IRQCHIP:
|
|
||||||
if (irqchip_split(kvm))
|
|
||||||
return -EINVAL;
|
|
||||||
e->irqchip.pin = ue->u.irqchip.pin;
|
|
||||||
switch (ue->u.irqchip.irqchip) {
|
|
||||||
case KVM_IRQCHIP_PIC_SLAVE:
|
|
||||||
e->irqchip.pin += PIC_NUM_PINS / 2;
|
|
||||||
fallthrough;
|
|
||||||
case KVM_IRQCHIP_PIC_MASTER:
|
|
||||||
if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
|
|
||||||
return -EINVAL;
|
|
||||||
e->set = kvm_set_pic_irq;
|
|
||||||
break;
|
|
||||||
case KVM_IRQCHIP_IOAPIC:
|
|
||||||
if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
|
|
||||||
return -EINVAL;
|
|
||||||
e->set = kvm_set_ioapic_irq;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
e->irqchip.irqchip = ue->u.irqchip.irqchip;
|
|
||||||
break;
|
|
||||||
case KVM_IRQ_ROUTING_MSI:
|
|
||||||
e->set = kvm_set_msi;
|
|
||||||
e->msi.address_lo = ue->u.msi.address_lo;
|
|
||||||
e->msi.address_hi = ue->u.msi.address_hi;
|
|
||||||
e->msi.data = ue->u.msi.data;
|
|
||||||
|
|
||||||
if (kvm_msi_route_invalid(kvm, e))
|
|
||||||
return -EINVAL;
|
|
||||||
break;
|
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
|
||||||
case KVM_IRQ_ROUTING_HV_SINT:
|
|
||||||
e->set = kvm_hv_set_sint;
|
|
||||||
e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
|
|
||||||
e->hv_sint.sint = ue->u.hv_sint.sint;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_KVM_XEN
|
|
||||||
case KVM_IRQ_ROUTING_XEN_EVTCHN:
|
|
||||||
return kvm_xen_setup_evtchn(kvm, e, ue);
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
|
|
||||||
struct kvm_vcpu **dest_vcpu)
|
|
||||||
{
|
|
||||||
int r = 0;
|
|
||||||
unsigned long i;
|
|
||||||
struct kvm_vcpu *vcpu;
|
|
||||||
|
|
||||||
if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
||||||
if (!kvm_apic_present(vcpu))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
|
|
||||||
irq->dest_id, irq->dest_mode))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (++r == 2)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
*dest_vcpu = vcpu;
|
|
||||||
}
|
|
||||||
|
|
||||||
return r == 1;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
|
|
||||||
|
|
||||||
#define IOAPIC_ROUTING_ENTRY(irq) \
|
|
||||||
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
|
||||||
.u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
|
|
||||||
#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
|
|
||||||
|
|
||||||
#define PIC_ROUTING_ENTRY(irq) \
|
|
||||||
{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \
|
|
||||||
.u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
|
|
||||||
#define ROUTING_ENTRY2(irq) \
|
|
||||||
IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
|
|
||||||
|
|
||||||
static const struct kvm_irq_routing_entry default_routing[] = {
|
|
||||||
ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
|
|
||||||
ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
|
|
||||||
ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
|
|
||||||
ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
|
|
||||||
ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
|
|
||||||
ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
|
|
||||||
ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
|
|
||||||
ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
|
|
||||||
ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
|
|
||||||
ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
|
|
||||||
ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
|
|
||||||
ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
|
|
||||||
};
|
|
||||||
|
|
||||||
int kvm_setup_default_irq_routing(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return kvm_set_irq_routing(kvm, default_routing,
|
|
||||||
ARRAY_SIZE(default_routing), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
if (!irqchip_split(kvm))
|
|
||||||
return;
|
|
||||||
kvm_make_scan_ioapic_request(kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
|
|
||||||
u8 vector, unsigned long *ioapic_handled_vectors)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Intercept EOI if the vCPU is the target of the new IRQ routing, or
|
|
||||||
* the vCPU has a pending IRQ from the old routing, i.e. if the vCPU
|
|
||||||
* may receive a level-triggered IRQ in the future, or already received
|
|
||||||
* level-triggered IRQ. The EOI needs to be intercepted and forwarded
|
|
||||||
* to I/O APIC emulation so that the IRQ can be de-asserted.
|
|
||||||
*/
|
|
||||||
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode)) {
|
|
||||||
__set_bit(vector, ioapic_handled_vectors);
|
|
||||||
} else if (kvm_apic_pending_eoi(vcpu, vector)) {
|
|
||||||
__set_bit(vector, ioapic_handled_vectors);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Track the highest pending EOI for which the vCPU is NOT the
|
|
||||||
* target in the new routing. Only the EOI for the IRQ that is
|
|
||||||
* in-flight (for the old routing) needs to be intercepted, any
|
|
||||||
* future IRQs that arrive on this vCPU will be coincidental to
|
|
||||||
* the level-triggered routing and don't need to be intercepted.
|
|
||||||
*/
|
|
||||||
if ((int)vector > vcpu->arch.highest_stale_pending_ioapic_eoi)
|
|
||||||
vcpu->arch.highest_stale_pending_ioapic_eoi = vector;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
|
|
||||||
ulong *ioapic_handled_vectors)
|
|
||||||
{
|
|
||||||
struct kvm *kvm = vcpu->kvm;
|
|
||||||
struct kvm_kernel_irq_routing_entry *entry;
|
|
||||||
struct kvm_irq_routing_table *table;
|
|
||||||
u32 i, nr_ioapic_pins;
|
|
||||||
int idx;
|
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
|
||||||
table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
|
||||||
nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
|
|
||||||
kvm->arch.nr_reserved_ioapic_pins);
|
|
||||||
for (i = 0; i < nr_ioapic_pins; ++i) {
|
|
||||||
hlist_for_each_entry(entry, &table->map[i], link) {
|
|
||||||
struct kvm_lapic_irq irq;
|
|
||||||
|
|
||||||
if (entry->type != KVM_IRQ_ROUTING_MSI)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
|
|
||||||
|
|
||||||
if (!irq.trig_mode)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
kvm_scan_ioapic_irq(vcpu, irq.dest_id, irq.dest_mode,
|
|
||||||
irq.vector, ioapic_handled_vectors);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_arch_irq_routing_update(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
|
||||||
kvm_hv_irq_routing_update(kvm);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
@ -1455,7 +1455,7 @@ static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
|
||||||
|
|
||||||
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||||
{
|
{
|
||||||
int trigger_mode;
|
int __maybe_unused trigger_mode;
|
||||||
|
|
||||||
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
|
/* Eoi the ioapic only if the ioapic doesn't own the vector. */
|
||||||
if (!kvm_ioapic_handles_vector(apic, vector))
|
if (!kvm_ioapic_handles_vector(apic, vector))
|
||||||
|
|
@ -1476,12 +1476,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
if (apic_test_vector(vector, apic->regs + APIC_TMR))
|
||||||
trigger_mode = IOAPIC_LEVEL_TRIG;
|
trigger_mode = IOAPIC_LEVEL_TRIG;
|
||||||
else
|
else
|
||||||
trigger_mode = IOAPIC_EDGE_TRIG;
|
trigger_mode = IOAPIC_EDGE_TRIG;
|
||||||
|
|
||||||
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
|
kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int apic_set_eoi(struct kvm_lapic *apic)
|
static int apic_set_eoi(struct kvm_lapic *apic)
|
||||||
|
|
@ -3146,8 +3148,11 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
|
||||||
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
|
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
|
||||||
}
|
}
|
||||||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
if (ioapic_in_kernel(vcpu->kvm))
|
if (ioapic_in_kernel(vcpu->kvm))
|
||||||
kvm_rtc_eoi_tracking_restore_one(vcpu);
|
kvm_rtc_eoi_tracking_restore_one(vcpu);
|
||||||
|
#endif
|
||||||
|
|
||||||
vcpu->arch.apic_arb_prio = 0;
|
vcpu->arch.apic_arb_prio = 0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
#include <linux/hashtable.h>
|
#include <linux/hashtable.h>
|
||||||
#include <linux/amd-iommu.h>
|
#include <linux/amd-iommu.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
|
#include <linux/kvm_irqfd.h>
|
||||||
|
|
||||||
#include <asm/irq_remapping.h>
|
#include <asm/irq_remapping.h>
|
||||||
#include <asm/msr.h>
|
#include <asm/msr.h>
|
||||||
|
|
@ -29,36 +30,39 @@
|
||||||
#include "svm.h"
|
#include "svm.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
|
* Encode the arbitrary VM ID and the vCPU's _index_ into the GATag so that
|
||||||
* into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
|
* KVM can retrieve the correct vCPU from a GALog entry if an interrupt can't
|
||||||
* if an interrupt can't be delivered, e.g. because the vCPU isn't running.
|
* be delivered, e.g. because the vCPU isn't running. Use the vCPU's index
|
||||||
|
* instead of its ID (a.k.a. its default APIC ID), as KVM is guaranteed a fast
|
||||||
|
* lookup on the index, where as vCPUs whose index doesn't match their ID need
|
||||||
|
* to walk the entire xarray of vCPUs in the worst case scenario.
|
||||||
*
|
*
|
||||||
* For the vCPU ID, use however many bits are currently allowed for the max
|
* For the vCPU index, use however many bits are currently allowed for the max
|
||||||
* guest physical APIC ID (limited by the size of the physical ID table), and
|
* guest physical APIC ID (limited by the size of the physical ID table), and
|
||||||
* use whatever bits remain to assign arbitrary AVIC IDs to VMs. Note, the
|
* use whatever bits remain to assign arbitrary AVIC IDs to VMs. Note, the
|
||||||
* size of the GATag is defined by hardware (32 bits), but is an opaque value
|
* size of the GATag is defined by hardware (32 bits), but is an opaque value
|
||||||
* as far as hardware is concerned.
|
* as far as hardware is concerned.
|
||||||
*/
|
*/
|
||||||
#define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
|
#define AVIC_VCPU_IDX_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
|
||||||
|
|
||||||
#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
|
#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
|
||||||
#define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
|
#define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
|
||||||
|
|
||||||
#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
|
#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
|
||||||
#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
|
#define AVIC_GATAG_TO_VCPUIDX(x) (x & AVIC_VCPU_IDX_MASK)
|
||||||
|
|
||||||
#define __AVIC_GATAG(vm_id, vcpu_id) ((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
|
#define __AVIC_GATAG(vm_id, vcpu_idx) ((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
|
||||||
((vcpu_id) & AVIC_VCPU_ID_MASK))
|
((vcpu_idx) & AVIC_VCPU_IDX_MASK))
|
||||||
#define AVIC_GATAG(vm_id, vcpu_id) \
|
#define AVIC_GATAG(vm_id, vcpu_idx) \
|
||||||
({ \
|
({ \
|
||||||
u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_id); \
|
u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_idx); \
|
||||||
\
|
\
|
||||||
WARN_ON_ONCE(AVIC_GATAG_TO_VCPUID(ga_tag) != (vcpu_id)); \
|
WARN_ON_ONCE(AVIC_GATAG_TO_VCPUIDX(ga_tag) != (vcpu_idx)); \
|
||||||
WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id)); \
|
WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id)); \
|
||||||
ga_tag; \
|
ga_tag; \
|
||||||
})
|
})
|
||||||
|
|
||||||
static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
|
static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_IDX_MASK) == -1u);
|
||||||
|
|
||||||
static bool force_avic;
|
static bool force_avic;
|
||||||
module_param_unsafe(force_avic, bool, 0444);
|
module_param_unsafe(force_avic, bool, 0444);
|
||||||
|
|
@ -75,14 +79,6 @@ static bool next_vm_id_wrapped = 0;
|
||||||
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
|
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
|
||||||
bool x2avic_enabled;
|
bool x2avic_enabled;
|
||||||
|
|
||||||
/*
|
|
||||||
* This is a wrapper of struct amd_iommu_ir_data.
|
|
||||||
*/
|
|
||||||
struct amd_svm_iommu_ir {
|
|
||||||
struct list_head node; /* Used by SVM for per-vcpu ir_list */
|
|
||||||
void *data; /* Storing pointer to struct amd_ir_data */
|
|
||||||
};
|
|
||||||
|
|
||||||
static void avic_activate_vmcb(struct vcpu_svm *svm)
|
static void avic_activate_vmcb(struct vcpu_svm *svm)
|
||||||
{
|
{
|
||||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||||
|
|
@ -147,16 +143,16 @@ int avic_ga_log_notifier(u32 ga_tag)
|
||||||
struct kvm_svm *kvm_svm;
|
struct kvm_svm *kvm_svm;
|
||||||
struct kvm_vcpu *vcpu = NULL;
|
struct kvm_vcpu *vcpu = NULL;
|
||||||
u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
|
u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
|
||||||
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
|
u32 vcpu_idx = AVIC_GATAG_TO_VCPUIDX(ga_tag);
|
||||||
|
|
||||||
pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
|
pr_debug("SVM: %s: vm_id=%#x, vcpu_idx=%#x\n", __func__, vm_id, vcpu_idx);
|
||||||
trace_kvm_avic_ga_log(vm_id, vcpu_id);
|
trace_kvm_avic_ga_log(vm_id, vcpu_idx);
|
||||||
|
|
||||||
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
||||||
hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
|
hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
|
||||||
if (kvm_svm->avic_vm_id != vm_id)
|
if (kvm_svm->avic_vm_id != vm_id)
|
||||||
continue;
|
continue;
|
||||||
vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
|
vcpu = kvm_get_vcpu(&kvm_svm->kvm, vcpu_idx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
|
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
|
||||||
|
|
@ -180,10 +176,8 @@ void avic_vm_destroy(struct kvm *kvm)
|
||||||
if (!enable_apicv)
|
if (!enable_apicv)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (kvm_svm->avic_logical_id_table_page)
|
free_page((unsigned long)kvm_svm->avic_logical_id_table);
|
||||||
__free_page(kvm_svm->avic_logical_id_table_page);
|
free_page((unsigned long)kvm_svm->avic_physical_id_table);
|
||||||
if (kvm_svm->avic_physical_id_table_page)
|
|
||||||
__free_page(kvm_svm->avic_physical_id_table_page);
|
|
||||||
|
|
||||||
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
||||||
hash_del(&kvm_svm->hnode);
|
hash_del(&kvm_svm->hnode);
|
||||||
|
|
@ -196,27 +190,19 @@ int avic_vm_init(struct kvm *kvm)
|
||||||
int err = -ENOMEM;
|
int err = -ENOMEM;
|
||||||
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
|
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
|
||||||
struct kvm_svm *k2;
|
struct kvm_svm *k2;
|
||||||
struct page *p_page;
|
|
||||||
struct page *l_page;
|
|
||||||
u32 vm_id;
|
u32 vm_id;
|
||||||
|
|
||||||
if (!enable_apicv)
|
if (!enable_apicv)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* Allocating physical APIC ID table (4KB) */
|
kvm_svm->avic_physical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||||
p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
if (!kvm_svm->avic_physical_id_table)
|
||||||
if (!p_page)
|
|
||||||
goto free_avic;
|
goto free_avic;
|
||||||
|
|
||||||
kvm_svm->avic_physical_id_table_page = p_page;
|
kvm_svm->avic_logical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||||
|
if (!kvm_svm->avic_logical_id_table)
|
||||||
/* Allocating logical APIC ID table (4KB) */
|
|
||||||
l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
|
||||||
if (!l_page)
|
|
||||||
goto free_avic;
|
goto free_avic;
|
||||||
|
|
||||||
kvm_svm->avic_logical_id_table_page = l_page;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
||||||
again:
|
again:
|
||||||
vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
|
vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
|
||||||
|
|
@ -242,17 +228,19 @@ free_avic:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static phys_addr_t avic_get_backing_page_address(struct vcpu_svm *svm)
|
||||||
|
{
|
||||||
|
return __sme_set(__pa(svm->vcpu.arch.apic->regs));
|
||||||
|
}
|
||||||
|
|
||||||
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
|
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
|
||||||
{
|
{
|
||||||
struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
|
struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
|
||||||
phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
|
|
||||||
phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
|
|
||||||
phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
|
|
||||||
|
|
||||||
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
|
vmcb->control.avic_backing_page = avic_get_backing_page_address(svm);
|
||||||
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
|
vmcb->control.avic_logical_id = __sme_set(__pa(kvm_svm->avic_logical_id_table));
|
||||||
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
|
vmcb->control.avic_physical_id = __sme_set(__pa(kvm_svm->avic_physical_id_table));
|
||||||
vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
|
vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE;
|
||||||
|
|
||||||
if (kvm_apicv_activated(svm->vcpu.kvm))
|
if (kvm_apicv_activated(svm->vcpu.kvm))
|
||||||
avic_activate_vmcb(svm);
|
avic_activate_vmcb(svm);
|
||||||
|
|
@ -260,32 +248,31 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
|
||||||
avic_deactivate_vmcb(svm);
|
avic_deactivate_vmcb(svm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
|
|
||||||
unsigned int index)
|
|
||||||
{
|
|
||||||
u64 *avic_physical_id_table;
|
|
||||||
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
|
||||||
|
|
||||||
if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) ||
|
|
||||||
(index > X2AVIC_MAX_PHYSICAL_ID))
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
|
|
||||||
|
|
||||||
return &avic_physical_id_table[index];
|
|
||||||
}
|
|
||||||
|
|
||||||
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
u64 *entry, new_entry;
|
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
||||||
int id = vcpu->vcpu_id;
|
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
u32 id = vcpu->vcpu_id;
|
||||||
|
u64 new_entry;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Inhibit AVIC if the vCPU ID is bigger than what is supported by AVIC
|
||||||
|
* hardware. Immediately clear apicv_active, i.e. don't wait until the
|
||||||
|
* KVM_REQ_APICV_UPDATE request is processed on the first KVM_RUN, as
|
||||||
|
* avic_vcpu_load() expects to be called if and only if the vCPU has
|
||||||
|
* fully initialized AVIC.
|
||||||
|
*/
|
||||||
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
|
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
|
||||||
(id > X2AVIC_MAX_PHYSICAL_ID))
|
(id > X2AVIC_MAX_PHYSICAL_ID)) {
|
||||||
return -EINVAL;
|
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
|
||||||
|
vcpu->arch.apic->apicv_active = false;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!vcpu->arch.apic->regs)
|
BUILD_BUG_ON((AVIC_MAX_PHYSICAL_ID + 1) * sizeof(new_entry) > PAGE_SIZE ||
|
||||||
|
(X2AVIC_MAX_PHYSICAL_ID + 1) * sizeof(new_entry) > PAGE_SIZE);
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!vcpu->arch.apic->regs))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (kvm_apicv_activated(vcpu->kvm)) {
|
if (kvm_apicv_activated(vcpu->kvm)) {
|
||||||
|
|
@ -302,19 +289,21 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
|
/* Note, fls64() returns the bit position, +1. */
|
||||||
|
BUILD_BUG_ON(__PHYSICAL_MASK_SHIFT >
|
||||||
|
fls64(AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK));
|
||||||
|
|
||||||
/* Setting AVIC backing page address in the phy APIC ID table */
|
/* Setting AVIC backing page address in the phy APIC ID table */
|
||||||
entry = avic_get_physical_id_entry(vcpu, id);
|
new_entry = avic_get_backing_page_address(svm) |
|
||||||
if (!entry)
|
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
|
||||||
return -EINVAL;
|
svm->avic_physical_id_entry = new_entry;
|
||||||
|
|
||||||
new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
|
/*
|
||||||
AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
|
* Initialize the real table, as vCPUs must have a valid entry in order
|
||||||
AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
|
* for broadcast IPIs to function correctly (broadcast IPIs ignore
|
||||||
WRITE_ONCE(*entry, new_entry);
|
* invalid entries, i.e. aren't guaranteed to generate a VM-Exit).
|
||||||
|
*/
|
||||||
svm->avic_physical_id_cache = entry;
|
WRITE_ONCE(kvm_svm->avic_physical_id_table[id], new_entry);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -448,7 +437,7 @@ static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source
|
||||||
if (apic_x2apic_mode(source))
|
if (apic_x2apic_mode(source))
|
||||||
avic_logical_id_table = NULL;
|
avic_logical_id_table = NULL;
|
||||||
else
|
else
|
||||||
avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
|
avic_logical_id_table = kvm_svm->avic_logical_id_table;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AVIC is inhibited if vCPUs aren't mapped 1:1 with logical
|
* AVIC is inhibited if vCPUs aren't mapped 1:1 with logical
|
||||||
|
|
@ -550,7 +539,6 @@ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
|
||||||
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
|
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
|
||||||
{
|
{
|
||||||
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
||||||
u32 *logical_apic_id_table;
|
|
||||||
u32 cluster, index;
|
u32 cluster, index;
|
||||||
|
|
||||||
ldr = GET_APIC_LOGICAL_ID(ldr);
|
ldr = GET_APIC_LOGICAL_ID(ldr);
|
||||||
|
|
@ -571,9 +559,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
|
||||||
return NULL;
|
return NULL;
|
||||||
index += (cluster << 2);
|
index += (cluster << 2);
|
||||||
|
|
||||||
logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
|
return &kvm_svm->avic_logical_id_table[index];
|
||||||
|
|
||||||
return &logical_apic_id_table[index];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
|
static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
|
||||||
|
|
@ -722,6 +708,9 @@ int avic_init_vcpu(struct vcpu_svm *svm)
|
||||||
int ret;
|
int ret;
|
||||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||||
|
|
||||||
|
INIT_LIST_HEAD(&svm->ir_list);
|
||||||
|
spin_lock_init(&svm->ir_list_lock);
|
||||||
|
|
||||||
if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
|
if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
@ -729,8 +718,6 @@ int avic_init_vcpu(struct vcpu_svm *svm)
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&svm->ir_list);
|
|
||||||
spin_lock_init(&svm->ir_list_lock);
|
|
||||||
svm->dfr_reg = APIC_DFR_FLAT;
|
svm->dfr_reg = APIC_DFR_FLAT;
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -742,316 +729,161 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
|
||||||
avic_handle_ldr_update(vcpu);
|
avic_handle_ldr_update(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
|
static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
struct kvm_vcpu *vcpu = irqfd->irq_bypass_vcpu;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct amd_svm_iommu_ir *ir;
|
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
|
||||||
|
|
||||||
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
if (!vcpu)
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags);
|
||||||
|
list_del(&irqfd->vcpu_list);
|
||||||
|
spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
|
||||||
|
unsigned int host_irq, uint32_t guest_irq,
|
||||||
|
struct kvm_vcpu *vcpu, u32 vector)
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
* Here, we go through the per-vcpu ir_list to update all existing
|
* If the IRQ was affined to a different vCPU, remove the IRTE metadata
|
||||||
* interrupt remapping table entry targeting this vcpu.
|
* from the *previous* vCPU's list.
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
svm_ir_list_del(irqfd);
|
||||||
|
|
||||||
if (list_empty(&svm->ir_list))
|
if (vcpu) {
|
||||||
goto out;
|
/*
|
||||||
|
* Try to enable guest_mode in IRTE, unless AVIC is inhibited,
|
||||||
|
* in which case configure the IRTE for legacy mode, but track
|
||||||
|
* the IRTE metadata so that it can be converted to guest mode
|
||||||
|
* if AVIC is enabled/uninhibited in the future.
|
||||||
|
*/
|
||||||
|
struct amd_iommu_pi_data pi_data = {
|
||||||
|
.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
|
||||||
|
vcpu->vcpu_idx),
|
||||||
|
.is_guest_mode = kvm_vcpu_apicv_active(vcpu),
|
||||||
|
.vapic_addr = avic_get_backing_page_address(to_svm(vcpu)),
|
||||||
|
.vector = vector,
|
||||||
|
};
|
||||||
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
u64 entry;
|
||||||
|
int ret;
|
||||||
|
|
||||||
list_for_each_entry(ir, &svm->ir_list, node) {
|
/*
|
||||||
if (activate)
|
* Prevent the vCPU from being scheduled out or migrated until
|
||||||
ret = amd_iommu_activate_guest_mode(ir->data);
|
* the IRTE is updated and its metadata has been added to the
|
||||||
else
|
* list of IRQs being posted to the vCPU, to ensure the IRTE
|
||||||
ret = amd_iommu_deactivate_guest_mode(ir->data);
|
* isn't programmed with stale pCPU/IsRunning information.
|
||||||
|
*/
|
||||||
|
guard(spinlock_irqsave)(&svm->ir_list_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the target pCPU for IOMMU doorbells if the vCPU is
|
||||||
|
* running. If the vCPU is NOT running, i.e. is blocking or
|
||||||
|
* scheduled out, KVM will update the pCPU info when the vCPU
|
||||||
|
* is awakened and/or scheduled in. See also avic_vcpu_load().
|
||||||
|
*/
|
||||||
|
entry = svm->avic_physical_id_entry;
|
||||||
|
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) {
|
||||||
|
pi_data.cpu = entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
|
||||||
|
} else {
|
||||||
|
pi_data.cpu = -1;
|
||||||
|
pi_data.ga_log_intr = entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = irq_set_vcpu_affinity(host_irq, &pi_data);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
return ret;
|
||||||
}
|
|
||||||
out:
|
|
||||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
/*
|
||||||
{
|
* Revert to legacy mode if the IOMMU didn't provide metadata
|
||||||
unsigned long flags;
|
* for the IRTE, which KVM needs to keep the IRTE up-to-date,
|
||||||
struct amd_svm_iommu_ir *cur;
|
* e.g. if the vCPU is migrated or AVIC is disabled.
|
||||||
|
*/
|
||||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
if (WARN_ON_ONCE(!pi_data.ir_data)) {
|
||||||
list_for_each_entry(cur, &svm->ir_list, node) {
|
irq_set_vcpu_affinity(host_irq, NULL);
|
||||||
if (cur->data != pi->ir_data)
|
return -EIO;
|
||||||
continue;
|
|
||||||
list_del(&cur->node);
|
|
||||||
kfree(cur);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
unsigned long flags;
|
|
||||||
struct amd_svm_iommu_ir *ir;
|
|
||||||
u64 entry;
|
|
||||||
|
|
||||||
if (WARN_ON_ONCE(!pi->ir_data))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In some cases, the existing irte is updated and re-set,
|
|
||||||
* so we need to check here if it's already been * added
|
|
||||||
* to the ir_list.
|
|
||||||
*/
|
|
||||||
if (pi->prev_ga_tag) {
|
|
||||||
struct kvm *kvm = svm->vcpu.kvm;
|
|
||||||
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
|
|
||||||
struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
|
|
||||||
struct vcpu_svm *prev_svm;
|
|
||||||
|
|
||||||
if (!prev_vcpu) {
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prev_svm = to_svm(prev_vcpu);
|
irqfd->irq_bypass_data = pi_data.ir_data;
|
||||||
svm_ir_list_del(prev_svm, pi);
|
list_add(&irqfd->vcpu_list, &svm->ir_list);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
return irq_set_vcpu_affinity(host_irq, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
enum avic_vcpu_action {
|
||||||
* Allocating new amd_iommu_pi_data, which will get
|
/*
|
||||||
* add to the per-vcpu ir_list.
|
* There is no need to differentiate between activate and deactivate,
|
||||||
|
* as KVM only refreshes AVIC state when the vCPU is scheduled in and
|
||||||
|
* isn't blocking, i.e. the pCPU must always be (in)valid when AVIC is
|
||||||
|
* being (de)activated.
|
||||||
*/
|
*/
|
||||||
ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT);
|
AVIC_TOGGLE_ON_OFF = BIT(0),
|
||||||
if (!ir) {
|
AVIC_ACTIVATE = AVIC_TOGGLE_ON_OFF,
|
||||||
ret = -ENOMEM;
|
AVIC_DEACTIVATE = AVIC_TOGGLE_ON_OFF,
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
ir->data = pi->ir_data;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the target pCPU for IOMMU doorbells if the vCPU is running.
|
* No unique action is required to deal with a vCPU that stops/starts
|
||||||
* If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
|
* running. A vCPU that starts running by definition stops blocking as
|
||||||
* will update the pCPU info when the vCPU awkened and/or scheduled in.
|
* well, and a vCPU that stops running can't have been blocking, i.e.
|
||||||
* See also avic_vcpu_load().
|
* doesn't need to toggle GALogIntr.
|
||||||
*/
|
*/
|
||||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
AVIC_START_RUNNING = 0,
|
||||||
if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
AVIC_STOP_RUNNING = 0,
|
||||||
amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
|
|
||||||
true, pi->ir_data);
|
|
||||||
|
|
||||||
list_add(&ir->node, &svm->ir_list);
|
/*
|
||||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
* When a vCPU starts blocking, KVM needs to set the GALogIntr flag
|
||||||
out:
|
* int all associated IRTEs so that KVM can wake the vCPU if an IRQ is
|
||||||
return ret;
|
* sent to the vCPU.
|
||||||
}
|
*/
|
||||||
|
AVIC_START_BLOCKING = BIT(1),
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
static void avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu,
|
||||||
* Note:
|
enum avic_vcpu_action action)
|
||||||
* The HW cannot support posting multicast/broadcast
|
|
||||||
* interrupts to a vCPU. So, we still use legacy interrupt
|
|
||||||
* remapping for these kind of interrupts.
|
|
||||||
*
|
|
||||||
* For lowest-priority interrupts, we only support
|
|
||||||
* those with single CPU as the destination, e.g. user
|
|
||||||
* configures the interrupts via /proc/irq or uses
|
|
||||||
* irqbalance to make the interrupts single-CPU.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
|
||||||
struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
|
|
||||||
{
|
{
|
||||||
struct kvm_lapic_irq irq;
|
bool ga_log_intr = (action & AVIC_START_BLOCKING);
|
||||||
struct kvm_vcpu *vcpu = NULL;
|
|
||||||
|
|
||||||
kvm_set_msi_irq(kvm, e, &irq);
|
|
||||||
|
|
||||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
|
|
||||||
!kvm_irq_is_postable(&irq)) {
|
|
||||||
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
|
|
||||||
__func__, irq.vector);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
|
|
||||||
irq.vector);
|
|
||||||
*svm = to_svm(vcpu);
|
|
||||||
vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
|
|
||||||
vcpu_info->vector = irq.vector;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* avic_pi_update_irte - set IRTE for Posted-Interrupts
|
|
||||||
*
|
|
||||||
* @kvm: kvm
|
|
||||||
* @host_irq: host irq of the interrupt
|
|
||||||
* @guest_irq: gsi of the interrupt
|
|
||||||
* @set: set or unset PI
|
|
||||||
* returns 0 on success, < 0 on failure
|
|
||||||
*/
|
|
||||||
int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
|
|
||||||
uint32_t guest_irq, bool set)
|
|
||||||
{
|
|
||||||
struct kvm_kernel_irq_routing_entry *e;
|
|
||||||
struct kvm_irq_routing_table *irq_rt;
|
|
||||||
bool enable_remapped_mode = true;
|
|
||||||
int idx, ret = 0;
|
|
||||||
|
|
||||||
if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
|
|
||||||
__func__, host_irq, guest_irq, set);
|
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
|
||||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
|
||||||
|
|
||||||
if (guest_irq >= irq_rt->nr_rt_entries ||
|
|
||||||
hlist_empty(&irq_rt->map[guest_irq])) {
|
|
||||||
pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
|
|
||||||
guest_irq, irq_rt->nr_rt_entries);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
|
|
||||||
struct vcpu_data vcpu_info;
|
|
||||||
struct vcpu_svm *svm = NULL;
|
|
||||||
|
|
||||||
if (e->type != KVM_IRQ_ROUTING_MSI)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Here, we setup with legacy mode in the following cases:
|
|
||||||
* 1. When cannot target interrupt to a specific vcpu.
|
|
||||||
* 2. Unsetting posted interrupt.
|
|
||||||
* 3. APIC virtualization is disabled for the vcpu.
|
|
||||||
* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
|
|
||||||
*/
|
|
||||||
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
|
|
||||||
kvm_vcpu_apicv_active(&svm->vcpu)) {
|
|
||||||
struct amd_iommu_pi_data pi;
|
|
||||||
|
|
||||||
enable_remapped_mode = false;
|
|
||||||
|
|
||||||
/* Try to enable guest_mode in IRTE */
|
|
||||||
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
|
|
||||||
AVIC_HPA_MASK);
|
|
||||||
pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
|
|
||||||
svm->vcpu.vcpu_id);
|
|
||||||
pi.is_guest_mode = true;
|
|
||||||
pi.vcpu_data = &vcpu_info;
|
|
||||||
ret = irq_set_vcpu_affinity(host_irq, &pi);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Here, we successfully setting up vcpu affinity in
|
|
||||||
* IOMMU guest mode. Now, we need to store the posted
|
|
||||||
* interrupt information in a per-vcpu ir_list so that
|
|
||||||
* we can reference to them directly when we update vcpu
|
|
||||||
* scheduling information in IOMMU irte.
|
|
||||||
*/
|
|
||||||
if (!ret && pi.is_guest_mode)
|
|
||||||
svm_ir_list_add(svm, &pi);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ret && svm) {
|
|
||||||
trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
|
|
||||||
e->gsi, vcpu_info.vector,
|
|
||||||
vcpu_info.pi_desc_addr, set);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret < 0) {
|
|
||||||
pr_err("%s: failed to update PI IRTE\n", __func__);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
if (enable_remapped_mode) {
|
|
||||||
/* Use legacy mode in IRTE */
|
|
||||||
struct amd_iommu_pi_data pi;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Here, pi is used to:
|
|
||||||
* - Tell IOMMU to use legacy mode for this interrupt.
|
|
||||||
* - Retrieve ga_tag of prior interrupt remapping data.
|
|
||||||
*/
|
|
||||||
pi.prev_ga_tag = 0;
|
|
||||||
pi.is_guest_mode = false;
|
|
||||||
ret = irq_set_vcpu_affinity(host_irq, &pi);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if the posted interrupt was previously
|
|
||||||
* setup with the guest_mode by checking if the ga_tag
|
|
||||||
* was cached. If so, we need to clean up the per-vcpu
|
|
||||||
* ir_list.
|
|
||||||
*/
|
|
||||||
if (!ret && pi.prev_ga_tag) {
|
|
||||||
int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
|
|
||||||
struct kvm_vcpu *vcpu;
|
|
||||||
|
|
||||||
vcpu = kvm_get_vcpu_by_id(kvm, id);
|
|
||||||
if (vcpu)
|
|
||||||
svm_ir_list_del(to_svm(vcpu), &pi);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out:
|
|
||||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int
|
|
||||||
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
struct amd_svm_iommu_ir *ir;
|
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
struct kvm_kernel_irqfd *irqfd;
|
||||||
|
|
||||||
lockdep_assert_held(&svm->ir_list_lock);
|
lockdep_assert_held(&svm->ir_list_lock);
|
||||||
|
|
||||||
if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Here, we go through the per-vcpu ir_list to update all existing
|
* Here, we go through the per-vcpu ir_list to update all existing
|
||||||
* interrupt remapping table entry targeting this vcpu.
|
* interrupt remapping table entry targeting this vcpu.
|
||||||
*/
|
*/
|
||||||
if (list_empty(&svm->ir_list))
|
if (list_empty(&svm->ir_list))
|
||||||
return 0;
|
return;
|
||||||
|
|
||||||
list_for_each_entry(ir, &svm->ir_list, node) {
|
list_for_each_entry(irqfd, &svm->ir_list, vcpu_list) {
|
||||||
ret = amd_iommu_update_ga(cpu, r, ir->data);
|
void *data = irqfd->irq_bypass_data;
|
||||||
if (ret)
|
|
||||||
return ret;
|
if (!(action & AVIC_TOGGLE_ON_OFF))
|
||||||
|
WARN_ON_ONCE(amd_iommu_update_ga(data, cpu, ga_log_intr));
|
||||||
|
else if (cpu >= 0)
|
||||||
|
WARN_ON_ONCE(amd_iommu_activate_guest_mode(data, cpu, ga_log_intr));
|
||||||
|
else
|
||||||
|
WARN_ON_ONCE(amd_iommu_deactivate_guest_mode(data));
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
|
||||||
|
enum avic_vcpu_action action)
|
||||||
{
|
{
|
||||||
u64 entry;
|
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
||||||
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
int h_physical_id = kvm_cpu_get_apicid(cpu);
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
u64 entry;
|
||||||
|
|
||||||
lockdep_assert_preemption_disabled();
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
|
if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
|
||||||
* No need to update anything if the vCPU is blocking, i.e. if the vCPU
|
|
||||||
* is being scheduled in after being preempted. The CPU entries in the
|
|
||||||
* Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
|
|
||||||
* If the vCPU was migrated, its new CPU value will be stuffed when the
|
|
||||||
* vCPU unblocks.
|
|
||||||
*/
|
|
||||||
if (kvm_vcpu_is_blocking(vcpu))
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1063,38 +895,57 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||||
|
|
||||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
entry = svm->avic_physical_id_entry;
|
||||||
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
|
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
|
||||||
|
|
||||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
|
entry &= ~(AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK |
|
||||||
|
AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
|
||||||
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
|
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
|
||||||
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||||
|
|
||||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
svm->avic_physical_id_entry = entry;
|
||||||
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
|
|
||||||
|
/*
|
||||||
|
* If IPI virtualization is disabled, clear IsRunning when updating the
|
||||||
|
* actual Physical ID table, so that the CPU never sees IsRunning=1.
|
||||||
|
* Keep the APIC ID up-to-date in the entry to minimize the chances of
|
||||||
|
* things going sideways if hardware peeks at the ID.
|
||||||
|
*/
|
||||||
|
if (!enable_ipiv)
|
||||||
|
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||||
|
|
||||||
|
WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
|
||||||
|
|
||||||
|
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||||
{
|
{
|
||||||
u64 entry;
|
/*
|
||||||
|
* No need to update anything if the vCPU is blocking, i.e. if the vCPU
|
||||||
|
* is being scheduled in after being preempted. The CPU entries in the
|
||||||
|
* Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
|
||||||
|
* If the vCPU was migrated, its new CPU value will be stuffed when the
|
||||||
|
* vCPU unblocks.
|
||||||
|
*/
|
||||||
|
if (kvm_vcpu_is_blocking(vcpu))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__avic_vcpu_load(vcpu, cpu, AVIC_START_RUNNING);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
|
||||||
|
{
|
||||||
|
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
u64 entry = svm->avic_physical_id_entry;
|
||||||
|
|
||||||
lockdep_assert_preemption_disabled();
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
/*
|
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
|
||||||
* Note, reading the Physical ID entry outside of ir_list_lock is safe
|
|
||||||
* as only the pCPU that has loaded (or is loading) the vCPU is allowed
|
|
||||||
* to modify the entry, and preemption is disabled. I.e. the vCPU
|
|
||||||
* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
|
|
||||||
* recursively.
|
|
||||||
*/
|
|
||||||
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
|
||||||
|
|
||||||
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
|
|
||||||
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1107,13 +958,62 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
spin_lock_irqsave(&svm->ir_list_lock, flags);
|
||||||
|
|
||||||
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
avic_update_iommu_vcpu_affinity(vcpu, -1, action);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Keep the previous APIC ID in the entry so that a rogue doorbell from
|
||||||
|
* hardware is at least restricted to a CPU associated with the vCPU.
|
||||||
|
*/
|
||||||
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
||||||
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
||||||
|
if (enable_ipiv)
|
||||||
|
WRITE_ONCE(kvm_svm->avic_physical_id_table[vcpu->vcpu_id], entry);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note! Don't set AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR in the table as
|
||||||
|
* it's a synthetic flag that usurps an unused should-be-zero bit.
|
||||||
|
*/
|
||||||
|
if (action & AVIC_START_BLOCKING)
|
||||||
|
entry |= AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR;
|
||||||
|
|
||||||
|
svm->avic_physical_id_entry = entry;
|
||||||
|
|
||||||
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Note, reading the Physical ID entry outside of ir_list_lock is safe
|
||||||
|
* as only the pCPU that has loaded (or is loading) the vCPU is allowed
|
||||||
|
* to modify the entry, and preemption is disabled. I.e. the vCPU
|
||||||
|
* can't be scheduled out and thus avic_vcpu_{put,load}() can't run
|
||||||
|
* recursively.
|
||||||
|
*/
|
||||||
|
u64 entry = to_svm(vcpu)->avic_physical_id_entry;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Nothing to do if IsRunning == '0' due to vCPU blocking, i.e. if the
|
||||||
|
* vCPU is preempted while its in the process of blocking. WARN if the
|
||||||
|
* vCPU wasn't running and isn't blocking, KVM shouldn't attempt to put
|
||||||
|
* the AVIC if it wasn't previously loaded.
|
||||||
|
*/
|
||||||
|
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) {
|
||||||
|
if (WARN_ON_ONCE(!kvm_vcpu_is_blocking(vcpu)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The vCPU was preempted while blocking, ensure its IRTEs are
|
||||||
|
* configured to generate GA Log Interrupts.
|
||||||
|
*/
|
||||||
|
if (!(WARN_ON_ONCE(!(entry & AVIC_PHYSICAL_ID_ENTRY_GA_LOG_INTR))))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
__avic_vcpu_put(vcpu, kvm_vcpu_is_blocking(vcpu) ? AVIC_START_BLOCKING :
|
||||||
|
AVIC_STOP_RUNNING);
|
||||||
}
|
}
|
||||||
|
|
||||||
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||||
|
|
@ -1142,19 +1042,18 @@ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
bool activated = kvm_vcpu_apicv_active(vcpu);
|
|
||||||
|
|
||||||
if (!enable_apicv)
|
if (!enable_apicv)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* APICv should only be toggled on/off while the vCPU is running. */
|
||||||
|
WARN_ON_ONCE(kvm_vcpu_is_blocking(vcpu));
|
||||||
|
|
||||||
avic_refresh_virtual_apic_mode(vcpu);
|
avic_refresh_virtual_apic_mode(vcpu);
|
||||||
|
|
||||||
if (activated)
|
if (kvm_vcpu_apicv_active(vcpu))
|
||||||
avic_vcpu_load(vcpu, vcpu->cpu);
|
__avic_vcpu_load(vcpu, vcpu->cpu, AVIC_ACTIVATE);
|
||||||
else
|
else
|
||||||
avic_vcpu_put(vcpu);
|
__avic_vcpu_put(vcpu, AVIC_DEACTIVATE);
|
||||||
|
|
||||||
avic_set_pi_irte_mode(vcpu, activated);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
|
void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||||
|
|
@ -1162,20 +1061,25 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
|
||||||
if (!kvm_vcpu_apicv_active(vcpu))
|
if (!kvm_vcpu_apicv_active(vcpu))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unload the AVIC when the vCPU is about to block, _before_
|
* Unload the AVIC when the vCPU is about to block, _before_ the vCPU
|
||||||
* the vCPU actually blocks.
|
* actually blocks.
|
||||||
*
|
*
|
||||||
* Any IRQs that arrive before IsRunning=0 will not cause an
|
* Note, any IRQs that arrive before IsRunning=0 will not cause an
|
||||||
* incomplete IPI vmexit on the source, therefore vIRR will also
|
* incomplete IPI vmexit on the source; kvm_vcpu_check_block() handles
|
||||||
* be checked by kvm_vcpu_check_block() before blocking. The
|
* this by checking vIRR one last time before blocking. The memory
|
||||||
* memory barrier implicit in set_current_state orders writing
|
* barrier implicit in set_current_state orders writing IsRunning=0
|
||||||
* IsRunning=0 before reading the vIRR. The processor needs a
|
* before reading the vIRR. The processor needs a matching memory
|
||||||
* matching memory barrier on interrupt delivery between writing
|
* barrier on interrupt delivery between writing IRR and reading
|
||||||
* IRR and reading IsRunning; the lack of this barrier might be
|
* IsRunning; the lack of this barrier might be the cause of errata #1235).
|
||||||
* the cause of errata #1235).
|
*
|
||||||
*/
|
* Clear IsRunning=0 even if guest IRQs are disabled, i.e. even if KVM
|
||||||
avic_vcpu_put(vcpu);
|
* doesn't need to detect events for scheduling purposes. The doorbell
|
||||||
|
* used to signal running vCPUs cannot be blocked, i.e. will perturb the
|
||||||
|
* CPU and cause noisy neighbor problems if the VM is sending interrupts
|
||||||
|
* to the vCPU while it's scheduled out.
|
||||||
|
*/
|
||||||
|
__avic_vcpu_put(vcpu, AVIC_START_BLOCKING);
|
||||||
}
|
}
|
||||||
|
|
||||||
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
|
||||||
|
|
@ -1228,6 +1132,14 @@ bool avic_hardware_setup(void)
|
||||||
if (x2avic_enabled)
|
if (x2avic_enabled)
|
||||||
pr_info("x2AVIC enabled\n");
|
pr_info("x2AVIC enabled\n");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
|
||||||
|
* due to erratum 1235, which results in missed VM-Exits on the sender
|
||||||
|
* and thus missed wake events for blocking vCPUs due to the CPU
|
||||||
|
* failing to see a software update to clear IsRunning.
|
||||||
|
*/
|
||||||
|
enable_ipiv = enable_ipiv && boot_cpu_data.x86 != 0x17;
|
||||||
|
|
||||||
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
|
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -232,6 +232,7 @@ module_param(tsc_scaling, int, 0444);
|
||||||
*/
|
*/
|
||||||
static bool avic;
|
static bool avic;
|
||||||
module_param(avic, bool, 0444);
|
module_param(avic, bool, 0444);
|
||||||
|
module_param(enable_ipiv, bool, 0444);
|
||||||
|
|
||||||
module_param(enable_device_posted_irqs, bool, 0444);
|
module_param(enable_device_posted_irqs, bool, 0444);
|
||||||
|
|
||||||
|
|
@ -1490,6 +1491,8 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
struct vcpu_svm *svm = to_svm(vcpu);
|
struct vcpu_svm *svm = to_svm(vcpu);
|
||||||
|
|
||||||
|
WARN_ON_ONCE(!list_empty(&svm->ir_list));
|
||||||
|
|
||||||
svm_leave_nested(vcpu);
|
svm_leave_nested(vcpu);
|
||||||
svm_free_nested(svm);
|
svm_free_nested(svm);
|
||||||
|
|
||||||
|
|
@ -5581,6 +5584,7 @@ static __init int svm_hardware_setup(void)
|
||||||
enable_apicv = avic = avic && avic_hardware_setup();
|
enable_apicv = avic = avic && avic_hardware_setup();
|
||||||
|
|
||||||
if (!enable_apicv) {
|
if (!enable_apicv) {
|
||||||
|
enable_ipiv = false;
|
||||||
svm_x86_ops.vcpu_blocking = NULL;
|
svm_x86_ops.vcpu_blocking = NULL;
|
||||||
svm_x86_ops.vcpu_unblocking = NULL;
|
svm_x86_ops.vcpu_unblocking = NULL;
|
||||||
svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
|
svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
|
||||||
|
|
|
||||||
|
|
@ -123,8 +123,8 @@ struct kvm_svm {
|
||||||
|
|
||||||
/* Struct members for AVIC */
|
/* Struct members for AVIC */
|
||||||
u32 avic_vm_id;
|
u32 avic_vm_id;
|
||||||
struct page *avic_logical_id_table_page;
|
u32 *avic_logical_id_table;
|
||||||
struct page *avic_physical_id_table_page;
|
u64 *avic_physical_id_table;
|
||||||
struct hlist_node hnode;
|
struct hlist_node hnode;
|
||||||
|
|
||||||
struct kvm_sev_info sev_info;
|
struct kvm_sev_info sev_info;
|
||||||
|
|
@ -306,14 +306,22 @@ struct vcpu_svm {
|
||||||
|
|
||||||
u32 ldr_reg;
|
u32 ldr_reg;
|
||||||
u32 dfr_reg;
|
u32 dfr_reg;
|
||||||
struct page *avic_backing_page;
|
|
||||||
u64 *avic_physical_id_cache;
|
/* This is essentially a shadow of the vCPU's actual entry in the
|
||||||
|
* Physical ID table that is programmed into the VMCB, i.e. that is
|
||||||
|
* seen by the CPU. If IPI virtualization is disabled, IsRunning is
|
||||||
|
* only ever set in the shadow, i.e. is never propagated to the "real"
|
||||||
|
* table, so that hardware never sees IsRunning=1.
|
||||||
|
*/
|
||||||
|
u64 avic_physical_id_entry;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per-vcpu list of struct amd_svm_iommu_ir:
|
* Per-vCPU list of irqfds that are eligible to post IRQs directly to
|
||||||
* This is used mainly to store interrupt remapping information used
|
* the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass). The list
|
||||||
* when update the vcpu affinity. This avoids the need to scan for
|
* is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
|
||||||
* IRTE and try to match ga_tag in the IOMMU driver.
|
* target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
|
||||||
|
* and if the irqfd becomes ineligible for posting (to put the IRTE
|
||||||
|
* back into remapped mode).
|
||||||
*/
|
*/
|
||||||
struct list_head ir_list;
|
struct list_head ir_list;
|
||||||
spinlock_t ir_list_lock;
|
spinlock_t ir_list_lock;
|
||||||
|
|
@ -721,7 +729,8 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
|
||||||
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
|
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
|
||||||
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
|
BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
|
||||||
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \
|
BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \
|
||||||
BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) \
|
BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) | \
|
||||||
|
BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG) \
|
||||||
)
|
)
|
||||||
|
|
||||||
bool avic_hardware_setup(void);
|
bool avic_hardware_setup(void);
|
||||||
|
|
@ -736,8 +745,9 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
|
||||||
void avic_vcpu_put(struct kvm_vcpu *vcpu);
|
void avic_vcpu_put(struct kvm_vcpu *vcpu);
|
||||||
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
|
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
|
||||||
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
|
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
|
||||||
int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
|
int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
|
||||||
uint32_t guest_irq, bool set);
|
unsigned int host_irq, uint32_t guest_irq,
|
||||||
|
struct kvm_vcpu *vcpu, u32 vector);
|
||||||
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
|
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
|
||||||
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
|
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
|
||||||
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
|
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
||||||
|
|
@ -260,6 +260,86 @@ TRACE_EVENT(kvm_cpuid,
|
||||||
__entry->used_max_basic ? ", used max basic" : "")
|
__entry->used_max_basic ? ", used max basic" : "")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
#define kvm_deliver_mode \
|
||||||
|
{0x0, "Fixed"}, \
|
||||||
|
{0x1, "LowPrio"}, \
|
||||||
|
{0x2, "SMI"}, \
|
||||||
|
{0x3, "Res3"}, \
|
||||||
|
{0x4, "NMI"}, \
|
||||||
|
{0x5, "INIT"}, \
|
||||||
|
{0x6, "SIPI"}, \
|
||||||
|
{0x7, "ExtINT"}
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
|
TRACE_EVENT(kvm_ioapic_set_irq,
|
||||||
|
TP_PROTO(__u64 e, int pin, bool coalesced),
|
||||||
|
TP_ARGS(e, pin, coalesced),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field( __u64, e )
|
||||||
|
__field( int, pin )
|
||||||
|
__field( bool, coalesced )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->e = e;
|
||||||
|
__entry->pin = pin;
|
||||||
|
__entry->coalesced = coalesced;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s",
|
||||||
|
__entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
|
||||||
|
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
|
||||||
|
(__entry->e & (1<<11)) ? "logical" : "physical",
|
||||||
|
(__entry->e & (1<<15)) ? "level" : "edge",
|
||||||
|
(__entry->e & (1<<16)) ? "|masked" : "",
|
||||||
|
__entry->coalesced ? " (coalesced)" : "")
|
||||||
|
);
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
|
||||||
|
TP_PROTO(__u64 e),
|
||||||
|
TP_ARGS(e),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field( __u64, e )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->e = e;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("dst %x vec %u (%s|%s|%s%s)",
|
||||||
|
(u8)(__entry->e >> 56), (u8)__entry->e,
|
||||||
|
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
|
||||||
|
(__entry->e & (1<<11)) ? "logical" : "physical",
|
||||||
|
(__entry->e & (1<<15)) ? "level" : "edge",
|
||||||
|
(__entry->e & (1<<16)) ? "|masked" : "")
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
TRACE_EVENT(kvm_msi_set_irq,
|
||||||
|
TP_PROTO(__u64 address, __u64 data),
|
||||||
|
TP_ARGS(address, data),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field( __u64, address )
|
||||||
|
__field( __u64, data )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->address = address;
|
||||||
|
__entry->data = data;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("dst %llx vec %u (%s|%s|%s%s)",
|
||||||
|
(u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
|
||||||
|
(u8)__entry->data,
|
||||||
|
__print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
|
||||||
|
(__entry->address & (1<<2)) ? "logical" : "physical",
|
||||||
|
(__entry->data & (1<<15)) ? "level" : "edge",
|
||||||
|
(__entry->address & (1<<3)) ? "|rh" : "")
|
||||||
|
);
|
||||||
|
|
||||||
#define AREG(x) { APIC_##x, "APIC_" #x }
|
#define AREG(x) { APIC_##x, "APIC_" #x }
|
||||||
|
|
||||||
#define kvm_trace_symbol_apic \
|
#define kvm_trace_symbol_apic \
|
||||||
|
|
@ -1096,37 +1176,32 @@ TRACE_EVENT(kvm_smm_transition,
|
||||||
* Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
|
* Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
|
||||||
*/
|
*/
|
||||||
TRACE_EVENT(kvm_pi_irte_update,
|
TRACE_EVENT(kvm_pi_irte_update,
|
||||||
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
|
TP_PROTO(unsigned int host_irq, struct kvm_vcpu *vcpu,
|
||||||
unsigned int gsi, unsigned int gvec,
|
unsigned int gsi, unsigned int gvec, bool set),
|
||||||
u64 pi_desc_addr, bool set),
|
TP_ARGS(host_irq, vcpu, gsi, gvec, set),
|
||||||
TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set),
|
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__field( unsigned int, host_irq )
|
__field( unsigned int, host_irq )
|
||||||
__field( unsigned int, vcpu_id )
|
__field( int, vcpu_id )
|
||||||
__field( unsigned int, gsi )
|
__field( unsigned int, gsi )
|
||||||
__field( unsigned int, gvec )
|
__field( unsigned int, gvec )
|
||||||
__field( u64, pi_desc_addr )
|
|
||||||
__field( bool, set )
|
__field( bool, set )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->host_irq = host_irq;
|
__entry->host_irq = host_irq;
|
||||||
__entry->vcpu_id = vcpu_id;
|
__entry->vcpu_id = vcpu ? vcpu->vcpu_id : -1;
|
||||||
__entry->gsi = gsi;
|
__entry->gsi = gsi;
|
||||||
__entry->gvec = gvec;
|
__entry->gvec = gvec;
|
||||||
__entry->pi_desc_addr = pi_desc_addr;
|
|
||||||
__entry->set = set;
|
__entry->set = set;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
|
TP_printk("PI is %s for irq %u, vcpu %d, gsi: 0x%x, gvec: 0x%x",
|
||||||
"gvec: 0x%x, pi_desc_addr: 0x%llx",
|
|
||||||
__entry->set ? "enabled and being updated" : "disabled",
|
__entry->set ? "enabled and being updated" : "disabled",
|
||||||
__entry->host_irq,
|
__entry->host_irq,
|
||||||
__entry->vcpu_id,
|
__entry->vcpu_id,
|
||||||
__entry->gsi,
|
__entry->gsi,
|
||||||
__entry->gvec,
|
__entry->gvec)
|
||||||
__entry->pi_desc_addr)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,6 @@ extern bool __read_mostly enable_ept;
|
||||||
extern bool __read_mostly enable_unrestricted_guest;
|
extern bool __read_mostly enable_unrestricted_guest;
|
||||||
extern bool __read_mostly enable_ept_ad_bits;
|
extern bool __read_mostly enable_ept_ad_bits;
|
||||||
extern bool __read_mostly enable_pml;
|
extern bool __read_mostly enable_pml;
|
||||||
extern bool __read_mostly enable_ipiv;
|
|
||||||
extern int __read_mostly pt_mode;
|
extern int __read_mostly pt_mode;
|
||||||
|
|
||||||
#define PT_MODE_SYSTEM 0
|
#define PT_MODE_SYSTEM 0
|
||||||
|
|
|
||||||
|
|
@ -1014,7 +1014,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
|
||||||
.nested_ops = &vmx_nested_ops,
|
.nested_ops = &vmx_nested_ops,
|
||||||
|
|
||||||
.pi_update_irte = vmx_pi_update_irte,
|
.pi_update_irte = vmx_pi_update_irte,
|
||||||
.pi_start_assignment = vmx_pi_start_assignment,
|
.pi_start_bypass = vmx_pi_start_bypass,
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
.set_hv_timer = vt_op(set_hv_timer),
|
.set_hv_timer = vt_op(set_hv_timer),
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
|
#include <linux/kvm_irqfd.h>
|
||||||
|
|
||||||
#include <asm/irq_remapping.h>
|
#include <asm/irq_remapping.h>
|
||||||
#include <asm/cpu.h>
|
#include <asm/cpu.h>
|
||||||
|
|
@ -72,13 +73,10 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
|
||||||
/*
|
/*
|
||||||
* If the vCPU wasn't on the wakeup list and wasn't migrated, then the
|
* If the vCPU wasn't on the wakeup list and wasn't migrated, then the
|
||||||
* full update can be skipped as neither the vector nor the destination
|
* full update can be skipped as neither the vector nor the destination
|
||||||
* needs to be changed.
|
* needs to be changed. Clear SN even if there is no assigned device,
|
||||||
|
* again for simplicity.
|
||||||
*/
|
*/
|
||||||
if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR && vcpu->cpu == cpu) {
|
if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR && vcpu->cpu == cpu) {
|
||||||
/*
|
|
||||||
* Clear SN if it was set due to being preempted. Again, do
|
|
||||||
* this even if there is no assigned device for simplicity.
|
|
||||||
*/
|
|
||||||
if (pi_test_and_clear_sn(pi_desc))
|
if (pi_test_and_clear_sn(pi_desc))
|
||||||
goto after_clear_sn;
|
goto after_clear_sn;
|
||||||
return;
|
return;
|
||||||
|
|
@ -148,8 +146,13 @@ after_clear_sn:
|
||||||
|
|
||||||
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
|
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Note, reading the number of possible bypass IRQs can race with a
|
||||||
|
* bypass IRQ being attached to the VM. vmx_pi_start_bypass() ensures
|
||||||
|
* blockng vCPUs will see an elevated count or get KVM_REQ_UNBLOCK.
|
||||||
|
*/
|
||||||
return irqchip_in_kernel(kvm) && kvm_arch_has_irq_bypass() &&
|
return irqchip_in_kernel(kvm) && kvm_arch_has_irq_bypass() &&
|
||||||
kvm_arch_has_assigned_device(kvm);
|
READ_ONCE(kvm->arch.nr_possible_bypass_irqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -224,17 +227,23 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
|
||||||
if (!vmx_needs_pi_wakeup(vcpu))
|
if (!vmx_needs_pi_wakeup(vcpu))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (kvm_vcpu_is_blocking(vcpu) &&
|
/*
|
||||||
|
* If the vCPU is blocking with IRQs enabled and ISN'T being preempted,
|
||||||
|
* enable the wakeup handler so that notification IRQ wakes the vCPU as
|
||||||
|
* expected. There is no need to enable the wakeup handler if the vCPU
|
||||||
|
* is preempted between setting its wait state and manually scheduling
|
||||||
|
* out, as the task is still runnable, i.e. doesn't need a wake event
|
||||||
|
* from KVM to be scheduled in.
|
||||||
|
*
|
||||||
|
* If the wakeup handler isn't being enabled, Suppress Notifications as
|
||||||
|
* the cost of propagating PIR.IRR to PID.ON is negligible compared to
|
||||||
|
* the cost of a spurious IRQ, and vCPU put/load is a slow path.
|
||||||
|
*/
|
||||||
|
if (!vcpu->preempted && kvm_vcpu_is_blocking(vcpu) &&
|
||||||
((is_td_vcpu(vcpu) && tdx_interrupt_allowed(vcpu)) ||
|
((is_td_vcpu(vcpu) && tdx_interrupt_allowed(vcpu)) ||
|
||||||
(!is_td_vcpu(vcpu) && !vmx_interrupt_blocked(vcpu))))
|
(!is_td_vcpu(vcpu) && !vmx_interrupt_blocked(vcpu))))
|
||||||
pi_enable_wakeup_handler(vcpu);
|
pi_enable_wakeup_handler(vcpu);
|
||||||
|
else
|
||||||
/*
|
|
||||||
* Set SN when the vCPU is preempted. Note, the vCPU can both be seen
|
|
||||||
* as blocking and preempted, e.g. if it's preempted between setting
|
|
||||||
* its wait state and manually scheduling out.
|
|
||||||
*/
|
|
||||||
if (vcpu->preempted)
|
|
||||||
pi_set_sn(pi_desc);
|
pi_set_sn(pi_desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -281,99 +290,30 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Bail out of the block loop if the VM has an assigned
|
* Kick all vCPUs when the first possible bypass IRQ is attached to a VM, as
|
||||||
* device, but the blocking vCPU didn't reconfigure the
|
* blocking vCPUs may scheduled out without reconfiguring PID.NV to the wakeup
|
||||||
* PI.NV to the wakeup vector, i.e. the assigned device
|
* vector, i.e. if the bypass IRQ came along after vmx_vcpu_pi_put().
|
||||||
* came along after the initial check in vmx_vcpu_pi_put().
|
|
||||||
*/
|
*/
|
||||||
void vmx_pi_start_assignment(struct kvm *kvm)
|
void vmx_pi_start_bypass(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
if (!kvm_arch_has_irq_bypass())
|
if (WARN_ON_ONCE(!vmx_can_use_vtd_pi(kvm)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
|
kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
|
||||||
* vmx_pi_update_irte - set IRTE for Posted-Interrupts
|
unsigned int host_irq, uint32_t guest_irq,
|
||||||
*
|
struct kvm_vcpu *vcpu, u32 vector)
|
||||||
* @kvm: kvm
|
|
||||||
* @host_irq: host irq of the interrupt
|
|
||||||
* @guest_irq: gsi of the interrupt
|
|
||||||
* @set: set or unset PI
|
|
||||||
* returns 0 on success, < 0 on failure
|
|
||||||
*/
|
|
||||||
int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
|
|
||||||
uint32_t guest_irq, bool set)
|
|
||||||
{
|
{
|
||||||
struct kvm_kernel_irq_routing_entry *e;
|
if (vcpu) {
|
||||||
struct kvm_irq_routing_table *irq_rt;
|
struct intel_iommu_pi_data pi_data = {
|
||||||
bool enable_remapped_mode = true;
|
.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)),
|
||||||
struct kvm_lapic_irq irq;
|
.vector = vector,
|
||||||
struct kvm_vcpu *vcpu;
|
};
|
||||||
struct vcpu_data vcpu_info;
|
|
||||||
int idx, ret = 0;
|
|
||||||
|
|
||||||
if (!vmx_can_use_vtd_pi(kvm))
|
return irq_set_vcpu_affinity(host_irq, &pi_data);
|
||||||
return 0;
|
} else {
|
||||||
|
return irq_set_vcpu_affinity(host_irq, NULL);
|
||||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
|
||||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
|
||||||
if (guest_irq >= irq_rt->nr_rt_entries ||
|
|
||||||
hlist_empty(&irq_rt->map[guest_irq])) {
|
|
||||||
pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
|
|
||||||
guest_irq, irq_rt->nr_rt_entries);
|
|
||||||
goto out;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
|
|
||||||
if (e->type != KVM_IRQ_ROUTING_MSI)
|
|
||||||
continue;
|
|
||||||
/*
|
|
||||||
* VT-d PI cannot support posting multicast/broadcast
|
|
||||||
* interrupts to a vCPU, we still use interrupt remapping
|
|
||||||
* for these kind of interrupts.
|
|
||||||
*
|
|
||||||
* For lowest-priority interrupts, we only support
|
|
||||||
* those with single CPU as the destination, e.g. user
|
|
||||||
* configures the interrupts via /proc/irq or uses
|
|
||||||
* irqbalance to make the interrupts single-CPU.
|
|
||||||
*
|
|
||||||
* We will support full lowest-priority interrupt later.
|
|
||||||
*
|
|
||||||
* In addition, we can only inject generic interrupts using
|
|
||||||
* the PI mechanism, refuse to route others through it.
|
|
||||||
*/
|
|
||||||
|
|
||||||
kvm_set_msi_irq(kvm, e, &irq);
|
|
||||||
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
|
|
||||||
!kvm_irq_is_postable(&irq))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
|
|
||||||
vcpu_info.vector = irq.vector;
|
|
||||||
|
|
||||||
trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
|
|
||||||
vcpu_info.vector, vcpu_info.pi_desc_addr, set);
|
|
||||||
|
|
||||||
if (!set)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
enable_remapped_mode = false;
|
|
||||||
|
|
||||||
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
|
|
||||||
if (ret < 0) {
|
|
||||||
printk(KERN_INFO "%s: failed to update PI IRTE\n",
|
|
||||||
__func__);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (enable_remapped_mode)
|
|
||||||
ret = irq_set_vcpu_affinity(host_irq, NULL);
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
out:
|
|
||||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@
|
||||||
#define __KVM_X86_VMX_POSTED_INTR_H
|
#define __KVM_X86_VMX_POSTED_INTR_H
|
||||||
|
|
||||||
#include <linux/bitmap.h>
|
#include <linux/bitmap.h>
|
||||||
|
#include <linux/find.h>
|
||||||
|
#include <linux/kvm_host.h>
|
||||||
|
|
||||||
#include <asm/posted_intr.h>
|
#include <asm/posted_intr.h>
|
||||||
|
|
||||||
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
|
||||||
|
|
@ -11,9 +14,10 @@ void pi_wakeup_handler(void);
|
||||||
void __init pi_init_cpu(int cpu);
|
void __init pi_init_cpu(int cpu);
|
||||||
void pi_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
|
void pi_apicv_pre_state_restore(struct kvm_vcpu *vcpu);
|
||||||
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
|
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
|
||||||
int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
|
int vmx_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
|
||||||
uint32_t guest_irq, bool set);
|
unsigned int host_irq, uint32_t guest_irq,
|
||||||
void vmx_pi_start_assignment(struct kvm *kvm);
|
struct kvm_vcpu *vcpu, u32 vector);
|
||||||
|
void vmx_pi_start_bypass(struct kvm *kvm);
|
||||||
|
|
||||||
static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
|
static inline int pi_find_highest_vector(struct pi_desc *pi_desc)
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -113,8 +113,6 @@ static bool __read_mostly fasteoi = 1;
|
||||||
module_param(fasteoi, bool, 0444);
|
module_param(fasteoi, bool, 0444);
|
||||||
|
|
||||||
module_param(enable_apicv, bool, 0444);
|
module_param(enable_apicv, bool, 0444);
|
||||||
|
|
||||||
bool __read_mostly enable_ipiv = true;
|
|
||||||
module_param(enable_ipiv, bool, 0444);
|
module_param(enable_ipiv, bool, 0444);
|
||||||
|
|
||||||
module_param(enable_device_posted_irqs, bool, 0444);
|
module_param(enable_device_posted_irqs, bool, 0444);
|
||||||
|
|
|
||||||
|
|
@ -226,6 +226,9 @@ EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
|
||||||
bool __read_mostly enable_apicv = true;
|
bool __read_mostly enable_apicv = true;
|
||||||
EXPORT_SYMBOL_GPL(enable_apicv);
|
EXPORT_SYMBOL_GPL(enable_apicv);
|
||||||
|
|
||||||
|
bool __read_mostly enable_ipiv = true;
|
||||||
|
EXPORT_SYMBOL_GPL(enable_ipiv);
|
||||||
|
|
||||||
bool __read_mostly enable_device_posted_irqs = true;
|
bool __read_mostly enable_device_posted_irqs = true;
|
||||||
EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
|
EXPORT_SYMBOL_GPL(enable_device_posted_irqs);
|
||||||
|
|
||||||
|
|
@ -4634,17 +4637,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||||
case KVM_CAP_EXT_CPUID:
|
case KVM_CAP_EXT_CPUID:
|
||||||
case KVM_CAP_EXT_EMUL_CPUID:
|
case KVM_CAP_EXT_EMUL_CPUID:
|
||||||
case KVM_CAP_CLOCKSOURCE:
|
case KVM_CAP_CLOCKSOURCE:
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
case KVM_CAP_PIT:
|
case KVM_CAP_PIT:
|
||||||
|
case KVM_CAP_PIT2:
|
||||||
|
case KVM_CAP_PIT_STATE2:
|
||||||
|
case KVM_CAP_REINJECT_CONTROL:
|
||||||
|
#endif
|
||||||
case KVM_CAP_NOP_IO_DELAY:
|
case KVM_CAP_NOP_IO_DELAY:
|
||||||
case KVM_CAP_MP_STATE:
|
case KVM_CAP_MP_STATE:
|
||||||
case KVM_CAP_SYNC_MMU:
|
case KVM_CAP_SYNC_MMU:
|
||||||
case KVM_CAP_USER_NMI:
|
case KVM_CAP_USER_NMI:
|
||||||
case KVM_CAP_REINJECT_CONTROL:
|
|
||||||
case KVM_CAP_IRQ_INJECT_STATUS:
|
case KVM_CAP_IRQ_INJECT_STATUS:
|
||||||
case KVM_CAP_IOEVENTFD:
|
case KVM_CAP_IOEVENTFD:
|
||||||
case KVM_CAP_IOEVENTFD_NO_LENGTH:
|
case KVM_CAP_IOEVENTFD_NO_LENGTH:
|
||||||
case KVM_CAP_PIT2:
|
|
||||||
case KVM_CAP_PIT_STATE2:
|
|
||||||
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
|
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
|
||||||
case KVM_CAP_VCPU_EVENTS:
|
case KVM_CAP_VCPU_EVENTS:
|
||||||
#ifdef CONFIG_KVM_HYPERV
|
#ifdef CONFIG_KVM_HYPERV
|
||||||
|
|
@ -6401,135 +6407,6 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
|
||||||
{
|
|
||||||
struct kvm_pic *pic = kvm->arch.vpic;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
r = 0;
|
|
||||||
switch (chip->chip_id) {
|
|
||||||
case KVM_IRQCHIP_PIC_MASTER:
|
|
||||||
memcpy(&chip->chip.pic, &pic->pics[0],
|
|
||||||
sizeof(struct kvm_pic_state));
|
|
||||||
break;
|
|
||||||
case KVM_IRQCHIP_PIC_SLAVE:
|
|
||||||
memcpy(&chip->chip.pic, &pic->pics[1],
|
|
||||||
sizeof(struct kvm_pic_state));
|
|
||||||
break;
|
|
||||||
case KVM_IRQCHIP_IOAPIC:
|
|
||||||
kvm_get_ioapic(kvm, &chip->chip.ioapic);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
r = -EINVAL;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
|
|
||||||
{
|
|
||||||
struct kvm_pic *pic = kvm->arch.vpic;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
r = 0;
|
|
||||||
switch (chip->chip_id) {
|
|
||||||
case KVM_IRQCHIP_PIC_MASTER:
|
|
||||||
spin_lock(&pic->lock);
|
|
||||||
memcpy(&pic->pics[0], &chip->chip.pic,
|
|
||||||
sizeof(struct kvm_pic_state));
|
|
||||||
spin_unlock(&pic->lock);
|
|
||||||
break;
|
|
||||||
case KVM_IRQCHIP_PIC_SLAVE:
|
|
||||||
spin_lock(&pic->lock);
|
|
||||||
memcpy(&pic->pics[1], &chip->chip.pic,
|
|
||||||
sizeof(struct kvm_pic_state));
|
|
||||||
spin_unlock(&pic->lock);
|
|
||||||
break;
|
|
||||||
case KVM_IRQCHIP_IOAPIC:
|
|
||||||
kvm_set_ioapic(kvm, &chip->chip.ioapic);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
r = -EINVAL;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
kvm_pic_update_irq(pic);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
|
||||||
{
|
|
||||||
struct kvm_kpit_state *kps = &kvm->arch.vpit->pit_state;
|
|
||||||
|
|
||||||
BUILD_BUG_ON(sizeof(*ps) != sizeof(kps->channels));
|
|
||||||
|
|
||||||
mutex_lock(&kps->lock);
|
|
||||||
memcpy(ps, &kps->channels, sizeof(*ps));
|
|
||||||
mutex_unlock(&kps->lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
struct kvm_pit *pit = kvm->arch.vpit;
|
|
||||||
|
|
||||||
mutex_lock(&pit->pit_state.lock);
|
|
||||||
memcpy(&pit->pit_state.channels, ps, sizeof(*ps));
|
|
||||||
for (i = 0; i < 3; i++)
|
|
||||||
kvm_pit_load_count(pit, i, ps->channels[i].count, 0);
|
|
||||||
mutex_unlock(&pit->pit_state.lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
|
||||||
{
|
|
||||||
mutex_lock(&kvm->arch.vpit->pit_state.lock);
|
|
||||||
memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
|
|
||||||
sizeof(ps->channels));
|
|
||||||
ps->flags = kvm->arch.vpit->pit_state.flags;
|
|
||||||
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
|
|
||||||
memset(&ps->reserved, 0, sizeof(ps->reserved));
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
|
|
||||||
{
|
|
||||||
int start = 0;
|
|
||||||
int i;
|
|
||||||
u32 prev_legacy, cur_legacy;
|
|
||||||
struct kvm_pit *pit = kvm->arch.vpit;
|
|
||||||
|
|
||||||
mutex_lock(&pit->pit_state.lock);
|
|
||||||
prev_legacy = pit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
|
||||||
cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
|
|
||||||
if (!prev_legacy && cur_legacy)
|
|
||||||
start = 1;
|
|
||||||
memcpy(&pit->pit_state.channels, &ps->channels,
|
|
||||||
sizeof(pit->pit_state.channels));
|
|
||||||
pit->pit_state.flags = ps->flags;
|
|
||||||
for (i = 0; i < 3; i++)
|
|
||||||
kvm_pit_load_count(pit, i, pit->pit_state.channels[i].count,
|
|
||||||
start && i == 0);
|
|
||||||
mutex_unlock(&pit->pit_state.lock);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int kvm_vm_ioctl_reinject(struct kvm *kvm,
|
|
||||||
struct kvm_reinject_control *control)
|
|
||||||
{
|
|
||||||
struct kvm_pit *pit = kvm->arch.vpit;
|
|
||||||
|
|
||||||
/* pit->pit_state.lock was overloaded to prevent userspace from getting
|
|
||||||
* an inconsistent state after running multiple KVM_REINJECT_CONTROL
|
|
||||||
* ioctls in parallel. Use a separate lock if that ioctl isn't rare.
|
|
||||||
*/
|
|
||||||
mutex_lock(&pit->pit_state.lock);
|
|
||||||
kvm_pit_set_reinject(pit, control->pit_reinject);
|
|
||||||
mutex_unlock(&pit->pit_state.lock);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
@ -6549,18 +6426,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
||||||
kvm_vcpu_kick(vcpu);
|
kvm_vcpu_kick(vcpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
|
|
||||||
bool line_status)
|
|
||||||
{
|
|
||||||
if (!irqchip_in_kernel(kvm))
|
|
||||||
return -ENXIO;
|
|
||||||
|
|
||||||
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
|
|
||||||
irq_event->irq, irq_event->level,
|
|
||||||
line_status);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||||
struct kvm_enable_cap *cap)
|
struct kvm_enable_cap *cap)
|
||||||
{
|
{
|
||||||
|
|
@ -7072,9 +6937,11 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||||
struct kvm *kvm = filp->private_data;
|
struct kvm *kvm = filp->private_data;
|
||||||
void __user *argp = (void __user *)arg;
|
void __user *argp = (void __user *)arg;
|
||||||
int r = -ENOTTY;
|
int r = -ENOTTY;
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
/*
|
/*
|
||||||
* This union makes it completely explicit to gcc-3.x
|
* This union makes it completely explicit to gcc-3.x
|
||||||
* that these two variables' stack usage should be
|
* that these three variables' stack usage should be
|
||||||
* combined, not added together.
|
* combined, not added together.
|
||||||
*/
|
*/
|
||||||
union {
|
union {
|
||||||
|
|
@ -7082,6 +6949,7 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
||||||
struct kvm_pit_state2 ps2;
|
struct kvm_pit_state2 ps2;
|
||||||
struct kvm_pit_config pit_config;
|
struct kvm_pit_config pit_config;
|
||||||
} u;
|
} u;
|
||||||
|
#endif
|
||||||
|
|
||||||
switch (ioctl) {
|
switch (ioctl) {
|
||||||
case KVM_SET_TSS_ADDR:
|
case KVM_SET_TSS_ADDR:
|
||||||
|
|
@ -7105,6 +6973,7 @@ set_identity_unlock:
|
||||||
case KVM_SET_NR_MMU_PAGES:
|
case KVM_SET_NR_MMU_PAGES:
|
||||||
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
|
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
|
||||||
break;
|
break;
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
case KVM_CREATE_IRQCHIP: {
|
case KVM_CREATE_IRQCHIP: {
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->lock);
|
||||||
|
|
||||||
|
|
@ -7126,7 +6995,7 @@ set_identity_unlock:
|
||||||
goto create_irqchip_unlock;
|
goto create_irqchip_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = kvm_setup_default_irq_routing(kvm);
|
r = kvm_setup_default_ioapic_and_pic_routing(kvm);
|
||||||
if (r) {
|
if (r) {
|
||||||
kvm_ioapic_destroy(kvm);
|
kvm_ioapic_destroy(kvm);
|
||||||
kvm_pic_destroy(kvm);
|
kvm_pic_destroy(kvm);
|
||||||
|
|
@ -7174,7 +7043,7 @@ set_identity_unlock:
|
||||||
}
|
}
|
||||||
|
|
||||||
r = -ENXIO;
|
r = -ENXIO;
|
||||||
if (!irqchip_kernel(kvm))
|
if (!irqchip_full(kvm))
|
||||||
goto get_irqchip_out;
|
goto get_irqchip_out;
|
||||||
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
|
r = kvm_vm_ioctl_get_irqchip(kvm, chip);
|
||||||
if (r)
|
if (r)
|
||||||
|
|
@ -7198,7 +7067,7 @@ set_identity_unlock:
|
||||||
}
|
}
|
||||||
|
|
||||||
r = -ENXIO;
|
r = -ENXIO;
|
||||||
if (!irqchip_kernel(kvm))
|
if (!irqchip_full(kvm))
|
||||||
goto set_irqchip_out;
|
goto set_irqchip_out;
|
||||||
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
|
r = kvm_vm_ioctl_set_irqchip(kvm, chip);
|
||||||
set_irqchip_out:
|
set_irqchip_out:
|
||||||
|
|
@ -7271,6 +7140,7 @@ set_pit2_out:
|
||||||
r = kvm_vm_ioctl_reinject(kvm, &control);
|
r = kvm_vm_ioctl_reinject(kvm, &control);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
case KVM_SET_BOOT_CPU_ID:
|
case KVM_SET_BOOT_CPU_ID:
|
||||||
r = 0;
|
r = 0;
|
||||||
mutex_lock(&kvm->lock);
|
mutex_lock(&kvm->lock);
|
||||||
|
|
@ -10730,8 +10600,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
if (irqchip_split(vcpu->kvm))
|
if (irqchip_split(vcpu->kvm))
|
||||||
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
|
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
else if (ioapic_in_kernel(vcpu->kvm))
|
else if (ioapic_in_kernel(vcpu->kvm))
|
||||||
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
|
||||||
|
#endif
|
||||||
|
|
||||||
if (is_guest_mode(vcpu))
|
if (is_guest_mode(vcpu))
|
||||||
vcpu->arch.load_eoi_exitmap_pending = true;
|
vcpu->arch.load_eoi_exitmap_pending = true;
|
||||||
|
|
@ -12801,15 +12673,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_uninit_mmu;
|
goto out_uninit_mmu;
|
||||||
|
|
||||||
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
|
|
||||||
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
|
||||||
|
|
||||||
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
|
|
||||||
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
|
|
||||||
/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
|
|
||||||
set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
|
||||||
&kvm->arch.irq_sources_bitmap);
|
|
||||||
|
|
||||||
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
|
||||||
mutex_init(&kvm->arch.apic_map_lock);
|
mutex_init(&kvm->arch.apic_map_lock);
|
||||||
seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
|
seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
|
||||||
|
|
@ -12940,7 +12805,9 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
||||||
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
|
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
|
||||||
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
|
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
kvm_free_pit(kvm);
|
kvm_free_pit(kvm);
|
||||||
|
#endif
|
||||||
|
|
||||||
kvm_mmu_pre_destroy_vm(kvm);
|
kvm_mmu_pre_destroy_vm(kvm);
|
||||||
static_call_cond(kvm_x86_vm_pre_destroy)(kvm);
|
static_call_cond(kvm_x86_vm_pre_destroy)(kvm);
|
||||||
|
|
@ -12964,8 +12831,10 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||||
}
|
}
|
||||||
kvm_destroy_vcpus(kvm);
|
kvm_destroy_vcpus(kvm);
|
||||||
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
|
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
|
||||||
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
kvm_pic_destroy(kvm);
|
kvm_pic_destroy(kvm);
|
||||||
kvm_ioapic_destroy(kvm);
|
kvm_ioapic_destroy(kvm);
|
||||||
|
#endif
|
||||||
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
|
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
|
||||||
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
|
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
|
||||||
kvm_mmu_uninit_vm(kvm);
|
kvm_mmu_uninit_vm(kvm);
|
||||||
|
|
@ -13577,8 +13446,7 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
void kvm_arch_start_assignment(struct kvm *kvm)
|
void kvm_arch_start_assignment(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
|
atomic_inc(&kvm->arch.assigned_device_count);
|
||||||
kvm_x86_call(pi_start_assignment)(kvm);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
|
EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
|
||||||
|
|
||||||
|
|
@ -13629,77 +13497,6 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
|
EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
|
||||||
|
|
||||||
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
|
|
||||||
struct irq_bypass_producer *prod)
|
|
||||||
{
|
|
||||||
struct kvm_kernel_irqfd *irqfd =
|
|
||||||
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
|
||||||
struct kvm *kvm = irqfd->kvm;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
kvm_arch_start_assignment(irqfd->kvm);
|
|
||||||
|
|
||||||
spin_lock_irq(&kvm->irqfds.lock);
|
|
||||||
irqfd->producer = prod;
|
|
||||||
|
|
||||||
ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
|
|
||||||
prod->irq, irqfd->gsi, 1);
|
|
||||||
if (ret)
|
|
||||||
kvm_arch_end_assignment(irqfd->kvm);
|
|
||||||
|
|
||||||
spin_unlock_irq(&kvm->irqfds.lock);
|
|
||||||
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
|
|
||||||
struct irq_bypass_producer *prod)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
struct kvm_kernel_irqfd *irqfd =
|
|
||||||
container_of(cons, struct kvm_kernel_irqfd, consumer);
|
|
||||||
struct kvm *kvm = irqfd->kvm;
|
|
||||||
|
|
||||||
WARN_ON(irqfd->producer != prod);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When producer of consumer is unregistered, we change back to
|
|
||||||
* remapped mode, so we can re-use the current implementation
|
|
||||||
* when the irq is masked/disabled or the consumer side (KVM
|
|
||||||
* int this case doesn't want to receive the interrupts.
|
|
||||||
*/
|
|
||||||
spin_lock_irq(&kvm->irqfds.lock);
|
|
||||||
irqfd->producer = NULL;
|
|
||||||
|
|
||||||
ret = kvm_x86_call(pi_update_irte)(irqfd->kvm,
|
|
||||||
prod->irq, irqfd->gsi, 0);
|
|
||||||
if (ret)
|
|
||||||
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
|
|
||||||
" fails: %d\n", irqfd->consumer.token, ret);
|
|
||||||
|
|
||||||
spin_unlock_irq(&kvm->irqfds.lock);
|
|
||||||
|
|
||||||
|
|
||||||
kvm_arch_end_assignment(irqfd->kvm);
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
|
||||||
uint32_t guest_irq, bool set)
|
|
||||||
{
|
|
||||||
return kvm_x86_call(pi_update_irte)(kvm, host_irq, guest_irq, set);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
|
|
||||||
struct kvm_kernel_irq_routing_entry *new)
|
|
||||||
{
|
|
||||||
if (old->type != KVM_IRQ_ROUTING_MSI ||
|
|
||||||
new->type != KVM_IRQ_ROUTING_MSI)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool kvm_vector_hashing_enabled(void)
|
bool kvm_vector_hashing_enabled(void)
|
||||||
{
|
{
|
||||||
return vector_hashing;
|
return vector_hashing;
|
||||||
|
|
@ -14099,7 +13896,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window_update);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
|
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
|
||||||
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
|
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
|
||||||
|
|
|
||||||
|
|
@ -368,6 +368,14 @@ static void mshv_irqfd_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
||||||
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
|
container_of(polltbl, struct mshv_irqfd, irqfd_polltbl);
|
||||||
|
|
||||||
irqfd->irqfd_wqh = wqh;
|
irqfd->irqfd_wqh = wqh;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* TODO: Ensure there isn't already an exclusive, priority waiter, e.g.
|
||||||
|
* that the irqfd isn't already bound to another partition. Only the
|
||||||
|
* first exclusive waiter encountered will be notified, and
|
||||||
|
* add_wait_queue_priority() doesn't enforce exclusivity.
|
||||||
|
*/
|
||||||
|
irqfd->irqfd_wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||||
add_wait_queue_priority(wqh, &irqfd->irqfd_wait);
|
add_wait_queue_priority(wqh, &irqfd->irqfd_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1054,7 +1054,6 @@ struct irq_2_irte {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amd_ir_data {
|
struct amd_ir_data {
|
||||||
u32 cached_ga_tag;
|
|
||||||
struct amd_iommu *iommu;
|
struct amd_iommu *iommu;
|
||||||
struct irq_2_irte irq_2_irte;
|
struct irq_2_irte irq_2_irte;
|
||||||
struct msi_msg msi_entry;
|
struct msi_msg msi_entry;
|
||||||
|
|
|
||||||
|
|
@ -3804,13 +3804,70 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
|
||||||
.deactivate = irq_remapping_deactivate,
|
.deactivate = irq_remapping_deactivate,
|
||||||
};
|
};
|
||||||
|
|
||||||
int amd_iommu_activate_guest_mode(void *data)
|
static void __amd_iommu_update_ga(struct irte_ga *entry, int cpu,
|
||||||
|
bool ga_log_intr)
|
||||||
|
{
|
||||||
|
if (cpu >= 0) {
|
||||||
|
entry->lo.fields_vapic.destination =
|
||||||
|
APICID_TO_IRTE_DEST_LO(cpu);
|
||||||
|
entry->hi.fields.destination =
|
||||||
|
APICID_TO_IRTE_DEST_HI(cpu);
|
||||||
|
entry->lo.fields_vapic.is_run = true;
|
||||||
|
entry->lo.fields_vapic.ga_log_intr = false;
|
||||||
|
} else {
|
||||||
|
entry->lo.fields_vapic.is_run = false;
|
||||||
|
entry->lo.fields_vapic.ga_log_intr = ga_log_intr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update the pCPU information for an IRTE that is configured to post IRQs to
|
||||||
|
* a vCPU, without issuing an IOMMU invalidation for the IRTE.
|
||||||
|
*
|
||||||
|
* If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination
|
||||||
|
* with the pCPU's APIC ID, set IsRun, and clear GALogIntr. If the vCPU isn't
|
||||||
|
* associated with a pCPU (@cpu < 0), clear IsRun and set/clear GALogIntr based
|
||||||
|
* on input from the caller (e.g. KVM only requests GALogIntr when the vCPU is
|
||||||
|
* blocking and requires a notification wake event). I.e. treat vCPUs that are
|
||||||
|
* associated with a pCPU as running. This API is intended to be used when a
|
||||||
|
* vCPU is scheduled in/out (or stops running for any reason), to do a fast
|
||||||
|
* update of IsRun, GALogIntr, and (conditionally) Destination.
|
||||||
|
*
|
||||||
|
* Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached
|
||||||
|
* and thus don't require an invalidation to ensure the IOMMU consumes fresh
|
||||||
|
* information.
|
||||||
|
*/
|
||||||
|
int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr)
|
||||||
|
{
|
||||||
|
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
|
||||||
|
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!entry || !entry->lo.fields_vapic.guest_mode)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!ir_data->iommu)
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
__amd_iommu_update_ga(entry, cpu, ga_log_intr);
|
||||||
|
|
||||||
|
return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
|
||||||
|
ir_data->irq_2_irte.index, entry);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(amd_iommu_update_ga);
|
||||||
|
|
||||||
|
int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr)
|
||||||
{
|
{
|
||||||
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
|
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
|
||||||
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
|
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
|
||||||
u64 valid;
|
u64 valid;
|
||||||
|
|
||||||
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
|
if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!entry)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
valid = entry->lo.fields_vapic.valid;
|
valid = entry->lo.fields_vapic.valid;
|
||||||
|
|
@ -3820,11 +3877,12 @@ int amd_iommu_activate_guest_mode(void *data)
|
||||||
|
|
||||||
entry->lo.fields_vapic.valid = valid;
|
entry->lo.fields_vapic.valid = valid;
|
||||||
entry->lo.fields_vapic.guest_mode = 1;
|
entry->lo.fields_vapic.guest_mode = 1;
|
||||||
entry->lo.fields_vapic.ga_log_intr = 1;
|
|
||||||
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
|
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
|
||||||
entry->hi.fields.vector = ir_data->ga_vector;
|
entry->hi.fields.vector = ir_data->ga_vector;
|
||||||
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
|
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
|
||||||
|
|
||||||
|
__amd_iommu_update_ga(entry, cpu, ga_log_intr);
|
||||||
|
|
||||||
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
|
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
|
||||||
ir_data->irq_2_irte.index, entry);
|
ir_data->irq_2_irte.index, entry);
|
||||||
}
|
}
|
||||||
|
|
@ -3837,8 +3895,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
|
||||||
struct irq_cfg *cfg = ir_data->cfg;
|
struct irq_cfg *cfg = ir_data->cfg;
|
||||||
u64 valid;
|
u64 valid;
|
||||||
|
|
||||||
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
|
if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
|
||||||
!entry || !entry->lo.fields_vapic.guest_mode)
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (!entry || !entry->lo.fields_vapic.guest_mode)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
valid = entry->lo.fields_remap.valid;
|
valid = entry->lo.fields_remap.valid;
|
||||||
|
|
@ -3860,11 +3920,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
|
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
|
||||||
|
|
||||||
static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
|
static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct amd_iommu_pi_data *pi_data = vcpu_info;
|
struct amd_iommu_pi_data *pi_data = info;
|
||||||
struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
|
|
||||||
struct amd_ir_data *ir_data = data->chip_data;
|
struct amd_ir_data *ir_data = data->chip_data;
|
||||||
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
|
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
|
||||||
struct iommu_dev_data *dev_data;
|
struct iommu_dev_data *dev_data;
|
||||||
|
|
@ -3885,25 +3944,20 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
ir_data->cfg = irqd_cfg(data);
|
ir_data->cfg = irqd_cfg(data);
|
||||||
pi_data->ir_data = ir_data;
|
|
||||||
|
|
||||||
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
|
if (pi_data) {
|
||||||
if (pi_data->is_guest_mode) {
|
pi_data->ir_data = ir_data;
|
||||||
ir_data->ga_root_ptr = (pi_data->base >> 12);
|
|
||||||
ir_data->ga_vector = vcpu_pi_info->vector;
|
ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12);
|
||||||
|
ir_data->ga_vector = pi_data->vector;
|
||||||
ir_data->ga_tag = pi_data->ga_tag;
|
ir_data->ga_tag = pi_data->ga_tag;
|
||||||
ret = amd_iommu_activate_guest_mode(ir_data);
|
if (pi_data->is_guest_mode)
|
||||||
if (!ret)
|
ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu,
|
||||||
ir_data->cached_ga_tag = pi_data->ga_tag;
|
pi_data->ga_log_intr);
|
||||||
|
else
|
||||||
|
ret = amd_iommu_deactivate_guest_mode(ir_data);
|
||||||
} else {
|
} else {
|
||||||
ret = amd_iommu_deactivate_guest_mode(ir_data);
|
ret = amd_iommu_deactivate_guest_mode(ir_data);
|
||||||
|
|
||||||
/*
|
|
||||||
* This communicates the ga_tag back to the caller
|
|
||||||
* so that it can do all the necessary clean up.
|
|
||||||
*/
|
|
||||||
if (!ret)
|
|
||||||
ir_data->cached_ga_tag = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -3995,29 +4049,4 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int amd_iommu_update_ga(int cpu, bool is_run, void *data)
|
|
||||||
{
|
|
||||||
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
|
|
||||||
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
|
|
||||||
|
|
||||||
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
|
|
||||||
!entry || !entry->lo.fields_vapic.guest_mode)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (!ir_data->iommu)
|
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
if (cpu >= 0) {
|
|
||||||
entry->lo.fields_vapic.destination =
|
|
||||||
APICID_TO_IRTE_DEST_LO(cpu);
|
|
||||||
entry->hi.fields.destination =
|
|
||||||
APICID_TO_IRTE_DEST_HI(cpu);
|
|
||||||
}
|
|
||||||
entry->lo.fields_vapic.is_run = is_run;
|
|
||||||
|
|
||||||
return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
|
|
||||||
ir_data->irq_2_irte.index, entry);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(amd_iommu_update_ga);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1244,10 +1244,10 @@ static void intel_ir_compose_msi_msg(struct irq_data *irq_data,
|
||||||
static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
|
static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
|
||||||
{
|
{
|
||||||
struct intel_ir_data *ir_data = data->chip_data;
|
struct intel_ir_data *ir_data = data->chip_data;
|
||||||
struct vcpu_data *vcpu_pi_info = info;
|
struct intel_iommu_pi_data *pi_data = info;
|
||||||
|
|
||||||
/* stop posting interrupts, back to the default mode */
|
/* stop posting interrupts, back to the default mode */
|
||||||
if (!vcpu_pi_info) {
|
if (!pi_data) {
|
||||||
__intel_ir_reconfigure_irte(data, true);
|
__intel_ir_reconfigure_irte(data, true);
|
||||||
} else {
|
} else {
|
||||||
struct irte irte_pi;
|
struct irte irte_pi;
|
||||||
|
|
@ -1265,10 +1265,10 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
|
||||||
/* Update the posted mode fields */
|
/* Update the posted mode fields */
|
||||||
irte_pi.p_pst = 1;
|
irte_pi.p_pst = 1;
|
||||||
irte_pi.p_urgent = 0;
|
irte_pi.p_urgent = 0;
|
||||||
irte_pi.p_vector = vcpu_pi_info->vector;
|
irte_pi.p_vector = pi_data->vector;
|
||||||
irte_pi.pda_l = (vcpu_pi_info->pi_desc_addr >>
|
irte_pi.pda_l = (pi_data->pi_desc_addr >>
|
||||||
(32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
|
(32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
|
||||||
irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
|
irte_pi.pda_h = (pi_data->pi_desc_addr >> 32) &
|
||||||
~(-1UL << PDA_HIGH_BIT);
|
~(-1UL << PDA_HIGH_BIT);
|
||||||
|
|
||||||
ir_data->irq_2_iommu.posted_vcpu = true;
|
ir_data->irq_2_iommu.posted_vcpu = true;
|
||||||
|
|
|
||||||
|
|
@ -342,10 +342,10 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map)
|
||||||
return irq_set_vcpu_affinity(irq, &info);
|
return irq_set_vcpu_affinity(irq, &info);
|
||||||
}
|
}
|
||||||
|
|
||||||
int its_unmap_vlpi(int irq)
|
void its_unmap_vlpi(int irq)
|
||||||
{
|
{
|
||||||
irq_clear_status_flags(irq, IRQ_DISABLE_UNLAZY);
|
irq_clear_status_flags(irq, IRQ_DISABLE_UNLAZY);
|
||||||
return irq_set_vcpu_affinity(irq, NULL);
|
WARN_ON_ONCE(irq_set_vcpu_affinity(irq, NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
int its_prop_update_vlpi(int irq, u8 config, bool inv)
|
int its_prop_update_vlpi(int irq, u8 config, bool inv)
|
||||||
|
|
|
||||||
|
|
@ -505,15 +505,11 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_put_eventfd_ctx;
|
goto out_put_eventfd_ctx;
|
||||||
|
|
||||||
ctx->producer.token = trigger;
|
ret = irq_bypass_register_producer(&ctx->producer, trigger, irq);
|
||||||
ctx->producer.irq = irq;
|
|
||||||
ret = irq_bypass_register_producer(&ctx->producer);
|
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret)) {
|
||||||
dev_info(&pdev->dev,
|
dev_info(&pdev->dev,
|
||||||
"irq bypass producer (token %p) registration fails: %d\n",
|
"irq bypass producer (eventfd %p) registration fails: %d\n",
|
||||||
ctx->producer.token, ret);
|
trigger, ret);
|
||||||
|
|
||||||
ctx->producer.token = NULL;
|
|
||||||
}
|
}
|
||||||
ctx->trigger = trigger;
|
ctx->trigger = trigger;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -212,11 +212,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
||||||
if (!vq->call_ctx.ctx)
|
if (!vq->call_ctx.ctx)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
vq->call_ctx.producer.irq = irq;
|
ret = irq_bypass_register_producer(&vq->call_ctx.producer,
|
||||||
ret = irq_bypass_register_producer(&vq->call_ctx.producer);
|
vq->call_ctx.ctx, irq);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
|
dev_info(&v->dev, "vq %u, irq bypass producer (eventfd %p) registration fails, ret = %d\n",
|
||||||
qid, vq->call_ctx.producer.token, ret);
|
qid, vq->call_ctx.ctx, ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
|
||||||
|
|
@ -712,7 +712,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||||
if (ops->get_status(vdpa) &
|
if (ops->get_status(vdpa) &
|
||||||
VIRTIO_CONFIG_S_DRIVER_OK)
|
VIRTIO_CONFIG_S_DRIVER_OK)
|
||||||
vhost_vdpa_unsetup_vq_irq(v, idx);
|
vhost_vdpa_unsetup_vq_irq(v, idx);
|
||||||
vq->call_ctx.producer.token = NULL;
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -753,7 +752,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
|
||||||
cb.callback = vhost_vdpa_virtqueue_cb;
|
cb.callback = vhost_vdpa_virtqueue_cb;
|
||||||
cb.private = vq;
|
cb.private = vq;
|
||||||
cb.trigger = vq->call_ctx.ctx;
|
cb.trigger = vq->call_ctx.ctx;
|
||||||
vq->call_ctx.producer.token = vq->call_ctx.ctx;
|
|
||||||
if (ops->get_status(vdpa) &
|
if (ops->get_status(vdpa) &
|
||||||
VIRTIO_CONFIG_S_DRIVER_OK)
|
VIRTIO_CONFIG_S_DRIVER_OK)
|
||||||
vhost_vdpa_setup_vq_irq(v, idx);
|
vhost_vdpa_setup_vq_irq(v, idx);
|
||||||
|
|
|
||||||
|
|
@ -434,7 +434,7 @@ struct kvm_kernel_irq_routing_entry;
|
||||||
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
|
int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
|
||||||
struct kvm_kernel_irq_routing_entry *irq_entry);
|
struct kvm_kernel_irq_routing_entry *irq_entry);
|
||||||
|
|
||||||
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq);
|
void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq);
|
||||||
|
|
||||||
int vgic_v4_load(struct kvm_vcpu *vcpu);
|
int vgic_v4_load(struct kvm_vcpu *vcpu);
|
||||||
void vgic_v4_commit(struct kvm_vcpu *vcpu);
|
void vgic_v4_commit(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
||||||
|
|
@ -12,20 +12,6 @@
|
||||||
|
|
||||||
struct amd_iommu;
|
struct amd_iommu;
|
||||||
|
|
||||||
/*
|
|
||||||
* This is mainly used to communicate information back-and-forth
|
|
||||||
* between SVM and IOMMU for setting up and tearing down posted
|
|
||||||
* interrupt
|
|
||||||
*/
|
|
||||||
struct amd_iommu_pi_data {
|
|
||||||
u32 ga_tag;
|
|
||||||
u32 prev_ga_tag;
|
|
||||||
u64 base;
|
|
||||||
bool is_guest_mode;
|
|
||||||
struct vcpu_data *vcpu_data;
|
|
||||||
void *ir_data;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef CONFIG_AMD_IOMMU
|
#ifdef CONFIG_AMD_IOMMU
|
||||||
|
|
||||||
struct task_struct;
|
struct task_struct;
|
||||||
|
|
@ -44,10 +30,8 @@ static inline void amd_iommu_detect(void) { }
|
||||||
/* IOMMU AVIC Function */
|
/* IOMMU AVIC Function */
|
||||||
extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
|
extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
|
||||||
|
|
||||||
extern int
|
extern int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr);
|
||||||
amd_iommu_update_ga(int cpu, bool is_run, void *data);
|
extern int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr);
|
||||||
|
|
||||||
extern int amd_iommu_activate_guest_mode(void *data);
|
|
||||||
extern int amd_iommu_deactivate_guest_mode(void *data);
|
extern int amd_iommu_deactivate_guest_mode(void *data);
|
||||||
|
|
||||||
#else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
|
#else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
|
||||||
|
|
@ -58,13 +42,12 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr)
|
||||||
amd_iommu_update_ga(int cpu, bool is_run, void *data)
|
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int amd_iommu_activate_guest_mode(void *data)
|
static inline int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
|
|
||||||
|
struct eventfd_ctx;
|
||||||
struct irq_bypass_consumer;
|
struct irq_bypass_consumer;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -18,20 +19,22 @@ struct irq_bypass_consumer;
|
||||||
* The IRQ bypass manager is a simple set of lists and callbacks that allows
|
* The IRQ bypass manager is a simple set of lists and callbacks that allows
|
||||||
* IRQ producers (ex. physical interrupt sources) to be matched to IRQ
|
* IRQ producers (ex. physical interrupt sources) to be matched to IRQ
|
||||||
* consumers (ex. virtualization hardware that allows IRQ bypass or offload)
|
* consumers (ex. virtualization hardware that allows IRQ bypass or offload)
|
||||||
* via a shared token (ex. eventfd_ctx). Producers and consumers register
|
* via a shared eventfd_ctx. Producers and consumers register independently.
|
||||||
* independently. When a token match is found, the optional @stop callback
|
* When a producer and consumer are paired, i.e. an eventfd match is found, the
|
||||||
* will be called for each participant. The pair will then be connected via
|
* optional @stop callback will be called for each participant. The pair will
|
||||||
* the @add_* callbacks, and finally the optional @start callback will allow
|
* then be connected via the @add_* callbacks, and finally the optional @start
|
||||||
* any final coordination. When either participant is unregistered, the
|
* callback will allow any final coordination. When either participant is
|
||||||
* process is repeated using the @del_* callbacks in place of the @add_*
|
* unregistered, the process is repeated using the @del_* callbacks in place of
|
||||||
* callbacks. Match tokens must be unique per producer/consumer, 1:N pairings
|
* the @add_* callbacks. eventfds must be unique per producer/consumer, 1:N
|
||||||
* are not supported.
|
* pairings are not supported.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
struct irq_bypass_consumer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct irq_bypass_producer - IRQ bypass producer definition
|
* struct irq_bypass_producer - IRQ bypass producer definition
|
||||||
* @node: IRQ bypass manager private list management
|
* @eventfd: eventfd context used to match producers and consumers
|
||||||
* @token: opaque token to match between producer and consumer (non-NULL)
|
* @consumer: The connected consumer (NULL if no connection)
|
||||||
* @irq: Linux IRQ number for the producer device
|
* @irq: Linux IRQ number for the producer device
|
||||||
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
|
* @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
|
||||||
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
|
* @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
|
||||||
|
|
@ -43,8 +46,8 @@ struct irq_bypass_consumer;
|
||||||
* for a physical device assigned to a VM.
|
* for a physical device assigned to a VM.
|
||||||
*/
|
*/
|
||||||
struct irq_bypass_producer {
|
struct irq_bypass_producer {
|
||||||
struct list_head node;
|
struct eventfd_ctx *eventfd;
|
||||||
void *token;
|
struct irq_bypass_consumer *consumer;
|
||||||
int irq;
|
int irq;
|
||||||
int (*add_consumer)(struct irq_bypass_producer *,
|
int (*add_consumer)(struct irq_bypass_producer *,
|
||||||
struct irq_bypass_consumer *);
|
struct irq_bypass_consumer *);
|
||||||
|
|
@ -56,8 +59,8 @@ struct irq_bypass_producer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct irq_bypass_consumer - IRQ bypass consumer definition
|
* struct irq_bypass_consumer - IRQ bypass consumer definition
|
||||||
* @node: IRQ bypass manager private list management
|
* @eventfd: eventfd context used to match producers and consumers
|
||||||
* @token: opaque token to match between producer and consumer (non-NULL)
|
* @producer: The connected producer (NULL if no connection)
|
||||||
* @add_producer: Connect the IRQ consumer to an IRQ producer
|
* @add_producer: Connect the IRQ consumer to an IRQ producer
|
||||||
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
|
* @del_producer: Disconnect the IRQ consumer from an IRQ producer
|
||||||
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
|
* @stop: Perform any quiesce operations necessary prior to add/del (optional)
|
||||||
|
|
@ -69,8 +72,9 @@ struct irq_bypass_producer {
|
||||||
* portions of the interrupt handling to the VM.
|
* portions of the interrupt handling to the VM.
|
||||||
*/
|
*/
|
||||||
struct irq_bypass_consumer {
|
struct irq_bypass_consumer {
|
||||||
struct list_head node;
|
struct eventfd_ctx *eventfd;
|
||||||
void *token;
|
struct irq_bypass_producer *producer;
|
||||||
|
|
||||||
int (*add_producer)(struct irq_bypass_consumer *,
|
int (*add_producer)(struct irq_bypass_consumer *,
|
||||||
struct irq_bypass_producer *);
|
struct irq_bypass_producer *);
|
||||||
void (*del_producer)(struct irq_bypass_consumer *,
|
void (*del_producer)(struct irq_bypass_consumer *,
|
||||||
|
|
@ -79,9 +83,11 @@ struct irq_bypass_consumer {
|
||||||
void (*start)(struct irq_bypass_consumer *);
|
void (*start)(struct irq_bypass_consumer *);
|
||||||
};
|
};
|
||||||
|
|
||||||
int irq_bypass_register_producer(struct irq_bypass_producer *);
|
int irq_bypass_register_producer(struct irq_bypass_producer *producer,
|
||||||
void irq_bypass_unregister_producer(struct irq_bypass_producer *);
|
struct eventfd_ctx *eventfd, int irq);
|
||||||
int irq_bypass_register_consumer(struct irq_bypass_consumer *);
|
void irq_bypass_unregister_producer(struct irq_bypass_producer *producer);
|
||||||
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *);
|
int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer,
|
||||||
|
struct eventfd_ctx *eventfd);
|
||||||
|
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer);
|
||||||
|
|
||||||
#endif /* IRQBYPASS_H */
|
#endif /* IRQBYPASS_H */
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ int its_commit_vpe(struct its_vpe *vpe);
|
||||||
int its_invall_vpe(struct its_vpe *vpe);
|
int its_invall_vpe(struct its_vpe *vpe);
|
||||||
int its_map_vlpi(int irq, struct its_vlpi_map *map);
|
int its_map_vlpi(int irq, struct its_vlpi_map *map);
|
||||||
int its_get_vlpi(int irq, struct its_vlpi_map *map);
|
int its_get_vlpi(int irq, struct its_vlpi_map *map);
|
||||||
int its_unmap_vlpi(int irq);
|
void its_unmap_vlpi(int irq);
|
||||||
int its_prop_update_vlpi(int irq, u8 config, bool inv);
|
int its_prop_update_vlpi(int irq, u8 config, bool inv);
|
||||||
int its_prop_update_vsgi(int irq, u8 priority, bool group);
|
int its_prop_update_vsgi(int irq, u8 priority, bool group);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
|
||||||
|
|
||||||
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
|
||||||
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
|
||||||
|
#define KVM_PIT_IRQ_SOURCE_ID 2
|
||||||
|
|
||||||
extern struct mutex kvm_lock;
|
extern struct mutex kvm_lock;
|
||||||
extern struct list_head vm_list;
|
extern struct list_head vm_list;
|
||||||
|
|
@ -1022,16 +1023,12 @@ void kvm_unlock_all_vcpus(struct kvm *kvm);
|
||||||
void vcpu_load(struct kvm_vcpu *vcpu);
|
void vcpu_load(struct kvm_vcpu *vcpu);
|
||||||
void vcpu_put(struct kvm_vcpu *vcpu);
|
void vcpu_put(struct kvm_vcpu *vcpu);
|
||||||
|
|
||||||
#ifdef __KVM_HAVE_IOAPIC
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
|
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
|
||||||
void kvm_arch_post_irq_routing_update(struct kvm *kvm);
|
|
||||||
#else
|
#else
|
||||||
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
|
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||||
|
|
@ -1788,8 +1785,6 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||||
struct kvm_irq_ack_notifier *kian);
|
struct kvm_irq_ack_notifier *kian);
|
||||||
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||||
struct kvm_irq_ack_notifier *kian);
|
struct kvm_irq_ack_notifier *kian);
|
||||||
int kvm_request_irq_source_id(struct kvm *kvm);
|
|
||||||
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
|
|
||||||
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
|
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2406,6 +2401,8 @@ struct kvm_vcpu *kvm_get_running_vcpu(void);
|
||||||
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
|
struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
||||||
|
struct kvm_kernel_irqfd;
|
||||||
|
|
||||||
bool kvm_arch_has_irq_bypass(void);
|
bool kvm_arch_has_irq_bypass(void);
|
||||||
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
|
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
|
||||||
struct irq_bypass_producer *);
|
struct irq_bypass_producer *);
|
||||||
|
|
@ -2413,10 +2410,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
|
||||||
struct irq_bypass_producer *);
|
struct irq_bypass_producer *);
|
||||||
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
|
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
|
||||||
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
|
void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
|
||||||
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
|
void kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
|
||||||
uint32_t guest_irq, bool set);
|
struct kvm_kernel_irq_routing_entry *old,
|
||||||
bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
|
struct kvm_kernel_irq_routing_entry *new);
|
||||||
struct kvm_kernel_irq_routing_entry *);
|
|
||||||
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
|
#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
|
#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
|
||||||
|
|
|
||||||
|
|
@ -55,10 +55,13 @@ struct kvm_kernel_irqfd {
|
||||||
/* Used for setup/shutdown */
|
/* Used for setup/shutdown */
|
||||||
struct eventfd_ctx *eventfd;
|
struct eventfd_ctx *eventfd;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
poll_table pt;
|
|
||||||
struct work_struct shutdown;
|
struct work_struct shutdown;
|
||||||
struct irq_bypass_consumer consumer;
|
struct irq_bypass_consumer consumer;
|
||||||
struct irq_bypass_producer *producer;
|
struct irq_bypass_producer *producer;
|
||||||
|
|
||||||
|
struct kvm_vcpu *irq_bypass_vcpu;
|
||||||
|
struct list_head vcpu_list;
|
||||||
|
void *irq_bypass_data;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __LINUX_KVM_IRQFD_H */
|
#endif /* __LINUX_KVM_IRQFD_H */
|
||||||
|
|
|
||||||
|
|
@ -164,6 +164,8 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
|
||||||
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
||||||
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
||||||
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
||||||
|
extern int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
|
||||||
|
struct wait_queue_entry *wq_entry);
|
||||||
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
|
||||||
|
|
||||||
static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
|
static inline void __add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
|
||||||
|
|
|
||||||
|
|
@ -82,95 +82,15 @@ TRACE_EVENT(kvm_set_irq,
|
||||||
TP_printk("gsi %u level %d source %d",
|
TP_printk("gsi %u level %d source %d",
|
||||||
__entry->gsi, __entry->level, __entry->irq_source_id)
|
__entry->gsi, __entry->level, __entry->irq_source_id)
|
||||||
);
|
);
|
||||||
#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */
|
|
||||||
|
|
||||||
#if defined(__KVM_HAVE_IOAPIC)
|
#ifdef CONFIG_KVM_IOAPIC
|
||||||
#define kvm_deliver_mode \
|
|
||||||
{0x0, "Fixed"}, \
|
|
||||||
{0x1, "LowPrio"}, \
|
|
||||||
{0x2, "SMI"}, \
|
|
||||||
{0x3, "Res3"}, \
|
|
||||||
{0x4, "NMI"}, \
|
|
||||||
{0x5, "INIT"}, \
|
|
||||||
{0x6, "SIPI"}, \
|
|
||||||
{0x7, "ExtINT"}
|
|
||||||
|
|
||||||
TRACE_EVENT(kvm_ioapic_set_irq,
|
|
||||||
TP_PROTO(__u64 e, int pin, bool coalesced),
|
|
||||||
TP_ARGS(e, pin, coalesced),
|
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
|
||||||
__field( __u64, e )
|
|
||||||
__field( int, pin )
|
|
||||||
__field( bool, coalesced )
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->e = e;
|
|
||||||
__entry->pin = pin;
|
|
||||||
__entry->coalesced = coalesced;
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s",
|
|
||||||
__entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
|
|
||||||
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
|
|
||||||
(__entry->e & (1<<11)) ? "logical" : "physical",
|
|
||||||
(__entry->e & (1<<15)) ? "level" : "edge",
|
|
||||||
(__entry->e & (1<<16)) ? "|masked" : "",
|
|
||||||
__entry->coalesced ? " (coalesced)" : "")
|
|
||||||
);
|
|
||||||
|
|
||||||
TRACE_EVENT(kvm_ioapic_delayed_eoi_inj,
|
|
||||||
TP_PROTO(__u64 e),
|
|
||||||
TP_ARGS(e),
|
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
|
||||||
__field( __u64, e )
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->e = e;
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_printk("dst %x vec %u (%s|%s|%s%s)",
|
|
||||||
(u8)(__entry->e >> 56), (u8)__entry->e,
|
|
||||||
__print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
|
|
||||||
(__entry->e & (1<<11)) ? "logical" : "physical",
|
|
||||||
(__entry->e & (1<<15)) ? "level" : "edge",
|
|
||||||
(__entry->e & (1<<16)) ? "|masked" : "")
|
|
||||||
);
|
|
||||||
|
|
||||||
TRACE_EVENT(kvm_msi_set_irq,
|
|
||||||
TP_PROTO(__u64 address, __u64 data),
|
|
||||||
TP_ARGS(address, data),
|
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
|
||||||
__field( __u64, address )
|
|
||||||
__field( __u64, data )
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_fast_assign(
|
|
||||||
__entry->address = address;
|
|
||||||
__entry->data = data;
|
|
||||||
),
|
|
||||||
|
|
||||||
TP_printk("dst %llx vec %u (%s|%s|%s%s)",
|
|
||||||
(u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
|
|
||||||
(u8)__entry->data,
|
|
||||||
__print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
|
|
||||||
(__entry->address & (1<<2)) ? "logical" : "physical",
|
|
||||||
(__entry->data & (1<<15)) ? "level" : "edge",
|
|
||||||
(__entry->address & (1<<3)) ? "|rh" : "")
|
|
||||||
);
|
|
||||||
|
|
||||||
#define kvm_irqchips \
|
#define kvm_irqchips \
|
||||||
{KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \
|
{KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \
|
||||||
{KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \
|
{KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \
|
||||||
{KVM_IRQCHIP_IOAPIC, "IOAPIC"}
|
{KVM_IRQCHIP_IOAPIC, "IOAPIC"}
|
||||||
|
|
||||||
#endif /* defined(__KVM_HAVE_IOAPIC) */
|
#endif /* CONFIG_KVM_IOAPIC */
|
||||||
|
|
||||||
#if defined(CONFIG_HAVE_KVM_IRQCHIP)
|
|
||||||
|
|
||||||
#ifdef kvm_irqchips
|
#ifdef kvm_irqchips
|
||||||
#define kvm_ack_irq_string "irqchip %s pin %u"
|
#define kvm_ack_irq_string "irqchip %s pin %u"
|
||||||
|
|
|
||||||
|
|
@ -40,13 +40,31 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
|
wq_entry->flags |= WQ_FLAG_PRIORITY;
|
||||||
spin_lock_irqsave(&wq_head->lock, flags);
|
spin_lock_irqsave(&wq_head->lock, flags);
|
||||||
__add_wait_queue(wq_head, wq_entry);
|
__add_wait_queue(wq_head, wq_entry);
|
||||||
spin_unlock_irqrestore(&wq_head->lock, flags);
|
spin_unlock_irqrestore(&wq_head->lock, flags);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(add_wait_queue_priority);
|
EXPORT_SYMBOL_GPL(add_wait_queue_priority);
|
||||||
|
|
||||||
|
int add_wait_queue_priority_exclusive(struct wait_queue_head *wq_head,
|
||||||
|
struct wait_queue_entry *wq_entry)
|
||||||
|
{
|
||||||
|
struct list_head *head = &wq_head->head;
|
||||||
|
|
||||||
|
wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
|
||||||
|
|
||||||
|
guard(spinlock_irqsave)(&wq_head->lock);
|
||||||
|
|
||||||
|
if (!list_empty(head) &&
|
||||||
|
(list_first_entry(head, typeof(*wq_entry), entry)->flags & WQ_FLAG_PRIORITY))
|
||||||
|
return -EBUSY;
|
||||||
|
|
||||||
|
list_add(&wq_entry->entry, head);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(add_wait_queue_priority_exclusive);
|
||||||
|
|
||||||
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
|
void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
@ -64,7 +82,7 @@ EXPORT_SYMBOL(remove_wait_queue);
|
||||||
* the non-exclusive tasks. Normally, exclusive tasks will be at the end of
|
* the non-exclusive tasks. Normally, exclusive tasks will be at the end of
|
||||||
* the list and any non-exclusive tasks will be woken first. A priority task
|
* the list and any non-exclusive tasks will be woken first. A priority task
|
||||||
* may be at the head of the list, and can consume the event without any other
|
* may be at the head of the list, and can consume the event without any other
|
||||||
* tasks being woken.
|
* tasks being woken if it's also an exclusive task.
|
||||||
*
|
*
|
||||||
* There are circumstances in which we can try to wake a task which has already
|
* There are circumstances in which we can try to wake a task which has already
|
||||||
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
|
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
|
||||||
TEST_GEN_PROGS_COMMON = demand_paging_test
|
TEST_GEN_PROGS_COMMON = demand_paging_test
|
||||||
TEST_GEN_PROGS_COMMON += dirty_log_test
|
TEST_GEN_PROGS_COMMON += dirty_log_test
|
||||||
TEST_GEN_PROGS_COMMON += guest_print_test
|
TEST_GEN_PROGS_COMMON += guest_print_test
|
||||||
|
TEST_GEN_PROGS_COMMON += irqfd_test
|
||||||
TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
|
TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
|
||||||
TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
|
TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
|
||||||
TEST_GEN_PROGS_COMMON += kvm_page_table_test
|
TEST_GEN_PROGS_COMMON += kvm_page_table_test
|
||||||
|
|
|
||||||
|
|
@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
|
||||||
* that no actual interrupt was injected for those cases.
|
* that no actual interrupt was injected for those cases.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
|
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
|
||||||
fd[f] = eventfd(0, 0);
|
fd[f] = kvm_new_eventfd();
|
||||||
TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
|
|
||||||
}
|
|
||||||
|
|
||||||
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
|
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
|
||||||
struct kvm_irqfd irqfd = {
|
|
||||||
.fd = fd[f],
|
|
||||||
.gsi = i - MIN_SPI,
|
|
||||||
};
|
|
||||||
assert(i <= (uint64_t)UINT_MAX);
|
assert(i <= (uint64_t)UINT_MAX);
|
||||||
vm_ioctl(vm, KVM_IRQFD, &irqfd);
|
kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
|
for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
CONFIG_KVM=y
|
CONFIG_KVM=y
|
||||||
CONFIG_KVM_INTEL=y
|
CONFIG_KVM_INTEL=y
|
||||||
CONFIG_KVM_AMD=y
|
CONFIG_KVM_AMD=y
|
||||||
|
CONFIG_EVENTFD=y
|
||||||
CONFIG_USERFAULTFD=y
|
CONFIG_USERFAULTFD=y
|
||||||
CONFIG_IDLE_PAGE_TRACKING=y
|
CONFIG_IDLE_PAGE_TRACKING=y
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@
|
||||||
#include <asm/atomic.h>
|
#include <asm/atomic.h>
|
||||||
#include <asm/kvm.h>
|
#include <asm/kvm.h>
|
||||||
|
|
||||||
|
#include <sys/eventfd.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
|
|
||||||
#include "kvm_util_arch.h"
|
#include "kvm_util_arch.h"
|
||||||
|
|
@ -502,6 +503,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm)
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
|
||||||
|
uint32_t flags)
|
||||||
|
{
|
||||||
|
struct kvm_irqfd irqfd = {
|
||||||
|
.fd = eventfd,
|
||||||
|
.gsi = gsi,
|
||||||
|
.flags = flags,
|
||||||
|
.resamplefd = -1,
|
||||||
|
};
|
||||||
|
|
||||||
|
return __vm_ioctl(vm, KVM_IRQFD, &irqfd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd,
|
||||||
|
uint32_t flags)
|
||||||
|
{
|
||||||
|
int ret = __kvm_irqfd(vm, gsi, eventfd, flags);
|
||||||
|
|
||||||
|
TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
|
||||||
|
{
|
||||||
|
kvm_irqfd(vm, gsi, eventfd, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd)
|
||||||
|
{
|
||||||
|
kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int kvm_new_eventfd(void)
|
||||||
|
{
|
||||||
|
int fd = eventfd(0, 0);
|
||||||
|
|
||||||
|
TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd));
|
||||||
|
return fd;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
|
static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
|
||||||
{
|
{
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,135 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
#include <errno.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/sysinfo.h>
|
||||||
|
|
||||||
|
#include "kvm_util.h"
|
||||||
|
|
||||||
|
static struct kvm_vm *vm1;
|
||||||
|
static struct kvm_vm *vm2;
|
||||||
|
static int __eventfd;
|
||||||
|
static bool done;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when
|
||||||
|
* assigning (the API isn't symmetrical). Abuse the oddity and use a per-task
|
||||||
|
* GSI base to avoid false failures due to cross-task de-assign, i.e. so that
|
||||||
|
* the secondary doesn't de-assign the primary's eventfd and cause assign to
|
||||||
|
* unexpectedly succeed on the primary.
|
||||||
|
*/
|
||||||
|
#define GSI_BASE_PRIMARY 0x20
|
||||||
|
#define GSI_BASE_SECONDARY 0x30
|
||||||
|
|
||||||
|
static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd)
|
||||||
|
{
|
||||||
|
int r, i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The secondary task can encounter EBADF since the primary can close
|
||||||
|
* the eventfd at any time. And because the primary can recreate the
|
||||||
|
* eventfd, at the safe fd in the file table, the secondary can also
|
||||||
|
* encounter "unexpected" success, e.g. if the close+recreate happens
|
||||||
|
* between the first and second assignments. The secondary's role is
|
||||||
|
* mostly to antagonize KVM, not to detect bugs.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 2; i++) {
|
||||||
|
r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0);
|
||||||
|
TEST_ASSERT(!r || errno == EBUSY || errno == EBADF,
|
||||||
|
"Wanted success, EBUSY, or EBADF, r = %d, errno = %d",
|
||||||
|
r, errno);
|
||||||
|
|
||||||
|
/* De-assign should succeed unless the eventfd was closed. */
|
||||||
|
r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
TEST_ASSERT(!r || errno == EBADF,
|
||||||
|
"De-assign should succeed unless the fd was closed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *secondary_irqfd_juggler(void *ign)
|
||||||
|
{
|
||||||
|
while (!READ_ONCE(done)) {
|
||||||
|
juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd));
|
||||||
|
juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd));
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd)
|
||||||
|
{
|
||||||
|
int r1, r2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At least one of the assigns should fail. KVM disallows assigning a
|
||||||
|
* single eventfd to multiple GSIs (or VMs), so it's possible that both
|
||||||
|
* assignments can fail, too.
|
||||||
|
*/
|
||||||
|
r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0);
|
||||||
|
TEST_ASSERT(!r1 || errno == EBUSY,
|
||||||
|
"Wanted success or EBUSY, r = %d, errno = %d", r1, errno);
|
||||||
|
|
||||||
|
r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0);
|
||||||
|
TEST_ASSERT(r1 || (r2 && errno == EBUSY),
|
||||||
|
"Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d",
|
||||||
|
r1, r2, errno);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* De-assign should always succeed, even if the corresponding assign
|
||||||
|
* failed.
|
||||||
|
*/
|
||||||
|
kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
pthread_t racing_thread;
|
||||||
|
int r, i;
|
||||||
|
|
||||||
|
/* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */
|
||||||
|
vm1 = vm_create(1);
|
||||||
|
vm2 = vm_create(1);
|
||||||
|
|
||||||
|
WRITE_ONCE(__eventfd, kvm_new_eventfd());
|
||||||
|
|
||||||
|
kvm_irqfd(vm1, 10, __eventfd, 0);
|
||||||
|
|
||||||
|
r = __kvm_irqfd(vm1, 11, __eventfd, 0);
|
||||||
|
TEST_ASSERT(r && errno == EBUSY,
|
||||||
|
"Wanted EBUSY, r = %d, errno = %d", r, errno);
|
||||||
|
|
||||||
|
r = __kvm_irqfd(vm2, 12, __eventfd, 0);
|
||||||
|
TEST_ASSERT(r && errno == EBUSY,
|
||||||
|
"Wanted EBUSY, r = %d, errno = %d", r, errno);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* De-assign all eventfds, along with multiple eventfds that were never
|
||||||
|
* assigned. KVM's ABI is that de-assign is allowed so long as the
|
||||||
|
* eventfd itself is valid.
|
||||||
|
*/
|
||||||
|
kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN);
|
||||||
|
|
||||||
|
close(__eventfd);
|
||||||
|
|
||||||
|
pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2);
|
||||||
|
|
||||||
|
for (i = 0; i < 10000; i++) {
|
||||||
|
WRITE_ONCE(__eventfd, kvm_new_eventfd());
|
||||||
|
|
||||||
|
juggle_eventfd_primary(vm1, __eventfd);
|
||||||
|
juggle_eventfd_primary(vm2, __eventfd);
|
||||||
|
close(__eventfd);
|
||||||
|
}
|
||||||
|
|
||||||
|
WRITE_ONCE(done, true);
|
||||||
|
pthread_join(racing_thread, NULL);
|
||||||
|
}
|
||||||
|
|
@ -1716,7 +1716,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
|
||||||
/* Create an interrupt controller chip for the specified VM. */
|
/* Create an interrupt controller chip for the specified VM. */
|
||||||
void vm_create_irqchip(struct kvm_vm *vm)
|
void vm_create_irqchip(struct kvm_vm *vm)
|
||||||
{
|
{
|
||||||
vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
|
int r;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a fully in-kernel IRQ chip by default, but fall back to a
|
||||||
|
* split model (x86 only) if that fails (KVM x86 allows compiling out
|
||||||
|
* support for KVM_CREATE_IRQCHIP).
|
||||||
|
*/
|
||||||
|
r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
|
||||||
|
if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP))
|
||||||
|
vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24);
|
||||||
|
else
|
||||||
|
TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm);
|
||||||
|
|
||||||
vm->has_irqchip = true;
|
vm->has_irqchip = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -547,15 +547,9 @@ int main(int argc, char *argv[])
|
||||||
int irq_fd[2] = { -1, -1 };
|
int irq_fd[2] = { -1, -1 };
|
||||||
|
|
||||||
if (do_eventfd_tests) {
|
if (do_eventfd_tests) {
|
||||||
irq_fd[0] = eventfd(0, 0);
|
irq_fd[0] = kvm_new_eventfd();
|
||||||
irq_fd[1] = eventfd(0, 0);
|
irq_fd[1] = kvm_new_eventfd();
|
||||||
|
|
||||||
/* Unexpected, but not a KVM failure */
|
|
||||||
if (irq_fd[0] == -1 || irq_fd[1] == -1)
|
|
||||||
do_evtchn_tests = do_eventfd_tests = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (do_eventfd_tests) {
|
|
||||||
irq_routes.info.nr = 2;
|
irq_routes.info.nr = 2;
|
||||||
|
|
||||||
irq_routes.entries[0].gsi = 32;
|
irq_routes.entries[0].gsi = 32;
|
||||||
|
|
@ -572,15 +566,8 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
|
vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
|
||||||
|
|
||||||
struct kvm_irqfd ifd = { };
|
kvm_assign_irqfd(vm, 32, irq_fd[0]);
|
||||||
|
kvm_assign_irqfd(vm, 33, irq_fd[1]);
|
||||||
ifd.fd = irq_fd[0];
|
|
||||||
ifd.gsi = 32;
|
|
||||||
vm_ioctl(vm, KVM_IRQFD, &ifd);
|
|
||||||
|
|
||||||
ifd.fd = irq_fd[1];
|
|
||||||
ifd.gsi = 33;
|
|
||||||
vm_ioctl(vm, KVM_IRQFD, &ifd);
|
|
||||||
|
|
||||||
struct sigaction sa = { };
|
struct sigaction sa = { };
|
||||||
sa.sa_handler = handle_alrm;
|
sa.sa_handler = handle_alrm;
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,11 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (flags & EPOLLIN) {
|
if (flags & EPOLLIN) {
|
||||||
|
/*
|
||||||
|
* WARNING: Do NOT take irqfds.lock in any path except EPOLLHUP,
|
||||||
|
* as KVM holds irqfds.lock when registering the irqfd with the
|
||||||
|
* eventfd.
|
||||||
|
*/
|
||||||
u64 cnt;
|
u64 cnt;
|
||||||
eventfd_ctx_do_read(irqfd->eventfd, &cnt);
|
eventfd_ctx_do_read(irqfd->eventfd, &cnt);
|
||||||
|
|
||||||
|
|
@ -225,6 +230,11 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
|
||||||
/* The eventfd is closing, detach from KVM */
|
/* The eventfd is closing, detach from KVM */
|
||||||
unsigned long iflags;
|
unsigned long iflags;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Taking irqfds.lock is safe here, as KVM holds a reference to
|
||||||
|
* the eventfd when registering the irqfd, i.e. this path can't
|
||||||
|
* be reached while kvm_irqfd_add() is running.
|
||||||
|
*/
|
||||||
spin_lock_irqsave(&kvm->irqfds.lock, iflags);
|
spin_lock_irqsave(&kvm->irqfds.lock, iflags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -245,22 +255,14 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
|
|
||||||
poll_table *pt)
|
|
||||||
{
|
|
||||||
struct kvm_kernel_irqfd *irqfd =
|
|
||||||
container_of(pt, struct kvm_kernel_irqfd, pt);
|
|
||||||
add_wait_queue_priority(wqh, &irqfd->wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Must be called under irqfds.lock */
|
|
||||||
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
|
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
|
||||||
{
|
{
|
||||||
struct kvm_kernel_irq_routing_entry *e;
|
struct kvm_kernel_irq_routing_entry *e;
|
||||||
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
|
||||||
int n_entries;
|
int n_entries;
|
||||||
|
|
||||||
|
lockdep_assert_held(&kvm->irqfds.lock);
|
||||||
|
|
||||||
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
|
n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
|
||||||
|
|
||||||
write_seqcount_begin(&irqfd->irq_entry_sc);
|
write_seqcount_begin(&irqfd->irq_entry_sc);
|
||||||
|
|
@ -274,6 +276,63 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
|
||||||
write_seqcount_end(&irqfd->irq_entry_sc);
|
write_seqcount_end(&irqfd->irq_entry_sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct kvm_irqfd_pt {
|
||||||
|
struct kvm_kernel_irqfd *irqfd;
|
||||||
|
struct kvm *kvm;
|
||||||
|
poll_table pt;
|
||||||
|
int ret;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
|
||||||
|
poll_table *pt)
|
||||||
|
{
|
||||||
|
struct kvm_irqfd_pt *p = container_of(pt, struct kvm_irqfd_pt, pt);
|
||||||
|
struct kvm_kernel_irqfd *irqfd = p->irqfd;
|
||||||
|
struct kvm *kvm = p->kvm;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note, irqfds.lock protects the irqfd's irq_entry, i.e. its routing,
|
||||||
|
* and irqfds.items. It does NOT protect registering with the eventfd.
|
||||||
|
*/
|
||||||
|
spin_lock_irq(&kvm->irqfds.lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the routing information prior to adding the irqfd to the
|
||||||
|
* eventfd's waitqueue, as irqfd_wakeup() can be invoked as soon as the
|
||||||
|
* irqfd is registered.
|
||||||
|
*/
|
||||||
|
irqfd_update(kvm, irqfd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add the irqfd as a priority waiter on the eventfd, with a custom
|
||||||
|
* wake-up handler, so that KVM *and only KVM* is notified whenever the
|
||||||
|
* underlying eventfd is signaled.
|
||||||
|
*/
|
||||||
|
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Temporarily lie to lockdep about holding irqfds.lock to avoid a
|
||||||
|
* false positive regarding potential deadlock with irqfd_wakeup()
|
||||||
|
* (see irqfd_wakeup() for details).
|
||||||
|
*
|
||||||
|
* Adding to the wait queue will fail if there is already a priority
|
||||||
|
* waiter, i.e. if the eventfd is associated with another irqfd (in any
|
||||||
|
* VM). Note, kvm_irqfd_deassign() waits for all in-flight shutdown
|
||||||
|
* jobs to complete, i.e. ensures the irqfd has been removed from the
|
||||||
|
* eventfd's waitqueue before returning to userspace.
|
||||||
|
*/
|
||||||
|
spin_release(&kvm->irqfds.lock.dep_map, _RET_IP_);
|
||||||
|
p->ret = add_wait_queue_priority_exclusive(wqh, &irqfd->wait);
|
||||||
|
spin_acquire(&kvm->irqfds.lock.dep_map, 0, 0, _RET_IP_);
|
||||||
|
if (p->ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
list_add_tail(&irqfd->list, &kvm->irqfds.items);
|
||||||
|
|
||||||
|
out:
|
||||||
|
spin_unlock_irq(&kvm->irqfds.lock);
|
||||||
|
}
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
||||||
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
|
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
|
||||||
struct irq_bypass_consumer *cons)
|
struct irq_bypass_consumer *cons)
|
||||||
|
|
@ -285,26 +344,20 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
int __attribute__((weak)) kvm_arch_update_irqfd_routing(
|
void __weak kvm_arch_update_irqfd_routing(struct kvm_kernel_irqfd *irqfd,
|
||||||
struct kvm *kvm, unsigned int host_irq,
|
struct kvm_kernel_irq_routing_entry *old,
|
||||||
uint32_t guest_irq, bool set)
|
struct kvm_kernel_irq_routing_entry *new)
|
||||||
{
|
{
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
|
|
||||||
struct kvm_kernel_irq_routing_entry *old,
|
|
||||||
struct kvm_kernel_irq_routing_entry *new)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||||
{
|
{
|
||||||
struct kvm_kernel_irqfd *irqfd, *tmp;
|
struct kvm_kernel_irqfd *irqfd;
|
||||||
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
|
||||||
|
struct kvm_irqfd_pt irqfd_pt;
|
||||||
int ret;
|
int ret;
|
||||||
__poll_t events;
|
__poll_t events;
|
||||||
int idx;
|
int idx;
|
||||||
|
|
@ -390,57 +443,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Install our own custom wake-up handling so we are notified via
|
* Set the irqfd routing and add it to KVM's list before registering
|
||||||
* a callback whenever someone signals the underlying eventfd
|
* the irqfd with the eventfd, so that the routing information is valid
|
||||||
|
* and stays valid, e.g. if there are GSI routing changes, prior to
|
||||||
|
* making the irqfd visible, i.e. before it might be signaled.
|
||||||
|
*
|
||||||
|
* Note, holding SRCU ensures a stable read of routing information, and
|
||||||
|
* also prevents irqfd_shutdown() from freeing the irqfd before it's
|
||||||
|
* fully initialized.
|
||||||
*/
|
*/
|
||||||
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
|
|
||||||
init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
|
|
||||||
|
|
||||||
spin_lock_irq(&kvm->irqfds.lock);
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
list_for_each_entry(tmp, &kvm->irqfds.items, list) {
|
|
||||||
if (irqfd->eventfd != tmp->eventfd)
|
|
||||||
continue;
|
|
||||||
/* This fd is used for another irq already. */
|
|
||||||
ret = -EBUSY;
|
|
||||||
spin_unlock_irq(&kvm->irqfds.lock);
|
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||||
irqfd_update(kvm, irqfd);
|
|
||||||
|
|
||||||
list_add_tail(&irqfd->list, &kvm->irqfds.items);
|
|
||||||
|
|
||||||
spin_unlock_irq(&kvm->irqfds.lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if there was an event already pending on the eventfd
|
* Register the irqfd with the eventfd by polling on the eventfd, and
|
||||||
* before we registered, and trigger it as if we didn't miss it.
|
* simultaneously and the irqfd to KVM's list. If there was en event
|
||||||
|
* pending on the eventfd prior to registering, manually trigger IRQ
|
||||||
|
* injection.
|
||||||
*/
|
*/
|
||||||
events = vfs_poll(fd_file(f), &irqfd->pt);
|
irqfd_pt.irqfd = irqfd;
|
||||||
|
irqfd_pt.kvm = kvm;
|
||||||
|
init_poll_funcptr(&irqfd_pt.pt, kvm_irqfd_register);
|
||||||
|
|
||||||
|
events = vfs_poll(fd_file(f), &irqfd_pt.pt);
|
||||||
|
|
||||||
|
ret = irqfd_pt.ret;
|
||||||
|
if (ret)
|
||||||
|
goto fail_poll;
|
||||||
|
|
||||||
if (events & EPOLLIN)
|
if (events & EPOLLIN)
|
||||||
schedule_work(&irqfd->inject);
|
schedule_work(&irqfd->inject);
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
||||||
if (kvm_arch_has_irq_bypass()) {
|
if (kvm_arch_has_irq_bypass()) {
|
||||||
irqfd->consumer.token = (void *)irqfd->eventfd;
|
|
||||||
irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
|
irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
|
||||||
irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
|
irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
|
||||||
irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
|
irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
|
||||||
irqfd->consumer.start = kvm_arch_irq_bypass_start;
|
irqfd->consumer.start = kvm_arch_irq_bypass_start;
|
||||||
ret = irq_bypass_register_consumer(&irqfd->consumer);
|
ret = irq_bypass_register_consumer(&irqfd->consumer, irqfd->eventfd);
|
||||||
if (ret)
|
if (ret)
|
||||||
pr_info("irq bypass consumer (token %p) registration fails: %d\n",
|
pr_info("irq bypass consumer (eventfd %p) registration fails: %d\n",
|
||||||
irqfd->consumer.token, ret);
|
irqfd->eventfd, ret);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
fail_poll:
|
||||||
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||||
fail:
|
fail:
|
||||||
if (irqfd->resampler)
|
if (irqfd->resampler)
|
||||||
irqfd_resampler_shutdown(irqfd);
|
irqfd_resampler_shutdown(irqfd);
|
||||||
|
|
@ -617,13 +667,8 @@ void kvm_irq_routing_update(struct kvm *kvm)
|
||||||
irqfd_update(kvm, irqfd);
|
irqfd_update(kvm, irqfd);
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
#if IS_ENABLED(CONFIG_HAVE_KVM_IRQ_BYPASS)
|
||||||
if (irqfd->producer &&
|
if (irqfd->producer)
|
||||||
kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
|
kvm_arch_update_irqfd_routing(irqfd, &old, &irqfd->irq_entry);
|
||||||
int ret = kvm_arch_update_irqfd_routing(
|
|
||||||
irqfd->kvm, irqfd->producer->irq,
|
|
||||||
irqfd->gsi, 1);
|
|
||||||
WARN_ON(ret);
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -222,8 +222,6 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||||
kvm_arch_irq_routing_update(kvm);
|
kvm_arch_irq_routing_update(kvm);
|
||||||
mutex_unlock(&kvm->irq_lock);
|
mutex_unlock(&kvm->irq_lock);
|
||||||
|
|
||||||
kvm_arch_post_irq_routing_update(kvm);
|
|
||||||
|
|
||||||
synchronize_srcu_expedited(&kvm->irq_srcu);
|
synchronize_srcu_expedited(&kvm->irq_srcu);
|
||||||
|
|
||||||
new = old;
|
new = old;
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@
|
||||||
MODULE_LICENSE("GPL v2");
|
MODULE_LICENSE("GPL v2");
|
||||||
MODULE_DESCRIPTION("IRQ bypass manager utility module");
|
MODULE_DESCRIPTION("IRQ bypass manager utility module");
|
||||||
|
|
||||||
static LIST_HEAD(producers);
|
static DEFINE_XARRAY(producers);
|
||||||
static LIST_HEAD(consumers);
|
static DEFINE_XARRAY(consumers);
|
||||||
static DEFINE_MUTEX(lock);
|
static DEFINE_MUTEX(lock);
|
||||||
|
|
||||||
/* @lock must be held when calling connect */
|
/* @lock must be held when calling connect */
|
||||||
|
|
@ -51,6 +51,10 @@ static int __connect(struct irq_bypass_producer *prod,
|
||||||
if (prod->start)
|
if (prod->start)
|
||||||
prod->start(prod);
|
prod->start(prod);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
prod->consumer = cons;
|
||||||
|
cons->producer = prod;
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -72,56 +76,49 @@ static void __disconnect(struct irq_bypass_producer *prod,
|
||||||
cons->start(cons);
|
cons->start(cons);
|
||||||
if (prod->start)
|
if (prod->start)
|
||||||
prod->start(prod);
|
prod->start(prod);
|
||||||
|
|
||||||
|
prod->consumer = NULL;
|
||||||
|
cons->producer = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* irq_bypass_register_producer - register IRQ bypass producer
|
* irq_bypass_register_producer - register IRQ bypass producer
|
||||||
* @producer: pointer to producer structure
|
* @producer: pointer to producer structure
|
||||||
|
* @eventfd: pointer to the eventfd context associated with the producer
|
||||||
|
* @irq: Linux IRQ number of the underlying producer device
|
||||||
*
|
*
|
||||||
* Add the provided IRQ producer to the list of producers and connect
|
* Add the provided IRQ producer to the set of producers and connect with the
|
||||||
* with any matching token found on the IRQ consumers list.
|
* consumer with a matching eventfd, if one exists.
|
||||||
*/
|
*/
|
||||||
int irq_bypass_register_producer(struct irq_bypass_producer *producer)
|
int irq_bypass_register_producer(struct irq_bypass_producer *producer,
|
||||||
|
struct eventfd_ctx *eventfd, int irq)
|
||||||
{
|
{
|
||||||
struct irq_bypass_producer *tmp;
|
unsigned long index = (unsigned long)eventfd;
|
||||||
struct irq_bypass_consumer *consumer;
|
struct irq_bypass_consumer *consumer;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!producer->token)
|
if (WARN_ON_ONCE(producer->eventfd))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
might_sleep();
|
producer->irq = irq;
|
||||||
|
|
||||||
if (!try_module_get(THIS_MODULE))
|
guard(mutex)(&lock);
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
mutex_lock(&lock);
|
ret = xa_insert(&producers, index, producer, GFP_KERNEL);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
list_for_each_entry(tmp, &producers, node) {
|
consumer = xa_load(&consumers, index);
|
||||||
if (tmp->token == producer->token) {
|
if (consumer) {
|
||||||
ret = -EBUSY;
|
ret = __connect(producer, consumer);
|
||||||
goto out_err;
|
if (ret) {
|
||||||
|
WARN_ON_ONCE(xa_erase(&producers, index) != producer);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(consumer, &consumers, node) {
|
producer->eventfd = eventfd;
|
||||||
if (consumer->token == producer->token) {
|
|
||||||
ret = __connect(producer, consumer);
|
|
||||||
if (ret)
|
|
||||||
goto out_err;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list_add(&producer->node, &producers);
|
|
||||||
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
out_err:
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
|
EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
|
||||||
|
|
||||||
|
|
@ -129,95 +126,65 @@ EXPORT_SYMBOL_GPL(irq_bypass_register_producer);
|
||||||
* irq_bypass_unregister_producer - unregister IRQ bypass producer
|
* irq_bypass_unregister_producer - unregister IRQ bypass producer
|
||||||
* @producer: pointer to producer structure
|
* @producer: pointer to producer structure
|
||||||
*
|
*
|
||||||
* Remove a previously registered IRQ producer from the list of producers
|
* Remove a previously registered IRQ producer (note, it's safe to call this
|
||||||
* and disconnect it from any connected IRQ consumer.
|
* even if registration was unsuccessful). Disconnect from the associated
|
||||||
|
* consumer, if one exists.
|
||||||
*/
|
*/
|
||||||
void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
|
void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
|
||||||
{
|
{
|
||||||
struct irq_bypass_producer *tmp;
|
unsigned long index = (unsigned long)producer->eventfd;
|
||||||
struct irq_bypass_consumer *consumer;
|
|
||||||
|
|
||||||
if (!producer->token)
|
if (!producer->eventfd)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
might_sleep();
|
guard(mutex)(&lock);
|
||||||
|
|
||||||
if (!try_module_get(THIS_MODULE))
|
if (producer->consumer)
|
||||||
return; /* nothing in the list anyway */
|
__disconnect(producer, producer->consumer);
|
||||||
|
|
||||||
mutex_lock(&lock);
|
WARN_ON_ONCE(xa_erase(&producers, index) != producer);
|
||||||
|
producer->eventfd = NULL;
|
||||||
list_for_each_entry(tmp, &producers, node) {
|
|
||||||
if (tmp->token != producer->token)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
list_for_each_entry(consumer, &consumers, node) {
|
|
||||||
if (consumer->token == producer->token) {
|
|
||||||
__disconnect(producer, consumer);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list_del(&producer->node);
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
|
EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* irq_bypass_register_consumer - register IRQ bypass consumer
|
* irq_bypass_register_consumer - register IRQ bypass consumer
|
||||||
* @consumer: pointer to consumer structure
|
* @consumer: pointer to consumer structure
|
||||||
|
* @eventfd: pointer to the eventfd context associated with the consumer
|
||||||
*
|
*
|
||||||
* Add the provided IRQ consumer to the list of consumers and connect
|
* Add the provided IRQ consumer to the set of consumers and connect with the
|
||||||
* with any matching token found on the IRQ producer list.
|
* producer with a matching eventfd, if one exists.
|
||||||
*/
|
*/
|
||||||
int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
|
int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer,
|
||||||
|
struct eventfd_ctx *eventfd)
|
||||||
{
|
{
|
||||||
struct irq_bypass_consumer *tmp;
|
unsigned long index = (unsigned long)eventfd;
|
||||||
struct irq_bypass_producer *producer;
|
struct irq_bypass_producer *producer;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!consumer->token ||
|
if (WARN_ON_ONCE(consumer->eventfd))
|
||||||
!consumer->add_producer || !consumer->del_producer)
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
might_sleep();
|
if (!consumer->add_producer || !consumer->del_producer)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
if (!try_module_get(THIS_MODULE))
|
guard(mutex)(&lock);
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
mutex_lock(&lock);
|
ret = xa_insert(&consumers, index, consumer, GFP_KERNEL);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
list_for_each_entry(tmp, &consumers, node) {
|
producer = xa_load(&producers, index);
|
||||||
if (tmp->token == consumer->token || tmp == consumer) {
|
if (producer) {
|
||||||
ret = -EBUSY;
|
ret = __connect(producer, consumer);
|
||||||
goto out_err;
|
if (ret) {
|
||||||
|
WARN_ON_ONCE(xa_erase(&consumers, index) != consumer);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(producer, &producers, node) {
|
consumer->eventfd = eventfd;
|
||||||
if (producer->token == consumer->token) {
|
|
||||||
ret = __connect(producer, consumer);
|
|
||||||
if (ret)
|
|
||||||
goto out_err;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list_add(&consumer->node, &consumers);
|
|
||||||
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
out_err:
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
|
EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
|
||||||
|
|
||||||
|
|
@ -225,42 +192,23 @@ EXPORT_SYMBOL_GPL(irq_bypass_register_consumer);
|
||||||
* irq_bypass_unregister_consumer - unregister IRQ bypass consumer
|
* irq_bypass_unregister_consumer - unregister IRQ bypass consumer
|
||||||
* @consumer: pointer to consumer structure
|
* @consumer: pointer to consumer structure
|
||||||
*
|
*
|
||||||
* Remove a previously registered IRQ consumer from the list of consumers
|
* Remove a previously registered IRQ consumer (note, it's safe to call this
|
||||||
* and disconnect it from any connected IRQ producer.
|
* even if registration was unsuccessful). Disconnect from the associated
|
||||||
|
* producer, if one exists.
|
||||||
*/
|
*/
|
||||||
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
|
void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
|
||||||
{
|
{
|
||||||
struct irq_bypass_consumer *tmp;
|
unsigned long index = (unsigned long)consumer->eventfd;
|
||||||
struct irq_bypass_producer *producer;
|
|
||||||
|
|
||||||
if (!consumer->token)
|
if (!consumer->eventfd)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
might_sleep();
|
guard(mutex)(&lock);
|
||||||
|
|
||||||
if (!try_module_get(THIS_MODULE))
|
if (consumer->producer)
|
||||||
return; /* nothing in the list anyway */
|
__disconnect(consumer->producer, consumer);
|
||||||
|
|
||||||
mutex_lock(&lock);
|
WARN_ON_ONCE(xa_erase(&consumers, index) != consumer);
|
||||||
|
consumer->eventfd = NULL;
|
||||||
list_for_each_entry(tmp, &consumers, node) {
|
|
||||||
if (tmp != consumer)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
list_for_each_entry(producer, &producers, node) {
|
|
||||||
if (producer->token == consumer->token) {
|
|
||||||
__disconnect(producer, consumer);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
list_del(&consumer->node);
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&lock);
|
|
||||||
|
|
||||||
module_put(THIS_MODULE);
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);
|
EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue