KVM/riscv changes for 7.1

- Fix steal time shared memory alignment checks
  - Fix vector context allocation leak
  - Fix array out-of-bounds in pmu_ctr_read() and pmu_fw_ctr_read_hi()
  - Fix double-free of sdata in kvm_pmu_clear_snapshot_area()
  - Fix integer overflow in kvm_pmu_validate_counter_mask()
  - Fix shift-out-of-bounds in make_xfence_request()
  - Fix lost write protection on huge pages during dirty logging
  - Split huge pages during fault handling for dirty logging
  - Skip CSR restore if VCPU is reloaded on the same core
  - Implement kvm_arch_has_default_irqchip() for KVM selftests
  - Factored-out ISA checks into separate sources
  - Added hideleg to struct kvm_vcpu_config
  - Factored-out VCPU config into separate sources
  - Support configuration of per-VM HGATP mode from KVM user space
 -----BEGIN PGP SIGNATURE-----
 
 iQIyBAABCgAdFiEEZdn75s5e6LHDQ+f/rUjsVaLHLAcFAmnbi74ACgkQrUjsVaLH
 LAejGw/3XRqexEOrxJ74GvAylGtGkQRuTw003mhIBIyshosYw6PiOSHEuAu7TQLc
 N074wt0QSfwbmQmeNaa0q3gIqb7Sp6gC0Eidurv1zHXuKSaAGbppnKD0VOZibtm6
 CK4+HQqBXFxf2mMeJSQX7+EOWNO+rf2jfw80c3SiKTnPE8mtb8Xfn3G6Zw22UmBZ
 gOrDrW4IijNKNkrBItb8V1IJgsfFIdUY+1Il2n1MSRuuqQL+tJcmHWXEPs7GdfMf
 9siV7asCfhKdf6xDys/Px42DBgQxLASG72Q8X2cESCxkO1kDplJZAt1AitAGfzXL
 bk20uAWikO0j7/Su93pWDOwMxRqb8c9dIMrnyRsCpmx1ovWA/odEnkrhslS1lafN
 hpUr6DWmTrE+2I6PW9UgBRAbleMomtK411fLhZh28PSyvp42sxG8841PwUNd/hFP
 lfcFO/ksJS7HFxQK5RaGZaMUuSMsgtZAu5P7zGGZm9uQh60eaHRF1vFcA+c5GAHe
 hibOegHjdYMkcLFqbVJDegU6a5+kohO3I8R21WXItp+VQjGvezOWuTDJtcR+jPAJ
 RNe4R1QeRgbsIwRuP9+fm6QnLeTPmBfxkCPFS1FvzrimLWskcQxqPWUq+eDvNDp+
 EdnE1KPjPsTykWq2baiG70+0xnetHP4ZGIrWwOQNOREJPEGOqg==
 =LbnE
 -----END PGP SIGNATURE-----

Merge tag 'kvm-riscv-7.1-1' of https://github.com/kvm-riscv/linux into HEAD

KVM/riscv changes for 7.1

 - Fix steal time shared memory alignment checks
 - Fix vector context allocation leak
 - Fix array out-of-bounds in pmu_ctr_read() and pmu_fw_ctr_read_hi()
 - Fix double-free of sdata in kvm_pmu_clear_snapshot_area()
 - Fix integer overflow in kvm_pmu_validate_counter_mask()
 - Fix shift-out-of-bounds in make_xfence_request()
 - Fix lost write protection on huge pages during dirty logging
 - Split huge pages during fault handling for dirty logging
 - Skip CSR restore if VCPU is reloaded on the same core
 - Implement kvm_arch_has_default_irqchip() for KVM selftests
 - Factored-out ISA checks into separate sources
 - Added hideleg to struct kvm_vcpu_config
 - Factored-out VCPU config into separate sources
 - Support configuration of per-VM HGATP mode from KVM user space
master
Paolo Bonzini 2026-04-13 11:42:26 +02:00
commit d880d2a9c6
27 changed files with 956 additions and 501 deletions

View File

@ -15,6 +15,7 @@ struct kvm_gstage {
#define KVM_GSTAGE_FLAGS_LOCAL BIT(0)
unsigned long vmid;
pgd_t *pgd;
unsigned long pgd_levels;
};
struct kvm_gstage_mapping {
@ -29,16 +30,22 @@ struct kvm_gstage_mapping {
#define kvm_riscv_gstage_index_bits 10
#endif
extern unsigned long kvm_riscv_gstage_mode;
extern unsigned long kvm_riscv_gstage_pgd_levels;
extern unsigned long kvm_riscv_gstage_max_pgd_levels;
#define kvm_riscv_gstage_pgd_xbits 2
#define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits))
#define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \
(kvm_riscv_gstage_pgd_levels * \
kvm_riscv_gstage_index_bits) + \
kvm_riscv_gstage_pgd_xbits)
#define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits))
static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels)
{
return (HGATP_PAGE_SHIFT +
pgd_levels * kvm_riscv_gstage_index_bits +
kvm_riscv_gstage_pgd_xbits);
}
static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels)
{
return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels));
}
bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level);
@ -53,6 +60,10 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
bool page_rdonly, bool page_exec,
struct kvm_gstage_mapping *out_map);
int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
gpa_t addr, u32 target_level, bool flush);
enum kvm_riscv_gstage_op {
GSTAGE_OP_NOP = 0, /* Nothing */
GSTAGE_OP_CLEAR, /* Clear/Unmap */
@ -69,4 +80,30 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
void kvm_riscv_gstage_mode_detect(void);
static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels)
{
switch (pgd_levels) {
case 2:
return HGATP_MODE_SV32X4;
case 3:
return HGATP_MODE_SV39X4;
case 4:
return HGATP_MODE_SV48X4;
case 5:
return HGATP_MODE_SV57X4;
default:
WARN_ON_ONCE(1);
return HGATP_MODE_OFF;
}
}
static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm)
{
gstage->kvm = kvm;
gstage->flags = 0;
gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage->pgd = kvm->arch.pgd;
gstage->pgd_levels = kvm->arch.pgd_levels;
}
#endif

View File

@ -18,6 +18,7 @@
#include <asm/ptrace.h>
#include <asm/kvm_tlb.h>
#include <asm/kvm_vmid.h>
#include <asm/kvm_vcpu_config.h>
#include <asm/kvm_vcpu_fp.h>
#include <asm/kvm_vcpu_insn.h>
#include <asm/kvm_vcpu_sbi.h>
@ -47,18 +48,6 @@
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
BIT(EXC_INST_ILLEGAL) | \
BIT(EXC_BREAKPOINT) | \
BIT(EXC_SYSCALL) | \
BIT(EXC_INST_PAGE_FAULT) | \
BIT(EXC_LOAD_PAGE_FAULT) | \
BIT(EXC_STORE_PAGE_FAULT))
#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
BIT(IRQ_VS_TIMER) | \
BIT(IRQ_VS_EXT))
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@ -94,6 +83,7 @@ struct kvm_arch {
/* G-stage page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
unsigned long pgd_levels;
/* Guest Timer */
struct kvm_guest_timer timer;
@ -167,12 +157,6 @@ struct kvm_vcpu_csr {
unsigned long senvcfg;
};
struct kvm_vcpu_config {
u64 henvcfg;
u64 hstateen0;
unsigned long hedeleg;
};
struct kvm_vcpu_smstateen_csr {
unsigned long sstateen0;
};
@ -273,6 +257,9 @@ struct kvm_vcpu_arch {
/* 'static' configurations which are set only once */
struct kvm_vcpu_config cfg;
/* Indicates modified guest CSRs */
bool csr_dirty;
/* SBI steal-time accounting */
struct {
gpa_t shmem;

View File

@ -0,0 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2026 Qualcomm Technologies, Inc.
*/
#ifndef __KVM_RISCV_ISA_H
#define __KVM_RISCV_ISA_H
#include <linux/types.h>
unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext);
int __kvm_riscv_isa_check_host(unsigned long ext, unsigned long *base_ext);
#define kvm_riscv_isa_check_host(ext) \
__kvm_riscv_isa_check_host(KVM_RISCV_ISA_EXT_##ext, NULL)
bool kvm_riscv_isa_enable_allowed(unsigned long ext);
bool kvm_riscv_isa_disable_allowed(unsigned long ext);
#endif

View File

@ -0,0 +1,25 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2026 Qualcomm Technologies, Inc.
*/
#ifndef __KVM_VCPU_RISCV_CONFIG_H
#define __KVM_VCPU_RISCV_CONFIG_H
#include <linux/types.h>
struct kvm_vcpu;
struct kvm_vcpu_config {
u64 henvcfg;
u64 hstateen0;
unsigned long hedeleg;
unsigned long hideleg;
};
void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu);
#endif

View File

@ -110,6 +110,10 @@ struct kvm_riscv_timer {
__u64 state;
};
/* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
/*
* ISA extension IDs specific to KVM. This is not the same as the host ISA
* extension IDs as that is internal to the host and should not be exposed
@ -238,10 +242,6 @@ struct kvm_riscv_sbi_fwft {
struct kvm_riscv_sbi_fwft_feature pointer_masking;
};
/* Possible states for kvm_riscv_timer */
#define KVM_RISCV_TIMER_STATE_OFF 0
#define KVM_RISCV_TIMER_STATE_ON 1
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
#define KVM_REG_RISCV_TYPE_SHIFT 24

View File

@ -15,11 +15,13 @@ kvm-y += aia_aplic.o
kvm-y += aia_device.o
kvm-y += aia_imsic.o
kvm-y += gstage.o
kvm-y += isa.o
kvm-y += main.o
kvm-y += mmu.o
kvm-y += nacl.o
kvm-y += tlb.o
kvm-y += vcpu.o
kvm-y += vcpu_config.o
kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
kvm-y += vcpu_insn.o

View File

@ -11,7 +11,7 @@
#include <linux/irqchip/riscv-imsic.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <linux/cpufeature.h>
#include <asm/kvm_isa.h>
static int aia_create(struct kvm_device *dev, u32 type)
{
@ -23,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type)
if (irqchip_in_kernel(kvm))
return -EEXIST;
if (!riscv_isa_extension_available(NULL, SSAIA))
if (kvm_riscv_isa_check_host(SSAIA))
return -ENODEV;
ret = -EBUSY;

View File

@ -12,22 +12,21 @@
#include <asm/kvm_gstage.h>
#ifdef CONFIG_64BIT
unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4;
unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3;
unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3;
#else
unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4;
unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2;
unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2;
#endif
#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage,
gpa_t addr, u32 level)
{
unsigned long mask;
unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level);
if (level == (kvm_riscv_gstage_pgd_levels - 1))
if (level == gstage->pgd_levels - 1)
mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
@ -40,12 +39,13 @@ static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size,
u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;
for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) {
for (i = 0; i < gstage->pgd_levels; i++) {
if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) {
*out_level = i;
return 0;
@ -55,21 +55,23 @@ static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}
static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level,
unsigned long *out_pgorder)
{
if (kvm_riscv_gstage_pgd_levels < level)
if (gstage->pgd_levels < level)
return -EINVAL;
*out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits);
return 0;
}
static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level,
unsigned long *out_pgsize)
{
int rc;
unsigned long page_order = PAGE_SHIFT;
rc = gstage_level_to_page_order(level, &page_order);
rc = gstage_level_to_page_order(gstage, level, &page_order);
if (rc)
return rc;
@ -81,11 +83,11 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
u32 current_level = gstage->pgd_levels - 1;
*ptep_level = current_level;
ptep = (pte_t *)gstage->pgd;
ptep = &ptep[gstage_pte_index(addr, current_level)];
ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
while (ptep && pte_val(ptep_get(ptep))) {
if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
@ -97,7 +99,7 @@ bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr,
current_level--;
*ptep_level = current_level;
ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
ptep = &ptep[gstage_pte_index(addr, current_level)];
ptep = &ptep[gstage_pte_index(gstage, addr, current_level)];
} else {
ptep = NULL;
}
@ -110,7 +112,7 @@ static void gstage_tlb_flush(struct kvm_gstage *gstage, u32 level, gpa_t addr)
{
unsigned long order = PAGE_SHIFT;
if (gstage_level_to_page_order(level, &order))
if (gstage_level_to_page_order(gstage, level, &order))
return;
addr &= ~(BIT(order) - 1);
@ -125,9 +127,9 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
const struct kvm_gstage_mapping *map)
{
u32 current_level = kvm_riscv_gstage_pgd_levels - 1;
u32 current_level = gstage->pgd_levels - 1;
pte_t *next_ptep = (pte_t *)gstage->pgd;
pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
if (current_level < map->level)
return -EINVAL;
@ -151,7 +153,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
}
current_level--;
ptep = &next_ptep[gstage_pte_index(map->addr, current_level)];
ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)];
}
if (pte_val(*ptep) != pte_val(map->pte)) {
@ -163,19 +165,38 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage,
return 0;
}
static void kvm_riscv_gstage_update_pte_prot(struct kvm_gstage *gstage, u32 level,
gpa_t addr, pte_t *ptep, pgprot_t prot)
{
pte_t new_pte;
if (pgprot_val(pte_pgprot(ptep_get(ptep))) == pgprot_val(prot))
return;
new_pte = pfn_pte(pte_pfn(ptep_get(ptep)), prot);
new_pte = pte_mkdirty(new_pte);
set_pte(ptep, new_pte);
gstage_tlb_flush(gstage, level, addr);
}
int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa, unsigned long page_size,
bool page_rdonly, bool page_exec,
struct kvm_gstage_mapping *out_map)
{
bool found_leaf;
u32 ptep_level;
pgprot_t prot;
pte_t *ptep;
int ret;
out_map->addr = gpa;
out_map->level = 0;
ret = gstage_page_size_to_level(page_size, &out_map->level);
ret = gstage_page_size_to_level(gstage, page_size, &out_map->level);
if (ret)
return ret;
@ -203,12 +224,119 @@ int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage,
else
prot = PAGE_WRITE;
}
found_leaf = kvm_riscv_gstage_get_leaf(gstage, gpa, &ptep, &ptep_level);
if (found_leaf) {
/*
* ptep_level is the current gstage mapping level of addr, out_map->level
* is the required mapping level during fault handling.
*
* 1) ptep_level > out_map->level
* This happens when dirty logging is enabled and huge pages are used.
* KVM must track the pages at 4K level, and split the huge mapping
* into 4K mappings.
*
* 2) ptep_level < out_map->level
* This happens when dirty logging is disabled and huge pages are used.
* The gstage is split into 4K mappings, but the out_map level is now
* back to the huge page level. Ignore the out_map level this time, and
* just update the pte prot here. Otherwise, we would fall back to mapping
* the gstage at huge page level in `kvm_riscv_gstage_set_pte`, with the
* overhead of freeing the page tables(not support now), which would slow
* down the vCPUs' performance.
*
* It is better to recover the huge page mapping in the ioctl context when
* disabling dirty logging.
*
* 3) ptep_level == out_map->level
* We already have the ptep, just update the pte prot if the pfn not change.
* There is no need to invoke `kvm_riscv_gstage_set_pte` again.
*/
if (ptep_level > out_map->level) {
kvm_riscv_gstage_split_huge(gstage, pcache, gpa,
out_map->level, true);
} else if (ALIGN_DOWN(PFN_PHYS(pte_pfn(ptep_get(ptep))), page_size) == hpa) {
kvm_riscv_gstage_update_pte_prot(gstage, ptep_level, gpa, ptep, prot);
return 0;
}
}
out_map->pte = pfn_pte(PFN_DOWN(hpa), prot);
out_map->pte = pte_mkdirty(out_map->pte);
return kvm_riscv_gstage_set_pte(gstage, pcache, out_map);
}
static inline unsigned long make_child_pte(unsigned long huge_pte, int index,
unsigned long child_page_size)
{
unsigned long child_pte = huge_pte;
unsigned long child_pfn_offset;
/*
* The child_pte already has the base address of the huge page being
* split. So we just have to OR in the offset to the page at the next
* lower level for the given index.
*/
child_pfn_offset = index * (child_page_size / PAGE_SIZE);
child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0)));
return child_pte;
}
int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage,
struct kvm_mmu_memory_cache *pcache,
gpa_t addr, u32 target_level, bool flush)
{
u32 current_level = gstage->pgd_levels - 1;
pte_t *next_ptep = (pte_t *)gstage->pgd;
unsigned long huge_pte, child_pte;
unsigned long child_page_size;
pte_t *ptep;
int i, ret;
if (!pcache)
return -ENOMEM;
while(current_level > target_level) {
ptep = (pte_t *)&next_ptep[gstage_pte_index(gstage, addr, current_level)];
if (!pte_val(ptep_get(ptep)))
break;
if (!gstage_pte_leaf(ptep)) {
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
current_level--;
continue;
}
huge_pte = pte_val(ptep_get(ptep));
ret = gstage_level_to_page_size(gstage, current_level - 1, &child_page_size);
if (ret)
return ret;
next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
for (i = 0; i < PTRS_PER_PTE; i++) {
child_pte = make_child_pte(huge_pte, i, child_page_size);
set_pte((pte_t *)&next_ptep[i], __pte(child_pte));
}
set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)),
__pgprot(_PAGE_TABLE)));
if (flush)
gstage_tlb_flush(gstage, current_level, addr);
current_level--;
}
return 0;
}
void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
pte_t *ptep, u32 ptep_level, enum kvm_riscv_gstage_op op)
{
@ -217,7 +345,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
u32 next_ptep_level;
unsigned long next_page_size, page_size;
ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
return;
@ -229,7 +357,7 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr,
if (ptep_level && !gstage_pte_leaf(ptep)) {
next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep));
next_ptep_level = ptep_level - 1;
ret = gstage_level_to_page_size(next_ptep_level, &next_page_size);
ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size);
if (ret)
return;
@ -263,7 +391,7 @@ void kvm_riscv_gstage_unmap_range(struct kvm_gstage *gstage,
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
@ -297,17 +425,16 @@ void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end
while (addr < end) {
found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level);
ret = gstage_level_to_page_size(ptep_level, &page_size);
ret = gstage_level_to_page_size(gstage, ptep_level, &page_size);
if (ret)
break;
if (!found_leaf)
goto next;
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
kvm_riscv_gstage_op_pte(gstage, addr, ptep,
ptep_level, GSTAGE_OP_WP);
addr = ALIGN_DOWN(addr, page_size);
kvm_riscv_gstage_op_pte(gstage, addr, ptep,
ptep_level, GSTAGE_OP_WP);
next:
addr += page_size;
}
@ -319,39 +446,34 @@ void __init kvm_riscv_gstage_mode_detect(void)
/* Try Sv57x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV57X4;
kvm_riscv_gstage_pgd_levels = 5;
kvm_riscv_gstage_max_pgd_levels = 5;
goto done;
}
/* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV48X4;
kvm_riscv_gstage_pgd_levels = 4;
kvm_riscv_gstage_max_pgd_levels = 4;
goto done;
}
/* Try Sv39x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV39X4;
kvm_riscv_gstage_pgd_levels = 3;
kvm_riscv_gstage_max_pgd_levels = 3;
goto done;
}
#else /* CONFIG_32BIT */
/* Try Sv32x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) {
kvm_riscv_gstage_mode = HGATP_MODE_SV32X4;
kvm_riscv_gstage_pgd_levels = 2;
kvm_riscv_gstage_max_pgd_levels = 2;
goto done;
}
#endif
/* KVM depends on !HGATP_MODE_OFF */
kvm_riscv_gstage_mode = HGATP_MODE_OFF;
kvm_riscv_gstage_pgd_levels = 0;
kvm_riscv_gstage_max_pgd_levels = 0;
done:
csr_write(CSR_HGATP, 0);

253
arch/riscv/kvm/isa.c Normal file
View File

@ -0,0 +1,253 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2026 Qualcomm Technologies, Inc.
*/
#include <linux/cpufeature.h>
#include <linux/errno.h>
#include <linux/kvm_host.h>
#include <linux/nospec.h>
#include <linux/pgtable.h>
#include <asm/kvm_isa.h>
#include <asm/vector.h>
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
/* Mapping between KVM ISA Extension ID & guest ISA extension ID */
static const unsigned long kvm_isa_ext_arr[] = {
/* Single letter extensions (alphabetically sorted) */
[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMNPM),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSNPM),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVADE),
KVM_ISA_EXT_ARR(SVADU),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
KVM_ISA_EXT_ARR(ZALASR),
KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZBC),
KVM_ISA_EXT_ARR(ZBKB),
KVM_ISA_EXT_ARR(ZBKC),
KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
KVM_ISA_EXT_ARR(ZCA),
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFBFMIN),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOP),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
KVM_ISA_EXT_ARR(ZIFENCEI),
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
KVM_ISA_EXT_ARR(ZKNH),
KVM_ISA_EXT_ARR(ZKR),
KVM_ISA_EXT_ARR(ZKSED),
KVM_ISA_EXT_ARR(ZKSH),
KVM_ISA_EXT_ARR(ZKT),
KVM_ISA_EXT_ARR(ZTSO),
KVM_ISA_EXT_ARR(ZVBB),
KVM_ISA_EXT_ARR(ZVBC),
KVM_ISA_EXT_ARR(ZVFBFMIN),
KVM_ISA_EXT_ARR(ZVFBFWMA),
KVM_ISA_EXT_ARR(ZVFH),
KVM_ISA_EXT_ARR(ZVFHMIN),
KVM_ISA_EXT_ARR(ZVKB),
KVM_ISA_EXT_ARR(ZVKG),
KVM_ISA_EXT_ARR(ZVKNED),
KVM_ISA_EXT_ARR(ZVKNHA),
KVM_ISA_EXT_ARR(ZVKNHB),
KVM_ISA_EXT_ARR(ZVKSED),
KVM_ISA_EXT_ARR(ZVKSH),
KVM_ISA_EXT_ARR(ZVKT),
};
unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext)
{
unsigned long i;
for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
if (kvm_isa_ext_arr[i] == base_ext)
return i;
}
return KVM_RISCV_ISA_EXT_MAX;
}
int __kvm_riscv_isa_check_host(unsigned long kvm_ext, unsigned long *base_ext)
{
unsigned long host_ext;
if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX ||
kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr))
return -ENOENT;
kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr));
switch (kvm_isa_ext_arr[kvm_ext]) {
case RISCV_ISA_EXT_SMNPM:
/*
* Pointer masking effective in (H)S-mode is provided by the
* Smnpm extension, so that extension is reported to the guest,
* even though the CSR bits for configuring VS-mode pointer
* masking on the host side are part of the Ssnpm extension.
*/
host_ext = RISCV_ISA_EXT_SSNPM;
break;
default:
host_ext = kvm_isa_ext_arr[kvm_ext];
break;
}
if (!__riscv_isa_extension_available(NULL, host_ext))
return -ENOENT;
if (base_ext)
*base_ext = kvm_isa_ext_arr[kvm_ext];
return 0;
}
bool kvm_riscv_isa_enable_allowed(unsigned long ext)
{
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
case KVM_RISCV_ISA_EXT_SSCOFPMF:
/* Sscofpmf depends on interrupt filtering defined in ssaia */
return !kvm_riscv_isa_check_host(SSAIA);
case KVM_RISCV_ISA_EXT_SVADU:
/*
* The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
* Guest OS can use Svadu only when host OS enable Svadu.
*/
return arch_has_hw_pte_young();
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
break;
}
return true;
}
bool kvm_riscv_isa_disable_allowed(unsigned long ext)
{
switch (ext) {
/* Extensions which don't have any mechanism to disable */
case KVM_RISCV_ISA_EXT_A:
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
/* There is not architectural config bit to disable sscofpmf completely */
case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SSNPM:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_SVVPTC:
case KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_RISCV_ISA_EXT_ZABHA:
case KVM_RISCV_ISA_EXT_ZACAS:
case KVM_RISCV_ISA_EXT_ZALASR:
case KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
case KVM_RISCV_ISA_EXT_ZBB:
case KVM_RISCV_ISA_EXT_ZBC:
case KVM_RISCV_ISA_EXT_ZBKB:
case KVM_RISCV_ISA_EXT_ZBKC:
case KVM_RISCV_ISA_EXT_ZBKX:
case KVM_RISCV_ISA_EXT_ZBS:
case KVM_RISCV_ISA_EXT_ZCA:
case KVM_RISCV_ISA_EXT_ZCB:
case KVM_RISCV_ISA_EXT_ZCD:
case KVM_RISCV_ISA_EXT_ZCF:
case KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_RISCV_ISA_EXT_ZFA:
case KVM_RISCV_ISA_EXT_ZFBFMIN:
case KVM_RISCV_ISA_EXT_ZFH:
case KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_RISCV_ISA_EXT_ZICBOP:
case KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_RISCV_ISA_EXT_ZICOND:
case KVM_RISCV_ISA_EXT_ZICSR:
case KVM_RISCV_ISA_EXT_ZIFENCEI:
case KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZIHPM:
case KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_RISCV_ISA_EXT_ZKND:
case KVM_RISCV_ISA_EXT_ZKNE:
case KVM_RISCV_ISA_EXT_ZKNH:
case KVM_RISCV_ISA_EXT_ZKR:
case KVM_RISCV_ISA_EXT_ZKSED:
case KVM_RISCV_ISA_EXT_ZKSH:
case KVM_RISCV_ISA_EXT_ZKT:
case KVM_RISCV_ISA_EXT_ZTSO:
case KVM_RISCV_ISA_EXT_ZVBB:
case KVM_RISCV_ISA_EXT_ZVBC:
case KVM_RISCV_ISA_EXT_ZVFBFMIN:
case KVM_RISCV_ISA_EXT_ZVFBFWMA:
case KVM_RISCV_ISA_EXT_ZVFH:
case KVM_RISCV_ISA_EXT_ZVFHMIN:
case KVM_RISCV_ISA_EXT_ZVKB:
case KVM_RISCV_ISA_EXT_ZVKG:
case KVM_RISCV_ISA_EXT_ZVKNED:
case KVM_RISCV_ISA_EXT_ZVKNHA:
case KVM_RISCV_ISA_EXT_ZVKNHB:
case KVM_RISCV_ISA_EXT_ZVKSED:
case KVM_RISCV_ISA_EXT_ZVKSH:
case KVM_RISCV_ISA_EXT_ZVKT:
return false;
/* Extensions which can be disabled using Smstateen */
case KVM_RISCV_ISA_EXT_SSAIA:
return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN);
case KVM_RISCV_ISA_EXT_SVADE:
/*
* The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
* Svade can't be disabled unless we support Svadu.
*/
return arch_has_hw_pte_young();
default:
break;
}
return true;
}

View File

@ -41,8 +41,8 @@ int kvm_arch_enable_virtualization_cpu(void)
if (rc)
return rc;
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);
@ -105,17 +105,17 @@ static int __init riscv_kvm_init(void)
return rc;
kvm_riscv_gstage_mode_detect();
switch (kvm_riscv_gstage_mode) {
case HGATP_MODE_SV32X4:
switch (kvm_riscv_gstage_max_pgd_levels) {
case 2:
str = "Sv32x4";
break;
case HGATP_MODE_SV39X4:
case 3:
str = "Sv39x4";
break;
case HGATP_MODE_SV48X4:
case 4:
str = "Sv48x4";
break;
case HGATP_MODE_SV57X4:
case 5:
str = "Sv57x4";
break;
default:
@ -164,7 +164,7 @@ static int __init riscv_kvm_init(void)
(rc) ? slist : "no features");
}
kvm_info("using %s G-stage page table format\n", str);
kvm_info("highest G-stage page table mode is %s\n", str);
kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());

View File

@ -24,10 +24,7 @@ static void mmu_wp_memory_region(struct kvm *kvm, int slot)
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
struct kvm_gstage gstage;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_wp_range(&gstage, start, end);
@ -49,10 +46,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
struct kvm_gstage_mapping map;
struct kvm_gstage gstage;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
@ -67,7 +61,7 @@ int kvm_riscv_mmu_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
if (!writable)
map.pte = pte_wrprotect(map.pte);
ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels);
ret = kvm_mmu_topup_memory_cache(&pcache, kvm->arch.pgd_levels);
if (ret)
goto out;
@ -89,10 +83,7 @@ void kvm_riscv_mmu_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
{
struct kvm_gstage gstage;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
@ -109,10 +100,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
struct kvm_gstage gstage;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
kvm_riscv_gstage_wp_range(&gstage, start, end);
}
@ -141,10 +129,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
struct kvm_gstage gstage;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
spin_lock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false);
@ -186,7 +171,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* space addressable by the KVM guest GPA space.
*/
if ((new->base_gfn + new->npages) >=
(kvm_riscv_gstage_gpa_size >> PAGE_SHIFT))
kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels) >> PAGE_SHIFT)
return -EFAULT;
hva = new->userspace_addr;
@ -250,10 +235,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.pgd)
return false;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
mmu_locked = spin_trylock(&kvm->mmu_lock);
kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT,
@ -275,10 +257,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@ -298,10 +277,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@ -463,16 +439,13 @@ int kvm_riscv_mmu_map(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
struct kvm_gstage gstage;
struct page *page;
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_init(&gstage, kvm);
/* Setup initial state of output mapping */
memset(out_map, 0, sizeof(*out_map));
/* We need minimum second+third level pages */
ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels);
ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels);
if (ret) {
kvm_err("Failed to topup G-stage cache\n");
return ret;
@ -575,6 +548,7 @@ int kvm_riscv_mmu_alloc_pgd(struct kvm *kvm)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
kvm->arch.pgd_phys = page_to_phys(pgd_page);
kvm->arch.pgd_levels = kvm_riscv_gstage_max_pgd_levels;
return 0;
}
@ -586,14 +560,13 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
gstage.kvm = kvm;
gstage.flags = 0;
gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid);
gstage.pgd = kvm->arch.pgd;
kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false);
kvm_riscv_gstage_init(&gstage, kvm);
kvm_riscv_gstage_unmap_range(&gstage, 0UL,
kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels), false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
kvm->arch.pgd_levels = 0;
}
spin_unlock(&kvm->mmu_lock);
@ -603,11 +576,12 @@ void kvm_riscv_mmu_free_pgd(struct kvm *kvm)
void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu)
{
unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT;
struct kvm_arch *k = &vcpu->kvm->arch;
struct kvm_arch *ka = &vcpu->kvm->arch;
unsigned long hgatp = kvm_riscv_gstage_mode(ka->pgd_levels)
<< HGATP_MODE_SHIFT;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
hgatp |= (READ_ONCE(ka->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
hgatp |= (ka->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
ncsr_write(CSR_HGATP, hgatp);

View File

@ -338,7 +338,8 @@ static void make_xfence_request(struct kvm *kvm,
bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (hbase != -1UL) {
if (vcpu->vcpu_id < hbase)
if (vcpu->vcpu_id < hbase ||
vcpu->vcpu_id >= hbase + BITS_PER_LONG)
continue;
if (!(hmask & (1UL << (vcpu->vcpu_id - hbase))))
continue;

View File

@ -24,6 +24,8 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu);
const struct kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
@ -133,10 +135,12 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
/* Setup VCPU config */
kvm_riscv_vcpu_config_init(vcpu);
/* Setup ISA features available to VCPU */
kvm_riscv_vcpu_setup_isa(vcpu);
@ -529,57 +533,41 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
if (dbg->control & KVM_GUESTDBG_ENABLE) {
if (dbg->control & KVM_GUESTDBG_ENABLE)
vcpu->guest_debug = dbg->control;
vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
} else {
else
vcpu->guest_debug = 0;
vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
}
return 0;
}
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
{
const unsigned long *isa = vcpu->arch.isa;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
if (riscv_isa_extension_available(isa, SVPBMT))
cfg->henvcfg |= ENVCFG_PBMTE;
if (riscv_isa_extension_available(isa, SSTC))
cfg->henvcfg |= ENVCFG_STCE;
if (riscv_isa_extension_available(isa, ZICBOM))
cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
if (riscv_isa_extension_available(isa, ZICBOZ))
cfg->henvcfg |= ENVCFG_CBZE;
if (riscv_isa_extension_available(isa, SVADU) &&
!riscv_isa_extension_available(isa, SVADE))
cfg->henvcfg |= ENVCFG_ADUE;
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
if (riscv_isa_extension_available(isa, SSAIA))
cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
SMSTATEEN0_AIA |
SMSTATEEN0_AIA_ISEL;
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
if (vcpu->guest_debug)
cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
void *nsh;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
/*
* If VCPU is being reloaded on the same physical CPU and no
* other KVM VCPU has run on this CPU since it was last put,
* we can skip the expensive CSR and HGATP writes.
*
* Note: If a new CSR is added to this fast-path skip block,
* make sure that 'csr_dirty' is set to true in any
* ioctl (e.g., KVM_SET_ONE_REG) that modifies it.
*/
if (vcpu != __this_cpu_read(kvm_former_vcpu))
__this_cpu_write(kvm_former_vcpu, vcpu);
else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty)
goto csr_restore_done;
vcpu->arch.csr_dirty = false;
/*
* Load VCPU config CSRs before other CSRs because
* the read/write behaviour of certain CSRs change
* based on VCPU config CSRs.
*/
kvm_riscv_vcpu_config_load(vcpu);
if (kvm_riscv_nacl_sync_csr_available()) {
nsh = nacl_shmem();
@ -590,17 +578,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
if (IS_ENABLED(CONFIG_32BIT))
nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
if (IS_ENABLED(CONFIG_32BIT))
nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
}
} else {
csr_write(CSR_VSSTATUS, csr->vsstatus);
csr_write(CSR_VSIE, csr->vsie);
@ -609,21 +588,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSEPC, csr->vsepc);
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
csr_write(CSR_HENVCFG, cfg->henvcfg);
if (IS_ENABLED(CONFIG_32BIT))
csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
csr_write(CSR_HSTATEEN0, cfg->hstateen0);
if (IS_ENABLED(CONFIG_32BIT))
csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
}
}
kvm_riscv_mmu_update_hgatp(vcpu);
kvm_riscv_vcpu_aia_load(vcpu, cpu);
csr_restore_done:
kvm_riscv_vcpu_timer_restore(vcpu);
kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context);
@ -633,8 +606,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context,
vcpu->arch.isa);
kvm_riscv_vcpu_aia_load(vcpu, cpu);
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
vcpu->cpu = cpu;
@ -750,28 +721,22 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *
{
struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
(cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0,
smcsr->sstateen0);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0);
}
static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
(cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0,
vcpu->arch.host_sstateen0);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN))
smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0);
}
/*
@ -868,7 +833,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
if (!vcpu->arch.ran_atleast_once)
kvm_riscv_vcpu_setup_config(vcpu);
kvm_riscv_vcpu_config_ran_once(vcpu);
/* Mark this VCPU ran at least once */
vcpu->arch.ran_atleast_once = true;

View File

@ -0,0 +1,103 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2026 Qualcomm Technologies, Inc.
*/
#include <linux/kvm_host.h>
#include <asm/kvm_nacl.h>
#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
BIT(EXC_INST_ILLEGAL) | \
BIT(EXC_BREAKPOINT) | \
BIT(EXC_SYSCALL) | \
BIT(EXC_INST_PAGE_FAULT) | \
BIT(EXC_LOAD_PAGE_FAULT) | \
BIT(EXC_STORE_PAGE_FAULT))
#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
BIT(IRQ_VS_TIMER) | \
BIT(IRQ_VS_EXT))
void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT;
vcpu->arch.cfg.hideleg = KVM_HIDELEG_DEFAULT;
}
void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
if (vcpu->guest_debug)
cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
else
cfg->hedeleg |= BIT(EXC_BREAKPOINT);
vcpu->arch.csr_dirty = true;
}
void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu)
{
const unsigned long *isa = vcpu->arch.isa;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
if (riscv_isa_extension_available(isa, SVPBMT))
cfg->henvcfg |= ENVCFG_PBMTE;
if (riscv_isa_extension_available(isa, SSTC))
cfg->henvcfg |= ENVCFG_STCE;
if (riscv_isa_extension_available(isa, ZICBOM))
cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
if (riscv_isa_extension_available(isa, ZICBOZ))
cfg->henvcfg |= ENVCFG_CBZE;
if (riscv_isa_extension_available(isa, SVADU) &&
!riscv_isa_extension_available(isa, SVADE))
cfg->henvcfg |= ENVCFG_ADUE;
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
if (riscv_isa_extension_available(isa, SSAIA))
cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC |
SMSTATEEN0_AIA |
SMSTATEEN0_AIA_ISEL;
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
if (vcpu->guest_debug)
cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
void *nsh;
if (kvm_riscv_nacl_sync_csr_available()) {
nsh = nacl_shmem();
nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
nacl_csr_write(nsh, CSR_HIDELEG, cfg->hideleg);
nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
if (IS_ENABLED(CONFIG_32BIT))
nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
if (IS_ENABLED(CONFIG_32BIT))
nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
}
} else {
csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HIDELEG, cfg->hideleg);
csr_write(CSR_HENVCFG, cfg->henvcfg);
if (IS_ENABLED(CONFIG_32BIT))
csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
csr_write(CSR_HSTATEEN0, cfg->hstateen0);
if (IS_ENABLED(CONFIG_32BIT))
csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
}
}
}

View File

@ -13,6 +13,7 @@
#include <linux/nospec.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/kvm_isa.h>
#ifdef CONFIG_FPU
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
@ -60,17 +61,17 @@ void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx)
{
/* No need to check host sstatus as it can be modified outside */
if (riscv_isa_extension_available(NULL, d))
if (!kvm_riscv_isa_check_host(D))
__kvm_riscv_fp_d_save(cntx);
else if (riscv_isa_extension_available(NULL, f))
else if (!kvm_riscv_isa_check_host(F))
__kvm_riscv_fp_f_save(cntx);
}
void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx)
{
if (riscv_isa_extension_available(NULL, d))
if (!kvm_riscv_isa_check_host(D))
__kvm_riscv_fp_d_restore(cntx);
else if (riscv_isa_extension_available(NULL, f))
else if (!kvm_riscv_isa_check_host(F))
__kvm_riscv_fp_f_restore(cntx);
}
#endif

View File

@ -15,259 +15,19 @@
#include <linux/kvm_host.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_vector.h>
#include <asm/pgtable.h>
#include <asm/vector.h>
#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
/* Mapping between KVM ISA Extension ID & guest ISA extension ID */
static const unsigned long kvm_isa_ext_arr[] = {
/* Single letter extensions (alphabetically sorted) */
[KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
[KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
[KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
[KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
[KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
[KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMNPM),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSNPM),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVADE),
KVM_ISA_EXT_ARR(SVADU),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
KVM_ISA_EXT_ARR(ZALASR),
KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZBC),
KVM_ISA_EXT_ARR(ZBKB),
KVM_ISA_EXT_ARR(ZBKC),
KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
KVM_ISA_EXT_ARR(ZCA),
KVM_ISA_EXT_ARR(ZCB),
KVM_ISA_EXT_ARR(ZCD),
KVM_ISA_EXT_ARR(ZCF),
KVM_ISA_EXT_ARR(ZCLSD),
KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFBFMIN),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOP),
KVM_ISA_EXT_ARR(ZICBOZ),
KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
KVM_ISA_EXT_ARR(ZIFENCEI),
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
KVM_ISA_EXT_ARR(ZILSD),
KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
KVM_ISA_EXT_ARR(ZKNH),
KVM_ISA_EXT_ARR(ZKR),
KVM_ISA_EXT_ARR(ZKSED),
KVM_ISA_EXT_ARR(ZKSH),
KVM_ISA_EXT_ARR(ZKT),
KVM_ISA_EXT_ARR(ZTSO),
KVM_ISA_EXT_ARR(ZVBB),
KVM_ISA_EXT_ARR(ZVBC),
KVM_ISA_EXT_ARR(ZVFBFMIN),
KVM_ISA_EXT_ARR(ZVFBFWMA),
KVM_ISA_EXT_ARR(ZVFH),
KVM_ISA_EXT_ARR(ZVFHMIN),
KVM_ISA_EXT_ARR(ZVKB),
KVM_ISA_EXT_ARR(ZVKG),
KVM_ISA_EXT_ARR(ZVKNED),
KVM_ISA_EXT_ARR(ZVKNHA),
KVM_ISA_EXT_ARR(ZVKNHB),
KVM_ISA_EXT_ARR(ZVKSED),
KVM_ISA_EXT_ARR(ZVKSH),
KVM_ISA_EXT_ARR(ZVKT),
};
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
{
unsigned long i;
for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
if (kvm_isa_ext_arr[i] == base_ext)
return i;
}
return KVM_RISCV_ISA_EXT_MAX;
}
static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext)
{
unsigned long host_ext;
if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX ||
kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr))
return -ENOENT;
kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr));
*guest_ext = kvm_isa_ext_arr[kvm_ext];
switch (*guest_ext) {
case RISCV_ISA_EXT_SMNPM:
/*
* Pointer masking effective in (H)S-mode is provided by the
* Smnpm extension, so that extension is reported to the guest,
* even though the CSR bits for configuring VS-mode pointer
* masking on the host side are part of the Ssnpm extension.
*/
host_ext = RISCV_ISA_EXT_SSNPM;
break;
default:
host_ext = *guest_ext;
break;
}
if (!__riscv_isa_extension_available(NULL, host_ext))
return -ENOENT;
return 0;
}
static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
{
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
case KVM_RISCV_ISA_EXT_SSCOFPMF:
/* Sscofpmf depends on interrupt filtering defined in ssaia */
return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
case KVM_RISCV_ISA_EXT_SVADU:
/*
* The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
* Guest OS can use Svadu only when host OS enable Svadu.
*/
return arch_has_hw_pte_young();
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
break;
}
return true;
}
static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
{
switch (ext) {
/* Extensions which don't have any mechanism to disable */
case KVM_RISCV_ISA_EXT_A:
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
/* There is not architectural config bit to disable sscofpmf completely */
case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SSNPM:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_SVVPTC:
case KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_RISCV_ISA_EXT_ZABHA:
case KVM_RISCV_ISA_EXT_ZACAS:
case KVM_RISCV_ISA_EXT_ZALASR:
case KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
case KVM_RISCV_ISA_EXT_ZBB:
case KVM_RISCV_ISA_EXT_ZBC:
case KVM_RISCV_ISA_EXT_ZBKB:
case KVM_RISCV_ISA_EXT_ZBKC:
case KVM_RISCV_ISA_EXT_ZBKX:
case KVM_RISCV_ISA_EXT_ZBS:
case KVM_RISCV_ISA_EXT_ZCA:
case KVM_RISCV_ISA_EXT_ZCB:
case KVM_RISCV_ISA_EXT_ZCD:
case KVM_RISCV_ISA_EXT_ZCF:
case KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_RISCV_ISA_EXT_ZFA:
case KVM_RISCV_ISA_EXT_ZFBFMIN:
case KVM_RISCV_ISA_EXT_ZFH:
case KVM_RISCV_ISA_EXT_ZFHMIN:
case KVM_RISCV_ISA_EXT_ZICBOP:
case KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_RISCV_ISA_EXT_ZICOND:
case KVM_RISCV_ISA_EXT_ZICSR:
case KVM_RISCV_ISA_EXT_ZIFENCEI:
case KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZIHPM:
case KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_RISCV_ISA_EXT_ZKND:
case KVM_RISCV_ISA_EXT_ZKNE:
case KVM_RISCV_ISA_EXT_ZKNH:
case KVM_RISCV_ISA_EXT_ZKR:
case KVM_RISCV_ISA_EXT_ZKSED:
case KVM_RISCV_ISA_EXT_ZKSH:
case KVM_RISCV_ISA_EXT_ZKT:
case KVM_RISCV_ISA_EXT_ZTSO:
case KVM_RISCV_ISA_EXT_ZVBB:
case KVM_RISCV_ISA_EXT_ZVBC:
case KVM_RISCV_ISA_EXT_ZVFBFMIN:
case KVM_RISCV_ISA_EXT_ZVFBFWMA:
case KVM_RISCV_ISA_EXT_ZVFH:
case KVM_RISCV_ISA_EXT_ZVFHMIN:
case KVM_RISCV_ISA_EXT_ZVKB:
case KVM_RISCV_ISA_EXT_ZVKG:
case KVM_RISCV_ISA_EXT_ZVKNED:
case KVM_RISCV_ISA_EXT_ZVKNHA:
case KVM_RISCV_ISA_EXT_ZVKNHB:
case KVM_RISCV_ISA_EXT_ZVKSED:
case KVM_RISCV_ISA_EXT_ZVKSH:
case KVM_RISCV_ISA_EXT_ZVKT:
return false;
/* Extensions which can be disabled using Smstateen */
case KVM_RISCV_ISA_EXT_SSAIA:
return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN);
case KVM_RISCV_ISA_EXT_SVADE:
/*
* The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
* Svade can't be disabled unless we support Svadu.
*/
return arch_has_hw_pte_young();
default:
break;
}
return true;
}
void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
{
unsigned long guest_ext, i;
for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
if (__kvm_riscv_isa_check_host(i, &guest_ext))
continue;
if (kvm_riscv_vcpu_isa_enable_allowed(i))
if (kvm_riscv_isa_enable_allowed(i))
set_bit(guest_ext, vcpu->arch.isa);
}
}
@ -290,17 +50,17 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOM))
if (kvm_riscv_isa_check_host(ZICBOM))
return -ENOENT;
reg_val = riscv_cbom_block_size;
break;
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOZ))
if (kvm_riscv_isa_check_host(ZICBOZ))
return -ENOENT;
reg_val = riscv_cboz_block_size;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOP))
if (kvm_riscv_isa_check_host(ZICBOP))
return -ENOENT;
reg_val = riscv_cbop_block_size;
break;
@ -361,15 +121,15 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
if (!vcpu->arch.ran_atleast_once) {
/* Ignore the enable/disable request for certain extensions */
for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
isa_ext = kvm_riscv_base2isa_ext(i);
if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
reg_val &= ~BIT(i);
continue;
}
if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
if (!kvm_riscv_isa_enable_allowed(isa_ext))
if (reg_val & BIT(i))
reg_val &= ~BIT(i);
if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
if (!kvm_riscv_isa_disable_allowed(isa_ext))
if (!(reg_val & BIT(i)))
reg_val |= BIT(i);
}
@ -384,19 +144,19 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
}
break;
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOM))
if (kvm_riscv_isa_check_host(ZICBOM))
return -ENOENT;
if (reg_val != riscv_cbom_block_size)
return -EINVAL;
break;
case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOZ))
if (kvm_riscv_isa_check_host(ZICBOZ))
return -ENOENT;
if (reg_val != riscv_cboz_block_size)
return -EINVAL;
break;
case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size):
if (!riscv_isa_extension_available(NULL, ZICBOP))
if (kvm_riscv_isa_check_host(ZICBOP))
return -ENOENT;
if (reg_val != riscv_cbop_block_size)
return -EINVAL;
@ -670,6 +430,8 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
if (rc)
return rc;
vcpu->arch.csr_dirty = true;
return 0;
}
@ -680,7 +442,7 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long guest_ext;
int ret;
ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext);
if (ret)
return ret;
@ -698,7 +460,7 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
unsigned long guest_ext;
int ret;
ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext);
ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext);
if (ret)
return ret;
@ -711,10 +473,10 @@ static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
* extension can be disabled
*/
if (reg_val == 1 &&
kvm_riscv_vcpu_isa_enable_allowed(reg_num))
kvm_riscv_isa_enable_allowed(reg_num))
set_bit(guest_ext, vcpu->arch.isa);
else if (!reg_val &&
kvm_riscv_vcpu_isa_disable_allowed(reg_num))
kvm_riscv_isa_disable_allowed(reg_num))
clear_bit(guest_ext, vcpu->arch.isa);
else
return -EINVAL;
@ -857,13 +619,13 @@ static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
* was not available.
*/
if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
!riscv_isa_extension_available(NULL, ZICBOM))
kvm_riscv_isa_check_host(ZICBOM))
continue;
else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
!riscv_isa_extension_available(NULL, ZICBOZ))
kvm_riscv_isa_check_host(ZICBOZ))
continue;
else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) &&
!riscv_isa_extension_available(NULL, ZICBOP))
kvm_riscv_isa_check_host(ZICBOP))
continue;
size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
@ -1084,7 +846,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext))
if (__kvm_riscv_isa_check_host(i, &guest_ext))
continue;
if (uindices) {

View File

@ -7,16 +7,17 @@
*/
#define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
#include <linux/nospec.h>
#include <linux/perf/riscv_pmu.h>
#include <asm/csr.h>
#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
#include <asm/sbi.h>
#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
@ -226,7 +227,14 @@ static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
return -EINVAL;
if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
return -EINVAL;
fevent_code = get_event_code(pmc->event_idx);
if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
"Invalid firmware event code: %d\n", fevent_code))
return -EINVAL;
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
*out_val = pmc->counter_val >> 32;
@ -251,7 +259,14 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID)
return -EINVAL;
fevent_code = get_event_code(pmc->event_idx);
if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX,
"Invalid firmware event code: %d\n", fevent_code))
return -EINVAL;
pmc->counter_val = kvpmu->fw_event[fevent_code].value;
} else if (pmc->perf_event) {
pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
@ -266,8 +281,10 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
unsigned long ctr_mask)
{
/* Make sure the we have a valid counter mask requested from the caller */
if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu);
/* Make sure we have a valid counter mask requested from the caller */
if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs))
return -EINVAL;
return 0;
@ -427,11 +444,12 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
saddr = saddr_low;
if (saddr_high != 0) {
if (IS_ENABLED(CONFIG_32BIT))
if (IS_ENABLED(CONFIG_32BIT)) {
saddr |= ((gpa_t)saddr_high << 32);
else
} else {
sbiret = SBI_ERR_INVALID_ADDRESS;
goto out;
goto out;
}
}
kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
@ -441,6 +459,7 @@ int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long s
/* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */
if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
kfree(kvpmu->sdata);
kvpmu->sdata = NULL;
sbiret = SBI_ERR_INVALID_ADDRESS;
goto out;
}
@ -827,7 +846,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
* filtering is available in the host. Otherwise, guest will always count
* events while the execution is in hypervisor mode.
*/
if (!riscv_isa_extension_available(NULL, SSCOFPMF))
if (kvm_riscv_isa_check_host(SSCOFPMF))
return;
ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);

View File

@ -181,6 +181,7 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
unsigned long reg_size, const void *reg_val)
{
unsigned long value;
gpa_t new_shmem = INVALID_GPA;
if (reg_size != sizeof(unsigned long))
return -EINVAL;
@ -191,18 +192,18 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
if (IS_ENABLED(CONFIG_32BIT)) {
gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem);
vcpu->arch.sta.shmem = value;
vcpu->arch.sta.shmem |= hi << 32;
new_shmem = value;
new_shmem |= hi << 32;
} else {
vcpu->arch.sta.shmem = value;
new_shmem = value;
}
break;
case KVM_REG_RISCV_SBI_STA_REG(shmem_hi):
if (IS_ENABLED(CONFIG_32BIT)) {
gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem);
vcpu->arch.sta.shmem = ((gpa_t)value << 32);
vcpu->arch.sta.shmem |= lo;
new_shmem = ((gpa_t)value << 32);
new_shmem |= lo;
} else if (value != 0) {
return -EINVAL;
}
@ -211,6 +212,11 @@ static int kvm_sbi_ext_sta_set_reg(struct kvm_vcpu *vcpu, unsigned long reg_num,
return -ENOENT;
}
if (new_shmem != INVALID_GPA && !IS_ALIGNED(new_shmem, 64))
return -EINVAL;
vcpu->arch.sta.shmem = new_shmem;
return 0;
}

View File

@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <clocksource/timer-riscv.h>
#include <asm/delay.h>
#include <asm/kvm_isa.h>
#include <asm/kvm_nacl.h>
#include <asm/kvm_vcpu_timer.h>
@ -253,7 +254,7 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
t->next_set = false;
/* Enable sstc for every vcpu if available in hardware */
if (riscv_isa_extension_available(NULL, SSTC)) {
if (!kvm_riscv_isa_check_host(SSTC)) {
t->sstc_enabled = true;
hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);

View File

@ -12,6 +12,7 @@
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/kvm_isa.h>
#include <asm/kvm_vcpu_vector.h>
#include <asm/vector.h>
@ -63,13 +64,13 @@ void kvm_riscv_vcpu_guest_vector_restore(struct kvm_cpu_context *cntx,
void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx)
{
/* No need to check host sstatus as it can be modified outside */
if (riscv_isa_extension_available(NULL, v))
if (!kvm_riscv_isa_check_host(V))
__kvm_riscv_vector_save(cntx);
}
void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx)
{
if (riscv_isa_extension_available(NULL, v))
if (!kvm_riscv_isa_check_host(V))
__kvm_riscv_vector_restore(cntx);
}
@ -80,8 +81,11 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu)
return -ENOMEM;
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
if (!vcpu->arch.host_context.vector.datap)
if (!vcpu->arch.host_context.vector.datap) {
kfree(vcpu->arch.guest_context.vector.datap);
vcpu->arch.guest_context.vector.datap = NULL;
return -ENOMEM;
}
return 0;
}
@ -127,6 +131,7 @@ static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
} else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
if (reg_size != vlenb)
return -EINVAL;
WARN_ON(!cntx->vector.datap);
*reg_addr = cntx->vector.datap +
(reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
} else {

View File

@ -199,7 +199,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_VM_GPA_BITS:
r = kvm_riscv_gstage_gpa_bits;
if (!kvm)
r = kvm_riscv_gstage_gpa_bits(kvm_riscv_gstage_max_pgd_levels);
else
r = kvm_riscv_gstage_gpa_bits(kvm->arch.pgd_levels);
break;
default:
r = 0;
@ -211,12 +214,52 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
if (cap->flags)
return -EINVAL;
switch (cap->cap) {
case KVM_CAP_RISCV_MP_STATE_RESET:
if (cap->flags)
return -EINVAL;
kvm->arch.mp_state_reset = true;
return 0;
case KVM_CAP_VM_GPA_BITS: {
unsigned long gpa_bits = cap->args[0];
unsigned long new_levels;
int r = 0;
/* Decide target pgd levels from requested gpa_bits */
#ifdef CONFIG_64BIT
if (gpa_bits <= 41)
new_levels = 3; /* Sv39x4 */
else if (gpa_bits <= 50)
new_levels = 4; /* Sv48x4 */
else if (gpa_bits <= 59)
new_levels = 5; /* Sv57x4 */
else
return -EINVAL;
#else
/* 32-bit: only Sv32x4*/
if (gpa_bits <= 34)
new_levels = 2;
else
return -EINVAL;
#endif
if (new_levels > kvm_riscv_gstage_max_pgd_levels)
return -EINVAL;
/* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */
mutex_lock(&kvm->lock);
mutex_lock(&kvm->slots_lock);
if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm))
r = -EBUSY;
else
kvm->arch.pgd_levels = new_levels;
mutex_unlock(&kvm->slots_lock);
mutex_unlock(&kvm->lock);
return r;
}
default:
return -EINVAL;
}

View File

@ -26,7 +26,8 @@ static DEFINE_SPINLOCK(vmid_lock);
void __init kvm_riscv_gstage_vmid_detect(void)
{
/* Figure-out number of VMID bits in HW */
csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID);
csr_write(CSR_HGATP, (kvm_riscv_gstage_mode(kvm_riscv_gstage_max_pgd_levels) <<
HGATP_MODE_SHIFT) | HGATP_VMID);
vmid_bits = csr_read(CSR_HGATP);
vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT;
vmid_bits = fls_long(vmid_bits);

View File

@ -17,4 +17,6 @@
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
#define INVALID_GPA (~(uint64_t)0)
#endif /* SELFTEST_KVM_UTIL_TYPES_H */

View File

@ -97,6 +97,43 @@ enum sbi_pmu_hw_generic_events_t {
SBI_PMU_HW_GENERAL_MAX,
};
enum sbi_pmu_fw_generic_events_t {
SBI_PMU_FW_MISALIGNED_LOAD = 0,
SBI_PMU_FW_MISALIGNED_STORE = 1,
SBI_PMU_FW_ACCESS_LOAD = 2,
SBI_PMU_FW_ACCESS_STORE = 3,
SBI_PMU_FW_ILLEGAL_INSN = 4,
SBI_PMU_FW_SET_TIMER = 5,
SBI_PMU_FW_IPI_SENT = 6,
SBI_PMU_FW_IPI_RCVD = 7,
SBI_PMU_FW_FENCE_I_SENT = 8,
SBI_PMU_FW_FENCE_I_RCVD = 9,
SBI_PMU_FW_SFENCE_VMA_SENT = 10,
SBI_PMU_FW_SFENCE_VMA_RCVD = 11,
SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13,
SBI_PMU_FW_HFENCE_GVMA_SENT = 14,
SBI_PMU_FW_HFENCE_GVMA_RCVD = 15,
SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16,
SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17,
SBI_PMU_FW_HFENCE_VVMA_SENT = 18,
SBI_PMU_FW_HFENCE_VVMA_RCVD = 19,
SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20,
SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21,
SBI_PMU_FW_MAX,
};
/* SBI PMU event types */
enum sbi_pmu_event_type {
SBI_PMU_EVENT_TYPE_HW = 0x0,
SBI_PMU_EVENT_TYPE_CACHE = 0x1,
SBI_PMU_EVENT_TYPE_RAW = 0x2,
SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3,
SBI_PMU_EVENT_TYPE_FW = 0xf,
};
/* SBI PMU counter types */
enum sbi_pmu_ctr_type {
SBI_PMU_CTR_TYPE_HW = 0x0,

View File

@ -566,3 +566,8 @@ unsigned long riscv64_get_satp_mode(void)
return val;
}
bool kvm_arch_has_default_irqchip(void)
{
return kvm_check_cap(KVM_CAP_IRQCHIP);
}

View File

@ -436,6 +436,7 @@ static void test_pmu_basic_sanity(void)
struct sbiret ret;
int num_counters = 0, i;
union sbi_pmu_ctr_info ctrinfo;
unsigned long fw_eidx;
probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
GUEST_ASSERT(probe && out_val == 1);
@ -461,7 +462,24 @@ static void test_pmu_basic_sanity(void)
pmu_csr_read_num(ctrinfo.csr);
GUEST_ASSERT(illegal_handler_invoked);
} else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
read_fw_counter(i, ctrinfo);
/* Read without configure should fail */
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
i, 0, 0, 0, 0, 0);
GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM);
/*
* Try to configure with a common firmware event.
* If configuration succeeds, verify we can read it.
*/
fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) |
SBI_PMU_FW_ACCESS_LOAD;
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH,
i, 1, 0, fw_eidx, 0, 0);
if (ret.error == 0) {
GUEST_ASSERT(ret.value == i);
read_fw_counter(i, ctrinfo);
}
}
}

View File

@ -69,16 +69,10 @@ static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
int ret;
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vcpu->vm, st_gva[i]);
ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
(ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
@ -99,6 +93,21 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
st->pad[8], st->pad[9], st->pad[10]);
}
static void check_steal_time_uapi(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
int ret;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
(ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK);
TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
kvm_vm_free(vm);
}
#elif defined(__aarch64__)
/* PV_TIME_ST must have 64-byte alignment */
@ -170,7 +179,6 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
{
struct kvm_vm *vm = vcpu->vm;
uint64_t st_ipa;
int ret;
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
@ -178,21 +186,12 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
.addr = (uint64_t)&st_ipa,
};
vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
st_ipa = (ulong)st_gva[i] | 1;
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
st_ipa = (ulong)st_gva[i];
vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
@ -205,6 +204,36 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
ksft_print_msg(" st_time: %ld\n", st->st_time);
}
static void check_steal_time_uapi(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
uint64_t st_ipa;
int ret;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
.addr = (uint64_t)&st_ipa,
};
vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
st_ipa = (ulong)ST_GPA_BASE | 1;
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
st_ipa = (ulong)ST_GPA_BASE;
vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
kvm_vm_free(vm);
}
#elif defined(__riscv)
/* SBI STA shmem must have 64-byte alignment */
@ -301,6 +330,41 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
pr_info("\n");
}
static void check_steal_time_uapi(void)
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
struct kvm_one_reg reg;
uint64_t shmem;
int ret;
vm = vm_create_with_one_vcpu(&vcpu, NULL);
reg.id = KVM_REG_RISCV |
KVM_REG_SIZE_ULONG |
KVM_REG_RISCV_SBI_STATE |
KVM_REG_RISCV_SBI_STA |
KVM_REG_RISCV_SBI_STA_REG(shmem_lo);
reg.addr = (uint64_t)&shmem;
shmem = ST_GPA_BASE + 1;
ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"misaligned STA shmem returns -EINVAL");
shmem = ST_GPA_BASE;
ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
TEST_ASSERT(ret == 0,
"aligned STA shmem succeeds");
shmem = INVALID_GPA;
ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
TEST_ASSERT(ret == 0,
"all-ones for STA shmem succeeds");
kvm_vm_free(vm);
}
#elif defined(__loongarch__)
/* steal_time must have 64-byte alignment */
@ -465,6 +529,8 @@ int main(int ac, char **av)
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
ksft_set_plan(NR_VCPUS);
check_steal_time_uapi();
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
steal_time_init(vcpus[i], i);