KVM/arm64 updates for 6.18

- Add support for FF-A 1.2 as the secure memory conduit for pKVM,
   allowing more registers to be used as part of the message payload.
 
 - Change the way pKVM allocates its VM handles, making sure that the
   privileged hypervisor is never tricked into using uninitialised
   data.
 
 - Speed up MMIO range registration by avoiding unnecessary RCU
   synchronisation, which results in VMs starting much quicker.
 
 - Add the dump of the instruction stream when panic-ing in the EL2
   payload, just like the rest of the kernel has always done. This will
   hopefully help debugging non-VHE setups.
 
 - Add 52bit PA support to the stage-1 page-table walker, and make use
   of it to populate the fault level reported to the guest on failing
   to translate a stage-1 walk.
 
 - Add NV support to the GICv3-on-GICv5 emulation code, ensuring
   feature parity for guests, irrespective of the host platform.
 
 - Fix some really ugly architecture problems when dealing with debug
   in a nested VM. This has some bad performance impacts, but is at
   least correct.
 
 - Add enough infrastructure to be able to disable EL2 features and
   give effective values to the EL2 control registers. This then allows
   a bunch of features to be turned off, which helps cross-host
   migration.
 
 - Large rework of the selftest infrastructure to allow most tests to
   transparently run at EL2. This is the first step towards enabling
   NV testing.
 
 - Various fixes and improvements all over the map, including one BE
   fix, just in time for the removal of the feature.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmjVhSsACgkQI9DQutE9
 ekOSIg//YdbA/zo17GrLnJnlGdUnS0e3/357n3e5Lypx1UFRDTmNacpVPw4VG/jt
 eVpQn7AgYwyvKfCq46eD+hBBqNv1XTn4DeXttv7CmVqhCRythsEvDkTBWSt7oYUZ
 xfYXCMKNhqUElH4AbYYx3y7nb2E9/KVGr+NBn6Vf5c14OZ3MGVc/fyp4jM1ih5dR
 kcV2onAYohlIGvFEyZMBtJ+jkYkqfIbfxfqCL0RAET0aEBFcmM1aXybWZj47hlLM
 f2j+E6cFQ0ZzUt+3pFhT75wo43lHGtIFDjVd60uishyU+NXTVvqRmXDTRU4k546W
 18HHX1yijbzuXIatVhVRo2hIq3jKU37T9wtj46BejbDHRdAPENEyN/Qopm7rNS+X
 mCwOT7He6KR+H4rU6nFaTcsS7bNRCvIbZP9i9zb6NElbvXu5QnM8BUQsYFCDUa/n
 xtbtQlckbo/7zeoUsBDrGj2XmCf0d45FTHb7fdWOYEmMSmJhXYpUKdM4JcLyhKoQ
 DD0ox2S+pt2lwNw3XOSABdES0KJxCvDASAMIgn2h2sGpY8FxsBcVW/BufXopdafG
 UeInxWaILp2iCDM4tH2GLjKqlvMAOwcA+mAEZToXypxlJAnYA6J1pXCF8WEaM6+D
 BGrLli8Zd8JRs87byq6K7tp8oLNzZdliJp73j5jfOHTJA4MnvFI=
 =iAL3
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 updates for 6.18

- Add support for FF-A 1.2 as the secure memory conduit for pKVM,
  allowing more registers to be used as part of the message payload.

- Change the way pKVM allocates its VM handles, making sure that the
  privileged hypervisor is never tricked into using uninitialised
  data.

- Speed up MMIO range registration by avoiding unnecessary RCU
  synchronisation, which results in VMs starting much quicker.

- Add the dump of the instruction stream when panic-ing in the EL2
  payload, just like the rest of the kernel has always done. This will
  hopefully help debugging non-VHE setups.

- Add 52bit PA support to the stage-1 page-table walker, and make use
  of it to populate the fault level reported to the guest on failing
  to translate a stage-1 walk.

- Add NV support to the GICv3-on-GICv5 emulation code, ensuring
  feature parity for guests, irrespective of the host platform.

- Fix some really ugly architecture problems when dealing with debug
  in a nested VM. This has some bad performance impacts, but is at
  least correct.

- Add enough infrastructure to be able to disable EL2 features and
  give effective values to the EL2 control registers. This then allows
  a bunch of features to be turned off, which helps cross-host
  migration.

- Large rework of the selftest infrastructure to allow most tests to
  transparently run at EL2. This is the first step towards enabling
  NV testing.

- Various fixes and improvements all over the map, including one BE
  fix, just in time for the removal of the feature.
pull/1354/merge
Paolo Bonzini 2025-09-30 13:23:28 -04:00
commit 924ebaefce
72 changed files with 1696 additions and 688 deletions

View File

@ -81,6 +81,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,

View File

@ -220,6 +220,20 @@ static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
static inline bool vcpu_el2_amo_is_set(const struct kvm_vcpu *vcpu)
{
/*
* DDI0487L.b Known Issue D22105
*
* When executing at EL2 and HCR_EL2.{E2H,TGE} = {1, 0} it is
* IMPLEMENTATION DEFINED whether the effective value of HCR_EL2.AMO
* is the value programmed or 1.
*
* Make the implementation choice of treating the effective value as 1 as
* we cannot subsequently catch changes to TGE or AMO that would
* otherwise lead to the SError becoming deliverable.
*/
if (vcpu_is_el2(vcpu) && vcpu_el2_e2h_is_set(vcpu) && !vcpu_el2_tge_is_set(vcpu))
return true;
return ctxt_sys_reg(&vcpu->arch.ctxt, HCR_EL2) & HCR_AMO;
}
@ -511,21 +525,29 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
if (vcpu_mode_is_32bit(vcpu)) {
*vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT;
} else {
u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
enum vcpu_sysreg r;
u64 sctlr;
r = vcpu_has_nv(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
sctlr = vcpu_read_sys_reg(vcpu, r);
sctlr |= SCTLR_ELx_EE;
vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1);
vcpu_write_sys_reg(vcpu, sctlr, r);
}
}
static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
{
enum vcpu_sysreg r;
u64 bit;
if (vcpu_mode_is_32bit(vcpu))
return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT);
if (vcpu_mode_priv(vcpu))
return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE);
else
return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E);
r = is_hyp_ctxt(vcpu) ? SCTLR_EL2 : SCTLR_EL1;
bit = vcpu_mode_priv(vcpu) ? SCTLR_ELx_EE : SCTLR_EL1_E0E;
return vcpu_read_sys_reg(vcpu, r) & bit;
}
static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,

View File

@ -252,7 +252,8 @@ struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
struct kvm_hyp_memcache stage2_teardown_mc;
bool enabled;
bool is_protected;
bool is_created;
};
struct kvm_mpidr_data {
@ -1442,7 +1443,7 @@ struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled)
#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected)
#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)

View File

@ -83,6 +83,8 @@ extern void check_nested_vcpu_requests(struct kvm_vcpu *vcpu);
extern void kvm_nested_flush_hwstate(struct kvm_vcpu *vcpu);
extern void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu);
extern void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu);
struct kvm_s2_trans {
phys_addr_t output;
unsigned long block_size;
@ -265,7 +267,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
return base;
}
static inline unsigned int ps_to_output_size(unsigned int ps)
static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit)
{
switch (ps) {
case 0: return 32;
@ -273,7 +275,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps)
case 2: return 40;
case 3: return 42;
case 4: return 44;
case 5:
case 5: return 48;
case 6: if (pa52bit)
return 52;
fallthrough;
default:
return 48;
}
@ -285,13 +290,28 @@ enum trans_regime {
TR_EL2,
};
struct s1_walk_info;
struct s1_walk_context {
struct s1_walk_info *wi;
u64 table_ipa;
int level;
};
struct s1_walk_filter {
int (*fn)(struct s1_walk_context *, void *);
void *priv;
};
struct s1_walk_info {
struct s1_walk_filter *filter;
u64 baddr;
enum trans_regime regime;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int txsz;
int sl;
u8 sh;
bool as_el0;
bool hpd;
bool e0poe;
@ -299,6 +319,7 @@ struct s1_walk_info {
bool pan;
bool be;
bool s2;
bool pa52bit;
};
struct s1_walk_result {
@ -334,6 +355,8 @@ struct s1_walk_result {
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va);
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa,
int *level);
/* VNCR management */
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);

View File

@ -18,6 +18,7 @@
int pkvm_init_host_vm(struct kvm *kvm);
int pkvm_create_hyp_vm(struct kvm *kvm);
bool pkvm_hyp_vm_is_created(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);

View File

@ -36,6 +36,7 @@ int kasan_brk_handler(struct pt_regs *regs, unsigned long esr);
int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr);
int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
void dump_kernel_instr(unsigned long kaddr);
/*
* Move regs->pc to next instruction and do necessary setup before it

View File

@ -94,6 +94,8 @@
#define VNCR_PMSICR_EL1 0x838
#define VNCR_PMSIRR_EL1 0x840
#define VNCR_PMSLATFR_EL1 0x848
#define VNCR_PMSNEVFR_EL1 0x850
#define VNCR_PMSDSFR_EL1 0x858
#define VNCR_TRFCR_EL1 0x880
#define VNCR_MPAM1_EL1 0x900
#define VNCR_MPAMHCR_EL2 0x930

View File

@ -2539,6 +2539,15 @@ test_has_mpam_hcr(const struct arm64_cpu_capabilities *entry, int scope)
return idr & MPAMIDR_EL1_HAS_HCR;
}
static bool
test_has_gicv5_legacy(const struct arm64_cpu_capabilities *entry, int scope)
{
if (!this_cpu_has_cap(ARM64_HAS_GICV5_CPUIF))
return false;
return !!(read_sysreg_s(SYS_ICC_IDR0_EL1) & ICC_IDR0_EL1_GCIE_LEGACY);
}
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.capability = ARM64_ALWAYS_BOOT,
@ -3156,6 +3165,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, GCIE, IMP)
},
{
.desc = "GICv5 Legacy vCPU interface",
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
.capability = ARM64_HAS_GICV5_LEGACY,
.matches = test_has_gicv5_legacy,
},
{},
};

View File

@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
/* Static key indicating whether GICv3 has GICv2 compatibility */
KVM_NVHE_ALIAS(vgic_v3_has_v2_compat);
/* Static key which is set if CNTVOFF_EL2 is unusable */
KVM_NVHE_ALIAS(broken_cntvoff_key);

View File

@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
int show_unhandled_signals = 0;
static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
void dump_kernel_instr(unsigned long kaddr)
{
unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
if (user_mode(regs))
if (!is_ttbr1_addr(kaddr))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@ -169,7 +168,7 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
printk("%sCode: %s\n", lvl, str);
printk(KERN_EMERG "Code: %s\n", str);
}
#define S_SMP " SMP"
@ -178,6 +177,7 @@ static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
unsigned long addr = instruction_pointer(regs);
pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
str, err, ++die_counter);
@ -190,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs)
print_modules();
show_regs(regs);
dump_kernel_instr(KERN_EMERG, regs);
if (user_mode(regs))
return ret;
dump_kernel_instr(addr);
return ret;
}

View File

@ -170,10 +170,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
ret = pkvm_init_host_vm(kvm);
if (ret)
goto err_unshare_kvm;
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) {
ret = -ENOMEM;
goto err_unshare_kvm;
@ -184,6 +180,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto err_free_cpumask;
if (is_protected_kvm_enabled()) {
/*
* If any failures occur after this is successful, make sure to
* call __pkvm_unreserve_vm to unreserve the VM in hyp.
*/
ret = pkvm_init_host_vm(kvm);
if (ret)
goto err_free_cpumask;
}
kvm_vgic_early_init(kvm);
kvm_timer_init_vm(kvm);
@ -2317,8 +2323,9 @@ static int __init init_subsystems(void)
}
if (kvm_mode == KVM_MODE_NV &&
!(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) {
kvm_err("NV support requires GICv3, giving up\n");
!(vgic_present && (kvm_vgic_global_state.type == VGIC_V3 ||
kvm_vgic_global_state.has_gcie_v3_compat))) {
kvm_err("NV support requires GICv3 or GICv5 with legacy support, giving up\n");
err = -EINVAL;
goto out;
}

View File

@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi)
/* Return true if the IPA is out of the OA range */
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
{
if (wi->pa52bit)
return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
}
static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
{
switch (BIT(wi->pgshift)) {
case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
return false;
return ((wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) :
FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
case SZ_16K:
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
return false;
break;
case SZ_4K:
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
return false;
break;
}
return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
}
static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
{
u64 addr;
if (!wi->pa52bit)
return desc & GENMASK_ULL(47, wi->pgshift);
switch (BIT(wi->pgshift)) {
case SZ_4K:
case SZ_16K:
addr = desc & GENMASK_ULL(49, wi->pgshift);
addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
break;
case SZ_64K:
default: /* IMPDEF: treat any other value as 64k */
addr = desc & GENMASK_ULL(47, wi->pgshift);
addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
break;
}
return addr;
}
/* Return the translation regime that applies to an AT instruction */
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
{
@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
}
}
static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
if (regime == TR_EL10) {
if (vcpu_has_nv(vcpu) &&
!(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
return 0;
return vcpu_read_sys_reg(vcpu, TCR2_EL1);
}
return vcpu_read_sys_reg(vcpu, TCR2_EL2);
}
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
if (!kvm_has_s1pie(vcpu->kvm))
return false;
switch (regime) {
case TR_EL2:
case TR_EL20:
return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
case TR_EL10:
return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
default:
BUG();
}
/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
}
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
@ -76,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
return;
}
switch (wi->regime) {
case TR_EL2:
case TR_EL20:
val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
wi->poe = val & TCR2_EL2_POE;
wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
break;
case TR_EL10:
if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
wi->poe = wi->e0poe = false;
return;
}
val = effective_tcr2(vcpu, wi->regime);
val = __vcpu_sys_reg(vcpu, TCR2_EL1);
wi->poe = val & TCR2_EL1_POE;
wi->e0poe = val & TCR2_EL1_E0POE;
}
/* Abuse TCR2_EL1_* for EL2 */
wi->poe = val & TCR2_EL1_POE;
wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
}
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
@ -102,14 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
unsigned int stride, x;
bool va55, tbi, lva;
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
va55 = va & BIT(55);
if (wi->regime == TR_EL2 && va55)
goto addrsz;
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
if (vcpu_has_nv(vcpu)) {
hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
} else {
WARN_ON_ONCE(wi->regime != TR_EL10);
wi->s2 = false;
hcr = 0;
}
switch (wi->regime) {
case TR_EL10:
@ -131,6 +174,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
BUG();
}
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55 && wi->regime != TR_EL2) {
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG1_MASK, tcr);
switch (tg << TCR_TG1_SHIFT) {
case TCR_TG1_4K:
wi->pgshift = 12; break;
case TCR_TG1_16K:
wi->pgshift = 14; break;
case TCR_TG1_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
} else {
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG0_MASK, tcr);
switch (tg << TCR_TG0_SHIFT) {
case TCR_TG0_4K:
wi->pgshift = 12; break;
case TCR_TG0_16K:
wi->pgshift = 14; break;
case TCR_TG0_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
}
wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
ia_bits = get_ia_size(wi);
/* AArch64.S1StartLevel() */
stride = wi->pgshift - 3;
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
if (wi->regime == TR_EL2 && va55)
goto addrsz;
tbi = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_TBI, tcr) :
(va55 ?
@ -140,6 +223,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (!tbi && (u64)sign_extend64(va, 55) != va)
goto addrsz;
wi->sh = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
(va55 ?
FIELD_GET(TCR_SH1_MASK, tcr) :
FIELD_GET(TCR_SH0_MASK, tcr)));
va = (u64)sign_extend64(va, 55);
/* Let's put the MMU disabled case aside immediately */
@ -194,53 +283,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
/* R_BVXDG */
wi->hpd |= (wi->poe || wi->e0poe);
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55) {
wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG1_MASK, tcr);
switch (tg << TCR_TG1_SHIFT) {
case TCR_TG1_4K:
wi->pgshift = 12; break;
case TCR_TG1_16K:
wi->pgshift = 14; break;
case TCR_TG1_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
} else {
wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
tg = FIELD_GET(TCR_TG0_MASK, tcr);
switch (tg << TCR_TG0_SHIFT) {
case TCR_TG0_4K:
wi->pgshift = 12; break;
case TCR_TG0_16K:
wi->pgshift = 14; break;
case TCR_TG0_64K:
default: /* IMPDEF: treat any other value as 64k */
wi->pgshift = 16; break;
}
}
/* R_PLCGL, R_YXNYW */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
if (wi->txsz > 39)
goto transfault_l0;
goto transfault;
} else {
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
goto transfault_l0;
goto transfault;
}
/* R_GTJBY, R_SXWGM */
switch (BIT(wi->pgshift)) {
case SZ_4K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
break;
case SZ_16K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
lva = wi->pa52bit;
break;
case SZ_64K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
@ -248,38 +304,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
}
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
goto transfault_l0;
ia_bits = get_ia_size(wi);
goto transfault;
/* R_YYVYV, I_THCZK */
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
(va55 && va < GENMASK(63, ia_bits)))
goto transfault_l0;
goto transfault;
/* I_ZFSYQ */
if (wi->regime != TR_EL2 &&
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
goto transfault_l0;
goto transfault;
/* R_BNDVG and following statements */
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
goto transfault_l0;
/* AArch64.S1StartLevel() */
stride = wi->pgshift - 3;
wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
goto transfault;
ps = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
/* Compute minimal alignment */
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
wi->baddr = ttbr & TTBRx_EL1_BADDR;
if (wi->pa52bit) {
/*
* Force the alignment on 64 bytes for top-level tables
* smaller than 8 entries, since TTBR.BADDR[5:2] are used to
* store bits [51:48] of the first level of lookup.
*/
x = max(x, 6);
wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
}
/* R_VPBBF */
if (check_output_size(wi->baddr, wi))
@ -289,12 +349,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
addrsz: /* Address Size Fault level 0 */
addrsz:
/*
* Address Size Fault level 0 to indicate it comes from TTBR.
* yes, this is an oddity.
*/
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
return -EFAULT;
transfault_l0: /* Translation Fault level 0 */
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
transfault:
/* Translation Fault on start level */
fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
return -EFAULT;
}
@ -339,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
ipa = kvm_s2_trans_output(&s2_trans);
}
if (wi->filter) {
ret = wi->filter->fn(&(struct s1_walk_context)
{
.wi = wi,
.table_ipa = baddr,
.level = level,
}, wi->filter->priv);
if (ret)
return ret;
}
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
@ -369,7 +445,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
}
baddr = desc & GENMASK_ULL(47, wi->pgshift);
baddr = desc_to_oa(wi, desc);
/* Check for out-of-range OA */
if (check_output_size(baddr, wi))
@ -386,11 +462,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
switch (BIT(wi->pgshift)) {
case SZ_4K:
valid_block = level == 1 || level == 2;
valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
break;
case SZ_16K:
case SZ_64K:
valid_block = level == 2;
valid_block = level == 2 || (wi->pa52bit && level == 1);
break;
}
@ -398,7 +474,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
goto transfault;
}
if (check_output_size(desc & GENMASK(47, va_bottom), wi))
baddr = desc_to_oa(wi, desc);
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
if (!(desc & PTE_AF)) {
@ -411,7 +488,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->failed = false;
wr->level = level;
wr->desc = desc;
wr->pa = desc & GENMASK(47, va_bottom);
wr->pa = baddr & GENMASK(52, va_bottom);
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
@ -640,21 +717,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
#define ATTR_OSH 0b10
#define ATTR_ISH 0b11
static u8 compute_sh(u8 attr, u64 desc)
static u8 compute_final_sh(u8 attr, u8 sh)
{
u8 sh;
/* Any form of device, as well as NC has SH[1:0]=0b10 */
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
return ATTR_OSH;
sh = FIELD_GET(PTE_SHARED, desc);
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
sh = ATTR_NSH;
return sh;
}
static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
u8 attr)
{
u8 sh;
/*
* non-52bit and LPA have their basic shareability described in the
* descriptor. LPA2 gets it from the corresponding field in TCR,
* conveniently recorded in the walk info.
*/
if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
else
sh = wi->sh;
return compute_final_sh(attr, sh);
}
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
{
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
@ -668,7 +760,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh)
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
struct kvm_s2_trans *tr)
{
u8 s1_parattr, s2_memattr, final_attr;
u8 s1_parattr, s2_memattr, final_attr, s2_sh;
u64 par;
/* If S2 has failed to translate, report the damage */
@ -741,17 +833,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
!MEMATTR_IS_DEVICE(final_attr))
final_attr = MEMATTR(NC, NC);
s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
par |= tr->output & GENMASK(47, 12);
par |= FIELD_PREP(SYS_PAR_EL1_SH,
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
compute_sh(final_attr, tr->desc)));
compute_final_sh(final_attr, s2_sh)));
return par;
}
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
enum trans_regime regime)
static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr)
{
u64 par;
@ -764,9 +858,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
} else if (wr->level == S1_MMU_DISABLED) {
/* MMU off or HCR_EL2.DC == 1 */
par = SYS_PAR_EL1_NSE;
par |= wr->pa & GENMASK_ULL(47, 12);
par |= wr->pa & SYS_PAR_EL1_PA;
if (regime == TR_EL10 &&
if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
MEMATTR(WbRaWa, WbRaWa));
@ -781,14 +875,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
par = SYS_PAR_EL1_NSE;
mair = (regime == TR_EL10 ?
mair = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
vcpu_read_sys_reg(vcpu, MAIR_EL2));
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
mair &= 0xff;
sctlr = (regime == TR_EL10 ?
sctlr = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
@ -797,9 +891,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
mair = MEMATTR(NC, NC);
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
par |= wr->pa & GENMASK_ULL(47, 12);
par |= wr->pa & SYS_PAR_EL1_PA;
sh = compute_sh(mair, wr->desc);
sh = compute_s1_sh(wi, wr, mair);
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
}
@ -873,7 +967,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
break;
case TR_EL10:
wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
break;
}
@ -1186,7 +1280,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
return compute_par_s1(vcpu, &wr, wi.regime);
return compute_par_s1(vcpu, &wi, &wr);
}
/*
@ -1202,7 +1296,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct mmu_config config;
struct kvm_s2_mmu *mmu;
bool fail;
bool fail, mmu_cs;
u64 par;
par = SYS_PAR_EL1_F;
@ -1218,8 +1312,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
* the right one (as we trapped from vEL2). If not, save the
* full MMU context.
*
* We are also guaranteed to be in the correct context if
* we're not in a nested VM.
*/
if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
mmu_cs = (vcpu_has_nv(vcpu) &&
!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
if (!mmu_cs)
goto skip_mmu_switch;
/*
@ -1287,7 +1386,7 @@ skip_mmu_switch:
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
if (mmu_cs)
__mmu_config_restore(&config);
return par;
@ -1470,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
}
struct desc_match {
u64 ipa;
int level;
};
static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
{
struct desc_match *dm = priv;
u64 ipa = dm->ipa;
/* Use S1 granule alignment */
ipa &= GENMASK(51, ctxt->wi->pgshift);
/* Not the IPA we're looking for? Continue. */
if (ipa != ctxt->table_ipa)
return 0;
/* Note the level and interrupt the walk */
dm->level = ctxt->level;
return -EINTR;
}
int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
{
struct desc_match dm = {
.ipa = ipa,
};
struct s1_walk_info wi = {
.filter = &(struct s1_walk_filter){
.fn = match_s1_desc,
.priv = &dm,
},
.regime = TR_EL10,
.as_el0 = false,
.pan = false,
};
struct s1_walk_result wr = {};
int ret;
ret = setup_s1_walk(vcpu, &wi, &wr, va);
if (ret)
return ret;
/* We really expect the S1 MMU to be on here... */
if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
*level = 0;
return 0;
}
/* Walk the guest's PT, looking for a match along the way */
ret = walk_s1(vcpu, &wi, &wr, va);
switch (ret) {
case -EINTR:
/* We interrupted the walk on a match, return the level */
*level = dm.level;
return 0;
case 0:
/* The walk completed, we failed to find the entry */
return -ENOENT;
default:
/* Any other error... */
return ret;
}
}

View File

@ -7,12 +7,22 @@
#include <linux/kvm_host.h>
#include <asm/sysreg.h>
/*
* Describes the dependencies between a set of bits (or the negation
* of a set of RES0 bits) and a feature. The flags indicate how the
* data is interpreted.
*/
struct reg_bits_to_feat_map {
u64 bits;
union {
u64 bits;
u64 *res0p;
};
#define NEVER_FGU BIT(0) /* Can trap, but never UNDEF */
#define CALL_FUNC BIT(1) /* Needs to evaluate tons of crap */
#define FIXED_VALUE BIT(2) /* RAZ/WI or RAO/WI in KVM */
#define RES0_POINTER BIT(3) /* Pointer to RES0 value instead of bits */
unsigned long flags;
union {
@ -28,9 +38,27 @@ struct reg_bits_to_feat_map {
};
};
#define __NEEDS_FEAT_3(m, f, id, fld, lim) \
/*
* Describes the dependencies for a given register:
*
* @feat_map describes the dependency for the whole register. If the
* features the register depends on are not present, the whole
* register is effectively RES0.
*
* @bit_feat_map describes the dependencies for a set of bits in that
* register. If the features these bits depend on are not present, the
* bits are effectively RES0.
*/
struct reg_feat_map_desc {
const char *name;
const struct reg_bits_to_feat_map feat_map;
const struct reg_bits_to_feat_map *bit_feat_map;
const unsigned int bit_feat_map_sz;
};
#define __NEEDS_FEAT_3(m, f, w, id, fld, lim) \
{ \
.bits = (m), \
.w = (m), \
.flags = (f), \
.regidx = IDREG_IDX(SYS_ ## id), \
.shift = id ##_## fld ## _SHIFT, \
@ -39,28 +67,63 @@ struct reg_bits_to_feat_map {
.lo_lim = id ##_## fld ##_## lim \
}
#define __NEEDS_FEAT_2(m, f, fun, dummy) \
#define __NEEDS_FEAT_2(m, f, w, fun, dummy) \
{ \
.bits = (m), \
.w = (m), \
.flags = (f) | CALL_FUNC, \
.fval = (fun), \
}
#define __NEEDS_FEAT_1(m, f, fun) \
#define __NEEDS_FEAT_1(m, f, w, fun) \
{ \
.bits = (m), \
.w = (m), \
.flags = (f) | CALL_FUNC, \
.match = (fun), \
}
#define __NEEDS_FEAT_FLAG(m, f, w, ...) \
CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, w, __VA_ARGS__)
#define NEEDS_FEAT_FLAG(m, f, ...) \
CONCATENATE(__NEEDS_FEAT_, COUNT_ARGS(__VA_ARGS__))(m, f, __VA_ARGS__)
__NEEDS_FEAT_FLAG(m, f, bits, __VA_ARGS__)
#define NEEDS_FEAT_FIXED(m, ...) \
NEEDS_FEAT_FLAG(m, FIXED_VALUE, __VA_ARGS__, 0)
__NEEDS_FEAT_FLAG(m, FIXED_VALUE, bits, __VA_ARGS__, 0)
#define NEEDS_FEAT_RES0(p, ...) \
__NEEDS_FEAT_FLAG(p, RES0_POINTER, res0p, __VA_ARGS__)
/*
* Declare the dependency between a set of bits and a set of features,
* generating a struct reg_bit_to_feat_map.
*/
#define NEEDS_FEAT(m, ...) NEEDS_FEAT_FLAG(m, 0, __VA_ARGS__)
/*
* Declare the dependency between a non-FGT register, a set of
* feature, and the set of individual bits it contains. This generates
* a struct reg_feat_map_desc.
*/
#define DECLARE_FEAT_MAP(n, r, m, f) \
struct reg_feat_map_desc n = { \
.name = #r, \
.feat_map = NEEDS_FEAT(~r##_RES0, f), \
.bit_feat_map = m, \
.bit_feat_map_sz = ARRAY_SIZE(m), \
}
/*
* Specialised version of the above for FGT registers that have their
* RES0 masks described as struct fgt_masks.
*/
#define DECLARE_FEAT_MAP_FGT(n, msk, m, f) \
struct reg_feat_map_desc n = { \
.name = #msk, \
.feat_map = NEEDS_FEAT_RES0(&msk.res0, f),\
.bit_feat_map = m, \
.bit_feat_map_sz = ARRAY_SIZE(m), \
}
#define FEAT_SPE ID_AA64DFR0_EL1, PMSVer, IMP
#define FEAT_SPE_FnE ID_AA64DFR0_EL1, PMSVer, V1P2
#define FEAT_BRBE ID_AA64DFR0_EL1, BRBE, IMP
@ -73,6 +136,7 @@ struct reg_bits_to_feat_map {
#define FEAT_AA32EL0 ID_AA64PFR0_EL1, EL0, AARCH32
#define FEAT_AA32EL1 ID_AA64PFR0_EL1, EL1, AARCH32
#define FEAT_AA64EL1 ID_AA64PFR0_EL1, EL1, IMP
#define FEAT_AA64EL2 ID_AA64PFR0_EL1, EL2, IMP
#define FEAT_AA64EL3 ID_AA64PFR0_EL1, EL3, IMP
#define FEAT_AIE ID_AA64MMFR3_EL1, AIE, IMP
#define FEAT_S2POE ID_AA64MMFR3_EL1, S2POE, IMP
@ -131,7 +195,6 @@ struct reg_bits_to_feat_map {
#define FEAT_SPMU ID_AA64DFR1_EL1, SPMU, IMP
#define FEAT_SPE_nVM ID_AA64DFR2_EL1, SPE_nVM, IMP
#define FEAT_STEP2 ID_AA64DFR2_EL1, STEP, IMP
#define FEAT_SYSREG128 ID_AA64ISAR2_EL1, SYSREG_128, IMP
#define FEAT_CPA2 ID_AA64ISAR3_EL1, CPA, CPA2
#define FEAT_ASID2 ID_AA64MMFR4_EL1, ASID2, IMP
#define FEAT_MEC ID_AA64MMFR3_EL1, MEC, IMP
@ -143,7 +206,6 @@ struct reg_bits_to_feat_map {
#define FEAT_LSMAOC ID_AA64MMFR2_EL1, LSM, IMP
#define FEAT_MixedEnd ID_AA64MMFR0_EL1, BIGEND, IMP
#define FEAT_MixedEndEL0 ID_AA64MMFR0_EL1, BIGENDEL0, IMP
#define FEAT_MTE2 ID_AA64PFR1_EL1, MTE, MTE2
#define FEAT_MTE_ASYNC ID_AA64PFR1_EL1, MTE_frac, ASYNC
#define FEAT_MTE_STORE_ONLY ID_AA64PFR2_EL1, MTESTOREONLY, IMP
#define FEAT_PAN ID_AA64MMFR1_EL1, PAN, IMP
@ -151,7 +213,9 @@ struct reg_bits_to_feat_map {
#define FEAT_SSBS ID_AA64PFR1_EL1, SSBS, IMP
#define FEAT_TIDCP1 ID_AA64MMFR1_EL1, TIDCP1, IMP
#define FEAT_FGT ID_AA64MMFR0_EL1, FGT, IMP
#define FEAT_FGT2 ID_AA64MMFR0_EL1, FGT, FGT2
#define FEAT_MTPMU ID_AA64DFR0_EL1, MTPMU, IMP
#define FEAT_HCX ID_AA64MMFR1_EL1, HCX, IMP
static bool not_feat_aa64el3(struct kvm *kvm)
{
@ -397,6 +461,10 @@ static const struct reg_bits_to_feat_map hfgrtr_feat_map[] = {
NEVER_FGU, FEAT_AA64EL1),
};
static const DECLARE_FEAT_MAP_FGT(hfgrtr_desc, hfgrtr_masks,
hfgrtr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = {
NEEDS_FEAT(HFGWTR_EL2_nAMAIR2_EL1 |
HFGWTR_EL2_nMAIR2_EL1,
@ -461,6 +529,9 @@ static const struct reg_bits_to_feat_map hfgwtr_feat_map[] = {
NEVER_FGU, FEAT_AA64EL1),
};
static const DECLARE_FEAT_MAP_FGT(hfgwtr_desc, hfgwtr_masks,
hfgwtr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = {
NEEDS_FEAT(HDFGRTR_EL2_PMBIDR_EL1 |
HDFGRTR_EL2_PMSLATFR_EL1 |
@ -528,6 +599,9 @@ static const struct reg_bits_to_feat_map hdfgrtr_feat_map[] = {
NEVER_FGU, FEAT_AA64EL1)
};
static const DECLARE_FEAT_MAP_FGT(hdfgrtr_desc, hdfgrtr_masks,
hdfgrtr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = {
NEEDS_FEAT(HDFGWTR_EL2_PMSLATFR_EL1 |
HDFGWTR_EL2_PMSIRR_EL1 |
@ -588,6 +662,8 @@ static const struct reg_bits_to_feat_map hdfgwtr_feat_map[] = {
NEEDS_FEAT(HDFGWTR_EL2_TRFCR_EL1, FEAT_TRF),
};
static const DECLARE_FEAT_MAP_FGT(hdfgwtr_desc, hdfgwtr_masks,
hdfgwtr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hfgitr_feat_map[] = {
NEEDS_FEAT(HFGITR_EL2_PSBCSYNC, FEAT_SPEv1p5),
@ -662,6 +738,9 @@ static const struct reg_bits_to_feat_map hfgitr_feat_map[] = {
NEVER_FGU, FEAT_AA64EL1),
};
static const DECLARE_FEAT_MAP_FGT(hfgitr_desc, hfgitr_masks,
hfgitr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = {
NEEDS_FEAT(HAFGRTR_EL2_AMEVTYPER115_EL0 |
HAFGRTR_EL2_AMEVTYPER114_EL0 |
@ -704,11 +783,17 @@ static const struct reg_bits_to_feat_map hafgrtr_feat_map[] = {
FEAT_AMUv1),
};
static const DECLARE_FEAT_MAP_FGT(hafgrtr_desc, hafgrtr_masks,
hafgrtr_feat_map, FEAT_FGT);
static const struct reg_bits_to_feat_map hfgitr2_feat_map[] = {
NEEDS_FEAT(HFGITR2_EL2_nDCCIVAPS, FEAT_PoPS),
NEEDS_FEAT(HFGITR2_EL2_TSBCSYNC, FEAT_TRBEv1p1)
};
static const DECLARE_FEAT_MAP_FGT(hfgitr2_desc, hfgitr2_masks,
hfgitr2_feat_map, FEAT_FGT2);
static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = {
NEEDS_FEAT(HFGRTR2_EL2_nPFAR_EL1, FEAT_PFAR),
NEEDS_FEAT(HFGRTR2_EL2_nERXGSR_EL1, FEAT_RASv2),
@ -728,6 +813,9 @@ static const struct reg_bits_to_feat_map hfgrtr2_feat_map[] = {
NEEDS_FEAT(HFGRTR2_EL2_nRCWSMASK_EL1, FEAT_THE),
};
static const DECLARE_FEAT_MAP_FGT(hfgrtr2_desc, hfgrtr2_masks,
hfgrtr2_feat_map, FEAT_FGT2);
static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = {
NEEDS_FEAT(HFGWTR2_EL2_nPFAR_EL1, FEAT_PFAR),
NEEDS_FEAT(HFGWTR2_EL2_nACTLRALIAS_EL1 |
@ -746,6 +834,9 @@ static const struct reg_bits_to_feat_map hfgwtr2_feat_map[] = {
NEEDS_FEAT(HFGWTR2_EL2_nRCWSMASK_EL1, FEAT_THE),
};
static const DECLARE_FEAT_MAP_FGT(hfgwtr2_desc, hfgwtr2_masks,
hfgwtr2_feat_map, FEAT_FGT2);
static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = {
NEEDS_FEAT(HDFGRTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9),
NEEDS_FEAT(HDFGRTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss),
@ -776,6 +867,9 @@ static const struct reg_bits_to_feat_map hdfgrtr2_feat_map[] = {
NEEDS_FEAT(HDFGRTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam),
};
static const DECLARE_FEAT_MAP_FGT(hdfgrtr2_desc, hdfgrtr2_masks,
hdfgrtr2_feat_map, FEAT_FGT2);
static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = {
NEEDS_FEAT(HDFGWTR2_EL2_nMDSELR_EL1, FEAT_Debugv8p9),
NEEDS_FEAT(HDFGWTR2_EL2_nPMECR_EL1, feat_ebep_pmuv3_ss),
@ -804,6 +898,10 @@ static const struct reg_bits_to_feat_map hdfgwtr2_feat_map[] = {
NEEDS_FEAT(HDFGWTR2_EL2_nTRBMPAM_EL1, feat_trbe_mpam),
};
static const DECLARE_FEAT_MAP_FGT(hdfgwtr2_desc, hdfgwtr2_masks,
hdfgwtr2_feat_map, FEAT_FGT2);
static const struct reg_bits_to_feat_map hcrx_feat_map[] = {
NEEDS_FEAT(HCRX_EL2_PACMEn, feat_pauth_lr),
NEEDS_FEAT(HCRX_EL2_EnFPM, FEAT_FPMR),
@ -833,6 +931,10 @@ static const struct reg_bits_to_feat_map hcrx_feat_map[] = {
NEEDS_FEAT(HCRX_EL2_EnAS0, FEAT_LS64_ACCDATA),
};
static const DECLARE_FEAT_MAP(hcrx_desc, __HCRX_EL2,
hcrx_feat_map, FEAT_HCX);
static const struct reg_bits_to_feat_map hcr_feat_map[] = {
NEEDS_FEAT(HCR_EL2_TID0, FEAT_AA32EL0),
NEEDS_FEAT_FIXED(HCR_EL2_RW, compute_hcr_rw),
@ -904,6 +1006,9 @@ static const struct reg_bits_to_feat_map hcr_feat_map[] = {
NEEDS_FEAT_FIXED(HCR_EL2_E2H, compute_hcr_e2h),
};
static const DECLARE_FEAT_MAP(hcr_desc, HCR_EL2,
hcr_feat_map, FEAT_AA64EL2);
static const struct reg_bits_to_feat_map sctlr2_feat_map[] = {
NEEDS_FEAT(SCTLR2_EL1_NMEA |
SCTLR2_EL1_EASE,
@ -921,6 +1026,9 @@ static const struct reg_bits_to_feat_map sctlr2_feat_map[] = {
FEAT_CPA2),
};
static const DECLARE_FEAT_MAP(sctlr2_desc, SCTLR2_EL1,
sctlr2_feat_map, FEAT_SCTLR2);
static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = {
NEEDS_FEAT(TCR2_EL2_FNG1 |
TCR2_EL2_FNG0 |
@ -943,6 +1051,9 @@ static const struct reg_bits_to_feat_map tcr2_el2_feat_map[] = {
NEEDS_FEAT(TCR2_EL2_PIE, FEAT_S1PIE),
};
static const DECLARE_FEAT_MAP(tcr2_el2_desc, TCR2_EL2,
tcr2_el2_feat_map, FEAT_TCR2);
static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = {
NEEDS_FEAT(SCTLR_EL1_CP15BEN |
SCTLR_EL1_ITD |
@ -1017,6 +1128,9 @@ static const struct reg_bits_to_feat_map sctlr_el1_feat_map[] = {
FEAT_AA64EL1),
};
static const DECLARE_FEAT_MAP(sctlr_el1_desc, SCTLR_EL1,
sctlr_el1_feat_map, FEAT_AA64EL1);
static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = {
NEEDS_FEAT(MDCR_EL2_EBWE, FEAT_Debugv8p9),
NEEDS_FEAT(MDCR_EL2_TDOSA, FEAT_DoubleLock),
@ -1048,6 +1162,9 @@ static const struct reg_bits_to_feat_map mdcr_el2_feat_map[] = {
FEAT_AA64EL1),
};
static const DECLARE_FEAT_MAP(mdcr_el2_desc, MDCR_EL2,
mdcr_el2_feat_map, FEAT_AA64EL2);
static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
int map_size, u64 res0, const char *str)
{
@ -1061,32 +1178,36 @@ static void __init check_feat_map(const struct reg_bits_to_feat_map *map,
str, mask ^ ~res0);
}
static u64 reg_feat_map_bits(const struct reg_bits_to_feat_map *map)
{
return map->flags & RES0_POINTER ? ~(*map->res0p) : map->bits;
}
static void __init check_reg_desc(const struct reg_feat_map_desc *r)
{
check_feat_map(r->bit_feat_map, r->bit_feat_map_sz,
~reg_feat_map_bits(&r->feat_map), r->name);
}
void __init check_feature_map(void)
{
check_feat_map(hfgrtr_feat_map, ARRAY_SIZE(hfgrtr_feat_map),
hfgrtr_masks.res0, hfgrtr_masks.str);
check_feat_map(hfgwtr_feat_map, ARRAY_SIZE(hfgwtr_feat_map),
hfgwtr_masks.res0, hfgwtr_masks.str);
check_feat_map(hfgitr_feat_map, ARRAY_SIZE(hfgitr_feat_map),
hfgitr_masks.res0, hfgitr_masks.str);
check_feat_map(hdfgrtr_feat_map, ARRAY_SIZE(hdfgrtr_feat_map),
hdfgrtr_masks.res0, hdfgrtr_masks.str);
check_feat_map(hdfgwtr_feat_map, ARRAY_SIZE(hdfgwtr_feat_map),
hdfgwtr_masks.res0, hdfgwtr_masks.str);
check_feat_map(hafgrtr_feat_map, ARRAY_SIZE(hafgrtr_feat_map),
hafgrtr_masks.res0, hafgrtr_masks.str);
check_feat_map(hcrx_feat_map, ARRAY_SIZE(hcrx_feat_map),
__HCRX_EL2_RES0, "HCRX_EL2");
check_feat_map(hcr_feat_map, ARRAY_SIZE(hcr_feat_map),
HCR_EL2_RES0, "HCR_EL2");
check_feat_map(sctlr2_feat_map, ARRAY_SIZE(sctlr2_feat_map),
SCTLR2_EL1_RES0, "SCTLR2_EL1");
check_feat_map(tcr2_el2_feat_map, ARRAY_SIZE(tcr2_el2_feat_map),
TCR2_EL2_RES0, "TCR2_EL2");
check_feat_map(sctlr_el1_feat_map, ARRAY_SIZE(sctlr_el1_feat_map),
SCTLR_EL1_RES0, "SCTLR_EL1");
check_feat_map(mdcr_el2_feat_map, ARRAY_SIZE(mdcr_el2_feat_map),
MDCR_EL2_RES0, "MDCR_EL2");
check_reg_desc(&hfgrtr_desc);
check_reg_desc(&hfgwtr_desc);
check_reg_desc(&hfgitr_desc);
check_reg_desc(&hdfgrtr_desc);
check_reg_desc(&hdfgwtr_desc);
check_reg_desc(&hafgrtr_desc);
check_reg_desc(&hfgrtr2_desc);
check_reg_desc(&hfgwtr2_desc);
check_reg_desc(&hfgitr2_desc);
check_reg_desc(&hdfgrtr2_desc);
check_reg_desc(&hdfgwtr2_desc);
check_reg_desc(&hcrx_desc);
check_reg_desc(&hcr_desc);
check_reg_desc(&sctlr2_desc);
check_reg_desc(&tcr2_el2_desc);
check_reg_desc(&sctlr_el1_desc);
check_reg_desc(&mdcr_el2_desc);
}
static bool idreg_feat_match(struct kvm *kvm, const struct reg_bits_to_feat_map *map)
@ -1129,7 +1250,7 @@ static u64 __compute_fixed_bits(struct kvm *kvm,
match = idreg_feat_match(kvm, &map[i]);
if (!match || (map[i].flags & FIXED_VALUE))
val |= map[i].bits;
val |= reg_feat_map_bits(&map[i]);
}
return val;
@ -1145,15 +1266,36 @@ static u64 compute_res0_bits(struct kvm *kvm,
require, exclude | FIXED_VALUE);
}
static u64 compute_fixed_bits(struct kvm *kvm,
const struct reg_bits_to_feat_map *map,
int map_size,
u64 *fixed_bits,
unsigned long require,
unsigned long exclude)
static u64 compute_reg_res0_bits(struct kvm *kvm,
const struct reg_feat_map_desc *r,
unsigned long require, unsigned long exclude)
{
return __compute_fixed_bits(kvm, map, map_size, fixed_bits,
require | FIXED_VALUE, exclude);
u64 res0;
res0 = compute_res0_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz,
require, exclude);
/*
* If computing FGUs, don't take RES0 or register existence
* into account -- we're not computing bits for the register
* itself.
*/
if (!(exclude & NEVER_FGU)) {
res0 |= compute_res0_bits(kvm, &r->feat_map, 1, require, exclude);
res0 |= ~reg_feat_map_bits(&r->feat_map);
}
return res0;
}
static u64 compute_reg_fixed_bits(struct kvm *kvm,
const struct reg_feat_map_desc *r,
u64 *fixed_bits, unsigned long require,
unsigned long exclude)
{
return __compute_fixed_bits(kvm, r->bit_feat_map, r->bit_feat_map_sz,
fixed_bits, require | FIXED_VALUE, exclude);
}
void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt)
@ -1162,51 +1304,40 @@ void compute_fgu(struct kvm *kvm, enum fgt_group_id fgt)
switch (fgt) {
case HFGRTR_GROUP:
val |= compute_res0_bits(kvm, hfgrtr_feat_map,
ARRAY_SIZE(hfgrtr_feat_map),
0, NEVER_FGU);
val |= compute_res0_bits(kvm, hfgwtr_feat_map,
ARRAY_SIZE(hfgwtr_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgrtr_desc,
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgwtr_desc,
0, NEVER_FGU);
break;
case HFGITR_GROUP:
val |= compute_res0_bits(kvm, hfgitr_feat_map,
ARRAY_SIZE(hfgitr_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgitr_desc,
0, NEVER_FGU);
break;
case HDFGRTR_GROUP:
val |= compute_res0_bits(kvm, hdfgrtr_feat_map,
ARRAY_SIZE(hdfgrtr_feat_map),
0, NEVER_FGU);
val |= compute_res0_bits(kvm, hdfgwtr_feat_map,
ARRAY_SIZE(hdfgwtr_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hdfgrtr_desc,
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hdfgwtr_desc,
0, NEVER_FGU);
break;
case HAFGRTR_GROUP:
val |= compute_res0_bits(kvm, hafgrtr_feat_map,
ARRAY_SIZE(hafgrtr_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hafgrtr_desc,
0, NEVER_FGU);
break;
case HFGRTR2_GROUP:
val |= compute_res0_bits(kvm, hfgrtr2_feat_map,
ARRAY_SIZE(hfgrtr2_feat_map),
0, NEVER_FGU);
val |= compute_res0_bits(kvm, hfgwtr2_feat_map,
ARRAY_SIZE(hfgwtr2_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgrtr2_desc,
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgwtr2_desc,
0, NEVER_FGU);
break;
case HFGITR2_GROUP:
val |= compute_res0_bits(kvm, hfgitr2_feat_map,
ARRAY_SIZE(hfgitr2_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hfgitr2_desc,
0, NEVER_FGU);
break;
case HDFGRTR2_GROUP:
val |= compute_res0_bits(kvm, hdfgrtr2_feat_map,
ARRAY_SIZE(hdfgrtr2_feat_map),
0, NEVER_FGU);
val |= compute_res0_bits(kvm, hdfgwtr2_feat_map,
ARRAY_SIZE(hdfgwtr2_feat_map),
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hdfgrtr2_desc,
0, NEVER_FGU);
val |= compute_reg_res0_bits(kvm, &hdfgwtr2_desc,
0, NEVER_FGU);
break;
default:
BUG();
@ -1221,109 +1352,74 @@ void get_reg_fixed_bits(struct kvm *kvm, enum vcpu_sysreg reg, u64 *res0, u64 *r
switch (reg) {
case HFGRTR_EL2:
*res0 = compute_res0_bits(kvm, hfgrtr_feat_map,
ARRAY_SIZE(hfgrtr_feat_map), 0, 0);
*res0 |= hfgrtr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgrtr_desc, 0, 0);
*res1 = HFGRTR_EL2_RES1;
break;
case HFGWTR_EL2:
*res0 = compute_res0_bits(kvm, hfgwtr_feat_map,
ARRAY_SIZE(hfgwtr_feat_map), 0, 0);
*res0 |= hfgwtr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgwtr_desc, 0, 0);
*res1 = HFGWTR_EL2_RES1;
break;
case HFGITR_EL2:
*res0 = compute_res0_bits(kvm, hfgitr_feat_map,
ARRAY_SIZE(hfgitr_feat_map), 0, 0);
*res0 |= hfgitr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgitr_desc, 0, 0);
*res1 = HFGITR_EL2_RES1;
break;
case HDFGRTR_EL2:
*res0 = compute_res0_bits(kvm, hdfgrtr_feat_map,
ARRAY_SIZE(hdfgrtr_feat_map), 0, 0);
*res0 |= hdfgrtr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hdfgrtr_desc, 0, 0);
*res1 = HDFGRTR_EL2_RES1;
break;
case HDFGWTR_EL2:
*res0 = compute_res0_bits(kvm, hdfgwtr_feat_map,
ARRAY_SIZE(hdfgwtr_feat_map), 0, 0);
*res0 |= hdfgwtr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hdfgwtr_desc, 0, 0);
*res1 = HDFGWTR_EL2_RES1;
break;
case HAFGRTR_EL2:
*res0 = compute_res0_bits(kvm, hafgrtr_feat_map,
ARRAY_SIZE(hafgrtr_feat_map), 0, 0);
*res0 |= hafgrtr_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hafgrtr_desc, 0, 0);
*res1 = HAFGRTR_EL2_RES1;
break;
case HFGRTR2_EL2:
*res0 = compute_res0_bits(kvm, hfgrtr2_feat_map,
ARRAY_SIZE(hfgrtr2_feat_map), 0, 0);
*res0 |= hfgrtr2_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgrtr2_desc, 0, 0);
*res1 = HFGRTR2_EL2_RES1;
break;
case HFGWTR2_EL2:
*res0 = compute_res0_bits(kvm, hfgwtr2_feat_map,
ARRAY_SIZE(hfgwtr2_feat_map), 0, 0);
*res0 |= hfgwtr2_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgwtr2_desc, 0, 0);
*res1 = HFGWTR2_EL2_RES1;
break;
case HFGITR2_EL2:
*res0 = compute_res0_bits(kvm, hfgitr2_feat_map,
ARRAY_SIZE(hfgitr2_feat_map), 0, 0);
*res0 |= hfgitr2_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hfgitr2_desc, 0, 0);
*res1 = HFGITR2_EL2_RES1;
break;
case HDFGRTR2_EL2:
*res0 = compute_res0_bits(kvm, hdfgrtr2_feat_map,
ARRAY_SIZE(hdfgrtr2_feat_map), 0, 0);
*res0 |= hdfgrtr2_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hdfgrtr2_desc, 0, 0);
*res1 = HDFGRTR2_EL2_RES1;
break;
case HDFGWTR2_EL2:
*res0 = compute_res0_bits(kvm, hdfgwtr2_feat_map,
ARRAY_SIZE(hdfgwtr2_feat_map), 0, 0);
*res0 |= hdfgwtr2_masks.res0;
*res0 = compute_reg_res0_bits(kvm, &hdfgwtr2_desc, 0, 0);
*res1 = HDFGWTR2_EL2_RES1;
break;
case HCRX_EL2:
*res0 = compute_res0_bits(kvm, hcrx_feat_map,
ARRAY_SIZE(hcrx_feat_map), 0, 0);
*res0 |= __HCRX_EL2_RES0;
*res0 = compute_reg_res0_bits(kvm, &hcrx_desc, 0, 0);
*res1 = __HCRX_EL2_RES1;
break;
case HCR_EL2:
mask = compute_fixed_bits(kvm, hcr_feat_map,
ARRAY_SIZE(hcr_feat_map), &fixed,
0, 0);
*res0 = compute_res0_bits(kvm, hcr_feat_map,
ARRAY_SIZE(hcr_feat_map), 0, 0);
*res0 |= HCR_EL2_RES0 | (mask & ~fixed);
mask = compute_reg_fixed_bits(kvm, &hcr_desc, &fixed, 0, 0);
*res0 = compute_reg_res0_bits(kvm, &hcr_desc, 0, 0);
*res0 |= (mask & ~fixed);
*res1 = HCR_EL2_RES1 | (mask & fixed);
break;
case SCTLR2_EL1:
case SCTLR2_EL2:
*res0 = compute_res0_bits(kvm, sctlr2_feat_map,
ARRAY_SIZE(sctlr2_feat_map), 0, 0);
*res0 |= SCTLR2_EL1_RES0;
*res0 = compute_reg_res0_bits(kvm, &sctlr2_desc, 0, 0);
*res1 = SCTLR2_EL1_RES1;
break;
case TCR2_EL2:
*res0 = compute_res0_bits(kvm, tcr2_el2_feat_map,
ARRAY_SIZE(tcr2_el2_feat_map), 0, 0);
*res0 |= TCR2_EL2_RES0;
*res0 = compute_reg_res0_bits(kvm, &tcr2_el2_desc, 0, 0);
*res1 = TCR2_EL2_RES1;
break;
case SCTLR_EL1:
*res0 = compute_res0_bits(kvm, sctlr_el1_feat_map,
ARRAY_SIZE(sctlr_el1_feat_map), 0, 0);
*res0 |= SCTLR_EL1_RES0;
*res0 = compute_reg_res0_bits(kvm, &sctlr_el1_desc, 0, 0);
*res1 = SCTLR_EL1_RES1;
break;
case MDCR_EL2:
*res0 = compute_res0_bits(kvm, mdcr_el2_feat_map,
ARRAY_SIZE(mdcr_el2_feat_map), 0, 0);
*res0 |= MDCR_EL2_RES0;
*res0 = compute_reg_res0_bits(kvm, &mdcr_el2_desc, 0, 0);
*res1 = MDCR_EL2_RES1;
break;
default:

View File

@ -56,6 +56,9 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
if (!kvm_guest_owns_debug_regs(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
if (vcpu_has_nv(vcpu))
kvm_nested_setup_mdcr_el2(vcpu);
/* Write MDCR_EL2 directly if we're already at EL2 */
if (has_vhe())
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
@ -243,29 +246,29 @@ void kvm_debug_handle_oslar(struct kvm_vcpu *vcpu, u64 val)
preempt_enable();
}
static bool skip_trbe_access(bool skip_condition)
{
return (WARN_ON_ONCE(preemptible()) || skip_condition ||
is_protected_kvm_enabled() || !is_kvm_arm_initialised());
}
void kvm_enable_trbe(void)
{
if (has_vhe() || is_protected_kvm_enabled() ||
WARN_ON_ONCE(preemptible()))
return;
host_data_set_flag(TRBE_ENABLED);
if (!skip_trbe_access(has_vhe()))
host_data_set_flag(TRBE_ENABLED);
}
EXPORT_SYMBOL_GPL(kvm_enable_trbe);
void kvm_disable_trbe(void)
{
if (has_vhe() || is_protected_kvm_enabled() ||
WARN_ON_ONCE(preemptible()))
return;
host_data_clear_flag(TRBE_ENABLED);
if (!skip_trbe_access(has_vhe()))
host_data_clear_flag(TRBE_ENABLED);
}
EXPORT_SYMBOL_GPL(kvm_disable_trbe);
void kvm_tracing_set_el1_configuration(u64 trfcr_while_in_guest)
{
if (is_protected_kvm_enabled() || WARN_ON_ONCE(preemptible()))
if (skip_trbe_access(false))
return;
if (has_vhe()) {

View File

@ -1185,6 +1185,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS),
SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS),
SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS),
SR_TRAP(SYS_PMSDSFR_EL1, CGT_MDCR_TPMS),
SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF),
SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB),
SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB),

View File

@ -559,6 +559,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
/* Dump the faulting instruction */
dump_kernel_instr(panic_addr + kaslr_offset());
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're

View File

@ -29,7 +29,7 @@ struct pkvm_hyp_vcpu {
};
/*
* Holds the relevant data for running a protected vm.
* Holds the relevant data for running a vm in protected mode.
*/
struct pkvm_hyp_vm {
struct kvm kvm;
@ -67,6 +67,8 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_reserve_vm(void);
void __pkvm_unreserve_vm(pkvm_handle_t handle);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,

View File

@ -12,7 +12,8 @@
#include <asm/kvm_host.h>
#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
#define DECLARE_REG(type, name, ctxt, reg) \
#define DECLARE_REG(type, name, ctxt, reg) \
__always_unused int ___check_reg_ ## reg; \
type name = (type)cpu_reg(ctxt, (reg))
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */

View File

@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
hyp-obj-y += ../../../kernel/smccc-call.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-y += $(lib-objs)

View File

@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
static bool has_version_negotiated;
static hyp_spinlock_t version_lock;
static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
{
*res = (struct arm_smccc_res) {
*res = (struct arm_smccc_1_2_regs) {
.a0 = FFA_ERROR,
.a2 = ffa_errno,
};
}
static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
{
if (ret == FFA_RET_SUCCESS) {
*res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
.a2 = prop };
*res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
.a2 = prop };
} else {
ffa_to_smccc_error(res, ret);
}
}
static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
{
ffa_to_smccc_res_prop(res, ret, 0);
}
static void ffa_set_retval(struct kvm_cpu_context *ctxt,
struct arm_smccc_res *res)
struct arm_smccc_1_2_regs *res)
{
cpu_reg(ctxt, 0) = res->a0;
cpu_reg(ctxt, 1) = res->a1;
cpu_reg(ctxt, 2) = res->a2;
cpu_reg(ctxt, 3) = res->a3;
cpu_reg(ctxt, 4) = res->a4;
cpu_reg(ctxt, 5) = res->a5;
cpu_reg(ctxt, 6) = res->a6;
cpu_reg(ctxt, 7) = res->a7;
/*
* DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
*
* In FF-A 1.2, we cannot rely on the function ID sent by the caller to
* detect 32-bit calls because the CPU cycle management interfaces (e.g.
* FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses.
*
* FFA-1.3 introduces 64-bit variants of the CPU cycle management
* interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests
* complete with SMC32 direct reponses which *should* allow us use the
* function ID sent by the caller to determine whether to return x8-x17.
*
* Note that we also cannot rely on function IDs in the response.
*
* Given the above, assume SMC64 and send back x0-x17 unconditionally
* as the passthrough code (__kvm_hyp_host_forward_smc) does the same.
*/
cpu_reg(ctxt, 8) = res->a8;
cpu_reg(ctxt, 9) = res->a9;
cpu_reg(ctxt, 10) = res->a10;
cpu_reg(ctxt, 11) = res->a11;
cpu_reg(ctxt, 12) = res->a12;
cpu_reg(ctxt, 13) = res->a13;
cpu_reg(ctxt, 14) = res->a14;
cpu_reg(ctxt, 15) = res->a15;
cpu_reg(ctxt, 16) = res->a16;
cpu_reg(ctxt, 17) = res->a17;
}
static bool is_ffa_call(u64 func_id)
@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id)
static int ffa_map_hyp_buffers(u64 ffa_page_count)
{
struct arm_smccc_res res;
struct arm_smccc_1_2_regs res;
arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
hyp_virt_to_phys(hyp_buffers.tx),
hyp_virt_to_phys(hyp_buffers.rx),
ffa_page_count,
0, 0, 0, 0,
&res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_FN64_RXTX_MAP,
.a1 = hyp_virt_to_phys(hyp_buffers.tx),
.a2 = hyp_virt_to_phys(hyp_buffers.rx),
.a3 = ffa_page_count,
}, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static int ffa_unmap_hyp_buffers(void)
{
struct arm_smccc_res res;
struct arm_smccc_1_2_regs res;
arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
HOST_FFA_ID,
0, 0, 0, 0, 0, 0,
&res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_RXTX_UNMAP,
.a1 = HOST_FFA_ID,
}, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo,
static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fraglen, u32 endpoint_id)
{
arm_smccc_1_1_smc(FFA_MEM_FRAG_TX,
handle_lo, handle_hi, fraglen, endpoint_id,
0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_MEM_FRAG_TX,
.a1 = handle_lo,
.a2 = handle_hi,
.a3 = fraglen,
.a4 = endpoint_id,
}, res);
}
static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo,
static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fragoff)
{
arm_smccc_1_1_smc(FFA_MEM_FRAG_RX,
handle_lo, handle_hi, fragoff, HOST_FFA_ID,
0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_MEM_FRAG_RX,
.a1 = handle_lo,
.a2 = handle_hi,
.a3 = fragoff,
.a4 = HOST_FFA_ID,
}, res);
}
static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len,
static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
u32 fraglen)
{
arm_smccc_1_1_smc(func_id, len, fraglen,
0, 0, 0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = func_id,
.a1 = len,
.a2 = fraglen,
}, res);
}
static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo,
static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 flags)
{
arm_smccc_1_1_smc(FFA_MEM_RECLAIM,
handle_lo, handle_hi, flags,
0, 0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_MEM_RECLAIM,
.a1 = handle_lo,
.a2 = handle_hi,
.a3 = flags,
}, res);
}
static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
{
arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ,
len, len,
0, 0, 0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_FN64_MEM_RETRIEVE_REQ,
.a1 = len,
.a2 = len,
}, res);
}
static void ffa_rx_release(struct arm_smccc_res *res)
static void ffa_rx_release(struct arm_smccc_1_2_regs *res)
{
arm_smccc_1_1_smc(FFA_RX_RELEASE,
0, 0,
0, 0, 0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_RX_RELEASE,
}, res);
}
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(phys_addr_t, tx, ctxt, 1);
@ -267,7 +309,7 @@ err_unmap:
goto out_unlock;
}
static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
return ret;
}
static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@ -427,7 +469,7 @@ out:
}
static void __do_ffa_mem_xfer(const u64 func_id,
struct arm_smccc_res *res,
struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
@ -521,7 +563,7 @@ err_unshare:
__do_ffa_mem_xfer((fid), (res), (ctxt)); \
} while (0);
static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@ -628,13 +670,26 @@ static bool ffa_call_supported(u64 func_id)
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
case FFA_MEM_RETRIEVE_REQ:
/* Optional notification interfaces added in FF-A 1.1 */
case FFA_NOTIFICATION_BITMAP_CREATE:
case FFA_NOTIFICATION_BITMAP_DESTROY:
case FFA_NOTIFICATION_BIND:
case FFA_NOTIFICATION_UNBIND:
case FFA_NOTIFICATION_SET:
case FFA_NOTIFICATION_GET:
case FFA_NOTIFICATION_INFO_GET:
/* Optional interfaces added in FF-A 1.2 */
case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */
case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */
case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */
case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */
return false;
}
return true;
}
static bool do_ffa_features(struct arm_smccc_res *res,
static bool do_ffa_features(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@ -666,21 +721,25 @@ out_handled:
static int hyp_ffa_post_init(void)
{
size_t min_rxtx_sz;
struct arm_smccc_res res;
struct arm_smccc_1_2_regs res;
arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
.a0 = FFA_ID_GET,
}, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
if (res.a2 != HOST_FFA_ID)
return -EINVAL;
arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
0, 0, 0, 0, 0, 0, &res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
.a0 = FFA_FEATURES,
.a1 = FFA_FN64_RXTX_MAP,
}, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
switch (res.a2) {
switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) {
case FFA_FEAT_RXTX_MIN_SZ_4K:
min_rxtx_sz = SZ_4K;
break;
@ -700,7 +759,7 @@ static int hyp_ffa_post_init(void)
return 0;
}
static void do_ffa_version(struct arm_smccc_res *res,
static void do_ffa_version(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, ffa_req_version, ctxt, 1);
@ -712,7 +771,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_spin_lock(&version_lock);
if (has_version_negotiated) {
res->a0 = hyp_ffa_version;
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version))
res->a0 = FFA_RET_NOT_SUPPORTED;
else
res->a0 = hyp_ffa_version;
goto unlock;
}
@ -721,9 +783,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
* first if TEE supports it.
*/
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0,
0, 0, 0, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_VERSION,
.a1 = ffa_req_version,
}, res);
if (res->a0 == FFA_RET_NOT_SUPPORTED)
goto unlock;
@ -740,7 +803,7 @@ unlock:
hyp_spin_unlock(&version_lock);
}
static void do_ffa_part_get(struct arm_smccc_res *res,
static void do_ffa_part_get(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, uuid0, ctxt, 1);
@ -756,9 +819,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
goto out_unlock;
}
arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1,
uuid2, uuid3, flags, 0, 0,
res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_PARTITION_INFO_GET,
.a1 = uuid0,
.a2 = uuid1,
.a3 = uuid2,
.a4 = uuid3,
.a5 = flags,
}, res);
if (res->a0 != FFA_SUCCESS)
goto out_unlock;
@ -791,7 +859,7 @@ out_unlock:
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
struct arm_smccc_res res;
struct arm_smccc_1_2_regs res;
/*
* There's no way we can tell what a non-standard SMC call might
@ -860,13 +928,16 @@ out_handled:
int hyp_ffa_init(void *pages)
{
struct arm_smccc_res res;
struct arm_smccc_1_2_regs res;
void *tx, *rx;
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
return 0;
arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res);
arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
.a0 = FFA_VERSION,
.a1 = FFA_VERSION_1_2,
}, &res);
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
@ -886,10 +957,10 @@ int hyp_ffa_init(void *pages)
if (FFA_MAJOR_VERSION(res.a0) != 1)
return -EOPNOTSUPP;
if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1))
if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2))
hyp_ffa_version = res.a0;
else
hyp_ffa_version = FFA_VERSION_1_1;
hyp_ffa_version = FFA_VERSION_1_2;
tx = pages;
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;

View File

@ -546,6 +546,18 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt)
{
cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm();
}
static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
__pkvm_unreserve_vm(handle);
}
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
@ -606,6 +618,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_reserve_vm),
HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),

View File

@ -1010,9 +1010,12 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip
return ret;
if (!kvm_pte_valid(pte))
return -ENOENT;
if (kvm_granule_size(level) != size)
if (size && kvm_granule_size(level) != size)
return -E2BIG;
if (!size)
size = kvm_granule_size(level);
state = guest_get_page_state(pte, ipa);
if (state != PKVM_PAGE_SHARED_BORROWED)
return -EPERM;
@ -1100,7 +1103,7 @@ int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
assert_host_shared_guest(vm, ipa, PAGE_SIZE);
assert_host_shared_guest(vm, ipa, 0);
guest_lock_component(vm);
ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
@ -1156,7 +1159,7 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
if (pkvm_hyp_vm_is_protected(vm))
return -EPERM;
assert_host_shared_guest(vm, ipa, PAGE_SIZE);
assert_host_shared_guest(vm, ipa, 0);
guest_lock_component(vm);
kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
guest_unlock_component(vm);

View File

@ -23,8 +23,8 @@ unsigned int kvm_arm_vmid_bits;
unsigned int kvm_host_sve_max_vl;
/*
* The currently loaded hyp vCPU for each physical CPU. Used only when
* protected KVM is enabled, but for both protected and non-protected VMs.
* The currently loaded hyp vCPU for each physical CPU. Used in protected mode
* for both protected and non-protected VMs.
*/
static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
@ -135,7 +135,7 @@ static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
/* Protected KVM does not support AArch32 guests. */
/* No AArch32 support for protected guests. */
if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
return -EINVAL;
@ -192,6 +192,11 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
*/
#define HANDLE_OFFSET 0x1000
/*
* Marks a reserved but not yet used entry in the VM table.
*/
#define RESERVED_ENTRY ((void *)0xa110ca7ed)
static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
{
return handle - HANDLE_OFFSET;
@ -210,8 +215,8 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
DEFINE_HYP_SPINLOCK(vm_table_lock);
/*
* The table of VM entries for protected VMs in hyp.
* Allocated at hyp initialization and setup.
* A table that tracks all VMs in protected mode.
* Allocated during hyp initialization and setup.
*/
static struct pkvm_hyp_vm **vm_table;
@ -231,6 +236,10 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
if (unlikely(idx >= KVM_MAX_PVMS))
return NULL;
/* A reserved entry doesn't represent an initialized VM. */
if (unlikely(vm_table[idx] == RESERVED_ENTRY))
return NULL;
return vm_table[idx];
}
@ -401,14 +410,26 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
}
static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
unsigned int nr_vcpus)
unsigned int nr_vcpus, pkvm_handle_t handle)
{
struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
int idx = vm_handle_to_idx(handle);
hyp_vm->kvm.arch.pkvm.handle = handle;
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected);
hyp_vm->kvm.arch.pkvm.is_created = true;
hyp_vm->kvm.arch.flags = 0;
pkvm_init_features_from_host(hyp_vm, host_kvm);
/* VMID 0 is reserved for the host */
atomic64_set(&mmu->vmid.id, idx + 1);
mmu->vtcr = host_mmu.arch.mmu.vtcr;
mmu->arch = &hyp_vm->kvm.arch;
mmu->pgt = &hyp_vm->pgt;
}
static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
@ -480,7 +501,7 @@ done:
return ret;
}
static int find_free_vm_table_entry(struct kvm *host_kvm)
static int find_free_vm_table_entry(void)
{
int i;
@ -493,15 +514,13 @@ static int find_free_vm_table_entry(struct kvm *host_kvm)
}
/*
* Allocate a VM table entry and insert a pointer to the new vm.
* Reserve a VM table entry.
*
* Return a unique handle to the protected VM on success,
* Return a unique handle to the VM on success,
* negative error code on failure.
*/
static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
struct pkvm_hyp_vm *hyp_vm)
static int allocate_vm_table_entry(void)
{
struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
int idx;
hyp_assert_lock_held(&vm_table_lock);
@ -514,20 +533,57 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
if (unlikely(!vm_table))
return -EINVAL;
idx = find_free_vm_table_entry(host_kvm);
if (idx < 0)
idx = find_free_vm_table_entry();
if (unlikely(idx < 0))
return idx;
hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
vm_table[idx] = RESERVED_ENTRY;
/* VMID 0 is reserved for the host */
atomic64_set(&mmu->vmid.id, idx + 1);
return idx;
}
mmu->arch = &hyp_vm->kvm.arch;
mmu->pgt = &hyp_vm->pgt;
static int __insert_vm_table_entry(pkvm_handle_t handle,
struct pkvm_hyp_vm *hyp_vm)
{
unsigned int idx;
hyp_assert_lock_held(&vm_table_lock);
/*
* Initializing protected state might have failed, yet a malicious
* host could trigger this function. Thus, ensure that 'vm_table'
* exists.
*/
if (unlikely(!vm_table))
return -EINVAL;
idx = vm_handle_to_idx(handle);
if (unlikely(idx >= KVM_MAX_PVMS))
return -EINVAL;
if (unlikely(vm_table[idx] != RESERVED_ENTRY))
return -EINVAL;
vm_table[idx] = hyp_vm;
return hyp_vm->kvm.arch.pkvm.handle;
return 0;
}
/*
* Insert a pointer to the initialized VM into the VM table.
*
* Return 0 on success, or negative error code on failure.
*/
static int insert_vm_table_entry(pkvm_handle_t handle,
struct pkvm_hyp_vm *hyp_vm)
{
int ret;
hyp_spin_lock(&vm_table_lock);
ret = __insert_vm_table_entry(handle, hyp_vm);
hyp_spin_unlock(&vm_table_lock);
return ret;
}
/*
@ -594,10 +650,45 @@ static void unmap_donated_memory_noclear(void *va, size_t size)
}
/*
* Initialize the hypervisor copy of the protected VM state using the
* memory donated by the host.
* Reserves an entry in the hypervisor for a new VM in protected mode.
*
* Unmaps the donated memory from the host at stage 2.
* Return a unique handle to the VM on success, negative error code on failure.
*/
int __pkvm_reserve_vm(void)
{
int ret;
hyp_spin_lock(&vm_table_lock);
ret = allocate_vm_table_entry();
hyp_spin_unlock(&vm_table_lock);
if (ret < 0)
return ret;
return idx_to_vm_handle(ret);
}
/*
* Removes a reserved entry, but only if is hasn't been used yet.
* Otherwise, the VM needs to be destroyed.
*/
void __pkvm_unreserve_vm(pkvm_handle_t handle)
{
unsigned int idx = vm_handle_to_idx(handle);
if (unlikely(!vm_table))
return;
hyp_spin_lock(&vm_table_lock);
if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY))
remove_vm_table_entry(handle);
hyp_spin_unlock(&vm_table_lock);
}
/*
* Initialize the hypervisor copy of the VM state using host-donated memory.
*
* Unmap the donated memory from the host at stage 2.
*
* host_kvm: A pointer to the host's struct kvm.
* vm_hva: The host va of the area being donated for the VM state.
@ -606,8 +697,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size)
* the VM. Must be page aligned. Its size is implied by the VM's
* VTCR.
*
* Return a unique handle to the protected VM on success,
* negative error code on failure.
* Return 0 success, negative error code on failure.
*/
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva)
@ -615,6 +705,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
struct pkvm_hyp_vm *hyp_vm = NULL;
size_t vm_size, pgd_size;
unsigned int nr_vcpus;
pkvm_handle_t handle;
void *pgd = NULL;
int ret;
@ -628,6 +719,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
goto err_unpin_kvm;
}
handle = READ_ONCE(host_kvm->arch.pkvm.handle);
if (unlikely(handle < HANDLE_OFFSET)) {
ret = -EINVAL;
goto err_unpin_kvm;
}
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
@ -641,24 +738,19 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
if (!pgd)
goto err_remove_mappings;
init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
hyp_spin_lock(&vm_table_lock);
ret = insert_vm_table_entry(host_kvm, hyp_vm);
if (ret < 0)
goto err_unlock;
init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle);
ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
if (ret)
goto err_remove_vm_table_entry;
hyp_spin_unlock(&vm_table_lock);
goto err_remove_mappings;
return hyp_vm->kvm.arch.pkvm.handle;
/* Must be called last since this publishes the VM. */
ret = insert_vm_table_entry(handle, hyp_vm);
if (ret)
goto err_remove_mappings;
return 0;
err_remove_vm_table_entry:
remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
err_unlock:
hyp_spin_unlock(&vm_table_lock);
err_remove_mappings:
unmap_donated_memory(hyp_vm, vm_size);
unmap_donated_memory(pgd, pgd_size);
@ -668,10 +760,9 @@ err_unpin_kvm:
}
/*
* Initialize the hypervisor copy of the protected vCPU state using the
* memory donated by the host.
* Initialize the hypervisor copy of the vCPU state using host-donated memory.
*
* handle: The handle for the protected vm.
* handle: The hypervisor handle for the vm.
* host_vcpu: A pointer to the corresponding host vcpu.
* vcpu_hva: The host va of the area being donated for the vcpu state.
* Must be page aligned. The size of the area must be equal to

View File

@ -192,6 +192,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum pkvm_page_state state;
struct hyp_page *page;
phys_addr_t phys;
enum kvm_pgtable_prot prot;
if (!kvm_pte_valid(ctx->old))
return 0;
@ -210,11 +211,18 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
* configured in the hypervisor stage-1, and make sure to propagate them
* to the hyp_vmemmap state.
*/
state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
prot = kvm_pgtable_hyp_pte_prot(ctx->old);
state = pkvm_getstate(prot);
switch (state) {
case PKVM_PAGE_OWNED:
set_hyp_state(page, PKVM_PAGE_OWNED);
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
/* hyp text is RO in the host stage-2 to be inspected on panic. */
if (prot == PAGE_HYP_EXEC) {
set_host_state(page, PKVM_NOPAGE);
return host_stage2_idmap_locked(phys, PAGE_SIZE, KVM_PGTABLE_PROT_R);
} else {
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
}
case PKVM_PAGE_SHARED_OWNED:
set_hyp_state(page, PKVM_PAGE_SHARED_OWNED);
set_host_state(page, PKVM_PAGE_SHARED_BORROWED);

View File

@ -295,12 +295,8 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
}
/*
* GICv5 BET0 FEAT_GCIE_LEGACY doesn't include ICC_SRE_EL2. This is due
* to be relaxed in a future spec release, at which point this in
* condition can be dropped.
*/
if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
/* Only disable SRE if the host implements the GICv2 interface */
if (static_branch_unlikely(&vgic_v3_has_v2_compat)) {
/*
* Prevent the guest from touching the ICC_SRE_EL1 system
* register. Note that this may not have any effect, as
@ -329,19 +325,16 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
/*
* Can be dropped in the future when GICv5 spec is relaxed. See comment
* above.
*/
if (!cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF)) {
/* Only restore SRE if the host implements the GICv2 interface */
if (static_branch_unlikely(&vgic_v3_has_v2_compat)) {
val = read_gicreg(ICC_SRE_EL2);
write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
}
if (!cpu_if->vgic_sre) {
/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
isb();
write_gicreg(1, ICC_SRE_EL1);
if (!cpu_if->vgic_sre) {
/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
isb();
write_gicreg(1, ICC_SRE_EL1);
}
}
/*

View File

@ -95,6 +95,13 @@ static u64 __compute_hcr(struct kvm_vcpu *vcpu)
/* Force NV2 in case the guest is forgetful... */
guest_hcr |= HCR_NV2;
}
/*
* Exclude the guest's TWED configuration if it hasn't set TWE
* to avoid potentially delaying traps for the host.
*/
if (!(guest_hcr & HCR_TWE))
guest_hcr &= ~(HCR_EL2_TWEDEn | HCR_EL2_TWEDEL);
}
BUG_ON(host_data_test_flag(VCPU_IN_HYP_CONTEXT) &&

View File

@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
u64 esr = 0;
u64 esr = 0, fsc;
int level;
/*
* If injecting an abort from a failed S1PTW, rewalk the S1 PTs to
* find the failing level. If we can't find it, assume the error was
* transient and restart without changing the state.
*/
if (kvm_vcpu_abt_iss1tw(vcpu)) {
u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu);
int ret;
if (hpfar == INVALID_GPA)
return;
ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level);
if (ret)
return;
WARN_ON_ONCE(level < -1 || level > 3);
fsc = ESR_ELx_FSC_SEA_TTW(level);
} else {
fsc = ESR_ELx_FSC_EXTABT;
}
/* This delight is brought to you by FEAT_DoubleFault2. */
if (effective_sctlr2_ease(vcpu))
@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
esr |= ESR_ELx_FSC_EXTABT;
esr |= fsc;
vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));

View File

@ -1431,11 +1431,8 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
* able to see the page's tags and therefore they must be initialised first. If
* PG_mte_tagged is set, tags have already been initialised.
*
* The race in the test/set of the PG_mte_tagged flag is handled by:
* - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
* racing to santise the same page
* - mmap_lock protects between a VM faulting a page in and the VMM performing
* an mprotect() to add VM_MTE
* Must be called with kvm->mmu_lock held to ensure the memory remains mapped
* while the tags are zeroed.
*/
static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
unsigned long size)
@ -1775,7 +1772,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* cache maintenance.
*/
if (!kvm_supports_cacheable_pfnmap())
return -EFAULT;
ret = -EFAULT;
} else {
/*
* If the page was identified as device early by looking at
@ -1798,7 +1795,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
if (exec_fault && s2_force_noncacheable)
return -ENOEXEC;
ret = -ENOEXEC;
if (ret) {
kvm_release_page_unused(page);
return ret;
}
if (nested)
adjust_nested_fault_perms(nested, &prot, &writable);

View File

@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@ -1491,9 +1491,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64PFR1_EL1:
/* Only support BTI, SSBS, CSV2_frac */
val &= (ID_AA64PFR1_EL1_BT |
ID_AA64PFR1_EL1_SSBS |
ID_AA64PFR1_EL1_CSV2_frac);
val &= ~(ID_AA64PFR1_EL1_PFAR |
ID_AA64PFR1_EL1_MTEX |
ID_AA64PFR1_EL1_THE |
ID_AA64PFR1_EL1_GCS |
ID_AA64PFR1_EL1_MTE_frac |
ID_AA64PFR1_EL1_NMI |
ID_AA64PFR1_EL1_SME |
ID_AA64PFR1_EL1_RES0 |
ID_AA64PFR1_EL1_MPAM_frac |
ID_AA64PFR1_EL1_MTE);
break;
case SYS_ID_AA64MMFR0_EL1:
@ -1546,12 +1553,11 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
break;
case SYS_ID_AA64MMFR1_EL1:
val &= (ID_AA64MMFR1_EL1_HCX |
ID_AA64MMFR1_EL1_PAN |
ID_AA64MMFR1_EL1_LO |
ID_AA64MMFR1_EL1_HPDS |
ID_AA64MMFR1_EL1_VH |
ID_AA64MMFR1_EL1_VMIDBits);
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
ID_AA64MMFR1_EL1_ETS |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_HAFDBS);
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
@ -1593,14 +1599,22 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64DFR0_EL1:
/* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
val &= (ID_AA64DFR0_EL1_PMUVer |
ID_AA64DFR0_EL1_WRPs |
ID_AA64DFR0_EL1_BRPs |
ID_AA64DFR0_EL1_DebugVer|
ID_AA64DFR0_EL1_HPMN0);
val &= ~(ID_AA64DFR0_EL1_ExtTrcBuff |
ID_AA64DFR0_EL1_BRBE |
ID_AA64DFR0_EL1_MTPMU |
ID_AA64DFR0_EL1_TraceBuffer |
ID_AA64DFR0_EL1_TraceFilt |
ID_AA64DFR0_EL1_PMSVer |
ID_AA64DFR0_EL1_CTX_CMPs |
ID_AA64DFR0_EL1_SEBEP |
ID_AA64DFR0_EL1_PMSS |
ID_AA64DFR0_EL1_TraceVer);
/* Cap Debug to ARMv8.1 */
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE);
/*
* FEAT_Debugv8p9 requires support for extended breakpoints /
* watchpoints.
*/
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, V8P8);
break;
}
@ -1825,3 +1839,33 @@ void kvm_nested_sync_hwstate(struct kvm_vcpu *vcpu)
if (unlikely(vcpu_test_and_clear_flag(vcpu, NESTED_SERROR_PENDING)))
kvm_inject_serror_esr(vcpu, vcpu_get_vsesr(vcpu));
}
/*
* KVM unconditionally sets most of these traps anyway but use an allowlist
* to document the guest hypervisor traps that may take precedence and guard
* against future changes to the non-nested trap configuration.
*/
#define NV_MDCR_GUEST_INCLUDE (MDCR_EL2_TDE | \
MDCR_EL2_TDA | \
MDCR_EL2_TDRA | \
MDCR_EL2_TTRF | \
MDCR_EL2_TPMS | \
MDCR_EL2_TPM | \
MDCR_EL2_TPMCR | \
MDCR_EL2_TDCC | \
MDCR_EL2_TDOSA)
void kvm_nested_setup_mdcr_el2(struct kvm_vcpu *vcpu)
{
u64 guest_mdcr = __vcpu_sys_reg(vcpu, MDCR_EL2);
/*
* In yet another example where FEAT_NV2 is fscking broken, accesses
* to MDSCR_EL1 are redirected to the VNCR despite having an effect
* at EL2. Use a big hammer to apply sanity.
*/
if (is_hyp_ctxt(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
else
vcpu->arch.mdcr_el2 |= (guest_mdcr & NV_MDCR_GUEST_INCLUDE);
}

View File

@ -85,16 +85,23 @@ void __init kvm_hyp_reserve(void)
hyp_mem_base);
}
static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
{
if (host_kvm->arch.pkvm.handle) {
if (pkvm_hyp_vm_is_created(kvm)) {
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
host_kvm->arch.pkvm.handle));
kvm->arch.pkvm.handle));
} else if (kvm->arch.pkvm.handle) {
/*
* The VM could have been reserved but hyp initialization has
* failed. Make sure to unreserve it.
*/
kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle);
}
host_kvm->arch.pkvm.handle = 0;
free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
kvm->arch.pkvm.handle = 0;
kvm->arch.pkvm.is_created = false;
free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
}
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
@ -129,16 +136,16 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
*
* Return 0 on success, negative error code on failure.
*/
static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
static int __pkvm_create_hyp_vm(struct kvm *kvm)
{
size_t pgd_sz, hyp_vm_sz;
void *pgd, *hyp_vm;
int ret;
if (host_kvm->created_vcpus < 1)
if (kvm->created_vcpus < 1)
return -EINVAL;
pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
@ -152,7 +159,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
size_mul(sizeof(void *),
host_kvm->created_vcpus)));
kvm->created_vcpus)));
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
if (!hyp_vm) {
ret = -ENOMEM;
@ -160,12 +167,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
}
/* Donate the VM memory to hyp and let hyp initialize it. */
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
if (ret < 0)
ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd);
if (ret)
goto free_vm;
host_kvm->arch.pkvm.handle = ret;
host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
kvm->arch.pkvm.is_created = true;
kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
@ -176,14 +183,19 @@ free_pgd:
return ret;
}
int pkvm_create_hyp_vm(struct kvm *host_kvm)
bool pkvm_hyp_vm_is_created(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.pkvm.is_created);
}
int pkvm_create_hyp_vm(struct kvm *kvm)
{
int ret = 0;
mutex_lock(&host_kvm->arch.config_lock);
if (!host_kvm->arch.pkvm.handle)
ret = __pkvm_create_hyp_vm(host_kvm);
mutex_unlock(&host_kvm->arch.config_lock);
mutex_lock(&kvm->arch.config_lock);
if (!pkvm_hyp_vm_is_created(kvm))
ret = __pkvm_create_hyp_vm(kvm);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@ -200,15 +212,31 @@ int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
return ret;
}
void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
void pkvm_destroy_hyp_vm(struct kvm *kvm)
{
mutex_lock(&host_kvm->arch.config_lock);
__pkvm_destroy_hyp_vm(host_kvm);
mutex_unlock(&host_kvm->arch.config_lock);
mutex_lock(&kvm->arch.config_lock);
__pkvm_destroy_hyp_vm(kvm);
mutex_unlock(&kvm->arch.config_lock);
}
int pkvm_init_host_vm(struct kvm *host_kvm)
int pkvm_init_host_vm(struct kvm *kvm)
{
int ret;
if (pkvm_hyp_vm_is_created(kvm))
return -EINVAL;
/* VM is already reserved, no need to proceed. */
if (kvm->arch.pkvm.handle)
return 0;
/* Reserve the VM in hyp and obtain a hyp handle for the VM. */
ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm);
if (ret < 0)
return ret;
kvm->arch.pkvm.handle = ret;
return 0;
}

View File

@ -32,23 +32,23 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.set = " ",
.clear = "F",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | PTE_VALID,
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.set = "R",
.clear = " ",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | PTE_VALID,
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.set = "W",
.clear = " ",
}, {
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN | PTE_VALID,
.val = PTE_VALID,
.set = " ",
.clear = "X",
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.set = "NX",
.clear = "x ",
}, {
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF | PTE_VALID,
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.set = "AF",
.clear = " ",
}, {

View File

@ -1757,7 +1757,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
val &= ~ID_AA64ISAR2_EL1_WFxT;
break;
case SYS_ID_AA64ISAR3_EL1:
val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_FAMINMAX;
val &= ID_AA64ISAR3_EL1_FPRCVT | ID_AA64ISAR3_EL1_LSFE |
ID_AA64ISAR3_EL1_FAMINMAX;
break;
case SYS_ID_AA64MMFR2_EL1:
val &= ~ID_AA64MMFR2_EL1_CCIDX_MASK;
@ -1997,6 +1998,26 @@ static u64 sanitise_id_aa64dfr0_el1(const struct kvm_vcpu *vcpu, u64 val)
return val;
}
/*
* Older versions of KVM erroneously claim support for FEAT_DoubleLock with
* NV-enabled VMs on unsupporting hardware. Silently ignore the incorrect
* value if it is consistent with the bug.
*/
static bool ignore_feat_doublelock(struct kvm_vcpu *vcpu, u64 val)
{
u8 host, user;
if (!vcpu_has_nv(vcpu))
return false;
host = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock,
read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1));
user = SYS_FIELD_GET(ID_AA64DFR0_EL1, DoubleLock, val);
return host == ID_AA64DFR0_EL1_DoubleLock_NI &&
user == ID_AA64DFR0_EL1_DoubleLock_IMP;
}
static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd,
u64 val)
@ -2028,6 +2049,11 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
if (debugver < ID_AA64DFR0_EL1_DebugVer_IMP)
return -EINVAL;
if (ignore_feat_doublelock(vcpu, val)) {
val &= ~ID_AA64DFR0_EL1_DoubleLock;
val |= SYS_FIELD_PREP_ENUM(ID_AA64DFR0_EL1, DoubleLock, NI);
}
return set_id_reg(vcpu, rd, val);
}
@ -2148,16 +2174,29 @@ static int set_id_aa64pfr1_el1(struct kvm_vcpu *vcpu,
return set_id_reg(vcpu, rd, user_val);
}
/*
* Allow userspace to de-feature a stage-2 translation granule but prevent it
* from claiming the impossible.
*/
#define tgran2_val_allowed(tg, safe, user) \
({ \
u8 __s = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, safe); \
u8 __u = SYS_FIELD_GET(ID_AA64MMFR0_EL1, tg, user); \
\
__s == __u || __u == ID_AA64MMFR0_EL1_##tg##_NI; \
})
static int set_id_aa64mmfr0_el1(struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd, u64 user_val)
{
u64 sanitized_val = kvm_read_sanitised_id_reg(vcpu, rd);
u64 tgran2_mask = ID_AA64MMFR0_EL1_TGRAN4_2_MASK |
ID_AA64MMFR0_EL1_TGRAN16_2_MASK |
ID_AA64MMFR0_EL1_TGRAN64_2_MASK;
if (vcpu_has_nv(vcpu) &&
((sanitized_val & tgran2_mask) != (user_val & tgran2_mask)))
if (!vcpu_has_nv(vcpu))
return set_id_reg(vcpu, rd, user_val);
if (!tgran2_val_allowed(TGRAN4_2, sanitized_val, user_val) ||
!tgran2_val_allowed(TGRAN16_2, sanitized_val, user_val) ||
!tgran2_val_allowed(TGRAN64_2, sanitized_val, user_val))
return -EINVAL;
return set_id_reg(vcpu, rd, user_val);
@ -3141,6 +3180,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64ISAR2_EL1_APA3 |
ID_AA64ISAR2_EL1_GPA3)),
ID_WRITABLE(ID_AA64ISAR3_EL1, (ID_AA64ISAR3_EL1_FPRCVT |
ID_AA64ISAR3_EL1_LSFE |
ID_AA64ISAR3_EL1_FAMINMAX)),
ID_UNALLOCATED(6,4),
ID_UNALLOCATED(6,5),
@ -3152,8 +3192,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
~(ID_AA64MMFR0_EL1_RES0 |
ID_AA64MMFR0_EL1_ASIDBITS)),
ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
ID_AA64MMFR1_EL1_HCX |
ID_AA64MMFR1_EL1_TWED |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_VH |
ID_AA64MMFR1_EL1_VMIDBits)),
@ -3238,6 +3276,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMBLIMITR_EL1), undef_access },
{ SYS_DESC(SYS_PMBPTR_EL1), undef_access },
{ SYS_DESC(SYS_PMBSR_EL1), undef_access },
{ SYS_DESC(SYS_PMSDSFR_EL1), undef_access },
/* PMBIDR_EL1 is not trapped */
{ PMU_SYS_REG(PMINTENSET_EL1),

View File

@ -554,7 +554,6 @@ int vgic_lazy_init(struct kvm *kvm)
* Also map the virtual CPU interface into the VM.
* v2 calls vgic_init() if not already done.
* v3 and derivatives return an error if the VGIC is not initialized.
* vgic_ready() returns true if this function has succeeded.
*/
int kvm_vgic_map_resources(struct kvm *kvm)
{
@ -563,12 +562,12 @@ int kvm_vgic_map_resources(struct kvm *kvm)
gpa_t dist_base;
int ret = 0;
if (likely(vgic_ready(kvm)))
if (likely(smp_load_acquire(&dist->ready)))
return 0;
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
if (dist->ready)
goto out;
if (!irqchip_in_kernel(kvm))
@ -594,14 +593,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
goto out_slots;
}
/*
* kvm_io_bus_register_dev() guarantees all readers see the new MMIO
* registration before returning through synchronize_srcu(), which also
* implies a full memory barrier. As such, marking the distributor as
* 'ready' here is guaranteed to be ordered after all vCPUs having seen
* a completely configured distributor.
*/
dist->ready = true;
smp_store_release(&dist->ready, true);
goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);

View File

@ -588,6 +588,7 @@ int vgic_v3_map_resources(struct kvm *kvm)
}
DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat);
static int __init early_group0_trap_cfg(char *buf)
{
@ -697,6 +698,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (kvm_vgic_global_state.vcpu_base == 0)
kvm_info("disabling GICv2 emulation\n");
/*
* Flip the static branch if the HW supports v2, even if we're
* not using it (such as in protected mode).
*/
if (has_v2)
static_branch_enable(&vgic_v3_has_v2_compat);
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;

View File

@ -15,7 +15,7 @@ int vgic_v5_probe(const struct gic_kvm_info *info)
u64 ich_vtr_el2;
int ret;
if (!info->has_gcie_v3_compat)
if (!cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
return -ENODEV;
kvm_vgic_global_state.type = VGIC_V5;

View File

@ -37,6 +37,7 @@ HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
HAS_GICV3_CPUIF
HAS_GICV5_CPUIF
HAS_GICV5_LEGACY
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_HCR_NV1

View File

@ -5785,6 +5785,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
/*
* Ensure that any updates to kvm->buses[] observed by the
* previous instruction (emulated or otherwise) are also
* visible to the instruction KVM is about to emulate.
*/
smp_rmb();
if (!kvm_emulate_instruction(vcpu, 0))
return 0;

View File

@ -1062,16 +1062,9 @@ static void gicv5_set_cpuif_idbits(void)
#ifdef CONFIG_KVM
static struct gic_kvm_info gic_v5_kvm_info __initdata;
static bool __init gicv5_cpuif_has_gcie_legacy(void)
{
u64 idr0 = read_sysreg_s(SYS_ICC_IDR0_EL1);
return !!FIELD_GET(ICC_IDR0_EL1_GCIE_LEGACY, idr0);
}
static void __init gic_of_setup_kvm_info(struct device_node *node)
{
gic_v5_kvm_info.type = GIC_V5;
gic_v5_kvm_info.has_gcie_v3_compat = gicv5_cpuif_has_gcie_legacy();
/* GIC Virtual CPU interface maintenance interrupt */
gic_v5_kvm_info.no_maint_irq_mask = false;

View File

@ -378,6 +378,7 @@ struct vgic_cpu {
extern struct static_key_false vgic_v2_cpuif_trap;
extern struct static_key_false vgic_v3_cpuif_trap;
extern struct static_key_false vgic_v3_has_v2_compat;
int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr);
void kvm_vgic_early_init(struct kvm *kvm);
@ -409,7 +410,6 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.initialized)
#define vgic_ready(k) ((k)->arch.vgic.ready)
#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))

View File

@ -128,6 +128,7 @@
#define FFA_FEAT_RXTX_MIN_SZ_4K 0
#define FFA_FEAT_RXTX_MIN_SZ_64K 1
#define FFA_FEAT_RXTX_MIN_SZ_16K 2
#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0)
/* FFA Bus/Device/Driver related */
struct ffa_device {

View File

@ -36,8 +36,6 @@ struct gic_kvm_info {
bool has_v4_1;
/* Deactivation impared, subpar stuff */
bool no_hw_deactivation;
/* v3 compat support (GICv5 hosts, only) */
bool has_gcie_v3_compat;
};
#ifdef CONFIG_KVM

View File

@ -207,6 +207,7 @@ struct kvm_io_range {
struct kvm_io_bus {
int dev_count;
int ioeventfd_count;
struct rcu_head rcu;
struct kvm_io_range range[];
};
@ -967,11 +968,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
}
/*
* Get a bus reference under the update-side lock. No long-term SRCU reader
* references are permitted, to avoid stale reads vs concurrent IO
* registrations.
*/
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
return rcu_dereference_protected(kvm->buses[idx],
lockdep_is_held(&kvm->slots_lock));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)

View File

@ -156,6 +156,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/external_aborts

View File

@ -165,10 +165,8 @@ static void guest_code(void)
static void test_init_timer_irq(struct kvm_vm *vm)
{
/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
ptimer_irq = vcpu_get_ptimer_irq(vcpus[0]);
vtimer_irq = vcpu_get_vtimer_irq(vcpus[0]);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@ -176,14 +174,14 @@ static void test_init_timer_irq(struct kvm_vm *vm)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
static int gic_fd;
struct kvm_vm *test_vm_create(void)
{
struct kvm_vm *vm;
unsigned int i;
int nr_vcpus = test_args.nr_vcpus;
TEST_REQUIRE(kvm_supports_vgic_v3());
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
vm_init_descriptor_tables(vm);
@ -204,8 +202,6 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
@ -215,6 +211,5 @@ struct kvm_vm *test_vm_create(void)
void test_vm_cleanup(struct kvm_vm *vm)
{
close(gic_fd);
kvm_vm_free(vm);
}

View File

@ -924,10 +924,8 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
ptimer_irq = vcpu_get_ptimer_irq(vcpu);
vtimer_irq = vcpu_get_vtimer_irq(vcpu);
sync_global_to_guest(vm, ptimer_irq);
sync_global_to_guest(vm, vtimer_irq);
@ -935,8 +933,6 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
}
static int gic_fd;
static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
enum arch_timer timer)
{
@ -951,8 +947,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
vcpu_args_set(*vcpu, 1, timer);
test_init_timer_irq(*vm, *vcpu);
gic_fd = vgic_v3_setup(*vm, 1, 64);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
sync_global_to_guest(*vm, test_args);
sync_global_to_guest(*vm, CVAL_MAX);
@ -961,7 +955,6 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
static void test_vm_cleanup(struct kvm_vm *vm)
{
close(gic_fd);
kvm_vm_free(vm);
}
@ -1042,6 +1035,8 @@ int main(int argc, char *argv[])
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
TEST_REQUIRE(kvm_supports_vgic_v3());
if (!parse_args(argc, argv))
exit(KSFT_SKIP);

View File

@ -250,6 +250,47 @@ static void test_serror(void)
kvm_vm_free(vm);
}
static void expect_sea_s1ptw_handler(struct ex_regs *regs)
{
u64 esr = read_sysreg(esr_el1);
GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
GUEST_DONE();
}
static noinline void test_s1ptw_abort_guest(void)
{
extern char test_s1ptw_abort_insn;
WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
asm volatile("test_s1ptw_abort_insn:\n\t"
"ldr x0, [%0]\n\t"
: : "r" (MMIO_ADDR) : "x0", "memory");
GUEST_FAIL("Load on S1PTW abort should not retire");
}
static void test_s1ptw_abort(void)
{
struct kvm_vcpu *vcpu;
u64 *ptep, bad_pa;
struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
expect_sea_s1ptw_handler);
ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
bad_pa = BIT(vm->pa_bits) - vm->page_size;
*ptep &= ~GENMASK(47, 12);
*ptep |= bad_pa;
vcpu_run_expect_done(vcpu);
kvm_vm_free(vm);
}
static void test_serror_emulated_guest(void)
{
GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
@ -327,4 +368,5 @@ int main(void)
test_serror_masked();
test_serror_emulated();
test_mmio_ease();
test_s1ptw_abort();
}

View File

@ -0,0 +1,71 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* hello_el2 - Basic KVM selftest for VM running at EL2 with E2H=RES1
*
* Copyright 2025 Google LLC
*/
#include "kvm_util.h"
#include "processor.h"
#include "test_util.h"
#include "ucall.h"
#include <asm/sysreg.h>
static void guest_code(void)
{
u64 mmfr0 = read_sysreg_s(SYS_ID_AA64MMFR0_EL1);
u64 mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
u64 mmfr4 = read_sysreg_s(SYS_ID_AA64MMFR4_EL1);
u8 e2h0 = SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4);
GUEST_ASSERT_EQ(get_current_el(), 2);
GUEST_ASSERT(read_sysreg(hcr_el2) & HCR_EL2_E2H);
GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1),
ID_AA64MMFR1_EL1_VH_IMP);
/*
* Traps of the complete ID register space are IMPDEF without FEAT_FGT,
* which is really annoying to deal with in KVM describing E2H as RES1.
*
* If the implementation doesn't honor the trap then expect the register
* to return all zeros.
*/
if (e2h0 == ID_AA64MMFR4_EL1_E2H0_IMP)
GUEST_ASSERT_EQ(SYS_FIELD_GET(ID_AA64MMFR0_EL1, FGT, mmfr0),
ID_AA64MMFR0_EL1_FGT_NI);
else
GUEST_ASSERT_EQ(e2h0, ID_AA64MMFR4_EL1_E2H0_NI_NV1);
GUEST_DONE();
}
int main(void)
{
struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
vm = vm_create(1);
kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
break;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
default:
TEST_FAIL("Unhandled ucall: %ld\n", uc.cmd);
}
kvm_vm_free(vm);
return 0;
}

View File

@ -108,7 +108,7 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info)
for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
memset(&res, 0, sizeof(res));
smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
do_smccc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
switch (stage) {
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:

View File

@ -25,7 +25,7 @@ static void guest_code(void)
{
struct arm_smccc_res res = {};
smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
do_smccc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res);
__GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 &&
res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 &&

View File

@ -163,6 +163,8 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
uint64_t pfr0;
test_disable_default_vgic();
vm = vm_create_with_one_vcpu(&vcpu, NULL);
pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
__TEST_REQUIRE(FIELD_GET(ID_AA64PFR0_EL1_GIC, pfr0),

View File

@ -27,7 +27,7 @@ static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
{
struct arm_smccc_res res;
smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
do_smccc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
0, 0, 0, 0, &res);
return res.a0;
@ -38,7 +38,7 @@ static uint64_t psci_affinity_info(uint64_t target_affinity,
{
struct arm_smccc_res res;
smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
do_smccc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
0, 0, 0, 0, 0, &res);
return res.a0;
@ -48,7 +48,7 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
{
struct arm_smccc_res res;
smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
do_smccc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
0, 0, 0, 0, 0, &res);
return res.a0;
@ -58,7 +58,7 @@ static uint64_t psci_system_off2(uint64_t type, uint64_t cookie)
{
struct arm_smccc_res res;
smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
do_smccc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@ -67,7 +67,7 @@ static uint64_t psci_features(uint32_t func_id)
{
struct arm_smccc_res res;
smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
do_smccc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}
@ -89,12 +89,13 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
vm = vm_create(2);
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*source = aarch64_vcpu_add(vm, 0, &init, guest_code);
*target = aarch64_vcpu_add(vm, 1, &init, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
return vm;
}

View File

@ -15,8 +15,6 @@
#include "test_util.h"
#include <linux/bitfield.h>
bool have_cap_arm_mte;
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
@ -125,6 +123,13 @@ static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64isar3_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FPRCVT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, LSFE, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR3_EL1, FAMINMAX, 0),
REG_FTR_END,
};
static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
@ -165,7 +170,9 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HCX, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TWED, 0),
REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
@ -221,6 +228,7 @@ static struct test_feature_reg test_regs[] = {
TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
TEST_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3_el1),
TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1),
TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
@ -239,6 +247,7 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ISAR3_EL1);
GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
@ -568,7 +577,9 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
uint64_t mte_frac;
int idx, err;
if (!have_cap_arm_mte) {
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
if (!mte) {
ksft_test_result_skip("MTE capability not supported, nothing to test\n");
return;
}
@ -593,9 +604,6 @@ static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
* from unsupported (0xF) to supported (0).
*
*/
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
mte = FIELD_GET(ID_AA64PFR1_EL1_MTE, val);
mte_frac = FIELD_GET(ID_AA64PFR1_EL1_MTE_frac, val);
if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
@ -750,28 +758,23 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
ksft_test_result_pass("%s\n", __func__);
}
void kvm_arch_vm_post_create(struct kvm_vm *vm)
{
if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) {
vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
have_cap_arm_mte = true;
}
}
int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
bool aarch64_only;
uint64_t val, el0;
int test_cnt;
int test_cnt, i, j;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
test_wants_mte();
vm = vm_create(1);
vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
vcpu = vm_vcpu_add(vm, 0, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
/* Check for AARCH64 only system */
val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
@ -780,13 +783,10 @@ int main(void)
ksft_print_header();
test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
test_cnt = 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST;
for (i = 0; i < ARRAY_SIZE(test_regs); i++)
for (j = 0; test_regs[i].ftr_bits[j].type != FTR_END; j++)
test_cnt++;
ksft_set_plan(test_cnt);

View File

@ -22,8 +22,20 @@ enum smccc_conduit {
SMC_INSN,
};
static bool test_runs_at_el2(void)
{
struct kvm_vm *vm = vm_create(1);
struct kvm_vcpu_init init;
kvm_get_default_vcpu_target(vm, &init);
kvm_vm_free(vm);
return init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
}
#define for_each_conduit(conduit) \
for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
for (conduit = test_runs_at_el2() ? SMC_INSN : HVC_INSN; \
conduit <= SMC_INSN; conduit++)
static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
{
@ -64,7 +76,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
struct kvm_vm *vm;
vm = vm_create(1);
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
kvm_get_default_vcpu_target(vm, &init);
/*
* Enable in-kernel emulation of PSCI to ensure that calls are denied
@ -73,6 +85,7 @@ static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
*vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
kvm_arch_vm_finalize_vcpus(vm);
return vm;
}

View File

@ -994,6 +994,8 @@ int main(int ac, char **av)
int pa_bits;
int cnt_impl = 0;
test_disable_default_vgic();
pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
max_phys_size = 1ULL << pa_bits;

View File

@ -752,7 +752,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_args_set(vcpu, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
guest_irq_handlers[args.eoi_split][args.level_sensitive]);
@ -802,6 +801,9 @@ int main(int argc, char **argv)
int opt;
bool eoi_split = false;
TEST_REQUIRE(kvm_supports_vgic_v3());
test_disable_default_vgic();
while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
switch (opt) {
case 'n':

View File

@ -27,7 +27,7 @@ static vm_paddr_t gpa_base;
static struct kvm_vm *vm;
static struct kvm_vcpu **vcpus;
static int gic_fd, its_fd;
static int its_fd;
static struct test_data {
bool request_vcpus_stop;
@ -214,9 +214,6 @@ static void setup_test_data(void)
static void setup_gic(void)
{
gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
its_fd = vgic_its_setup(vm);
}
@ -355,7 +352,6 @@ static void setup_vm(void)
static void destroy_vm(void)
{
close(its_fd);
close(gic_fd);
kvm_vm_free(vm);
free(vcpus);
}
@ -374,6 +370,8 @@ int main(int argc, char **argv)
u32 nr_threads;
int c;
TEST_REQUIRE(kvm_supports_vgic_v3());
while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
switch (c) {
case 'v':

View File

@ -28,7 +28,6 @@
struct vpmu_vm {
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
int gic_fd;
};
static struct vpmu_vm vpmu_vm;
@ -45,11 +44,6 @@ static uint64_t get_pmcr_n(uint64_t pmcr)
return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
}
static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
{
u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
}
static uint64_t get_counters_mask(uint64_t n)
{
uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
@ -415,10 +409,6 @@ static void create_vpmu_vm(void *guest_code)
.attr = KVM_ARM_VCPU_PMU_V3_IRQ,
.addr = (uint64_t)&irq,
};
struct kvm_device_attr init_attr = {
.group = KVM_ARM_VCPU_PMU_V3_CTRL,
.attr = KVM_ARM_VCPU_PMU_V3_INIT,
};
/* The test creates the vpmu_vm multiple times. Ensure a clean state */
memset(&vpmu_vm, 0, sizeof(vpmu_vm));
@ -431,13 +421,12 @@ static void create_vpmu_vm(void *guest_code)
}
/* Create vCPU with PMUv3 */
vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
__TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
"Failed to create vgic-v3, skipping");
kvm_arch_vm_finalize_vcpus(vpmu_vm.vm);
/* Make sure that PMUv3 support is indicated in the ID register */
dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
@ -446,14 +435,11 @@ static void create_vpmu_vm(void *guest_code)
pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
"Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
/* Initialize vPMU */
vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
}
static void destroy_vpmu_vm(void)
{
close(vpmu_vm.gic_fd);
kvm_vm_free(vpmu_vm.vm);
}
@ -475,33 +461,28 @@ static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
}
}
static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
static void test_create_vpmu_vm_with_nr_counters(unsigned int nr_counters, bool expect_fail)
{
struct kvm_vcpu *vcpu;
uint64_t pmcr, pmcr_orig;
unsigned int prev;
int ret;
create_vpmu_vm(guest_code);
vcpu = vpmu_vm.vcpu;
pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
pmcr = pmcr_orig;
prev = get_pmcr_n(vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0)));
/*
* Setting a larger value of PMCR.N should not modify the field, and
* return a success.
*/
set_pmcr_n(&pmcr, pmcr_n);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
ret = __vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS, &nr_counters);
if (expect_fail)
TEST_ASSERT(pmcr_orig == pmcr,
"PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
pmcr, pmcr_n);
TEST_ASSERT(ret && errno == EINVAL,
"Setting more PMU counters (%u) than available (%u) unexpectedly succeeded",
nr_counters, prev);
else
TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
"Failed to update PMCR.N to %lu (received: %lu)",
pmcr_n, get_pmcr_n(pmcr));
TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PMU_V3_CTRL, KVM_ARM_VCPU_PMU_V3_INIT, NULL);
}
/*
@ -516,11 +497,11 @@ static void run_access_test(uint64_t pmcr_n)
pr_debug("Test with pmcr_n %lu\n", pmcr_n);
test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
/* Save the initial sp to restore them later to run the guest again */
sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
sp = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1));
run_vcpu(vcpu, pmcr_n);
@ -528,11 +509,11 @@ static void run_access_test(uint64_t pmcr_n)
* Reset and re-initialize the vCPU, and run the guest code again to
* check if PMCR_EL0.N is preserved.
*/
vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
kvm_get_default_vcpu_target(vpmu_vm.vm, &init);
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
aarch64_vcpu_setup(vcpu, &init);
vcpu_init_descriptor_tables(vcpu);
vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), sp);
vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
run_vcpu(vcpu, pmcr_n);
@ -557,7 +538,7 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
uint64_t set_reg_id, clr_reg_id, reg_val;
uint64_t valid_counters_mask, max_counters_mask;
test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
test_create_vpmu_vm_with_nr_counters(pmcr_n, false);
vcpu = vpmu_vm.vcpu;
valid_counters_mask = get_counters_mask(pmcr_n);
@ -611,7 +592,7 @@ static void run_error_test(uint64_t pmcr_n)
{
pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
test_create_vpmu_vm_with_nr_counters(pmcr_n, true);
destroy_vpmu_vm();
}
@ -629,11 +610,25 @@ static uint64_t get_pmcr_n_limit(void)
return get_pmcr_n(pmcr);
}
static bool kvm_supports_nr_counters_attr(void)
{
bool supported;
create_vpmu_vm(NULL);
supported = !__vcpu_has_device_attr(vpmu_vm.vcpu, KVM_ARM_VCPU_PMU_V3_CTRL,
KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS);
destroy_vpmu_vm();
return supported;
}
int main(void)
{
uint64_t i, pmcr_n;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
TEST_REQUIRE(kvm_supports_vgic_v3());
TEST_REQUIRE(kvm_supports_nr_counters_attr());
pmcr_n = get_pmcr_n_limit();
for (i = 0; i <= pmcr_n; i++) {

View File

@ -20,38 +20,6 @@
#include "guest_modes.h"
#include "ucall_common.h"
#ifdef __aarch64__
#include "arm64/vgic.h"
static int gic_fd;
static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
{
/*
* The test can still run even if hardware does not support GICv3, as it
* is only an optimization to reduce guest exits.
*/
gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
}
static void arch_cleanup_vm(struct kvm_vm *vm)
{
if (gic_fd > 0)
close(gic_fd);
}
#else /* __aarch64__ */
static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
{
}
static void arch_cleanup_vm(struct kvm_vm *vm)
{
}
#endif
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
@ -166,8 +134,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
dirty_log_manual_caps);
arch_setup_vm(vm, nr_vcpus);
/* Start the iterations */
iteration = 0;
host_quit = false;
@ -285,7 +251,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
memstress_free_bitmaps(bitmaps, p->slots);
arch_cleanup_vm(vm);
memstress_destroy_vm(vm);
}

View File

@ -585,6 +585,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
log_mode_create_vm_done(vm);
*vcpu = vm_vcpu_add(vm, 0, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
return vm;
}

View File

@ -116,10 +116,13 @@ void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
}
#ifdef __aarch64__
static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
static void prepare_vcpu_init(struct kvm_vm *vm, struct vcpu_reg_list *c,
struct kvm_vcpu_init *init)
{
struct vcpu_reg_sublist *s;
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, init);
for_each_sublist(c, s)
if (s->capability)
init->features[s->feature / 32] |= 1 << (s->feature % 32);
@ -127,10 +130,10 @@ static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *ini
static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
{
struct kvm_vcpu_init init = { .target = -1, };
struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
prepare_vcpu_init(c, &init);
prepare_vcpu_init(vm, c, &init);
vcpu = __vm_vcpu_add(vm, 0);
aarch64_vcpu_setup(vcpu, &init);

View File

@ -155,4 +155,28 @@ static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
timer_set_tval(timer, msec_to_cycles(msec));
}
static inline u32 vcpu_get_vtimer_irq(struct kvm_vcpu *vcpu)
{
u32 intid;
u64 attr;
attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HVTIMER :
KVM_ARM_VCPU_TIMER_IRQ_VTIMER;
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
return intid;
}
static inline u32 vcpu_get_ptimer_irq(struct kvm_vcpu *vcpu)
{
u32 intid;
u64 attr;
attr = vcpu_has_el2(vcpu) ? KVM_ARM_VCPU_TIMER_IRQ_HPTIMER :
KVM_ARM_VCPU_TIMER_IRQ_PTIMER;
vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, attr, &intid);
return intid;
}
#endif /* SELFTEST_KVM_ARCH_TIMER_H */

View File

@ -2,6 +2,9 @@
#ifndef SELFTEST_KVM_UTIL_ARCH_H
#define SELFTEST_KVM_UTIL_ARCH_H
struct kvm_vm_arch {};
struct kvm_vm_arch {
bool has_gic;
int gic_fd;
};
#endif // SELFTEST_KVM_UTIL_ARCH_H

View File

@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void)
@ -300,4 +301,77 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
/* Execute a Wait For Interrupt instruction. */
void wfi(void);
void test_wants_mte(void);
void test_disable_default_vgic(void);
bool vm_supports_el2(struct kvm_vm *vm);
static bool vcpu_has_el2(struct kvm_vcpu *vcpu)
{
return vcpu->init.features[0] & BIT(KVM_ARM_VCPU_HAS_EL2);
}
#define MAPPED_EL2_SYSREG(el2, el1) \
case SYS_##el1: \
if (vcpu_has_el2(vcpu)) \
alias = SYS_##el2; \
break
static __always_inline u64 ctxt_reg_alias(struct kvm_vcpu *vcpu, u32 encoding)
{
u32 alias = encoding;
BUILD_BUG_ON(!__builtin_constant_p(encoding));
switch (encoding) {
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1);
MAPPED_EL2_SYSREG(CPTR_EL2, CPACR_EL1);
MAPPED_EL2_SYSREG(TTBR0_EL2, TTBR0_EL1);
MAPPED_EL2_SYSREG(TTBR1_EL2, TTBR1_EL1);
MAPPED_EL2_SYSREG(TCR_EL2, TCR_EL1);
MAPPED_EL2_SYSREG(VBAR_EL2, VBAR_EL1);
MAPPED_EL2_SYSREG(AFSR0_EL2, AFSR0_EL1);
MAPPED_EL2_SYSREG(AFSR1_EL2, AFSR1_EL1);
MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1);
MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1);
MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1);
MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1);
MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1);
MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1);
MAPPED_EL2_SYSREG(POR_EL2, POR_EL1);
MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1);
MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1);
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1);
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1);
MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1);
MAPPED_EL2_SYSREG(SCTLR2_EL2, SCTLR2_EL1);
MAPPED_EL2_SYSREG(CNTHCTL_EL2, CNTKCTL_EL1);
case SYS_SP_EL1:
if (!vcpu_has_el2(vcpu))
return ARM64_CORE_REG(sp_el1);
alias = SYS_SP_EL2;
break;
default:
BUILD_BUG();
}
return KVM_ARM64_SYS_REG(alias);
}
void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init);
static inline unsigned int get_current_el(void)
{
return (read_sysreg(CurrentEL) >> 2) & 0x3;
}
#define do_smccc(...) \
do { \
if (get_current_el() == 2) \
smccc_smc(__VA_ARGS__); \
else \
smccc_hvc(__VA_ARGS__); \
} while (0)
#endif /* SELFTEST_KVM_PROCESSOR_H */

View File

@ -16,6 +16,9 @@
((uint64_t)(flags) << 12) | \
index)
bool kvm_supports_vgic_v3(void);
int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
void __vgic_v3_init(int fd);
int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
#define VGIC_MAX_RESERVED 1023

View File

@ -63,6 +63,9 @@ struct kvm_vcpu {
struct kvm_run *run;
#ifdef __x86_64__
struct kvm_cpuid2 *cpuid;
#endif
#ifdef __aarch64__
struct kvm_vcpu_init init;
#endif
struct kvm_binary_stats stats;
struct kvm_dirty_gfn *dirty_gfns;
@ -1257,7 +1260,9 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
*/
void kvm_selftest_arch_init(void);
void kvm_arch_vm_post_create(struct kvm_vm *vm);
void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus);
void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm);
void kvm_arch_vm_release(struct kvm_vm *vm);
bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);

View File

@ -12,6 +12,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "ucall_common.h"
#include "vgic.h"
#include <linux/bitfield.h>
#include <linux/sizes.h>
@ -185,7 +186,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
uint64_t *ptep;
@ -195,17 +196,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 0)
return ptep;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 1)
break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
if (level == 2)
break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@ -223,6 +230,11 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
return virt_get_pte_hva_at_level(vm, gva, 3);
}
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);
@ -266,31 +278,49 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
bool vm_supports_el2(struct kvm_vm *vm)
{
const char *value = getenv("NV");
if (value && *value == '0')
return false;
return vm_check_cap(vm, KVM_CAP_ARM_EL2) && vm->arch.has_gic;
}
void kvm_get_default_vcpu_target(struct kvm_vm *vm, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init preferred = {};
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
if (vm_supports_el2(vm))
preferred.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
*init = preferred;
}
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
if (!init) {
kvm_get_default_vcpu_target(vm, &default_init);
init = &default_init;
if (init->target == -1) {
struct kvm_vcpu_init preferred;
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
init->target = preferred.target;
}
vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
vcpu->init = *init;
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_CPACR_EL1), 3 << 20);
sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
sctlr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1));
tcr_el1 = vcpu_get_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1));
/* Configure base granule size */
switch (vm->mode) {
@ -357,11 +387,17 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
if (use_lpa2_pte_format(vm))
tcr_el1 |= TCR_DS;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TCR_EL1), tcr_el1);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
if (!vcpu_has_el2(vcpu))
return;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2),
HCR_EL2_RW | HCR_EL2_TGE | HCR_EL2_E2H);
}
void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
@ -395,7 +431,7 @@ static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
aarch64_vcpu_setup(vcpu, init);
vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_SP_EL1), stack_vaddr + stack_size);
return vcpu;
}
@ -465,7 +501,7 @@ void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
{
extern char vectors;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
vcpu_set_reg(vcpu, ctxt_reg_alias(vcpu, SYS_VBAR_EL1), (uint64_t)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@ -653,3 +689,39 @@ void wfi(void)
{
asm volatile("wfi");
}
static bool request_mte;
static bool request_vgic = true;
void test_wants_mte(void)
{
request_mte = true;
}
void test_disable_default_vgic(void)
{
request_vgic = false;
}
void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
if (request_mte && vm_check_cap(vm, KVM_CAP_ARM_MTE))
vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
if (request_vgic && kvm_supports_vgic_v3()) {
vm->arch.gic_fd = __vgic_v3_setup(vm, nr_vcpus, 64);
vm->arch.has_gic = true;
}
}
void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
{
if (vm->arch.has_gic)
__vgic_v3_init(vm->arch.gic_fd);
}
void kvm_arch_vm_release(struct kvm_vm *vm)
{
if (vm->arch.has_gic)
close(vm->arch.gic_fd);
}

View File

@ -15,6 +15,17 @@
#include "gic.h"
#include "gic_v3.h"
bool kvm_supports_vgic_v3(void)
{
struct kvm_vm *vm = vm_create_barebones();
int r;
r = __kvm_test_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
kvm_vm_free(vm);
return !r;
}
/*
* vGIC-v3 default host setup
*
@ -30,24 +41,11 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
int __vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
int gic_fd;
uint64_t attr;
struct list_head *iter;
unsigned int nr_gic_pages, nr_vcpus_created = 0;
TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
/*
* Make sure that the caller is infact calling this
* function after all the vCPUs are added.
*/
list_for_each(iter, &vm->vcpus)
nr_vcpus_created++;
TEST_ASSERT(nr_vcpus == nr_vcpus_created,
"Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
nr_vcpus, nr_vcpus_created);
unsigned int nr_gic_pages;
/* Distributor setup */
gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
@ -56,9 +54,6 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
@ -73,12 +68,41 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
return gic_fd;
}
void __vgic_v3_init(int fd)
{
kvm_device_attr_set(fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
}
int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
unsigned int nr_vcpus_created = 0;
struct list_head *iter;
int fd;
TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
/*
* Make sure that the caller is infact calling this
* function after all the vCPUs are added.
*/
list_for_each(iter, &vm->vcpus)
nr_vcpus_created++;
TEST_ASSERT(nr_vcpus == nr_vcpus_created,
"Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
nr_vcpus, nr_vcpus_created);
fd = __vgic_v3_setup(vm, nr_vcpus, nr_irqs);
if (fd < 0)
return fd;
__vgic_v3_init(fd);
return fd;
}
/* should only work for level sensitive interrupts */
int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
{

View File

@ -517,7 +517,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
kvm_arch_vm_post_create(vm);
kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
return vm;
}
@ -555,6 +555,7 @@ struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
for (i = 0; i < nr_vcpus; ++i)
vcpus[i] = vm_vcpu_add(vm, i, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
return vm;
}
@ -805,6 +806,8 @@ void kvm_vm_release(struct kvm_vm *vmp)
/* Free cached stats metadata and close FD */
kvm_stats_release(&vmp->stats);
kvm_arch_vm_release(vmp);
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
@ -2330,7 +2333,15 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
TEST_FAIL("Unable to find stat '%s'", name);
}
__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
__weak void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
}
__weak void kvm_arch_vm_finalize_vcpus(struct kvm_vm *vm)
{
}
__weak void kvm_arch_vm_release(struct kvm_vm *vm)
{
}

View File

@ -625,7 +625,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
REPORT_GUEST_ASSERT(uc);
}
void kvm_arch_vm_post_create(struct kvm_vm *vm)
void kvm_arch_vm_post_create(struct kvm_vm *vm, unsigned int nr_vcpus)
{
int r;

View File

@ -145,7 +145,7 @@ static void finish_vm_setup(struct kvm_vm *vm)
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
kvm_arch_vm_post_create(vm);
kvm_arch_vm_post_create(vm, 0);
}
static struct kvm_vm *create_vm_two_memslots(void)

View File

@ -118,7 +118,7 @@ static int64_t smccc(uint32_t func, uint64_t arg)
{
struct arm_smccc_res res;
smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
do_smccc(func, arg, 0, 0, 0, 0, 0, 0, &res);
return res.a0;
}

View File

@ -1103,6 +1103,14 @@ void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
}
/* Called only on cleanup and destruction paths when there are no users. */
static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm,
enum kvm_bus idx)
{
return rcu_dereference_protected(kvm->buses[idx],
!refcount_read(&kvm->users_count));
}
static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
{
struct kvm *kvm = kvm_arch_alloc_vm();
@ -1228,7 +1236,7 @@ out_err_no_disable:
out_err_no_arch_destroy_vm:
WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
kfree(kvm_get_bus_for_destruction(kvm, i));
kvm_free_irq_routing(kvm);
out_err_no_irq_routing:
cleanup_srcu_struct(&kvm->irq_srcu);
@ -1276,7 +1284,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
struct kvm_io_bus *bus = kvm_get_bus_for_destruction(kvm, i);
if (bus)
kvm_io_bus_destroy(bus);
@ -1312,6 +1320,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
}
cleanup_srcu_struct(&kvm->irq_srcu);
srcu_barrier(&kvm->srcu);
cleanup_srcu_struct(&kvm->srcu);
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
xa_destroy(&kvm->mem_attr_array);
@ -5845,6 +5854,18 @@ static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
return -EOPNOTSUPP;
}
static struct kvm_io_bus *kvm_get_bus_srcu(struct kvm *kvm, enum kvm_bus idx)
{
/*
* Ensure that any updates to kvm_buses[] observed by the previous vCPU
* machine instruction are also visible to the vCPU machine instruction
* that triggered this call.
*/
smp_mb__after_srcu_read_lock();
return srcu_dereference(kvm->buses[idx], &kvm->srcu);
}
int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
@ -5857,7 +5878,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
@ -5876,7 +5897,7 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
.len = len,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
@ -5926,7 +5947,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
@ -5934,6 +5955,13 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
}
EXPORT_SYMBOL_GPL(kvm_io_bus_read);
static void __free_bus(struct rcu_head *rcu)
{
struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu);
kfree(bus);
}
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev)
{
@ -5972,8 +6000,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
memcpy(new_bus->range + i + 1, bus->range + i,
(bus->dev_count - i) * sizeof(struct kvm_io_range));
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
call_srcu(&kvm->srcu, &bus->rcu, __free_bus);
return 0;
}
@ -6035,7 +6062,7 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
bus = kvm_get_bus_srcu(kvm, bus_idx);
if (!bus)
goto out_unlock;