1393 lines
36 KiB
C
1393 lines
36 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (c) 2023, Microsoft Corporation.
|
|
*
|
|
* Author:
|
|
* Roman Kisel <romank@linux.microsoft.com>
|
|
* Saurabh Sengar <ssengar@linux.microsoft.com>
|
|
* Naman Jain <namjain@linux.microsoft.com>
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/miscdevice.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/cpuhotplug.h>
|
|
#include <linux/count_zeros.h>
|
|
#include <linux/entry-virt.h>
|
|
#include <linux/eventfd.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/file.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <asm/debugreg.h>
|
|
#include <asm/mshyperv.h>
|
|
#include <trace/events/ipi.h>
|
|
#include <uapi/asm/mtrr.h>
|
|
#include <uapi/linux/mshv.h>
|
|
#include <hyperv/hvhdk.h>
|
|
|
|
#include "../../kernel/fpu/legacy.h"
|
|
#include "mshv.h"
|
|
#include "mshv_vtl.h"
|
|
#include "hyperv_vmbus.h"
|
|
|
|
MODULE_AUTHOR("Microsoft");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_DESCRIPTION("Microsoft Hyper-V VTL Driver");
|
|
|
|
#define MSHV_ENTRY_REASON_LOWER_VTL_CALL 0x1
|
|
#define MSHV_ENTRY_REASON_INTERRUPT 0x2
|
|
#define MSHV_ENTRY_REASON_INTERCEPT 0x3
|
|
|
|
#define MSHV_REAL_OFF_SHIFT 16
|
|
#define MSHV_PG_OFF_CPU_MASK (BIT_ULL(MSHV_REAL_OFF_SHIFT) - 1)
|
|
#define MSHV_RUN_PAGE_OFFSET 0
|
|
#define MSHV_REG_PAGE_OFFSET 1
|
|
#define VTL2_VMBUS_SINT_INDEX 7
|
|
|
|
static struct device *mem_dev;
|
|
|
|
static struct tasklet_struct msg_dpc;
|
|
static wait_queue_head_t fd_wait_queue;
|
|
static bool has_message;
|
|
static struct eventfd_ctx *flag_eventfds[HV_EVENT_FLAGS_COUNT];
|
|
static DEFINE_MUTEX(flag_lock);
|
|
static bool __read_mostly mshv_has_reg_page;
|
|
|
|
/* hvcall code is of type u16, allocate a bitmap of size (1 << 16) to accommodate it */
|
|
#define MAX_BITMAP_SIZE ((U16_MAX + 1) / 8)
|
|
|
|
struct mshv_vtl_hvcall_fd {
|
|
u8 allow_bitmap[MAX_BITMAP_SIZE];
|
|
bool allow_map_initialized;
|
|
/*
|
|
* Used to protect hvcall setup in IOCTLs
|
|
*/
|
|
struct mutex init_mutex;
|
|
struct miscdevice *dev;
|
|
};
|
|
|
|
struct mshv_vtl_poll_file {
|
|
struct file *file;
|
|
wait_queue_entry_t wait;
|
|
wait_queue_head_t *wqh;
|
|
poll_table pt;
|
|
int cpu;
|
|
};
|
|
|
|
struct mshv_vtl {
|
|
struct device *module_dev;
|
|
u64 id;
|
|
};
|
|
|
|
struct mshv_vtl_per_cpu {
|
|
struct mshv_vtl_run *run;
|
|
struct page *reg_page;
|
|
};
|
|
|
|
/* SYNIC_OVERLAY_PAGE_MSR - internal, identical to hv_synic_simp */
|
|
union hv_synic_overlay_page_msr {
|
|
u64 as_uint64;
|
|
struct {
|
|
u64 enabled: 1;
|
|
u64 reserved: 11;
|
|
u64 pfn: 52;
|
|
} __packed;
|
|
};
|
|
|
|
static struct mutex mshv_vtl_poll_file_lock;
|
|
static union hv_register_vsm_page_offsets mshv_vsm_page_offsets;
|
|
static union hv_register_vsm_capabilities mshv_vsm_capabilities;
|
|
|
|
static DEFINE_PER_CPU(struct mshv_vtl_poll_file, mshv_vtl_poll_file);
|
|
static DEFINE_PER_CPU(unsigned long long, num_vtl0_transitions);
|
|
static DEFINE_PER_CPU(struct mshv_vtl_per_cpu, mshv_vtl_per_cpu);
|
|
|
|
static const union hv_input_vtl input_vtl_zero;
|
|
static const union hv_input_vtl input_vtl_normal = {
|
|
.use_target_vtl = 1,
|
|
};
|
|
|
|
static const struct file_operations mshv_vtl_fops;
|
|
|
|
static long
|
|
mshv_ioctl_create_vtl(void __user *user_arg, struct device *module_dev)
|
|
{
|
|
struct mshv_vtl *vtl;
|
|
struct file *file;
|
|
int fd;
|
|
|
|
vtl = kzalloc(sizeof(*vtl), GFP_KERNEL);
|
|
if (!vtl)
|
|
return -ENOMEM;
|
|
|
|
fd = get_unused_fd_flags(O_CLOEXEC);
|
|
if (fd < 0) {
|
|
kfree(vtl);
|
|
return fd;
|
|
}
|
|
file = anon_inode_getfile("mshv_vtl", &mshv_vtl_fops,
|
|
vtl, O_RDWR);
|
|
if (IS_ERR(file)) {
|
|
kfree(vtl);
|
|
return PTR_ERR(file);
|
|
}
|
|
vtl->module_dev = module_dev;
|
|
fd_install(fd, file);
|
|
|
|
return fd;
|
|
}
|
|
|
|
static long
|
|
mshv_ioctl_check_extension(void __user *user_arg)
|
|
{
|
|
u32 arg;
|
|
|
|
if (copy_from_user(&arg, user_arg, sizeof(arg)))
|
|
return -EFAULT;
|
|
|
|
switch (arg) {
|
|
case MSHV_CAP_CORE_API_STABLE:
|
|
return 0;
|
|
case MSHV_CAP_REGISTER_PAGE:
|
|
return mshv_has_reg_page;
|
|
case MSHV_CAP_VTL_RETURN_ACTION:
|
|
return mshv_vsm_capabilities.return_action_available;
|
|
case MSHV_CAP_DR6_SHARED:
|
|
return mshv_vsm_capabilities.dr6_shared;
|
|
}
|
|
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static long
|
|
mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
|
{
|
|
struct miscdevice *misc = filp->private_data;
|
|
|
|
switch (ioctl) {
|
|
case MSHV_CHECK_EXTENSION:
|
|
return mshv_ioctl_check_extension((void __user *)arg);
|
|
case MSHV_CREATE_VTL:
|
|
return mshv_ioctl_create_vtl((void __user *)arg, misc->this_device);
|
|
}
|
|
|
|
return -ENOTTY;
|
|
}
|
|
|
|
static const struct file_operations mshv_dev_fops = {
|
|
.owner = THIS_MODULE,
|
|
.unlocked_ioctl = mshv_dev_ioctl,
|
|
.llseek = noop_llseek,
|
|
};
|
|
|
|
static struct miscdevice mshv_dev = {
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
.name = "mshv",
|
|
.fops = &mshv_dev_fops,
|
|
.mode = 0600,
|
|
};
|
|
|
|
static struct mshv_vtl_run *mshv_vtl_this_run(void)
|
|
{
|
|
return *this_cpu_ptr(&mshv_vtl_per_cpu.run);
|
|
}
|
|
|
|
static struct mshv_vtl_run *mshv_vtl_cpu_run(int cpu)
|
|
{
|
|
return *per_cpu_ptr(&mshv_vtl_per_cpu.run, cpu);
|
|
}
|
|
|
|
static struct page *mshv_vtl_cpu_reg_page(int cpu)
|
|
{
|
|
return *per_cpu_ptr(&mshv_vtl_per_cpu.reg_page, cpu);
|
|
}
|
|
|
|
static void mshv_vtl_configure_reg_page(struct mshv_vtl_per_cpu *per_cpu)
|
|
{
|
|
struct hv_register_assoc reg_assoc = {};
|
|
union hv_synic_overlay_page_msr overlay = {};
|
|
struct page *reg_page;
|
|
|
|
reg_page = alloc_page(GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL);
|
|
if (!reg_page) {
|
|
WARN(1, "failed to allocate register page\n");
|
|
return;
|
|
}
|
|
|
|
overlay.enabled = 1;
|
|
overlay.pfn = page_to_hvpfn(reg_page);
|
|
reg_assoc.name = HV_X64_REGISTER_REG_PAGE;
|
|
reg_assoc.value.reg64 = overlay.as_uint64;
|
|
|
|
if (hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
|
|
1, input_vtl_zero, ®_assoc)) {
|
|
WARN(1, "failed to setup register page\n");
|
|
__free_page(reg_page);
|
|
return;
|
|
}
|
|
|
|
per_cpu->reg_page = reg_page;
|
|
mshv_has_reg_page = true;
|
|
}
|
|
|
|
static void mshv_vtl_synic_enable_regs(unsigned int cpu)
|
|
{
|
|
union hv_synic_sint sint;
|
|
|
|
sint.as_uint64 = 0;
|
|
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
|
|
sint.masked = false;
|
|
sint.auto_eoi = hv_recommend_using_aeoi();
|
|
|
|
/* Enable intercepts */
|
|
if (!mshv_vsm_capabilities.intercept_page_available)
|
|
hv_set_msr(HV_MSR_SINT0 + HV_SYNIC_INTERCEPTION_SINT_INDEX,
|
|
sint.as_uint64);
|
|
|
|
/* VTL2 Host VSP SINT is (un)masked when the user mode requests that */
|
|
}
|
|
|
|
static int mshv_vtl_get_vsm_regs(void)
|
|
{
|
|
struct hv_register_assoc registers[2];
|
|
int ret, count = 2;
|
|
|
|
registers[0].name = HV_REGISTER_VSM_CODE_PAGE_OFFSETS;
|
|
registers[1].name = HV_REGISTER_VSM_CAPABILITIES;
|
|
|
|
ret = hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
|
|
count, input_vtl_zero, registers);
|
|
if (ret)
|
|
return ret;
|
|
|
|
mshv_vsm_page_offsets.as_uint64 = registers[0].value.reg64;
|
|
mshv_vsm_capabilities.as_uint64 = registers[1].value.reg64;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int mshv_vtl_configure_vsm_partition(struct device *dev)
|
|
{
|
|
union hv_register_vsm_partition_config config;
|
|
struct hv_register_assoc reg_assoc;
|
|
|
|
config.as_uint64 = 0;
|
|
config.default_vtl_protection_mask = HV_MAP_GPA_PERMISSIONS_MASK;
|
|
config.enable_vtl_protection = 1;
|
|
config.zero_memory_on_reset = 1;
|
|
config.intercept_vp_startup = 1;
|
|
config.intercept_cpuid_unimplemented = 1;
|
|
|
|
if (mshv_vsm_capabilities.intercept_page_available) {
|
|
dev_dbg(dev, "using intercept page\n");
|
|
config.intercept_page = 1;
|
|
}
|
|
|
|
reg_assoc.name = HV_REGISTER_VSM_PARTITION_CONFIG;
|
|
reg_assoc.value.reg64 = config.as_uint64;
|
|
|
|
return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
|
|
1, input_vtl_zero, ®_assoc);
|
|
}
|
|
|
|
static void mshv_vtl_vmbus_isr(void)
|
|
{
|
|
struct hv_per_cpu_context *per_cpu;
|
|
struct hv_message *msg;
|
|
u32 message_type;
|
|
union hv_synic_event_flags *event_flags;
|
|
struct eventfd_ctx *eventfd;
|
|
u16 i;
|
|
|
|
per_cpu = this_cpu_ptr(hv_context.cpu_context);
|
|
if (smp_processor_id() == 0) {
|
|
msg = (struct hv_message *)per_cpu->hyp_synic_message_page + VTL2_VMBUS_SINT_INDEX;
|
|
message_type = READ_ONCE(msg->header.message_type);
|
|
if (message_type != HVMSG_NONE)
|
|
tasklet_schedule(&msg_dpc);
|
|
}
|
|
|
|
event_flags = (union hv_synic_event_flags *)per_cpu->hyp_synic_event_page +
|
|
VTL2_VMBUS_SINT_INDEX;
|
|
for_each_set_bit(i, event_flags->flags, HV_EVENT_FLAGS_COUNT) {
|
|
if (!sync_test_and_clear_bit(i, event_flags->flags))
|
|
continue;
|
|
rcu_read_lock();
|
|
eventfd = READ_ONCE(flag_eventfds[i]);
|
|
if (eventfd)
|
|
eventfd_signal(eventfd);
|
|
rcu_read_unlock();
|
|
}
|
|
|
|
vmbus_isr();
|
|
}
|
|
|
|
static int mshv_vtl_alloc_context(unsigned int cpu)
|
|
{
|
|
struct mshv_vtl_per_cpu *per_cpu = this_cpu_ptr(&mshv_vtl_per_cpu);
|
|
|
|
per_cpu->run = (struct mshv_vtl_run *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
|
if (!per_cpu->run)
|
|
return -ENOMEM;
|
|
|
|
if (mshv_vsm_capabilities.intercept_page_available)
|
|
mshv_vtl_configure_reg_page(per_cpu);
|
|
|
|
mshv_vtl_synic_enable_regs(cpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mshv_vtl_cpuhp_online;
|
|
|
|
static int hv_vtl_setup_synic(void)
|
|
{
|
|
int ret;
|
|
|
|
/* Use our isr to first filter out packets destined for userspace */
|
|
hv_setup_vmbus_handler(mshv_vtl_vmbus_isr);
|
|
|
|
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vtl:online",
|
|
mshv_vtl_alloc_context, NULL);
|
|
if (ret < 0) {
|
|
hv_setup_vmbus_handler(vmbus_isr);
|
|
return ret;
|
|
}
|
|
|
|
mshv_vtl_cpuhp_online = ret;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void hv_vtl_remove_synic(void)
|
|
{
|
|
cpuhp_remove_state(mshv_vtl_cpuhp_online);
|
|
hv_setup_vmbus_handler(vmbus_isr);
|
|
}
|
|
|
|
static int vtl_get_vp_register(struct hv_register_assoc *reg)
|
|
{
|
|
return hv_call_get_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
|
|
1, input_vtl_normal, reg);
|
|
}
|
|
|
|
static int vtl_set_vp_register(struct hv_register_assoc *reg)
|
|
{
|
|
return hv_call_set_vp_registers(HV_VP_INDEX_SELF, HV_PARTITION_ID_SELF,
|
|
1, input_vtl_normal, reg);
|
|
}
|
|
|
|
static int mshv_vtl_ioctl_add_vtl0_mem(struct mshv_vtl *vtl, void __user *arg)
|
|
{
|
|
struct mshv_vtl_ram_disposition vtl0_mem;
|
|
struct dev_pagemap *pgmap;
|
|
void *addr;
|
|
|
|
if (copy_from_user(&vtl0_mem, arg, sizeof(vtl0_mem)))
|
|
return -EFAULT;
|
|
/* vtl0_mem.last_pfn is excluded in the pagemap range for VTL0 as per design */
|
|
if (vtl0_mem.last_pfn <= vtl0_mem.start_pfn) {
|
|
dev_err(vtl->module_dev, "range start pfn (%llx) > end pfn (%llx)\n",
|
|
vtl0_mem.start_pfn, vtl0_mem.last_pfn);
|
|
return -EFAULT;
|
|
}
|
|
|
|
pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
|
|
if (!pgmap)
|
|
return -ENOMEM;
|
|
|
|
pgmap->ranges[0].start = PFN_PHYS(vtl0_mem.start_pfn);
|
|
pgmap->ranges[0].end = PFN_PHYS(vtl0_mem.last_pfn) - 1;
|
|
pgmap->nr_range = 1;
|
|
pgmap->type = MEMORY_DEVICE_GENERIC;
|
|
|
|
/*
|
|
* Determine the highest page order that can be used for the given memory range.
|
|
* This works best when the range is aligned; i.e. both the start and the length.
|
|
*/
|
|
pgmap->vmemmap_shift = count_trailing_zeros(vtl0_mem.start_pfn | vtl0_mem.last_pfn);
|
|
dev_dbg(vtl->module_dev,
|
|
"Add VTL0 memory: start: 0x%llx, end_pfn: 0x%llx, page order: %lu\n",
|
|
vtl0_mem.start_pfn, vtl0_mem.last_pfn, pgmap->vmemmap_shift);
|
|
|
|
addr = devm_memremap_pages(mem_dev, pgmap);
|
|
if (IS_ERR(addr)) {
|
|
dev_err(vtl->module_dev, "devm_memremap_pages error: %ld\n", PTR_ERR(addr));
|
|
kfree(pgmap);
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* Don't free pgmap, since it has to stick around until the memory
|
|
* is unmapped, which will never happen as there is no scenario
|
|
* where VTL0 can be released/shutdown without bringing down VTL2.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static void mshv_vtl_cancel(int cpu)
|
|
{
|
|
int here = get_cpu();
|
|
|
|
if (here != cpu) {
|
|
if (!xchg_relaxed(&mshv_vtl_cpu_run(cpu)->cancel, 1))
|
|
smp_send_reschedule(cpu);
|
|
} else {
|
|
WRITE_ONCE(mshv_vtl_this_run()->cancel, 1);
|
|
}
|
|
put_cpu();
|
|
}
|
|
|
|
static int mshv_vtl_poll_file_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
|
|
{
|
|
struct mshv_vtl_poll_file *poll_file = container_of(wait, struct mshv_vtl_poll_file, wait);
|
|
|
|
mshv_vtl_cancel(poll_file->cpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void mshv_vtl_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
|
|
{
|
|
struct mshv_vtl_poll_file *poll_file = container_of(pt, struct mshv_vtl_poll_file, pt);
|
|
|
|
WARN_ON(poll_file->wqh);
|
|
poll_file->wqh = wqh;
|
|
add_wait_queue(wqh, &poll_file->wait);
|
|
}
|
|
|
|
static int mshv_vtl_ioctl_set_poll_file(struct mshv_vtl_set_poll_file __user *user_input)
|
|
{
|
|
struct file *file, *old_file;
|
|
struct mshv_vtl_poll_file *poll_file;
|
|
struct mshv_vtl_set_poll_file input;
|
|
|
|
if (copy_from_user(&input, user_input, sizeof(input)))
|
|
return -EFAULT;
|
|
|
|
if (input.cpu >= num_possible_cpus() || !cpu_online(input.cpu))
|
|
return -EINVAL;
|
|
/*
|
|
* CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
|
|
* CPU is expected to remain online after above cpu_online() check.
|
|
*/
|
|
|
|
file = NULL;
|
|
file = fget(input.fd);
|
|
if (!file)
|
|
return -EBADFD;
|
|
|
|
poll_file = per_cpu_ptr(&mshv_vtl_poll_file, READ_ONCE(input.cpu));
|
|
if (!poll_file)
|
|
return -EINVAL;
|
|
|
|
mutex_lock(&mshv_vtl_poll_file_lock);
|
|
|
|
if (poll_file->wqh)
|
|
remove_wait_queue(poll_file->wqh, &poll_file->wait);
|
|
poll_file->wqh = NULL;
|
|
|
|
old_file = poll_file->file;
|
|
poll_file->file = file;
|
|
poll_file->cpu = input.cpu;
|
|
|
|
if (file) {
|
|
init_waitqueue_func_entry(&poll_file->wait, mshv_vtl_poll_file_wake);
|
|
init_poll_funcptr(&poll_file->pt, mshv_vtl_ptable_queue_proc);
|
|
vfs_poll(file, &poll_file->pt);
|
|
}
|
|
|
|
mutex_unlock(&mshv_vtl_poll_file_lock);
|
|
|
|
if (old_file)
|
|
fput(old_file);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Static table mapping register names to their corresponding actions */
|
|
static const struct {
|
|
enum hv_register_name reg_name;
|
|
int debug_reg_num; /* -1 if not a debug register */
|
|
u32 msr_addr; /* 0 if not an MSR */
|
|
} reg_table[] = {
|
|
/* Debug registers */
|
|
{HV_X64_REGISTER_DR0, 0, 0},
|
|
{HV_X64_REGISTER_DR1, 1, 0},
|
|
{HV_X64_REGISTER_DR2, 2, 0},
|
|
{HV_X64_REGISTER_DR3, 3, 0},
|
|
{HV_X64_REGISTER_DR6, 6, 0},
|
|
/* MTRR MSRs */
|
|
{HV_X64_REGISTER_MSR_MTRR_CAP, -1, MSR_MTRRcap},
|
|
{HV_X64_REGISTER_MSR_MTRR_DEF_TYPE, -1, MSR_MTRRdefType},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE0, -1, MTRRphysBase_MSR(0)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE1, -1, MTRRphysBase_MSR(1)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE2, -1, MTRRphysBase_MSR(2)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE3, -1, MTRRphysBase_MSR(3)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE4, -1, MTRRphysBase_MSR(4)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE5, -1, MTRRphysBase_MSR(5)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE6, -1, MTRRphysBase_MSR(6)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE7, -1, MTRRphysBase_MSR(7)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE8, -1, MTRRphysBase_MSR(8)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASE9, -1, MTRRphysBase_MSR(9)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEA, -1, MTRRphysBase_MSR(0xa)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEB, -1, MTRRphysBase_MSR(0xb)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEC, -1, MTRRphysBase_MSR(0xc)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASED, -1, MTRRphysBase_MSR(0xd)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEE, -1, MTRRphysBase_MSR(0xe)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_BASEF, -1, MTRRphysBase_MSR(0xf)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK0, -1, MTRRphysMask_MSR(0)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK1, -1, MTRRphysMask_MSR(1)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK2, -1, MTRRphysMask_MSR(2)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK3, -1, MTRRphysMask_MSR(3)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK4, -1, MTRRphysMask_MSR(4)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK5, -1, MTRRphysMask_MSR(5)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK6, -1, MTRRphysMask_MSR(6)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK7, -1, MTRRphysMask_MSR(7)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK8, -1, MTRRphysMask_MSR(8)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASK9, -1, MTRRphysMask_MSR(9)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKA, -1, MTRRphysMask_MSR(0xa)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKB, -1, MTRRphysMask_MSR(0xb)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKC, -1, MTRRphysMask_MSR(0xc)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKD, -1, MTRRphysMask_MSR(0xd)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKE, -1, MTRRphysMask_MSR(0xe)},
|
|
{HV_X64_REGISTER_MSR_MTRR_PHYS_MASKF, -1, MTRRphysMask_MSR(0xf)},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX64K00000, -1, MSR_MTRRfix64K_00000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX16K80000, -1, MSR_MTRRfix16K_80000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX16KA0000, -1, MSR_MTRRfix16K_A0000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KC0000, -1, MSR_MTRRfix4K_C0000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KC8000, -1, MSR_MTRRfix4K_C8000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KD0000, -1, MSR_MTRRfix4K_D0000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KD8000, -1, MSR_MTRRfix4K_D8000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KE0000, -1, MSR_MTRRfix4K_E0000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KE8000, -1, MSR_MTRRfix4K_E8000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KF0000, -1, MSR_MTRRfix4K_F0000},
|
|
{HV_X64_REGISTER_MSR_MTRR_FIX4KF8000, -1, MSR_MTRRfix4K_F8000},
|
|
};
|
|
|
|
static int mshv_vtl_get_set_reg(struct hv_register_assoc *regs, bool set)
|
|
{
|
|
u64 *reg64;
|
|
enum hv_register_name gpr_name;
|
|
int i;
|
|
|
|
gpr_name = regs->name;
|
|
reg64 = ®s->value.reg64;
|
|
|
|
/* Search for the register in the table */
|
|
for (i = 0; i < ARRAY_SIZE(reg_table); i++) {
|
|
if (reg_table[i].reg_name != gpr_name)
|
|
continue;
|
|
if (reg_table[i].debug_reg_num != -1) {
|
|
/* Handle debug registers */
|
|
if (gpr_name == HV_X64_REGISTER_DR6 &&
|
|
!mshv_vsm_capabilities.dr6_shared)
|
|
goto hypercall;
|
|
if (set)
|
|
native_set_debugreg(reg_table[i].debug_reg_num, *reg64);
|
|
else
|
|
*reg64 = native_get_debugreg(reg_table[i].debug_reg_num);
|
|
} else {
|
|
/* Handle MSRs */
|
|
if (set)
|
|
wrmsrl(reg_table[i].msr_addr, *reg64);
|
|
else
|
|
rdmsrl(reg_table[i].msr_addr, *reg64);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
hypercall:
|
|
return 1;
|
|
}
|
|
|
|
static void mshv_vtl_return(struct mshv_vtl_cpu_context *vtl0)
|
|
{
|
|
struct hv_vp_assist_page *hvp;
|
|
|
|
hvp = hv_vp_assist_page[smp_processor_id()];
|
|
|
|
/*
|
|
* Process signal event direct set in the run page, if any.
|
|
*/
|
|
if (mshv_vsm_capabilities.return_action_available) {
|
|
u32 offset = READ_ONCE(mshv_vtl_this_run()->vtl_ret_action_size);
|
|
|
|
WRITE_ONCE(mshv_vtl_this_run()->vtl_ret_action_size, 0);
|
|
|
|
/*
|
|
* Hypervisor will take care of clearing out the actions
|
|
* set in the assist page.
|
|
*/
|
|
memcpy(hvp->vtl_ret_actions,
|
|
mshv_vtl_this_run()->vtl_ret_actions,
|
|
min_t(u32, offset, sizeof(hvp->vtl_ret_actions)));
|
|
}
|
|
|
|
mshv_vtl_return_call(vtl0);
|
|
}
|
|
|
|
static bool mshv_vtl_process_intercept(void)
|
|
{
|
|
struct hv_per_cpu_context *mshv_cpu;
|
|
void *synic_message_page;
|
|
struct hv_message *msg;
|
|
u32 message_type;
|
|
|
|
mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
|
|
synic_message_page = mshv_cpu->hyp_synic_message_page;
|
|
if (unlikely(!synic_message_page))
|
|
return true;
|
|
|
|
msg = (struct hv_message *)synic_message_page + HV_SYNIC_INTERCEPTION_SINT_INDEX;
|
|
message_type = READ_ONCE(msg->header.message_type);
|
|
if (message_type == HVMSG_NONE)
|
|
return true;
|
|
|
|
memcpy(mshv_vtl_this_run()->exit_message, msg, sizeof(*msg));
|
|
vmbus_signal_eom(msg, message_type);
|
|
|
|
return false;
|
|
}
|
|
|
|
static int mshv_vtl_ioctl_return_to_lower_vtl(void)
|
|
{
|
|
preempt_disable();
|
|
for (;;) {
|
|
unsigned long irq_flags;
|
|
struct hv_vp_assist_page *hvp;
|
|
int ret;
|
|
|
|
if (__xfer_to_guest_mode_work_pending()) {
|
|
preempt_enable();
|
|
ret = xfer_to_guest_mode_handle_work();
|
|
if (ret)
|
|
return ret;
|
|
preempt_disable();
|
|
}
|
|
|
|
local_irq_save(irq_flags);
|
|
if (READ_ONCE(mshv_vtl_this_run()->cancel)) {
|
|
local_irq_restore(irq_flags);
|
|
preempt_enable();
|
|
return -EINTR;
|
|
}
|
|
|
|
mshv_vtl_return(&mshv_vtl_this_run()->cpu_context);
|
|
local_irq_restore(irq_flags);
|
|
|
|
hvp = hv_vp_assist_page[smp_processor_id()];
|
|
this_cpu_inc(num_vtl0_transitions);
|
|
switch (hvp->vtl_entry_reason) {
|
|
case MSHV_ENTRY_REASON_INTERRUPT:
|
|
if (!mshv_vsm_capabilities.intercept_page_available &&
|
|
likely(!mshv_vtl_process_intercept()))
|
|
goto done;
|
|
break;
|
|
|
|
case MSHV_ENTRY_REASON_INTERCEPT:
|
|
WARN_ON(!mshv_vsm_capabilities.intercept_page_available);
|
|
memcpy(mshv_vtl_this_run()->exit_message, hvp->intercept_message,
|
|
sizeof(hvp->intercept_message));
|
|
goto done;
|
|
|
|
default:
|
|
panic("unknown entry reason: %d", hvp->vtl_entry_reason);
|
|
}
|
|
}
|
|
|
|
done:
|
|
preempt_enable();
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long
|
|
mshv_vtl_ioctl_get_regs(void __user *user_args)
|
|
{
|
|
struct mshv_vp_registers args;
|
|
struct hv_register_assoc reg;
|
|
long ret;
|
|
|
|
if (copy_from_user(&args, user_args, sizeof(args)))
|
|
return -EFAULT;
|
|
|
|
/* This IOCTL supports processing only one register at a time. */
|
|
if (args.count != 1)
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(®, (void __user *)args.regs_ptr,
|
|
sizeof(reg)))
|
|
return -EFAULT;
|
|
|
|
ret = mshv_vtl_get_set_reg(®, false);
|
|
if (!ret)
|
|
goto copy_args; /* No need of hypercall */
|
|
ret = vtl_get_vp_register(®);
|
|
if (ret)
|
|
return ret;
|
|
|
|
copy_args:
|
|
if (copy_to_user((void __user *)args.regs_ptr, ®, sizeof(reg)))
|
|
ret = -EFAULT;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long
|
|
mshv_vtl_ioctl_set_regs(void __user *user_args)
|
|
{
|
|
struct mshv_vp_registers args;
|
|
struct hv_register_assoc reg;
|
|
long ret;
|
|
|
|
if (copy_from_user(&args, user_args, sizeof(args)))
|
|
return -EFAULT;
|
|
|
|
/* This IOCTL supports processing only one register at a time. */
|
|
if (args.count != 1)
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(®, (void __user *)args.regs_ptr, sizeof(reg)))
|
|
return -EFAULT;
|
|
|
|
ret = mshv_vtl_get_set_reg(®, true);
|
|
if (!ret)
|
|
return ret; /* No need of hypercall */
|
|
ret = vtl_set_vp_register(®);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long
|
|
mshv_vtl_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
|
|
{
|
|
long ret;
|
|
struct mshv_vtl *vtl = filp->private_data;
|
|
|
|
switch (ioctl) {
|
|
case MSHV_SET_POLL_FILE:
|
|
ret = mshv_vtl_ioctl_set_poll_file((struct mshv_vtl_set_poll_file __user *)arg);
|
|
break;
|
|
case MSHV_GET_VP_REGISTERS:
|
|
ret = mshv_vtl_ioctl_get_regs((void __user *)arg);
|
|
break;
|
|
case MSHV_SET_VP_REGISTERS:
|
|
ret = mshv_vtl_ioctl_set_regs((void __user *)arg);
|
|
break;
|
|
case MSHV_RETURN_TO_LOWER_VTL:
|
|
ret = mshv_vtl_ioctl_return_to_lower_vtl();
|
|
break;
|
|
case MSHV_ADD_VTL0_MEMORY:
|
|
ret = mshv_vtl_ioctl_add_vtl0_mem(vtl, (void __user *)arg);
|
|
break;
|
|
default:
|
|
dev_err(vtl->module_dev, "invalid vtl ioctl: %#x\n", ioctl);
|
|
ret = -ENOTTY;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static vm_fault_t mshv_vtl_fault(struct vm_fault *vmf)
|
|
{
|
|
struct page *page;
|
|
int cpu = vmf->pgoff & MSHV_PG_OFF_CPU_MASK;
|
|
int real_off = vmf->pgoff >> MSHV_REAL_OFF_SHIFT;
|
|
|
|
if (!cpu_online(cpu))
|
|
return VM_FAULT_SIGBUS;
|
|
/*
|
|
* CPU Hotplug is not supported in VTL2 in OpenHCL, where this kernel driver exists.
|
|
* CPU is expected to remain online after above cpu_online() check.
|
|
*/
|
|
|
|
if (real_off == MSHV_RUN_PAGE_OFFSET) {
|
|
page = virt_to_page(mshv_vtl_cpu_run(cpu));
|
|
} else if (real_off == MSHV_REG_PAGE_OFFSET) {
|
|
if (!mshv_has_reg_page)
|
|
return VM_FAULT_SIGBUS;
|
|
page = mshv_vtl_cpu_reg_page(cpu);
|
|
} else {
|
|
return VM_FAULT_NOPAGE;
|
|
}
|
|
|
|
get_page(page);
|
|
vmf->page = page;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct vm_operations_struct mshv_vtl_vm_ops = {
|
|
.fault = mshv_vtl_fault,
|
|
};
|
|
|
|
static int mshv_vtl_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
{
|
|
vma->vm_ops = &mshv_vtl_vm_ops;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mshv_vtl_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct mshv_vtl *vtl = filp->private_data;
|
|
|
|
kfree(vtl);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations mshv_vtl_fops = {
|
|
.owner = THIS_MODULE,
|
|
.unlocked_ioctl = mshv_vtl_ioctl,
|
|
.release = mshv_vtl_release,
|
|
.mmap = mshv_vtl_mmap,
|
|
};
|
|
|
|
static void mshv_vtl_synic_mask_vmbus_sint(const u8 *mask)
|
|
{
|
|
union hv_synic_sint sint;
|
|
|
|
sint.as_uint64 = 0;
|
|
sint.vector = HYPERVISOR_CALLBACK_VECTOR;
|
|
sint.masked = (*mask != 0);
|
|
sint.auto_eoi = hv_recommend_using_aeoi();
|
|
|
|
hv_set_msr(HV_MSR_SINT0 + VTL2_VMBUS_SINT_INDEX,
|
|
sint.as_uint64);
|
|
|
|
if (!sint.masked)
|
|
pr_debug("%s: Unmasking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
|
|
else
|
|
pr_debug("%s: Masking VTL2 VMBUS SINT on VP %d\n", __func__, smp_processor_id());
|
|
}
|
|
|
|
static void mshv_vtl_read_remote(void *buffer)
|
|
{
|
|
struct hv_per_cpu_context *mshv_cpu = this_cpu_ptr(hv_context.cpu_context);
|
|
struct hv_message *msg = (struct hv_message *)mshv_cpu->hyp_synic_message_page +
|
|
VTL2_VMBUS_SINT_INDEX;
|
|
u32 message_type = READ_ONCE(msg->header.message_type);
|
|
|
|
WRITE_ONCE(has_message, false);
|
|
if (message_type == HVMSG_NONE)
|
|
return;
|
|
|
|
memcpy(buffer, msg, sizeof(*msg));
|
|
vmbus_signal_eom(msg, message_type);
|
|
}
|
|
|
|
static bool vtl_synic_mask_vmbus_sint_masked = true;
|
|
|
|
static ssize_t mshv_vtl_sint_read(struct file *filp, char __user *arg, size_t size, loff_t *offset)
|
|
{
|
|
struct hv_message msg = {};
|
|
int ret;
|
|
|
|
if (size < sizeof(msg))
|
|
return -EINVAL;
|
|
|
|
for (;;) {
|
|
smp_call_function_single(VMBUS_CONNECT_CPU, mshv_vtl_read_remote, &msg, true);
|
|
if (msg.header.message_type != HVMSG_NONE)
|
|
break;
|
|
|
|
if (READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
|
|
return 0; /* EOF */
|
|
|
|
if (filp->f_flags & O_NONBLOCK)
|
|
return -EAGAIN;
|
|
|
|
ret = wait_event_interruptible(fd_wait_queue,
|
|
READ_ONCE(has_message) ||
|
|
READ_ONCE(vtl_synic_mask_vmbus_sint_masked));
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
if (copy_to_user(arg, &msg, sizeof(msg)))
|
|
return -EFAULT;
|
|
|
|
return sizeof(msg);
|
|
}
|
|
|
|
static __poll_t mshv_vtl_sint_poll(struct file *filp, poll_table *wait)
|
|
{
|
|
__poll_t mask = 0;
|
|
|
|
poll_wait(filp, &fd_wait_queue, wait);
|
|
if (READ_ONCE(has_message) || READ_ONCE(vtl_synic_mask_vmbus_sint_masked))
|
|
mask |= EPOLLIN | EPOLLRDNORM;
|
|
|
|
return mask;
|
|
}
|
|
|
|
static void mshv_vtl_sint_on_msg_dpc(unsigned long data)
|
|
{
|
|
WRITE_ONCE(has_message, true);
|
|
wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
|
|
}
|
|
|
|
static int mshv_vtl_sint_ioctl_post_msg(struct mshv_vtl_sint_post_msg __user *arg)
|
|
{
|
|
struct mshv_vtl_sint_post_msg message;
|
|
u8 payload[HV_MESSAGE_PAYLOAD_BYTE_COUNT];
|
|
|
|
if (copy_from_user(&message, arg, sizeof(message)))
|
|
return -EFAULT;
|
|
if (message.payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
|
|
return -EINVAL;
|
|
if (copy_from_user(payload, (void __user *)message.payload_ptr,
|
|
message.payload_size))
|
|
return -EFAULT;
|
|
|
|
return hv_post_message((union hv_connection_id)message.connection_id,
|
|
message.message_type, (void *)payload,
|
|
message.payload_size);
|
|
}
|
|
|
|
static int mshv_vtl_sint_ioctl_signal_event(struct mshv_vtl_signal_event __user *arg)
|
|
{
|
|
u64 input, status;
|
|
struct mshv_vtl_signal_event signal_event;
|
|
|
|
if (copy_from_user(&signal_event, arg, sizeof(signal_event)))
|
|
return -EFAULT;
|
|
|
|
input = signal_event.connection_id | ((u64)signal_event.flag << 32);
|
|
|
|
status = hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input);
|
|
|
|
return hv_result_to_errno(status);
|
|
}
|
|
|
|
static int mshv_vtl_sint_ioctl_set_eventfd(struct mshv_vtl_set_eventfd __user *arg)
|
|
{
|
|
struct mshv_vtl_set_eventfd set_eventfd;
|
|
struct eventfd_ctx *eventfd, *old_eventfd;
|
|
|
|
if (copy_from_user(&set_eventfd, arg, sizeof(set_eventfd)))
|
|
return -EFAULT;
|
|
if (set_eventfd.flag >= HV_EVENT_FLAGS_COUNT)
|
|
return -EINVAL;
|
|
|
|
eventfd = NULL;
|
|
if (set_eventfd.fd >= 0) {
|
|
eventfd = eventfd_ctx_fdget(set_eventfd.fd);
|
|
if (IS_ERR(eventfd))
|
|
return PTR_ERR(eventfd);
|
|
}
|
|
|
|
guard(mutex)(&flag_lock);
|
|
old_eventfd = READ_ONCE(flag_eventfds[set_eventfd.flag]);
|
|
WRITE_ONCE(flag_eventfds[set_eventfd.flag], eventfd);
|
|
|
|
if (old_eventfd) {
|
|
synchronize_rcu();
|
|
eventfd_ctx_put(old_eventfd);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mshv_vtl_sint_ioctl_pause_msg_stream(struct mshv_sint_mask __user *arg)
|
|
{
|
|
static DEFINE_MUTEX(vtl2_vmbus_sint_mask_mutex);
|
|
struct mshv_sint_mask mask;
|
|
|
|
if (copy_from_user(&mask, arg, sizeof(mask)))
|
|
return -EFAULT;
|
|
guard(mutex)(&vtl2_vmbus_sint_mask_mutex);
|
|
on_each_cpu((smp_call_func_t)mshv_vtl_synic_mask_vmbus_sint, &mask.mask, 1);
|
|
WRITE_ONCE(vtl_synic_mask_vmbus_sint_masked, mask.mask != 0);
|
|
if (mask.mask)
|
|
wake_up_interruptible_poll(&fd_wait_queue, EPOLLIN);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long mshv_vtl_sint_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
|
|
{
|
|
switch (cmd) {
|
|
case MSHV_SINT_POST_MESSAGE:
|
|
return mshv_vtl_sint_ioctl_post_msg((struct mshv_vtl_sint_post_msg __user *)arg);
|
|
case MSHV_SINT_SIGNAL_EVENT:
|
|
return mshv_vtl_sint_ioctl_signal_event((struct mshv_vtl_signal_event __user *)arg);
|
|
case MSHV_SINT_SET_EVENTFD:
|
|
return mshv_vtl_sint_ioctl_set_eventfd((struct mshv_vtl_set_eventfd __user *)arg);
|
|
case MSHV_SINT_PAUSE_MESSAGE_STREAM:
|
|
return mshv_vtl_sint_ioctl_pause_msg_stream((struct mshv_sint_mask __user *)arg);
|
|
default:
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
}
|
|
|
|
static const struct file_operations mshv_vtl_sint_ops = {
|
|
.owner = THIS_MODULE,
|
|
.read = mshv_vtl_sint_read,
|
|
.poll = mshv_vtl_sint_poll,
|
|
.unlocked_ioctl = mshv_vtl_sint_ioctl,
|
|
};
|
|
|
|
static struct miscdevice mshv_vtl_sint_dev = {
|
|
.name = "mshv_sint",
|
|
.fops = &mshv_vtl_sint_ops,
|
|
.mode = 0600,
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
};
|
|
|
|
static int mshv_vtl_hvcall_dev_open(struct inode *node, struct file *f)
|
|
{
|
|
struct miscdevice *dev = f->private_data;
|
|
struct mshv_vtl_hvcall_fd *fd;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
fd = vzalloc(sizeof(*fd));
|
|
if (!fd)
|
|
return -ENOMEM;
|
|
fd->dev = dev;
|
|
f->private_data = fd;
|
|
mutex_init(&fd->init_mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mshv_vtl_hvcall_dev_release(struct inode *node, struct file *f)
|
|
{
|
|
struct mshv_vtl_hvcall_fd *fd;
|
|
|
|
fd = f->private_data;
|
|
if (fd) {
|
|
vfree(fd);
|
|
f->private_data = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mshv_vtl_hvcall_do_setup(struct mshv_vtl_hvcall_fd *fd,
|
|
struct mshv_vtl_hvcall_setup __user *hvcall_setup_user)
|
|
{
|
|
struct mshv_vtl_hvcall_setup hvcall_setup;
|
|
|
|
guard(mutex)(&fd->init_mutex);
|
|
|
|
if (fd->allow_map_initialized) {
|
|
dev_err(fd->dev->this_device,
|
|
"Hypercall allow map has already been set, pid %d\n",
|
|
current->pid);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (copy_from_user(&hvcall_setup, hvcall_setup_user,
|
|
sizeof(struct mshv_vtl_hvcall_setup))) {
|
|
return -EFAULT;
|
|
}
|
|
if (hvcall_setup.bitmap_array_size > ARRAY_SIZE(fd->allow_bitmap))
|
|
return -EINVAL;
|
|
|
|
if (copy_from_user(&fd->allow_bitmap,
|
|
(void __user *)hvcall_setup.allow_bitmap_ptr,
|
|
hvcall_setup.bitmap_array_size)) {
|
|
return -EFAULT;
|
|
}
|
|
|
|
dev_info(fd->dev->this_device, "Hypercall allow map has been set, pid %d\n",
|
|
current->pid);
|
|
fd->allow_map_initialized = true;
|
|
return 0;
|
|
}
|
|
|
|
static bool mshv_vtl_hvcall_is_allowed(struct mshv_vtl_hvcall_fd *fd, u16 call_code)
|
|
{
|
|
return test_bit(call_code, (unsigned long *)fd->allow_bitmap);
|
|
}
|
|
|
|
static int mshv_vtl_hvcall_call(struct mshv_vtl_hvcall_fd *fd,
|
|
struct mshv_vtl_hvcall __user *hvcall_user)
|
|
{
|
|
struct mshv_vtl_hvcall hvcall;
|
|
void *in, *out;
|
|
int ret;
|
|
|
|
if (copy_from_user(&hvcall, hvcall_user, sizeof(struct mshv_vtl_hvcall)))
|
|
return -EFAULT;
|
|
if (hvcall.input_size > HV_HYP_PAGE_SIZE)
|
|
return -EINVAL;
|
|
if (hvcall.output_size > HV_HYP_PAGE_SIZE)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* By default, all hypercalls are not allowed.
|
|
* The user mode code has to set up the allow bitmap once.
|
|
*/
|
|
|
|
if (!mshv_vtl_hvcall_is_allowed(fd, hvcall.control & 0xFFFF)) {
|
|
dev_err(fd->dev->this_device,
|
|
"Hypercall with control data %#llx isn't allowed\n",
|
|
hvcall.control);
|
|
return -EPERM;
|
|
}
|
|
|
|
/*
|
|
* This may create a problem for Confidential VM (CVM) usecase where we need to use
|
|
* Hyper-V driver allocated per-cpu input and output pages (hyperv_pcpu_input_arg and
|
|
* hyperv_pcpu_output_arg) for making a hypervisor call.
|
|
*
|
|
* TODO: Take care of this when CVM support is added.
|
|
*/
|
|
in = (void *)__get_free_page(GFP_KERNEL);
|
|
out = (void *)__get_free_page(GFP_KERNEL);
|
|
|
|
if (copy_from_user(in, (void __user *)hvcall.input_ptr, hvcall.input_size)) {
|
|
ret = -EFAULT;
|
|
goto free_pages;
|
|
}
|
|
|
|
hvcall.status = hv_do_hypercall(hvcall.control, in, out);
|
|
|
|
if (copy_to_user((void __user *)hvcall.output_ptr, out, hvcall.output_size)) {
|
|
ret = -EFAULT;
|
|
goto free_pages;
|
|
}
|
|
ret = put_user(hvcall.status, &hvcall_user->status);
|
|
free_pages:
|
|
free_page((unsigned long)in);
|
|
free_page((unsigned long)out);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static long mshv_vtl_hvcall_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct mshv_vtl_hvcall_fd *fd = f->private_data;
|
|
|
|
switch (cmd) {
|
|
case MSHV_HVCALL_SETUP:
|
|
return mshv_vtl_hvcall_do_setup(fd, (struct mshv_vtl_hvcall_setup __user *)arg);
|
|
case MSHV_HVCALL:
|
|
return mshv_vtl_hvcall_call(fd, (struct mshv_vtl_hvcall __user *)arg);
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return -ENOIOCTLCMD;
|
|
}
|
|
|
|
static const struct file_operations mshv_vtl_hvcall_dev_file_ops = {
|
|
.owner = THIS_MODULE,
|
|
.open = mshv_vtl_hvcall_dev_open,
|
|
.release = mshv_vtl_hvcall_dev_release,
|
|
.unlocked_ioctl = mshv_vtl_hvcall_dev_ioctl,
|
|
};
|
|
|
|
static struct miscdevice mshv_vtl_hvcall_dev = {
|
|
.name = "mshv_hvcall",
|
|
.nodename = "mshv_hvcall",
|
|
.fops = &mshv_vtl_hvcall_dev_file_ops,
|
|
.mode = 0600,
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
};
|
|
|
|
static int mshv_vtl_low_open(struct inode *inodep, struct file *filp)
|
|
{
|
|
pid_t pid = task_pid_vnr(current);
|
|
uid_t uid = current_uid().val;
|
|
int ret = 0;
|
|
|
|
pr_debug("%s: Opening VTL low, task group %d, uid %d\n", __func__, pid, uid);
|
|
|
|
if (capable(CAP_SYS_ADMIN)) {
|
|
filp->private_data = inodep;
|
|
} else {
|
|
pr_err("%s: VTL low open failed: CAP_SYS_ADMIN required. task group %d, uid %d",
|
|
__func__, pid, uid);
|
|
ret = -EPERM;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static bool can_fault(struct vm_fault *vmf, unsigned long size, unsigned long *pfn)
|
|
{
|
|
unsigned long mask = size - 1;
|
|
unsigned long start = vmf->address & ~mask;
|
|
unsigned long end = start + size;
|
|
bool is_valid;
|
|
|
|
is_valid = (vmf->address & mask) == ((vmf->pgoff << PAGE_SHIFT) & mask) &&
|
|
start >= vmf->vma->vm_start &&
|
|
end <= vmf->vma->vm_end;
|
|
|
|
if (is_valid)
|
|
*pfn = vmf->pgoff & ~(mask >> PAGE_SHIFT);
|
|
|
|
return is_valid;
|
|
}
|
|
|
|
static vm_fault_t mshv_vtl_low_huge_fault(struct vm_fault *vmf, unsigned int order)
|
|
{
|
|
unsigned long pfn = vmf->pgoff;
|
|
vm_fault_t ret = VM_FAULT_FALLBACK;
|
|
|
|
switch (order) {
|
|
case 0:
|
|
return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
|
|
|
|
case PMD_ORDER:
|
|
if (can_fault(vmf, PMD_SIZE, &pfn))
|
|
ret = vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
|
|
return ret;
|
|
|
|
case PUD_ORDER:
|
|
if (can_fault(vmf, PUD_SIZE, &pfn))
|
|
ret = vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
|
|
return ret;
|
|
|
|
default:
|
|
return VM_FAULT_SIGBUS;
|
|
}
|
|
}
|
|
|
|
static vm_fault_t mshv_vtl_low_fault(struct vm_fault *vmf)
|
|
{
|
|
return mshv_vtl_low_huge_fault(vmf, 0);
|
|
}
|
|
|
|
static const struct vm_operations_struct mshv_vtl_low_vm_ops = {
|
|
.fault = mshv_vtl_low_fault,
|
|
.huge_fault = mshv_vtl_low_huge_fault,
|
|
};
|
|
|
|
static int mshv_vtl_low_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
{
|
|
vma->vm_ops = &mshv_vtl_low_vm_ops;
|
|
vm_flags_set(vma, VM_HUGEPAGE | VM_MIXEDMAP);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations mshv_vtl_low_file_ops = {
|
|
.owner = THIS_MODULE,
|
|
.open = mshv_vtl_low_open,
|
|
.mmap = mshv_vtl_low_mmap,
|
|
};
|
|
|
|
static struct miscdevice mshv_vtl_low = {
|
|
.name = "mshv_vtl_low",
|
|
.nodename = "mshv_vtl_low",
|
|
.fops = &mshv_vtl_low_file_ops,
|
|
.mode = 0600,
|
|
.minor = MISC_DYNAMIC_MINOR,
|
|
};
|
|
|
|
static int __init mshv_vtl_init(void)
|
|
{
|
|
int ret;
|
|
struct device *dev = mshv_dev.this_device;
|
|
|
|
/*
|
|
* This creates /dev/mshv which provides functionality to create VTLs and partitions.
|
|
*/
|
|
ret = misc_register(&mshv_dev);
|
|
if (ret) {
|
|
dev_err(dev, "mshv device register failed: %d\n", ret);
|
|
goto free_dev;
|
|
}
|
|
|
|
tasklet_init(&msg_dpc, mshv_vtl_sint_on_msg_dpc, 0);
|
|
init_waitqueue_head(&fd_wait_queue);
|
|
|
|
if (mshv_vtl_get_vsm_regs()) {
|
|
dev_emerg(dev, "Unable to get VSM capabilities !!\n");
|
|
ret = -ENODEV;
|
|
goto free_dev;
|
|
}
|
|
if (mshv_vtl_configure_vsm_partition(dev)) {
|
|
dev_emerg(dev, "VSM configuration failed !!\n");
|
|
ret = -ENODEV;
|
|
goto free_dev;
|
|
}
|
|
|
|
mshv_vtl_return_call_init(mshv_vsm_page_offsets.vtl_return_offset);
|
|
ret = hv_vtl_setup_synic();
|
|
if (ret)
|
|
goto free_dev;
|
|
|
|
/*
|
|
* mshv_sint device adds VMBus relay ioctl support.
|
|
* This provides a channel for VTL0 to communicate with VTL2.
|
|
*/
|
|
ret = misc_register(&mshv_vtl_sint_dev);
|
|
if (ret)
|
|
goto free_synic;
|
|
|
|
/*
|
|
* mshv_hvcall device adds interface to enable userspace for direct hypercalls support.
|
|
*/
|
|
ret = misc_register(&mshv_vtl_hvcall_dev);
|
|
if (ret)
|
|
goto free_sint;
|
|
|
|
/*
|
|
* mshv_vtl_low device is used to map VTL0 address space to a user-mode process in VTL2.
|
|
* It implements mmap() to allow a user-mode process in VTL2 to map to the address of VTL0.
|
|
*/
|
|
ret = misc_register(&mshv_vtl_low);
|
|
if (ret)
|
|
goto free_hvcall;
|
|
|
|
/*
|
|
* "mshv vtl mem dev" device is later used to setup VTL0 memory.
|
|
*/
|
|
mem_dev = kzalloc(sizeof(*mem_dev), GFP_KERNEL);
|
|
if (!mem_dev) {
|
|
ret = -ENOMEM;
|
|
goto free_low;
|
|
}
|
|
|
|
mutex_init(&mshv_vtl_poll_file_lock);
|
|
|
|
device_initialize(mem_dev);
|
|
dev_set_name(mem_dev, "mshv vtl mem dev");
|
|
ret = device_add(mem_dev);
|
|
if (ret) {
|
|
dev_err(dev, "mshv vtl mem dev add: %d\n", ret);
|
|
goto free_mem;
|
|
}
|
|
|
|
return 0;
|
|
|
|
free_mem:
|
|
kfree(mem_dev);
|
|
free_low:
|
|
misc_deregister(&mshv_vtl_low);
|
|
free_hvcall:
|
|
misc_deregister(&mshv_vtl_hvcall_dev);
|
|
free_sint:
|
|
misc_deregister(&mshv_vtl_sint_dev);
|
|
free_synic:
|
|
hv_vtl_remove_synic();
|
|
free_dev:
|
|
misc_deregister(&mshv_dev);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void __exit mshv_vtl_exit(void)
|
|
{
|
|
device_del(mem_dev);
|
|
kfree(mem_dev);
|
|
misc_deregister(&mshv_vtl_low);
|
|
misc_deregister(&mshv_vtl_hvcall_dev);
|
|
misc_deregister(&mshv_vtl_sint_dev);
|
|
hv_vtl_remove_synic();
|
|
misc_deregister(&mshv_dev);
|
|
}
|
|
|
|
module_init(mshv_vtl_init);
|
|
module_exit(mshv_vtl_exit);
|