kvm驱动学习笔记

kvm驱动调用大致流程
open("/dev/kvm") → KVM_CREATE_VM → KVM_CREATE_VCPU → KVM_RUN
kvm内核源码框架设计

1、kvm设备文件操作接口

cpp 复制代码
static struct file_operations kvm_chardev_ops = {
    .unlocked_ioctl = kvm_dev_ioctl,
    .llseek     = noop_llseek,
    KVM_COMPAT(kvm_dev_ioctl),
};
/dev/kvm支持的命令
static long kvm_dev_ioctl(struct file *filp,
              unsigned int ioctl, unsigned long arg)
{
    long r = -EINVAL;
    switch (ioctl) {
    case KVM_GET_API_VERSION:
        if (arg)
            goto out;
        r = KVM_API_VERSION;
        break;
    case KVM_CREATE_VM:
        r = kvm_dev_ioctl_create_vm(arg);
        break;
    case KVM_CHECK_EXTENSION:
        r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
        break;
    case KVM_GET_VCPU_MMAP_SIZE:
        if (arg)
            goto out;
        r = PAGE_SIZE;     /* struct kvm_run */
#ifdef CONFIG_X86
        r += PAGE_SIZE;    /* pio data page */
#endif
#ifdef CONFIG_KVM_MMIO
        r += PAGE_SIZE;    /* coalesced mmio ring page */
#endif
        break;
    case KVM_TRACE_ENABLE:
    case KVM_TRACE_PAUSE:
    case KVM_TRACE_DISABLE:
        r = -EOPNOTSUPP;
        break;
    default:
        return kvm_arch_dev_ioctl(filp, ioctl, arg);
    }
out:
    return r;
}

2、虚拟机操作接口

cpp 复制代码
static struct file_operations kvm_vm_fops = {
    .release        = kvm_vm_release,
    .unlocked_ioctl = kvm_vm_ioctl,
    .llseek     = noop_llseek,
    KVM_COMPAT(kvm_vm_compat_ioctl),
};

vm支持的命令

cpp 复制代码
static long kvm_vm_ioctl(struct file *filp,
               unsigned int ioctl, unsigned long arg)
{
    struct kvm *kvm = filp->private_data;
    void __user *argp = (void __user *)arg;
    int r;

    if (kvm->mm != current->mm || kvm->vm_bugged)
        return -EIO;
    switch (ioctl) {
    case KVM_CREATE_VCPU:
        r = kvm_vm_ioctl_create_vcpu(kvm, arg);
        break;
    case KVM_ENABLE_CAP: {
        struct kvm_enable_cap cap;
        r = -EFAULT;
        if (copy_from_user(&cap, argp, sizeof(cap)))
            goto out;
        r = kvm_vm_ioctl_enable_cap_generic(kvm, &cap);
        break;
    }
    case KVM_SET_USER_MEMORY_REGION: {
        struct kvm_userspace_memory_region kvm_userspace_mem;
        r = -EFAULT;
        if (copy_from_user(&kvm_userspace_mem, argp,
                        sizeof(kvm_userspace_mem)))
            goto out;
        r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
        break;
    }
    case KVM_GET_DIRTY_LOG: {
        struct kvm_dirty_log log;
        r = -EFAULT;
        if (copy_from_user(&log, argp, sizeof(log)))
            goto out;
        r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
        break;
    }
#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
    case KVM_CLEAR_DIRTY_LOG: {
        struct kvm_clear_dirty_log log;
        r = -EFAULT;
        if (copy_from_user(&log, argp, sizeof(log)))
            goto out;
        r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
        break;
    }
#endif
#ifdef CONFIG_KVM_MMIO
    case KVM_REGISTER_COALESCED_MMIO: {
        struct kvm_coalesced_mmio_zone zone;
        r = -EFAULT;
        if (copy_from_user(&zone, argp, sizeof(zone)))
            goto out;
        r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
        break;
    }
    case KVM_UNREGISTER_COALESCED_MMIO: {
        struct kvm_coalesced_mmio_zone zone;
        r = -EFAULT;
        if (copy_from_user(&zone, argp, sizeof(zone)))
            goto out;
        r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
        break;
    }
#endif
    case KVM_IRQFD: {
        struct kvm_irqfd data;
        r = -EFAULT;
        if (copy_from_user(&data, argp, sizeof(data)))
            goto out;
        r = kvm_irqfd(kvm, &data);
        break;
    }
    case KVM_IOEVENTFD: {
        struct kvm_ioeventfd data;
        r = -EFAULT;
        if (copy_from_user(&data, argp, sizeof(data)))
            goto out;
        r = kvm_ioeventfd(kvm, &data);
        break;
    }
#ifdef CONFIG_HAVE_KVM_MSI
    case KVM_SIGNAL_MSI: {
        struct kvm_msi msi;
        r = -EFAULT;
        if (copy_from_user(&msi, argp, sizeof(msi)))
            goto out;
        r = kvm_send_userspace_msi(kvm, &msi);
        break;
    }
#endif
#ifdef __KVM_HAVE_IRQ_LINE
    case KVM_IRQ_LINE_STATUS:
    case KVM_IRQ_LINE: {
        struct kvm_irq_level irq_event;
        r = -EFAULT;
        if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
            goto out;
        r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
                    ioctl == KVM_IRQ_LINE_STATUS);
        if (r)
            goto out;
        r = -EFAULT;
        if (ioctl == KVM_IRQ_LINE_STATUS) {
            if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
                goto out;
        }
        r = 0;
        break;
    }
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
    case KVM_SET_GSI_ROUTING: {
        struct kvm_irq_routing routing;
        struct kvm_irq_routing __user *urouting;
        struct kvm_irq_routing_entry *entries = NULL;
        r = -EFAULT;
        if (copy_from_user(&routing, argp, sizeof(routing)))
            goto out;
        r = -EINVAL;
        if (!kvm_arch_can_set_irq_routing(kvm))
            goto out;
        if (routing.nr > KVM_MAX_IRQ_ROUTES)
            goto out;
        if (routing.flags)
            goto out;
        if (routing.nr) {
            r = -ENOMEM;
            entries = vmalloc(array_size(sizeof(*entries),
                             routing.nr));
            if (!entries)
                goto out;
            r = -EFAULT;
            urouting = argp;
            if (copy_from_user(entries, urouting->entries,
                       routing.nr * sizeof(*entries)))
                goto out_free_irq_routing;
        }
        r = kvm_set_irq_routing(kvm, entries, routing.nr,
                    routing.flags);
out_free_irq_routing:
        vfree(entries);
        break;
    }
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
    case KVM_CREATE_DEVICE: {
        struct kvm_create_device cd;
        r = -EFAULT;
        if (copy_from_user(&cd, argp, sizeof(cd)))
            goto out;
        r = kvm_ioctl_create_device(kvm, &cd);
        if (r)
            goto out;
        r = -EFAULT;
        if (copy_to_user(argp, &cd, sizeof(cd)))
            goto out;
        r = 0;
        break;
    }
    case KVM_CHECK_EXTENSION:
        r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
        break;
    default:
        r = kvm_arch_vm_ioctl(filp, ioctl, arg);
    }
out:
    return r;
}

3、vcpu操作接口

cpp 复制代码
static struct file_operations kvm_vcpu_fops = {
    .release        = kvm_vcpu_release,
    .unlocked_ioctl = kvm_vcpu_ioctl,
    .mmap           = kvm_vcpu_mmap,
    .llseek     = noop_llseek,
    KVM_COMPAT(kvm_vcpu_compat_ioctl),
};

vcpu支持的命令

cpp 复制代码
static long kvm_vcpu_ioctl(struct file *filp,
               unsigned int ioctl, unsigned long arg)
{
    struct kvm_vcpu *vcpu = filp->private_data;
    void __user *argp = (void __user *)arg;
    int r;
    struct kvm_fpu *fpu = NULL;
    struct kvm_sregs *kvm_sregs = NULL;
    if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
        return -EIO;
    if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
        return -EINVAL;
    /*
     * Some architectures have vcpu ioctls that are asynchronous to vcpu
     * execution; mutex_lock() would break them.
     */
    r = kvm_arch_vcpu_async_ioctl(filp, ioctl, arg);
    if (r != -ENOIOCTLCMD)
        return r;
    if (mutex_lock_killable(&vcpu->mutex))
        return -EINTR;
    switch (ioctl) {
    case KVM_RUN: {
        struct pid *oldpid;
        r = -EINVAL;
        if (arg)
            goto out;
        oldpid = rcu_access_pointer(vcpu->pid);
        if (unlikely(oldpid != task_pid(current))) {
            /* The thread running this VCPU changed. */
            struct pid *newpid;
            r = kvm_arch_vcpu_run_pid_change(vcpu);
            if (r)
                break;
            newpid = get_task_pid(current, PIDTYPE_PID);
            rcu_assign_pointer(vcpu->pid, newpid);
            if (oldpid)
                synchronize_rcu();
            put_pid(oldpid);
        }
        r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
        trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
        break;
    }
    case KVM_GET_REGS: {
        struct kvm_regs *kvm_regs;
        r = -ENOMEM;
        kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL_ACCOUNT);
        if (!kvm_regs)
            goto out;
        r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs);
        if (r)
            goto out_free1;
        r = -EFAULT;
        if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs)))
            goto out_free1;
        r = 0;
out_free1:
        kfree(kvm_regs);
        break;
    }
    case KVM_SET_REGS: {
        struct kvm_regs *kvm_regs;
        r = -ENOMEM;
        kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
        if (IS_ERR(kvm_regs)) {
            r = PTR_ERR(kvm_regs);
            goto out;
        }
        r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
        kfree(kvm_regs);
        break;
    }
    case KVM_GET_SREGS: {
        kvm_sregs = kzalloc(sizeof(struct kvm_sregs),
                    GFP_KERNEL_ACCOUNT);
        r = -ENOMEM;
        if (!kvm_sregs)
            goto out;
        r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
        if (r)
            goto out;
        r = -EFAULT;
        if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
            goto out;
        r = 0;
        break;
    }
    case KVM_SET_SREGS: {
        kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
        if (IS_ERR(kvm_sregs)) {
            r = PTR_ERR(kvm_sregs);
            kvm_sregs = NULL;
            goto out;
        }
        r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
        break;
    }
    case KVM_GET_MP_STATE: {
        struct kvm_mp_state mp_state;
        r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state);
        if (r)
            goto out;
        r = -EFAULT;
        if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
            goto out;
        r = 0;
        break;
    }
    case KVM_SET_MP_STATE: {
        struct kvm_mp_state mp_state;
        r = -EFAULT;
        if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
            goto out;
        r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
        break;
    }
    case KVM_TRANSLATE: {
        struct kvm_translation tr;
        r = -EFAULT;
        if (copy_from_user(&tr, argp, sizeof(tr)))
            goto out;
        r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
        if (r)
            goto out;
        r = -EFAULT;
        if (copy_to_user(argp, &tr, sizeof(tr)))
            goto out;
        r = 0;
        break;
    }
    case KVM_SET_GUEST_DEBUG: {
        struct kvm_guest_debug dbg;
        r = -EFAULT;
        if (copy_from_user(&dbg, argp, sizeof(dbg)))
            goto out;
        r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
        break;
    }
    case KVM_SET_SIGNAL_MASK: {
        struct kvm_signal_mask __user *sigmask_arg = argp;
        struct kvm_signal_mask kvm_sigmask;
        sigset_t sigset, *p;
        p = NULL;
        if (argp) {
            r = -EFAULT;
            if (copy_from_user(&kvm_sigmask, argp,
                       sizeof(kvm_sigmask)))
                goto out;
            r = -EINVAL;
            if (kvm_sigmask.len != sizeof(sigset))
                goto out;
            r = -EFAULT;
            if (copy_from_user(&sigset, sigmask_arg->sigset,
                       sizeof(sigset)))
                goto out;
            p = &sigset;
        }
        r = kvm_vcpu_ioctl_set_sigmask(vcpu, p);
        break;
    }
    case KVM_GET_FPU: {
        fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL_ACCOUNT);
        r = -ENOMEM;
        if (!fpu)
            goto out;
        r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
        if (r)
            goto out;
        r = -EFAULT;
        if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
            goto out;
        r = 0;
        break;
    }
    case KVM_SET_FPU: {
        fpu = memdup_user(argp, sizeof(*fpu));
        if (IS_ERR(fpu)) {
            r = PTR_ERR(fpu);
            fpu = NULL;
            goto out;
        }
        r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
        break;
    }
    default:
        r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
    }
out:
    mutex_unlock(&vcpu->mutex);
    kfree(fpu);
    kfree(kvm_sregs);
    return r;
}

内存映射: 用户态对 vcpu fd 调用 mmap(), 把内核中 vcpu 共享页(vcpu run structure) 映射到 QEMU 用户态地址空间 (HVA)。示例代码:

cpp 复制代码
static int kvm_vcpu_mmap(struct file *file, struct vm_area_struct *vma)
{
    vma->vm_ops = &kvm_vcpu_vm_ops;
    return 0;
}

代码示例:

cpp 复制代码
int kvm_init_vcpu(CPUState *env)
{
    KVMState *s = kvm_state;
    long mmap_size;
    int ret;

    // 1. 创建 vcpu,得到 vcpu_fd
    ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
    if (ret < 0) {
        return -1;
    }
    env->kvm_fd = ret; // 保存 vcpu_fd

    // 2. 获取需要 mmap 的大小(内核里 struct kvm_run 大小)
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
        return -1;
    }

    // 3. 关键:调用 mmap(2),触发内核 kvm_vcpu_mmap
    env->kvm_run = mmap(NULL, mmap_size,
                          PROT_READ | PROT_WRITE,
                          MAP_SHARED,
                          env->kvm_fd, 0);
    if (env->kvm_run == MAP_FAILED) {
        return -1;
    }
    return 0;
}
相关推荐
ggaofeng3 天前
如何通过uboot加载硬盘
linux·qemu·uboot
ScilogyHunter4 天前
QEMU完全指南
linux·qemu
冰山一脚201311 天前
qemu的cpu加速器分析笔记
qemu
longji1 个月前
win11 使用 QEMU11 模拟器跑龙芯系统(debian13,openKylin,openEuler,uos,Loongnix)
qemu·龙芯模拟器
冰山一脚20131 个月前
qemu的板级初始化笔记(以i4ffx为例)
qemu
冰山一脚20131 个月前
CPU的L1、L2、L3缓存笔记
qemu
Shining05962 个月前
QEMU 编译开发环境搭建
人工智能·语言模型·自然语言处理·云原生·qemu·vllm·华为昇腾
高铭杰2 个月前
Postgresql热迁移pgbench持续读写零中断
postgresql·qemu·neon