【QEMU系统分析之启动篇(十一)】

系列文章目录

第十一章 QEMU系统仿真的加速器初始化分析



前言

本文以 QEMU 8.2.2 为例,分析其作为系统仿真工具的启动过程,并为读者展示各种 QEMU 系统仿真的启动配置实例。
本文读者需要具备一定的 QEMU 系统仿真使用经验,并对 C 语言编程有一定了解。


一、QEMU是什么?

QEMU 是一个通用且开源的机器模拟器和虚拟机。
其官方主页是:https://www.qemu.org/


二、QEMU系统仿真的启动分析

1.系统仿真的初始化代码

QEMU 作为系统仿真工具,其入口代码在 system/main.c 文件中,初始化函数 qemu_init() 的实现在 system/vl.c 文件中,在完成 QEMU 虚拟机配置项处理和应用后,系统将配置虚拟机的加速器。本篇文章将完成以下代码部分的分析。

2.主循环数据初始化

这部分代码在 system/vl.c 文件中,实现如下:

void qemu_init(int argc, char **argv)
{
...
    /*
     * Note: uses machine properties such as kernel-irqchip, must run
     * after qemu_apply_machine_options.
     */
    configure_accelerators(argv[0]);
    phase_advance(PHASE_ACCEL_CREATED);
...
}

代码首先通过函数 qemu_disable_default_devices() 将所有默认设备的设置复位。

configure_accelerators()

此函数在 /system/vl.c 文件中,定义如下:

static void configure_accelerators(const char *progname)
{
    bool init_failed = false;

    qemu_opts_foreach(qemu_find_opts("icount"),
                      do_configure_icount, NULL, &error_fatal);

    if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
        char **accel_list, **tmp;

        if (accelerators == NULL) {
            /* Select the default accelerator */
            bool have_tcg = accel_find("tcg");
            bool have_kvm = accel_find("kvm");

            if (have_tcg && have_kvm) {
                if (g_str_has_suffix(progname, "kvm")) {
                    /* If the program name ends with "kvm", we prefer KVM */
                    accelerators = "kvm:tcg";
                } else {
                    accelerators = "tcg:kvm";
                }
            } else if (have_kvm) {
                accelerators = "kvm";
            } else if (have_tcg) {
                accelerators = "tcg";
            } else {
                error_report("No accelerator selected and"
                             " no default accelerator available");
                exit(1);
            }
        }
        accel_list = g_strsplit(accelerators, ":", 0);

        for (tmp = accel_list; *tmp; tmp++) {
            /*
             * Filter invalid accelerators here, to prevent obscenities
             * such as "-machine accel=tcg,,thread=single".
             */
            if (accel_find(*tmp)) {
                qemu_opts_parse_noisily(qemu_find_opts("accel"), *tmp, true);
            } else {
                init_failed = true;
                error_report("invalid accelerator %s", *tmp);
            }
        }
        g_strfreev(accel_list);
    } else {
        if (accelerators != NULL) {
            error_report("The -accel and \"-machine accel=\" options are incompatible");
            exit(1);
        }
    }

    if (!qemu_opts_foreach(qemu_find_opts("accel"),
                           do_configure_accelerator, &init_failed, &error_fatal)) {
        if (!init_failed) {
            error_report("no accelerator found");
        }
        exit(1);
    }

    if (init_failed && !qtest_chrdev) {
        error_report("falling back to %s", current_accel_name());
    }

    if (icount_enabled() && !tcg_enabled()) {
        error_report("-icount is not allowed with hardware virtualization");
        exit(1);
    }
}


函数 do_configure_icount() 定义如下:

static int do_configure_icount(void *opaque, QemuOpts *opts, Error **errp)
{
    icount_configure(opts, errp);
    return 0;
}

而函数 icount_configure() 在 TCG 加速器中定义如下:

void icount_configure(QemuOpts *opts, Error **errp)
{
    const char *option = qemu_opt_get(opts, "shift");
    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
    bool align = qemu_opt_get_bool(opts, "align", false);
    long time_shift = -1;

    if (!option) {
        if (qemu_opt_get(opts, "align") != NULL) {
            error_setg(errp, "Please specify shift option when using align");
        }
        return;
    }

    if (align && !sleep) {
        error_setg(errp, "align=on and sleep=off are incompatible");
        return;
    }

    if (strcmp(option, "auto") != 0) {
        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
            error_setg(errp, "icount: Invalid shift value");
            return;
        }
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
        return;
    } else if (!icount_sleep) {
        error_setg(errp, "shift=auto and sleep=off are incompatible");
        return;
    }

    icount_sleep = sleep;
    if (icount_sleep) {
        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                         icount_timer_cb, NULL);
    }

    icount_align_option = align;

    if (time_shift >= 0) {
        timers_state.icount_time_shift = time_shift;
        icount_enable_precise();
        return;
    }

    icount_enable_adaptive();

    /*
     * 125MIPS seems a reasonable initial guess at the guest speed.
     * It will be corrected fairly quickly anyway.
     */
    timers_state.icount_time_shift = 3;

    /*
     * Have both realtime and virtual time triggers for speed adjustment.
     * The realtime trigger catches emulated time passing too slowly,
     * the virtual time trigger catches emulated time passing too fast.
     * Realtime triggers occur even when idle, so use them less frequently
     * than VM triggers.
     */
    timers_state.vm_clock_warp_start = -1;
    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
                                   icount_adjust_rt, NULL);
    timer_mod(timers_state.icount_rt_timer,
                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                        icount_adjust_vm, NULL);
    timer_mod(timers_state.icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                   NANOSECONDS_PER_SECOND / 10);
}

如果没有设定加速器,默认查找是否支持 TCG 或 KVM 加速器,如果有则配置默认加速器,实现代码如下:

        if (accelerators == NULL) {
            /* Select the default accelerator */
            bool have_tcg = accel_find("tcg");
            bool have_kvm = accel_find("kvm");

            if (have_tcg && have_kvm) {
                if (g_str_has_suffix(progname, "kvm")) {
                    /* If the program name ends with "kvm", we prefer KVM */
                    accelerators = "kvm:tcg";
                } else {
                    accelerators = "tcg:kvm";
                }
            } else if (have_kvm) {
                accelerators = "kvm";
            } else if (have_tcg) {
                accelerators = "tcg";
            } else {
                error_report("No accelerator selected and"
                             " no default accelerator available");
                exit(1);
            }
        }

函数 do_configure_accelerator() 定义如下:

static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
{
    bool *p_init_failed = opaque;
    const char *acc = qemu_opt_get(opts, "accel");
    AccelClass *ac = accel_find(acc);
    AccelState *accel;
    int ret;
    bool qtest_with_kvm;

    if (!acc) {
        error_setg(errp, QERR_MISSING_PARAMETER, "accel");
        goto bad;
    }

    qtest_with_kvm = g_str_equal(acc, "kvm") && qtest_chrdev != NULL;

    if (!ac) {
        if (!qtest_with_kvm) {
            error_report("invalid accelerator %s", acc);
        }
        goto bad;
    }
    accel = ACCEL(object_new_with_class(OBJECT_CLASS(ac)));
    object_apply_compat_props(OBJECT(accel));
    qemu_opt_foreach(opts, accelerator_set_property,
                     accel,
                     &error_fatal);
    /*
     * If legacy -singlestep option is set, honour it for TCG and
     * silently ignore for any other accelerator (which is how this
     * option has always behaved).
     */
    if (opt_one_insn_per_tb) {
        /*
         * This will always succeed for TCG, and we want to ignore
         * the error from trying to set a nonexistent property
         * on any other accelerator.
         */
        object_property_set_bool(OBJECT(accel), "one-insn-per-tb", true, NULL);
    }
    ret = accel_init_machine(accel, current_machine);
    if (ret < 0) {
        if (!qtest_with_kvm || ret != -ENOENT) {
            error_report("failed to initialize %s: %s", acc, strerror(-ret));
        }
        goto bad;
    }

    return 1;

bad:
    *p_init_failed = true;
    return 0;
}

函数 accel_find() 定义如下:

/* Lookup AccelClass from opt_name. Returns NULL if not found */
AccelClass *accel_find(const char *opt_name)
{
    char *class_name = g_strdup_printf(ACCEL_CLASS_NAME("%s"), opt_name);
    AccelClass *ac = ACCEL_CLASS(module_object_class_by_name(class_name));
    g_free(class_name);
    return ac;
}

数据结构 struct AccelClass 定义如下:

struct AccelState {
    /*< private >*/
    Object parent_obj;
};

typedef struct AccelClass {
    /*< private >*/
    ObjectClass parent_class;
    /*< public >*/

    const char *name;
    int (*init_machine)(MachineState *ms);
#ifndef CONFIG_USER_ONLY
    void (*setup_post)(MachineState *ms, AccelState *accel);
    bool (*has_memory)(MachineState *ms, AddressSpace *as,
                       hwaddr start_addr, hwaddr size);
#endif
    bool (*cpu_common_realize)(CPUState *cpu, Error **errp);
    void (*cpu_common_unrealize)(CPUState *cpu);

    /* gdbstub related hooks */
    int (*gdbstub_supported_sstep_flags)(void);

    bool *allowed;
    /*
     * Array of global properties that would be applied when specific
     * accelerator is chosen. It works like MachineClass.compat_props
     * but it's for accelerators not machines. Accelerator-provided
     * global properties may be overridden by machine-type
     * compat_props or user-provided global properties.
     */
    GPtrArray *compat_props;
} AccelClass;

函数 accel_init_machine() 定义如下:

int accel_init_machine(AccelState *accel, MachineState *ms)
{
    AccelClass *acc = ACCEL_GET_CLASS(accel);
    int ret;
    ms->accelerator = accel;
    *(acc->allowed) = true;
    ret = acc->init_machine(ms);
    if (ret < 0) {
        ms->accelerator = NULL;
        *(acc->allowed) = false;
        object_unref(OBJECT(accel));
    } else {
        object_set_accelerator_compat_props(acc->compat_props);
    }
    return ret;
}

通过 accel_init_machine() 将目标机器和加速器关联在一起。

加速器的 init_machine() 函数根据每个加速器不同,有不同的实现,WHPX 的定义如下:

/*
 * Partition support
 */

static int whpx_accel_init(MachineState *ms)
{
    struct whpx_state *whpx;
    int ret;
    HRESULT hr;
    WHV_CAPABILITY whpx_cap;
    UINT32 whpx_cap_size;
    WHV_PARTITION_PROPERTY prop;
    UINT32 cpuidExitList[] = {1, 0x80000001};
    WHV_CAPABILITY_FEATURES features = {0};

    whpx = &whpx_global;

    if (!init_whp_dispatch()) {
        ret = -ENOSYS;
        goto error;
    }

    whpx->mem_quota = ms->ram_size;

    hr = whp_dispatch.WHvGetCapability(
        WHvCapabilityCodeHypervisorPresent, &whpx_cap,
        sizeof(whpx_cap), &whpx_cap_size);
    if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
        error_report("WHPX: No accelerator found, hr=%08lx", hr);
        ret = -ENOSPC;
        goto error;
    }

    hr = whp_dispatch.WHvGetCapability(
        WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
    if (FAILED(hr)) {
        error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
        ret = -EINVAL;
        goto error;
    }

    hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
    if (FAILED(hr)) {
        error_report("WHPX: Failed to create partition, hr=%08lx", hr);
        ret = -EINVAL;
        goto error;
    }

    /*
     * Query the XSAVE capability of the partition. Any error here is not
     * considered fatal.
     */
    hr = whp_dispatch.WHvGetPartitionProperty(
        whpx->partition,
        WHvPartitionPropertyCodeProcessorXsaveFeatures,
        &whpx_xsave_cap,
        sizeof(whpx_xsave_cap),
        &whpx_cap_size);

    /*
     * Windows version which don't support this property will return with the
     * specific error code.
     */
    if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) {
        error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr);
    }

    if (!whpx_has_xsave()) {
        printf("WHPX: Partition is not XSAVE capable\n");
    }

    memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
    prop.ProcessorCount = ms->smp.cpus;
    hr = whp_dispatch.WHvSetPartitionProperty(
        whpx->partition,
        WHvPartitionPropertyCodeProcessorCount,
        &prop,
        sizeof(WHV_PARTITION_PROPERTY));

    if (FAILED(hr)) {
        error_report("WHPX: Failed to set partition processor count to %u,"
                     " hr=%08lx", prop.ProcessorCount, hr);
        ret = -EINVAL;
        goto error;
    }

    /*
     * Error out if WHP doesn't support apic emulation and user is requiring
     * it.
     */
    if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
            !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
        error_report("WHPX: kernel irqchip requested, but unavailable. "
            "Try without kernel-irqchip or with kernel-irqchip=off");
        ret = -EINVAL;
        goto error;
    }

    if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
        whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
        WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
            WHvX64LocalApicEmulationModeXApic;
        printf("WHPX: setting APIC emulation mode in the hypervisor\n");
        hr = whp_dispatch.WHvSetPartitionProperty(
            whpx->partition,
            WHvPartitionPropertyCodeLocalApicEmulationMode,
            &mode,
            sizeof(mode));
        if (FAILED(hr)) {
            error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
            if (whpx->kernel_irqchip_required) {
                error_report("WHPX: kernel irqchip requested, but unavailable");
                ret = -EINVAL;
                goto error;
            }
        } else {
            whpx->apic_in_platform = true;
        }
    }

    /* Register for MSR and CPUID exits */
    memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
    prop.ExtendedVmExits.X64MsrExit = 1;
    prop.ExtendedVmExits.X64CpuidExit = 1;
    prop.ExtendedVmExits.ExceptionExit = 1;
    if (whpx_apic_in_platform()) {
        prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
    }

    hr = whp_dispatch.WHvSetPartitionProperty(
            whpx->partition,
            WHvPartitionPropertyCodeExtendedVmExits,
            &prop,
            sizeof(WHV_PARTITION_PROPERTY));
    if (FAILED(hr)) {
        error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
        ret = -EINVAL;
        goto error;
    }

    hr = whp_dispatch.WHvSetPartitionProperty(
        whpx->partition,
        WHvPartitionPropertyCodeCpuidExitList,
        cpuidExitList,
        RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));

    if (FAILED(hr)) {
        error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
                     hr);
        ret = -EINVAL;
        goto error;
    }

    /*
     * We do not want to intercept any exceptions from the guest,
     * until we actually start debugging with gdb.
     */
    whpx->exception_exit_bitmap = -1;
    hr = whpx_set_exception_exit_bitmap(0);

    if (FAILED(hr)) {
        error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr);
        ret = -EINVAL;
        goto error;
    }

    hr = whp_dispatch.WHvSetupPartition(whpx->partition);
    if (FAILED(hr)) {
        error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
        ret = -EINVAL;
        goto error;
    }

    whpx_memory_init();

    printf("Windows Hypervisor Platform accelerator is operational\n");
    return 0;

error:

    if (NULL != whpx->partition) {
        whp_dispatch.WHvDeletePartition(whpx->partition);
        whpx->partition = NULL;
    }

    return ret;
}

函数 init_whp_dispatch() 在 /target/i386/whpx/whpx-all.c 文件中,定义如下:

bool init_whp_dispatch(void)
{
    if (whp_dispatch_initialized) {
        return true;
    }

    if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
        goto error;
    }

    if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
        goto error;
    }

    assert(load_whp_dispatch_fns(&hWinHvPlatform,
        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
    whp_dispatch_initialized = true;

    return true;
error:
    if (hWinHvPlatform) {
        FreeLibrary(hWinHvPlatform);
    }

    if (hWinHvEmulation) {
        FreeLibrary(hWinHvEmulation);
    }

    return false;
}

函数 load_whp_dispatch_fns() 定义如下:

/*
 * Load the functions from the given library, using the given handle. If a
 * handle is provided, it is used, otherwise the library is opened. The
 * handle will be updated on return with the opened one.
 */
static bool load_whp_dispatch_fns(HMODULE *handle,
    WHPFunctionList function_list)
{
    HMODULE hLib = *handle;

    #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
    #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
        whp_dispatch.function_name = \
            (function_name ## _t)GetProcAddress(hLib, #function_name); \

    #define WHP_LOAD_FIELD(return_type, function_name, signature) \
        whp_dispatch.function_name = \
            (function_name ## _t)GetProcAddress(hLib, #function_name); \
        if (!whp_dispatch.function_name) { \
            error_report("Could not load function %s", #function_name); \
            goto error; \
        } \

    #define WHP_LOAD_LIB(lib_name, handle_lib) \
    if (!handle_lib) { \
        handle_lib = LoadLibrary(lib_name); \
        if (!handle_lib) { \
            error_report("Could not load library %s.", lib_name); \
            goto error; \
        } \
    } \

    switch (function_list) {
    case WINHV_PLATFORM_FNS_DEFAULT:
        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
        LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
        break;

    case WINHV_EMULATION_FNS_DEFAULT:
        WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
        LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
        break;

    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
        break;
    }

    *handle = hLib;
    return true;

error:
    if (hLib) {
        FreeLibrary(hLib);
    }

    return false;
}

WHPX 提供的函数类型列表如下:

typedef enum WHPFunctionList {
    WINHV_PLATFORM_FNS_DEFAULT,
    WINHV_EMULATION_FNS_DEFAULT,
    WINHV_PLATFORM_FNS_SUPPLEMENTAL
} WHPFunctionList;

phase_advance(PHASE_ACCEL_CREATED)

接下来,将机器状态置为 PHASE_ACCEL_CREATED,实现代码如下:

    phase_advance(PHASE_ACCEL_CREATED);

函数 phase_advance() 定义如下:

void phase_advance(MachineInitPhase phase)
{
    assert(machine_phase == phase - 1);
    machine_phase = phase;
}

机器状态 machine_phase 的数据类型定义如下:

typedef enum MachineInitPhase {
    /* current_machine is NULL.  */
    PHASE_NO_MACHINE,

    /* current_machine is not NULL, but current_machine->accel is NULL.  */
    PHASE_MACHINE_CREATED,

    /*
     * current_machine->accel is not NULL, but the machine properties have
     * not been validated and machine_class->init has not yet been called.
     */
    PHASE_ACCEL_CREATED,

    /*
     * machine_class->init has been called, thus creating any embedded
     * devices and validating machine properties.  Devices created at
     * this time are considered to be cold-plugged.
     */
    PHASE_MACHINE_INITIALIZED,

    /*
     * QEMU is ready to start CPUs and devices created at this time
     * are considered to be hot-plugged.  The monitor is not restricted
     * to "preconfig" commands.
     */
    PHASE_MACHINE_READY,
} MachineInitPhase;

至此,完成了对象字典中与虚拟机相关的加速器的配置项处理和应用。


总结

以上分析了 QEMU 系统仿真在启动过程中,QEMU 系统仿真对目标虚拟机的加速器配置项所做的处理和应用。

  • 22
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值