diff options
Diffstat (limited to 'hw/i386')
-rw-r--r-- | hw/i386/kvm/clock.c | 142 | ||||
-rw-r--r-- | hw/i386/multiboot.c | 20 | ||||
-rw-r--r-- | hw/i386/pc.c | 68 | ||||
-rw-r--r-- | hw/i386/pc_piix.c | 2 | ||||
-rw-r--r-- | hw/i386/pc_q35.c | 39 |
5 files changed, 223 insertions, 48 deletions
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c index 0f75dd3..ef9d560 100644 --- a/hw/i386/kvm/clock.c +++ b/hw/i386/kvm/clock.c @@ -36,6 +36,13 @@ typedef struct KVMClockState { uint64_t clock; bool clock_valid; + + /* whether machine type supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether the 'clock' value was obtained in a host with + * reliable KVM_GET_CLOCK */ + bool clock_is_reliable; } KVMClockState; struct pvclock_vcpu_time_info { @@ -81,6 +88,60 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s) return nsec + time.system_time; } +static void kvm_update_clock(KVMClockState *s) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + s->clock = data.clock; + + /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns + * essentially CLOCK_MONOTONIC plus a guest-specific adjustment. This + * can drift from the TSC-based value that is computed by the guest, + * so we need to go through kvmclock_current_nsec(). If + * kvm_has_adjust_clock_stable() is true, and the flags contain + * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value + * and kvmclock_current_nsec() is not necessary. + * + * Here, however, we need not check KVM_CLOCK_TSC_STABLE. This is because: + * + * - if the host has disabled the kvmclock master clock, the guest already + * has protection against time going backwards. This "safety net" is only + * absent when kvmclock is stable; + * + * - therefore, we can replace a check like + * + * if last KVM_GET_CLOCK was not reliable then + * read from memory + * + * with + * + * if last KVM_GET_CLOCK was not reliable && masterclock is enabled + * read from memory + * + * However: + * + * - if kvm_has_adjust_clock_stable() returns false, the left side is + * always true (KVM_GET_CLOCK is never reliable), and the right side is + * unknown (because we don't have data.flags). We must assume it's true + * and read from memory. + * + * - if kvm_has_adjust_clock_stable() returns true, the result of the && + * is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable) + * + * So we can just use this instead: + * + * if !kvm_has_adjust_clock_stable() then + * read from memory + */ + s->clock_is_reliable = kvm_has_adjust_clock_stable(); +} + static void kvmclock_vm_state_change(void *opaque, int running, RunState state) { @@ -91,15 +152,21 @@ static void kvmclock_vm_state_change(void *opaque, int running, if (running) { struct kvm_clock_data data = {}; - uint64_t time_at_migration = kvmclock_current_nsec(s); - - s->clock_valid = false; - /* We can't rely on the migrated clock value, just discard it */ - if (time_at_migration) { - s->clock = time_at_migration; + /* + * If the host where s->clock was read did not support reliable + * KVM_GET_CLOCK, read kvmclock value from memory. + */ + if (!s->clock_is_reliable) { + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); + /* We can't rely on the saved clock value, just discard it */ + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; + } } + s->clock_valid = false; + data.clock = s->clock; ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); if (ret < 0) { @@ -120,8 +187,6 @@ static void kvmclock_vm_state_change(void *opaque, int running, } } } else { - struct kvm_clock_data data; - int ret; if (s->clock_valid) { return; @@ -129,13 +194,7 @@ static void kvmclock_vm_state_change(void *opaque, int running, kvm_synchronize_all_tsc(); - ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); - if (ret < 0) { - fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); - abort(); - } - s->clock = data.clock; - + kvm_update_clock(s); /* * If the VM is stopped, declare the clock state valid to * avoid re-reading it on next vmsave (which would return @@ -149,25 +208,78 @@ static void kvmclock_realize(DeviceState *dev, Error **errp) { KVMClockState *s = KVM_CLOCK(dev); + kvm_update_clock(s); + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); } +static bool kvmclock_clock_is_reliable_needed(void *opaque) +{ + KVMClockState *s = opaque; + + return s->mach_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/clock_is_reliable", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_clock_is_reliable_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(clock_is_reliable, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +/* + * When migrating, read the clock just before migration, + * so that the guest clock counts during the events + * between: + * + * * vm_stop() + * * + * * pre_save() + * + * This reduces kvmclock difference on migration from 5s + * to 0.1s (when max_downtime == 5s), because sending the + * final pages of memory (which happens between vm_stop() + * and pre_save()) takes max_downtime. + */ +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + + kvm_update_clock(s); +} + static const VMStateDescription kvmclock_vmsd = { .name = "kvmclock", .version_id = 1, .minimum_version_id = 1, + .pre_save = kvmclock_pre_save, .fields = (VMStateField[]) { VMSTATE_UINT64(clock, KVMClockState), VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL } }; +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + static void kvmclock_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = kvmclock_realize; dc->vmsd = &kvmclock_vmsd; + dc->props = kvmclock_properties; } static const TypeInfo kvmclock_info = { diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c index 387caa6..f13e231 100644 --- a/hw/i386/multiboot.c +++ b/hw/i386/multiboot.c @@ -109,7 +109,7 @@ static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline) hwaddr p = s->offset_cmdlines; char *b = (char *)s->mb_buf + p; - get_opt_value(b, strlen(cmdline) + 1, cmdline); + memcpy(b, cmdline, strlen(cmdline) + 1); s->offset_cmdlines += strlen(b) + 1; return s->mb_buf_phys + p; } @@ -287,7 +287,8 @@ int load_multiboot(FWCfgState *fw_cfg, mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len; if (initrd_filename) { - char *next_initrd, not_last; + const char *next_initrd; + char not_last, tmpbuf[strlen(initrd_filename) + 1]; mbs.offset_mods = mbs.mb_buf_size; @@ -296,25 +297,24 @@ int load_multiboot(FWCfgState *fw_cfg, int mb_mod_length; uint32_t offs = mbs.mb_buf_size; - next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename); + next_initrd = get_opt_value(tmpbuf, sizeof(tmpbuf), initrd_filename); not_last = *next_initrd; - *next_initrd = '\0'; /* if a space comes after the module filename, treat everything after that as parameters */ - hwaddr c = mb_add_cmdline(&mbs, initrd_filename); - if ((next_space = strchr(initrd_filename, ' '))) + hwaddr c = mb_add_cmdline(&mbs, tmpbuf); + if ((next_space = strchr(tmpbuf, ' '))) *next_space = '\0'; - mb_debug("multiboot loading module: %s\n", initrd_filename); - mb_mod_length = get_image_size(initrd_filename); + mb_debug("multiboot loading module: %s\n", tmpbuf); + mb_mod_length = get_image_size(tmpbuf); if (mb_mod_length < 0) { - fprintf(stderr, "Failed to open file '%s'\n", initrd_filename); + fprintf(stderr, "Failed to open file '%s'\n", tmpbuf); exit(1); } mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size); mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size); - load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs); + load_image(tmpbuf, (unsigned char *)mbs.mb_buf + offs); mb_add_mod(&mbs, mbs.mb_buf_phys + offs, mbs.mb_buf_phys + offs + mb_mod_length, c); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index a9e64a8..25e8586 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -400,13 +400,13 @@ static void pc_cmos_init_late(void *opaque) int i, trans; val = 0; - if (ide_get_geometry(arg->idebus[0], 0, - &cylinders, &heads, §ors) >= 0) { + if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 0, + &cylinders, &heads, §ors) >= 0) { cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors); val |= 0xf0; } - if (ide_get_geometry(arg->idebus[0], 1, - &cylinders, &heads, §ors) >= 0) { + if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 1, + &cylinders, &heads, §ors) >= 0) { cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors); val |= 0x0f; } @@ -418,7 +418,8 @@ static void pc_cmos_init_late(void *opaque) geometry. It is always such that: 1 <= sects <= 63, 1 <= heads <= 16, 1 <= cylinders <= 16383. The BIOS geometry can be different if a translation is done. */ - if (ide_get_geometry(arg->idebus[i / 2], i % 2, + if (arg->idebus[i / 2] && + ide_get_geometry(arg->idebus[i / 2], i % 2, &cylinders, &heads, §ors) >= 0) { trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1; assert((trans & ~3) == 0); @@ -1535,6 +1536,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, ISADevice **rtc_state, bool create_fdctrl, bool no_vmport, + bool has_pit, uint32_t hpet_irqs) { int i; @@ -1588,7 +1590,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi, qemu_register_boot_set(pc_boot_set, *rtc_state); - if (!xen_enabled()) { + if (!xen_enabled() && has_pit) { if (kvm_pit_in_kernel()) { pit = kvm_pit_init(isa_bus, 0x40); } else { @@ -2158,6 +2160,48 @@ static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp) pcms->acpi_nvdimm_state.is_enabled = value; } +static bool pc_machine_get_smbus(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->smbus; +} + +static void pc_machine_set_smbus(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->smbus = value; +} + +static bool pc_machine_get_sata(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->sata; +} + +static void pc_machine_set_sata(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->sata = value; +} + +static bool pc_machine_get_pit(Object *obj, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + return pcms->pit; +} + +static void pc_machine_set_pit(Object *obj, bool value, Error **errp) +{ + PCMachineState *pcms = PC_MACHINE(obj); + + pcms->pit = value; +} + static void pc_machine_initfn(Object *obj) { PCMachineState *pcms = PC_MACHINE(obj); @@ -2169,6 +2213,9 @@ static void pc_machine_initfn(Object *obj) pcms->acpi_nvdimm_state.is_enabled = false; /* acpi build is enabled by default if machine supports it */ pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build; + pcms->smbus = true; + pcms->sata = true; + pcms->pit = true; } static void pc_machine_reset(void) @@ -2329,6 +2376,15 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_SMBUS, + pc_machine_get_smbus, pc_machine_set_smbus, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_SATA, + pc_machine_get_sata, pc_machine_set_sata, &error_abort); + + object_class_property_add_bool(oc, PC_MACHINE_PIT, + pc_machine_get_pit, pc_machine_set_pit, &error_abort); } static const TypeInfo pc_machine_info = { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a54a468..5e1adbe 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -235,7 +235,7 @@ static void pc_init1(MachineState *machine, /* init basic PC hardware */ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true, - (pcms->vmport != ON_OFF_AUTO_ON), 0x4); + (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, 0x4); pc_nic_init(isa_bus, pci_bus); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index b40d19e..d042fe0 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -227,32 +227,39 @@ static void pc_q35_init(MachineState *machine) /* init basic PC hardware */ pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy, - (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104); + (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, + 0xff0104); /* connect pm stuff to lpc */ ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pcms)); - /* ahci and SATA device, for q35 1 ahci controller is built-in */ - ahci = pci_create_simple_multifunction(host_bus, - PCI_DEVFN(ICH9_SATA1_DEV, - ICH9_SATA1_FUNC), - true, "ich9-ahci"); - idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); - idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); - g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); - ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); - ahci_ide_create_devs(ahci, hd); + if (pcms->sata) { + /* ahci and SATA device, for q35 1 ahci controller is built-in */ + ahci = pci_create_simple_multifunction(host_bus, + PCI_DEVFN(ICH9_SATA1_DEV, + ICH9_SATA1_FUNC), + true, "ich9-ahci"); + idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); + idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); + g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports); + ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports); + ahci_ide_create_devs(ahci, hd); + } else { + idebus[0] = idebus[1] = NULL; + } if (machine_usb(machine)) { /* Should we create 6 UHCI according to ich9 spec? */ ehci_create_ich9_with_companions(host_bus, 0x1d); } - /* TODO: Populate SPD eeprom data. */ - smbus_eeprom_init(ich9_smb_init(host_bus, - PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), - 0xb100), - 8, NULL, 0); + if (pcms->smbus) { + /* TODO: Populate SPD eeprom data. */ + smbus_eeprom_init(ich9_smb_init(host_bus, + PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), + 0xb100), + 8, NULL, 0); + } pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state); |