aboutsummaryrefslogtreecommitdiff
path: root/hw/i386
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386')
-rw-r--r--hw/i386/kvm/clock.c142
-rw-r--r--hw/i386/multiboot.c20
-rw-r--r--hw/i386/pc.c68
-rw-r--r--hw/i386/pc_piix.c2
-rw-r--r--hw/i386/pc_q35.c39
5 files changed, 223 insertions, 48 deletions
diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 0f75dd3..ef9d560 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -36,6 +36,13 @@ typedef struct KVMClockState {
uint64_t clock;
bool clock_valid;
+
+ /* whether machine type supports reliable KVM_GET_CLOCK */
+ bool mach_use_reliable_get_clock;
+
+ /* whether the 'clock' value was obtained in a host with
+ * reliable KVM_GET_CLOCK */
+ bool clock_is_reliable;
} KVMClockState;
struct pvclock_vcpu_time_info {
@@ -81,6 +88,60 @@ static uint64_t kvmclock_current_nsec(KVMClockState *s)
return nsec + time.system_time;
}
+static void kvm_update_clock(KVMClockState *s)
+{
+ struct kvm_clock_data data;
+ int ret;
+
+ ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+ abort();
+ }
+ s->clock = data.clock;
+
+ /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns
+ * essentially CLOCK_MONOTONIC plus a guest-specific adjustment. This
+ * can drift from the TSC-based value that is computed by the guest,
+ * so we need to go through kvmclock_current_nsec(). If
+ * kvm_has_adjust_clock_stable() is true, and the flags contain
+ * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value
+ * and kvmclock_current_nsec() is not necessary.
+ *
+ * Here, however, we need not check KVM_CLOCK_TSC_STABLE. This is because:
+ *
+ * - if the host has disabled the kvmclock master clock, the guest already
+ * has protection against time going backwards. This "safety net" is only
+ * absent when kvmclock is stable;
+ *
+ * - therefore, we can replace a check like
+ *
+ * if last KVM_GET_CLOCK was not reliable then
+ * read from memory
+ *
+ * with
+ *
+ * if last KVM_GET_CLOCK was not reliable && masterclock is enabled
+ * read from memory
+ *
+ * However:
+ *
+ * - if kvm_has_adjust_clock_stable() returns false, the left side is
+ * always true (KVM_GET_CLOCK is never reliable), and the right side is
+ * unknown (because we don't have data.flags). We must assume it's true
+ * and read from memory.
+ *
+ * - if kvm_has_adjust_clock_stable() returns true, the result of the &&
+ * is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable)
+ *
+ * So we can just use this instead:
+ *
+ * if !kvm_has_adjust_clock_stable() then
+ * read from memory
+ */
+ s->clock_is_reliable = kvm_has_adjust_clock_stable();
+}
+
static void kvmclock_vm_state_change(void *opaque, int running,
RunState state)
{
@@ -91,15 +152,21 @@ static void kvmclock_vm_state_change(void *opaque, int running,
if (running) {
struct kvm_clock_data data = {};
- uint64_t time_at_migration = kvmclock_current_nsec(s);
-
- s->clock_valid = false;
- /* We can't rely on the migrated clock value, just discard it */
- if (time_at_migration) {
- s->clock = time_at_migration;
+ /*
+ * If the host where s->clock was read did not support reliable
+ * KVM_GET_CLOCK, read kvmclock value from memory.
+ */
+ if (!s->clock_is_reliable) {
+ uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+ /* We can't rely on the saved clock value, just discard it */
+ if (pvclock_via_mem) {
+ s->clock = pvclock_via_mem;
+ }
}
+ s->clock_valid = false;
+
data.clock = s->clock;
ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
if (ret < 0) {
@@ -120,8 +187,6 @@ static void kvmclock_vm_state_change(void *opaque, int running,
}
}
} else {
- struct kvm_clock_data data;
- int ret;
if (s->clock_valid) {
return;
@@ -129,13 +194,7 @@ static void kvmclock_vm_state_change(void *opaque, int running,
kvm_synchronize_all_tsc();
- ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
- if (ret < 0) {
- fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
- abort();
- }
- s->clock = data.clock;
-
+ kvm_update_clock(s);
/*
* If the VM is stopped, declare the clock state valid to
* avoid re-reading it on next vmsave (which would return
@@ -149,25 +208,78 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
{
KVMClockState *s = KVM_CLOCK(dev);
+ kvm_update_clock(s);
+
qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
}
+static bool kvmclock_clock_is_reliable_needed(void *opaque)
+{
+ KVMClockState *s = opaque;
+
+ return s->mach_use_reliable_get_clock;
+}
+
+static const VMStateDescription kvmclock_reliable_get_clock = {
+ .name = "kvmclock/clock_is_reliable",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = kvmclock_clock_is_reliable_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(clock_is_reliable, KVMClockState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+/*
+ * When migrating, read the clock just before migration,
+ * so that the guest clock counts during the events
+ * between:
+ *
+ * * vm_stop()
+ * *
+ * * pre_save()
+ *
+ * This reduces kvmclock difference on migration from 5s
+ * to 0.1s (when max_downtime == 5s), because sending the
+ * final pages of memory (which happens between vm_stop()
+ * and pre_save()) takes max_downtime.
+ */
+static void kvmclock_pre_save(void *opaque)
+{
+ KVMClockState *s = opaque;
+
+ kvm_update_clock(s);
+}
+
static const VMStateDescription kvmclock_vmsd = {
.name = "kvmclock",
.version_id = 1,
.minimum_version_id = 1,
+ .pre_save = kvmclock_pre_save,
.fields = (VMStateField[]) {
VMSTATE_UINT64(clock, KVMClockState),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &kvmclock_reliable_get_clock,
+ NULL
}
};
+static Property kvmclock_properties[] = {
+ DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState,
+ mach_use_reliable_get_clock, true),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
static void kvmclock_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = kvmclock_realize;
dc->vmsd = &kvmclock_vmsd;
+ dc->props = kvmclock_properties;
}
static const TypeInfo kvmclock_info = {
diff --git a/hw/i386/multiboot.c b/hw/i386/multiboot.c
index 387caa6..f13e231 100644
--- a/hw/i386/multiboot.c
+++ b/hw/i386/multiboot.c
@@ -109,7 +109,7 @@ static uint32_t mb_add_cmdline(MultibootState *s, const char *cmdline)
hwaddr p = s->offset_cmdlines;
char *b = (char *)s->mb_buf + p;
- get_opt_value(b, strlen(cmdline) + 1, cmdline);
+ memcpy(b, cmdline, strlen(cmdline) + 1);
s->offset_cmdlines += strlen(b) + 1;
return s->mb_buf_phys + p;
}
@@ -287,7 +287,8 @@ int load_multiboot(FWCfgState *fw_cfg,
mbs.offset_bootloader = mbs.offset_cmdlines + cmdline_len;
if (initrd_filename) {
- char *next_initrd, not_last;
+ const char *next_initrd;
+ char not_last, tmpbuf[strlen(initrd_filename) + 1];
mbs.offset_mods = mbs.mb_buf_size;
@@ -296,25 +297,24 @@ int load_multiboot(FWCfgState *fw_cfg,
int mb_mod_length;
uint32_t offs = mbs.mb_buf_size;
- next_initrd = (char *)get_opt_value(NULL, 0, initrd_filename);
+ next_initrd = get_opt_value(tmpbuf, sizeof(tmpbuf), initrd_filename);
not_last = *next_initrd;
- *next_initrd = '\0';
/* if a space comes after the module filename, treat everything
after that as parameters */
- hwaddr c = mb_add_cmdline(&mbs, initrd_filename);
- if ((next_space = strchr(initrd_filename, ' ')))
+ hwaddr c = mb_add_cmdline(&mbs, tmpbuf);
+ if ((next_space = strchr(tmpbuf, ' ')))
*next_space = '\0';
- mb_debug("multiboot loading module: %s\n", initrd_filename);
- mb_mod_length = get_image_size(initrd_filename);
+ mb_debug("multiboot loading module: %s\n", tmpbuf);
+ mb_mod_length = get_image_size(tmpbuf);
if (mb_mod_length < 0) {
- fprintf(stderr, "Failed to open file '%s'\n", initrd_filename);
+ fprintf(stderr, "Failed to open file '%s'\n", tmpbuf);
exit(1);
}
mbs.mb_buf_size = TARGET_PAGE_ALIGN(mb_mod_length + mbs.mb_buf_size);
mbs.mb_buf = g_realloc(mbs.mb_buf, mbs.mb_buf_size);
- load_image(initrd_filename, (unsigned char *)mbs.mb_buf + offs);
+ load_image(tmpbuf, (unsigned char *)mbs.mb_buf + offs);
mb_add_mod(&mbs, mbs.mb_buf_phys + offs,
mbs.mb_buf_phys + offs + mb_mod_length, c);
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index a9e64a8..25e8586 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -400,13 +400,13 @@ static void pc_cmos_init_late(void *opaque)
int i, trans;
val = 0;
- if (ide_get_geometry(arg->idebus[0], 0,
- &cylinders, &heads, &sectors) >= 0) {
+ if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 0,
+ &cylinders, &heads, &sectors) >= 0) {
cmos_init_hd(s, 0x19, 0x1b, cylinders, heads, sectors);
val |= 0xf0;
}
- if (ide_get_geometry(arg->idebus[0], 1,
- &cylinders, &heads, &sectors) >= 0) {
+ if (arg->idebus[0] && ide_get_geometry(arg->idebus[0], 1,
+ &cylinders, &heads, &sectors) >= 0) {
cmos_init_hd(s, 0x1a, 0x24, cylinders, heads, sectors);
val |= 0x0f;
}
@@ -418,7 +418,8 @@ static void pc_cmos_init_late(void *opaque)
geometry. It is always such that: 1 <= sects <= 63, 1
<= heads <= 16, 1 <= cylinders <= 16383. The BIOS
geometry can be different if a translation is done. */
- if (ide_get_geometry(arg->idebus[i / 2], i % 2,
+ if (arg->idebus[i / 2] &&
+ ide_get_geometry(arg->idebus[i / 2], i % 2,
&cylinders, &heads, &sectors) >= 0) {
trans = ide_get_bios_chs_trans(arg->idebus[i / 2], i % 2) - 1;
assert((trans & ~3) == 0);
@@ -1535,6 +1536,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
ISADevice **rtc_state,
bool create_fdctrl,
bool no_vmport,
+ bool has_pit,
uint32_t hpet_irqs)
{
int i;
@@ -1588,7 +1590,7 @@ void pc_basic_device_init(ISABus *isa_bus, qemu_irq *gsi,
qemu_register_boot_set(pc_boot_set, *rtc_state);
- if (!xen_enabled()) {
+ if (!xen_enabled() && has_pit) {
if (kvm_pit_in_kernel()) {
pit = kvm_pit_init(isa_bus, 0x40);
} else {
@@ -2158,6 +2160,48 @@ static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
pcms->acpi_nvdimm_state.is_enabled = value;
}
+static bool pc_machine_get_smbus(Object *obj, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ return pcms->smbus;
+}
+
+static void pc_machine_set_smbus(Object *obj, bool value, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ pcms->smbus = value;
+}
+
+static bool pc_machine_get_sata(Object *obj, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ return pcms->sata;
+}
+
+static void pc_machine_set_sata(Object *obj, bool value, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ pcms->sata = value;
+}
+
+static bool pc_machine_get_pit(Object *obj, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ return pcms->pit;
+}
+
+static void pc_machine_set_pit(Object *obj, bool value, Error **errp)
+{
+ PCMachineState *pcms = PC_MACHINE(obj);
+
+ pcms->pit = value;
+}
+
static void pc_machine_initfn(Object *obj)
{
PCMachineState *pcms = PC_MACHINE(obj);
@@ -2169,6 +2213,9 @@ static void pc_machine_initfn(Object *obj)
pcms->acpi_nvdimm_state.is_enabled = false;
/* acpi build is enabled by default if machine supports it */
pcms->acpi_build_enabled = PC_MACHINE_GET_CLASS(pcms)->has_acpi_build;
+ pcms->smbus = true;
+ pcms->sata = true;
+ pcms->pit = true;
}
static void pc_machine_reset(void)
@@ -2329,6 +2376,15 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
+
+ object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
+ pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);
+
+ object_class_property_add_bool(oc, PC_MACHINE_SATA,
+ pc_machine_get_sata, pc_machine_set_sata, &error_abort);
+
+ object_class_property_add_bool(oc, PC_MACHINE_PIT,
+ pc_machine_get_pit, pc_machine_set_pit, &error_abort);
}
static const TypeInfo pc_machine_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a54a468..5e1adbe 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -235,7 +235,7 @@ static void pc_init1(MachineState *machine,
/* init basic PC hardware */
pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, true,
- (pcms->vmport != ON_OFF_AUTO_ON), 0x4);
+ (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit, 0x4);
pc_nic_init(isa_bus, pci_bus);
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index b40d19e..d042fe0 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -227,32 +227,39 @@ static void pc_q35_init(MachineState *machine)
/* init basic PC hardware */
pc_basic_device_init(isa_bus, pcms->gsi, &rtc_state, !mc->no_floppy,
- (pcms->vmport != ON_OFF_AUTO_ON), 0xff0104);
+ (pcms->vmport != ON_OFF_AUTO_ON), pcms->pit,
+ 0xff0104);
/* connect pm stuff to lpc */
ich9_lpc_pm_init(lpc, pc_machine_is_smm_enabled(pcms));
- /* ahci and SATA device, for q35 1 ahci controller is built-in */
- ahci = pci_create_simple_multifunction(host_bus,
- PCI_DEVFN(ICH9_SATA1_DEV,
- ICH9_SATA1_FUNC),
- true, "ich9-ahci");
- idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0");
- idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1");
- g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports);
- ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
- ahci_ide_create_devs(ahci, hd);
+ if (pcms->sata) {
+ /* ahci and SATA device, for q35 1 ahci controller is built-in */
+ ahci = pci_create_simple_multifunction(host_bus,
+ PCI_DEVFN(ICH9_SATA1_DEV,
+ ICH9_SATA1_FUNC),
+ true, "ich9-ahci");
+ idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0");
+ idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1");
+ g_assert(MAX_SATA_PORTS == ICH_AHCI(ahci)->ahci.ports);
+ ide_drive_get(hd, ICH_AHCI(ahci)->ahci.ports);
+ ahci_ide_create_devs(ahci, hd);
+ } else {
+ idebus[0] = idebus[1] = NULL;
+ }
if (machine_usb(machine)) {
/* Should we create 6 UHCI according to ich9 spec? */
ehci_create_ich9_with_companions(host_bus, 0x1d);
}
- /* TODO: Populate SPD eeprom data. */
- smbus_eeprom_init(ich9_smb_init(host_bus,
- PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC),
- 0xb100),
- 8, NULL, 0);
+ if (pcms->smbus) {
+ /* TODO: Populate SPD eeprom data. */
+ smbus_eeprom_init(ich9_smb_init(host_bus,
+ PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC),
+ 0xb100),
+ 8, NULL, 0);
+ }
pc_cmos_init(pcms, idebus[0], idebus[1], rtc_state);