From bb43d3839c29b17a2f5c122114cd4ca978065a18 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 16 Dec 2013 10:11:28 +0100 Subject: piix: gigabyte alignment for ram Map 3G (i440fx) of memory below 4G, so the RAM pieces are nicely aligned to gigabyte borders. Keep old memory layout for (a) old machine types and (b) in case all memory fits below 4G and thus we don't have to split RAM into pieces in the first place. The later makes sure this change doesn't take away memory from 32bit guests. So, with i440fx and up to 3.5 GB of memory, all of it will be mapped below 4G. With more than 3.5 GB of memory 3 GB will be mapped below 4G and the remaining amount will be mapped above 4G. Signed-off-by: Gerd Hoffmann Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 4e0dae7..acb9445 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -61,6 +61,7 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; static bool has_pci_info; static bool has_acpi_build = true; static bool smbios_type1_defaults = true; +static bool gigabyte_align = true; /* PC hardware initialisation */ static void pc_init1(QEMUMachineInitArgs *args, @@ -107,8 +108,9 @@ static void pc_init1(QEMUMachineInitArgs *args, } if (args->ram_size >= 0xe0000000) { - above_4g_mem_size = args->ram_size - 0xe0000000; - below_4g_mem_size = 0xe0000000; + ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000; + above_4g_mem_size = args->ram_size - lowmem; + below_4g_mem_size = lowmem; } else { above_4g_mem_size = 0; below_4g_mem_size = args->ram_size; @@ -245,6 +247,7 @@ static void pc_init_pci(QEMUMachineInitArgs *args) static void pc_compat_1_7(QEMUMachineInitArgs *args) { smbios_type1_defaults = false; + gigabyte_align = false; } static void pc_compat_1_6(QEMUMachineInitArgs *args) -- cgit v1.1 From ecdbfceb0f20a3ef784bf522ed7264660aa3d150 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 16 Dec 2013 13:54:19 +0200 Subject: pc_piix: document gigabyte_align Document the logic behind the below/above 4G split. Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'hw') diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index acb9445..832e20c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -61,6 +61,10 @@ static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; static bool has_pci_info; static bool has_acpi_build = true; static bool smbios_type1_defaults = true; +/* Make sure that guest addresses aligned at 1Gbyte boundaries get mapped to + * host addresses aligned at 1Gbyte boundaries. This way we can use 1GByte + * pages in the host. + */ static bool gigabyte_align = true; /* PC hardware initialisation */ @@ -107,6 +111,13 @@ static void pc_init1(QEMUMachineInitArgs *args, kvmclock_create(); } + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory). + * If it doesn't, we need to split it in chunks below and above 4G. + * In any case, try to make sure that guest addresses aligned at + * 1G boundaries get mapped to host addresses aligned at 1G boundaries. + * For old machine types, use whatever split we used historically to avoid + * breaking migration. + */ if (args->ram_size >= 0xe0000000) { ram_addr_t lowmem = gigabyte_align ? 0xc0000000 : 0xe0000000; above_4g_mem_size = args->ram_size - lowmem; -- cgit v1.1 From 637a5acb46b36a25b506ba6545e9a53350585b03 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Thu, 28 Nov 2013 00:52:52 +0100 Subject: hw/i386/pc_sysfw: support two flash drives This patch allows the user to usefully specify -drive file=img_1,if=pflash,format=raw,readonly \ -drive file=img_2,if=pflash,format=raw on the command line. The flash images will be mapped under 4G in their reverse unit order -- that is, with their base addresses progressing downwards, in increasing unit order. (The unit number increases with command line order if not explicitly specified.) This accommodates the following use case: suppose that OVMF is split in two parts, a writeable host file for non-volatile variable storage, and a read-only part for bootstrap and decompressible executable code. The binary code part would be read-only, centrally managed on the host system, and passed in as unit 0. The variable store would be writeable, VM-specific, and passed in as unit 1. 00000000ffe00000-00000000ffe1ffff (prio 0, R-): system.flash1 00000000ffe20000-00000000ffffffff (prio 0, R-): system.flash0 (If the guest tries to write to the flash range that is backed by the read-only drive, pflash_update() is never called; various flash programming/erase errors are returned to the guest instead. See the callers of pflash_update(), and the initialization of "pfl->ro", in "hw/block/pflash_cfi01.c".) Signed-off-by: Laszlo Ersek Reviewed-by: Markus Armbruster Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_sysfw.c | 105 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 19 deletions(-) (limited to 'hw') diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index e917c83..75a7ebba 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -72,35 +72,102 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, memory_region_set_readonly(isa_bios, true); } -static void pc_system_flash_init(MemoryRegion *rom_memory, - DriveInfo *pflash_drv) +#define FLASH_MAP_UNIT_MAX 2 + +/* We don't have a theoretically justifiable exact lower bound on the base + * address of any flash mapping. In practice, the IO-APIC MMIO range is + * [0xFEE00000..0xFEE01000[ -- see IO_APIC_DEFAULT_ADDRESS --, leaving free + * only 18MB-4KB below 4G. For now, restrict the cumulative mapping to 8MB in + * size. + */ +#define FLASH_MAP_BASE_MIN ((hwaddr)(0x100000000ULL - 8*1024*1024)) + +/* This function maps flash drives from 4G downward, in order of their unit + * numbers. The mapping starts at unit#0, with unit number increments of 1, and + * stops before the first missing flash drive, or before + * unit#FLASH_MAP_UNIT_MAX, whichever is reached first. + * + * Addressing within one flash drive is of course not reversed. + * + * An error message is printed and the process exits if: + * - the size of the backing file for a flash drive is non-positive, or not a + * multiple of the required sector size, or + * - the current mapping's base address would fall below FLASH_MAP_BASE_MIN. + * + * The drive with unit#0 (if available) is mapped at the highest address, and + * it is passed to pc_isa_bios_init(). Merging several drives for isa-bios is + * not supported. + */ +static void pc_system_flash_init(MemoryRegion *rom_memory) { + int unit; + DriveInfo *pflash_drv; BlockDriverState *bdrv; int64_t size; - hwaddr phys_addr; + char *fatal_errmsg = NULL; + hwaddr phys_addr = 0x100000000ULL; int sector_bits, sector_size; pflash_t *system_flash; MemoryRegion *flash_mem; + char name[64]; - bdrv = pflash_drv->bdrv; - size = bdrv_getlength(pflash_drv->bdrv); sector_bits = 12; sector_size = 1 << sector_bits; - if ((size % sector_size) != 0) { - fprintf(stderr, - "qemu: PC system firmware (pflash) must be a multiple of 0x%x\n", - sector_size); - exit(1); + for (unit = 0; + (unit < FLASH_MAP_UNIT_MAX && + (pflash_drv = drive_get(IF_PFLASH, 0, unit)) != NULL); + ++unit) { + bdrv = pflash_drv->bdrv; + size = bdrv_getlength(bdrv); + if (size < 0) { + fatal_errmsg = g_strdup_printf("failed to get backing file size"); + } else if (size == 0) { + fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " + "cannot have zero size"); + } else if ((size % sector_size) != 0) { + fatal_errmsg = g_strdup_printf("PC system firmware (pflash) " + "must be a multiple of 0x%x", sector_size); + } else if (phys_addr < size || phys_addr - size < FLASH_MAP_BASE_MIN) { + fatal_errmsg = g_strdup_printf("oversized backing file, pflash " + "segments cannot be mapped under " + TARGET_FMT_plx, FLASH_MAP_BASE_MIN); + } + if (fatal_errmsg != NULL) { + Location loc; + + /* push a new, "none" location on the location stack; overwrite its + * contents with the location saved in the option; print the error + * (includes location); pop the top + */ + loc_push_none(&loc); + if (pflash_drv->opts != NULL) { + qemu_opts_loc_restore(pflash_drv->opts); + } + error_report("%s", fatal_errmsg); + loc_pop(&loc); + g_free(fatal_errmsg); + exit(1); + } + + phys_addr -= size; + + /* pflash_cfi01_register() creates a deep copy of the name */ + snprintf(name, sizeof name, "system.flash%d", unit); + system_flash = pflash_cfi01_register(phys_addr, NULL /* qdev */, name, + size, bdrv, sector_size, + size >> sector_bits, + 1 /* width */, + 0x0000 /* id0 */, + 0x0000 /* id1 */, + 0x0000 /* id2 */, + 0x0000 /* id3 */, + 0 /* be */); + if (unit == 0) { + flash_mem = pflash_cfi01_get_memory(system_flash); + pc_isa_bios_init(rom_memory, flash_mem, size); + } } - - phys_addr = 0x100000000ULL - size; - system_flash = pflash_cfi01_register(phys_addr, NULL, "system.flash", size, - bdrv, sector_size, size >> sector_bits, - 1, 0x0000, 0x0000, 0x0000, 0x0000, 0); - flash_mem = pflash_cfi01_get_memory(system_flash); - - pc_isa_bios_init(rom_memory, flash_mem, size); } static void old_pc_system_rom_init(MemoryRegion *rom_memory, bool isapc_ram_fw) @@ -181,5 +248,5 @@ void pc_system_firmware_init(MemoryRegion *rom_memory, bool isapc_ram_fw) exit(1); } - pc_system_flash_init(rom_memory, pflash_drv); + pc_system_flash_init(rom_memory); } -- cgit v1.1 From e4f308bbf9f360ee2af5b94b87aef170d8f20dc4 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 13 Dec 2013 17:22:06 +0100 Subject: acpi: piix4: remove not needed GPE0 mask Hardcoded GPE0 mask isn't really needed. Since GPE0_STS initialized with all bits cleared and only QEMU itself can set bits there (i.e. guest can only clear bits in it). So guest can't triger SCI by setting _STS & _EN bits and there is not reason to mask out not supported _STS bits since they shouldn't be set by QEMU in the first place. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- hw/acpi/piix4.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 93849c8..b4caeab1 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -122,8 +122,7 @@ static void pm_update_sci(PIIX4PMState *s) ACPI_BITMASK_POWER_BUTTON_ENABLE | ACPI_BITMASK_GLOBAL_LOCK_ENABLE | ACPI_BITMASK_TIMER_ENABLE)) != 0) || - (((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) & - (PIIX4_PCI_HOTPLUG_STATUS | PIIX4_CPU_HOTPLUG_STATUS)) != 0); + ((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) != 0); qemu_set_irq(s->irq, sci_level); /* schedule a timer interruption if needed */ -- cgit v1.1 From 063135032808700a5a6b0b4a781f31252da2e762 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 13 Dec 2013 17:22:07 +0100 Subject: acpi: factor out common pm_update_sci() into acpi core ... and rename it into acpi_update_sci() since it changes SCI on only on PM registers status. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- hw/acpi/core.c | 18 ++++++++++++++++++ hw/acpi/ich9.c | 23 ++--------------------- hw/acpi/piix4.c | 26 ++++---------------------- 3 files changed, 24 insertions(+), 43 deletions(-) (limited to 'hw') diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 58308a3..79414b4 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -662,3 +662,21 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) return val; } + +void acpi_update_sci(ACPIREGS *regs, qemu_irq irq) +{ + int sci_level, pm1a_sts; + + pm1a_sts = acpi_pm1_evt_get_sts(regs); + + sci_level = ((pm1a_sts & + regs->pm1.evt.en & ACPI_BITMASK_PM1_COMMON_ENABLED) != 0) || + ((regs->gpe.sts[0] & regs->gpe.en[0]) != 0); + + qemu_set_irq(irq, sci_level); + + /* schedule a timer interruption if needed */ + acpi_pm_tmr_update(regs, + (regs->pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && + !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); +} diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 7e0429e..dcdef7c 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -44,29 +44,10 @@ do { printf("%s "fmt, __func__, ## __VA_ARGS__); } while (0) #define ICH9_DEBUG(fmt, ...) do { } while (0) #endif -static void pm_update_sci(ICH9LPCPMRegs *pm) -{ - int sci_level, pm1a_sts; - - pm1a_sts = acpi_pm1_evt_get_sts(&pm->acpi_regs); - - sci_level = (((pm1a_sts & pm->acpi_regs.pm1.evt.en) & - (ACPI_BITMASK_RT_CLOCK_ENABLE | - ACPI_BITMASK_POWER_BUTTON_ENABLE | - ACPI_BITMASK_GLOBAL_LOCK_ENABLE | - ACPI_BITMASK_TIMER_ENABLE)) != 0); - qemu_set_irq(pm->irq, sci_level); - - /* schedule a timer interruption if needed */ - acpi_pm_tmr_update(&pm->acpi_regs, - (pm->acpi_regs.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && - !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS)); -} - static void ich9_pm_update_sci_fn(ACPIREGS *regs) { ICH9LPCPMRegs *pm = container_of(regs, ICH9LPCPMRegs, acpi_regs); - pm_update_sci(pm); + acpi_update_sci(&pm->acpi_regs, pm->irq); } static uint64_t ich9_gpe_readb(void *opaque, hwaddr addr, unsigned width) @@ -193,7 +174,7 @@ static void pm_reset(void *opaque) pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN; } - pm_update_sci(pm); + acpi_update_sci(&pm->acpi_regs, pm->irq); } static void pm_powerdown_req(Notifier *n, void *opaque) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index b4caeab1..b6b97ce 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -112,28 +112,10 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent, #define ACPI_ENABLE 0xf1 #define ACPI_DISABLE 0xf0 -static void pm_update_sci(PIIX4PMState *s) -{ - int sci_level, pmsts; - - pmsts = acpi_pm1_evt_get_sts(&s->ar); - sci_level = (((pmsts & s->ar.pm1.evt.en) & - (ACPI_BITMASK_RT_CLOCK_ENABLE | - ACPI_BITMASK_POWER_BUTTON_ENABLE | - ACPI_BITMASK_GLOBAL_LOCK_ENABLE | - ACPI_BITMASK_TIMER_ENABLE)) != 0) || - ((s->ar.gpe.sts[0] & s->ar.gpe.en[0]) != 0); - - qemu_set_irq(s->irq, sci_level); - /* schedule a timer interruption if needed */ - acpi_pm_tmr_update(&s->ar, (s->ar.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) && - !(pmsts & ACPI_BITMASK_TIMER_STATUS)); -} - static void pm_tmr_timer(ACPIREGS *ar) { PIIX4PMState *s = container_of(ar, PIIX4PMState, ar); - pm_update_sci(s); + acpi_update_sci(&s->ar, s->irq); } static void apm_ctrl_changed(uint32_t val, void *arg) @@ -577,7 +559,7 @@ static void gpe_writeb(void *opaque, hwaddr addr, uint64_t val, PIIX4PMState *s = opaque; acpi_gpe_ioport_writeb(&s->ar, addr, val); - pm_update_sci(s); + acpi_update_sci(&s->ar, s->irq); PIIX4_DPRINTF("gpe write %" HWADDR_PRIx " <== %" PRIu64 "\n", addr, val); } @@ -693,7 +675,7 @@ static void piix4_cpu_hotplug_req(PIIX4PMState *s, CPUState *cpu, } else { g->sts[cpu_id / 8] &= ~(1 << (cpu_id % 8)); } - pm_update_sci(s); + acpi_update_sci(&s->ar, s->irq); } static void piix4_cpu_added_req(Notifier *n, void *opaque) @@ -767,7 +749,7 @@ static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, disable_device(s, slot); } - pm_update_sci(s); + acpi_update_sci(&s->ar, s->irq); return 0; } -- cgit v1.1 From 2c047956f3b27048883350c071bcd33ef8331d13 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 13 Dec 2013 17:22:10 +0100 Subject: acpi: ich9: allow guest to clear SCI rised by GPE it fixes IRQ storm since guest isn't able to lower SCI IRQ after it has been handled when it clears GPE event. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- hw/acpi/ich9.c | 1 + 1 file changed, 1 insertion(+) (limited to 'hw') diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index dcdef7c..30f0df8 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -61,6 +61,7 @@ static void ich9_gpe_writeb(void *opaque, hwaddr addr, uint64_t val, { ICH9LPCPMRegs *pm = opaque; acpi_gpe_ioport_writeb(&pm->acpi_regs, addr, val); + acpi_update_sci(&pm->acpi_regs, pm->irq); } static const MemoryRegionOps ich9_gpe_ops = { -- cgit v1.1 From c1a1af87d8d5dce93328bbe8c3db70ff29275069 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 13 Dec 2013 17:22:12 +0100 Subject: ACPI: Q35 DSDT: fix CPU hotplug GPE0.2 handler Fix bogus CPU hotplug GPE handler. Make Q35 CPU hotplug GPE handler match PIIX4 one, since CPU hotplug event is triggered by GPE0.2 register. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- hw/i386/q35-acpi-dsdt.dsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/i386/q35-acpi-dsdt.dsl b/hw/i386/q35-acpi-dsdt.dsl index 575c5d7..7934a9d 100644 --- a/hw/i386/q35-acpi-dsdt.dsl +++ b/hw/i386/q35-acpi-dsdt.dsl @@ -417,11 +417,11 @@ DefinitionBlock ( Method(_L00) { } Method(_L01) { + } + Method(_E02) { // CPU hotplug event \_SB.PRSC() } - Method(_L02) { - } Method(_L03) { } Method(_L04) { -- cgit v1.1 From aef52ee87f324fb03e0dcd88a84bdd50c8339a5f Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Fri, 13 Dec 2013 17:22:13 +0100 Subject: ACPI/DSDT-CPU: cleanup bogus comment Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-dsdt-cpu-hotplug.dsl | 1 - 1 file changed, 1 deletion(-) (limited to 'hw') diff --git a/hw/i386/acpi-dsdt-cpu-hotplug.dsl b/hw/i386/acpi-dsdt-cpu-hotplug.dsl index c96ac42..995b415 100644 --- a/hw/i386/acpi-dsdt-cpu-hotplug.dsl +++ b/hw/i386/acpi-dsdt-cpu-hotplug.dsl @@ -52,7 +52,6 @@ Scope(\_SB) { Sleep(200) } - /* CPU hotplug notify method */ OperationRegion(PRST, SystemIO, 0xaf00, 32) Field(PRST, ByteAcc, NoLock, Preserve) { PRS, 256 -- cgit v1.1 From 81e3e75b6461c53724fe7c7918bc54468fcdaf9d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Dec 2013 17:54:24 +0100 Subject: pci: do not export pci_bus_reset qbus_reset_all can be used instead. There is no semantic change because pcibus_reset returns 1 and takes care of the device tree traversal. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 8 ++------ hw/pci/pci_bridge.c | 2 +- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 82c11ec..83ea0a0 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -212,8 +212,9 @@ void pci_device_reset(PCIDevice *dev) * Trigger pci bus reset under a given bus. * To be called on RST# assert. */ -void pci_bus_reset(PCIBus *bus) +static int pcibus_reset(BusState *qbus) { + PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus); int i; for (i = 0; i < bus->nirq; i++) { @@ -224,11 +225,6 @@ void pci_bus_reset(PCIBus *bus) pci_device_reset(bus->devices[i]); } } -} - -static int pcibus_reset(BusState *qbus) -{ - pci_bus_reset(DO_UPCAST(PCIBus, qbus, qbus)); /* topology traverse is done by pci_bus_reset(). Tell qbus/qdev walker not to traverse the tree */ diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index f72872e..098c50a 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -268,7 +268,7 @@ void pci_bridge_write_config(PCIDevice *d, newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL); if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) { /* Trigger hot reset on 0->1 transition. */ - pci_bus_reset(&s->sec_bus); + qbus_reset_all(&s->sec_bus.qbus); } } -- cgit v1.1 From 9bdbbfc3a04c28dc43af5afffb32066623cb0022 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Dec 2013 17:54:25 +0100 Subject: pci: clean up resetting of IRQs pci_device_reset will deassert the INTX pins, and this will make the irq_count array all-zeroes. Check that this is the case, and remove the existing loop which might even unsync irq_count and irq_state. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- hw/pci/pci.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 83ea0a0..131e599 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -217,15 +217,16 @@ static int pcibus_reset(BusState *qbus) PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus); int i; - for (i = 0; i < bus->nirq; i++) { - bus->irq_count[i] = 0; - } for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { if (bus->devices[i]) { pci_device_reset(bus->devices[i]); } } + for (i = 0; i < bus->nirq; i++) { + assert(bus->irq_count[i] == 0); + } + /* topology traverse is done by pci_bus_reset(). Tell qbus/qdev walker not to traverse the tree */ return 1; -- cgit v1.1 From 0293214b8c5bf56a095d0a39c5821c9da66dd566 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Dec 2013 17:54:26 +0100 Subject: qdev: allow both pre- and post-order vists in qdev walking functions Resetting should be done in post-order, not pre-order. However, qdev_walk_children and qbus_walk_children do not allow this. Fix it by adding two extra arguments to the functions. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- hw/core/qdev.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) (limited to 'hw') diff --git a/hw/core/qdev.c b/hw/core/qdev.c index e374a93..5ddf1aa 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -240,12 +240,12 @@ static int qbus_reset_one(BusState *bus, void *opaque) void qdev_reset_all(DeviceState *dev) { - qdev_walk_children(dev, qdev_reset_one, qbus_reset_one, NULL); + qdev_walk_children(dev, qdev_reset_one, qbus_reset_one, NULL, NULL, NULL); } void qbus_reset_all(BusState *bus) { - qbus_walk_children(bus, qdev_reset_one, qbus_reset_one, NULL); + qbus_walk_children(bus, qdev_reset_one, qbus_reset_one, NULL, NULL, NULL); } void qbus_reset_all_fn(void *opaque) @@ -337,49 +337,70 @@ BusState *qdev_get_child_bus(DeviceState *dev, const char *name) return NULL; } -int qbus_walk_children(BusState *bus, qdev_walkerfn *devfn, - qbus_walkerfn *busfn, void *opaque) +int qbus_walk_children(BusState *bus, + qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn, + qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn, + void *opaque) { BusChild *kid; int err; - if (busfn) { - err = busfn(bus, opaque); + if (pre_busfn) { + err = pre_busfn(bus, opaque); if (err) { return err; } } QTAILQ_FOREACH(kid, &bus->children, sibling) { - err = qdev_walk_children(kid->child, devfn, busfn, opaque); + err = qdev_walk_children(kid->child, + pre_devfn, pre_busfn, + post_devfn, post_busfn, opaque); if (err < 0) { return err; } } + if (post_busfn) { + err = post_busfn(bus, opaque); + if (err) { + return err; + } + } + return 0; } -int qdev_walk_children(DeviceState *dev, qdev_walkerfn *devfn, - qbus_walkerfn *busfn, void *opaque) +int qdev_walk_children(DeviceState *dev, + qdev_walkerfn *pre_devfn, qbus_walkerfn *pre_busfn, + qdev_walkerfn *post_devfn, qbus_walkerfn *post_busfn, + void *opaque) { BusState *bus; int err; - if (devfn) { - err = devfn(dev, opaque); + if (pre_devfn) { + err = pre_devfn(dev, opaque); if (err) { return err; } } QLIST_FOREACH(bus, &dev->child_bus, sibling) { - err = qbus_walk_children(bus, devfn, busfn, opaque); + err = qbus_walk_children(bus, pre_devfn, pre_busfn, + post_devfn, post_busfn, opaque); if (err < 0) { return err; } } + if (post_devfn) { + err = post_devfn(dev, opaque); + if (err) { + return err; + } + } + return 0; } -- cgit v1.1 From dcc209314afdaeec42f1e2a7bbf37eec3ace23de Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Dec 2013 17:54:27 +0100 Subject: qdev: switch reset to post-order Post-order is the only sensible direction for the reset signals. For example, suppose pre-order is used and the parent has some data structures that cache children state (for example a list of active requests). When the reset method is invoked on the parent, these caches could be in any state. If post-order is used, on the other hand, these will be in a known state when the reset method is invoked on the parent. This change means that it is no longer possible to block the visit of the devices, so the callback is changed to return void. This is not a problem, because PCI was returning 1 exactly in order to achieve the same ordering that this patch implements. PCI can then rely on the qdev core having sent a "reset signal" (whatever that means) to the device, and only do the PCI-specific initialization with pci_do_device_reset. MST: fixed up virtio-ccw Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- hw/core/qdev.c | 6 +++--- hw/pci/pci.c | 31 ++++++++++++++++--------------- hw/s390x/virtio-ccw.c | 5 +---- 3 files changed, 20 insertions(+), 22 deletions(-) (limited to 'hw') diff --git a/hw/core/qdev.c b/hw/core/qdev.c index 5ddf1aa..d2ffe35 100644 --- a/hw/core/qdev.c +++ b/hw/core/qdev.c @@ -233,19 +233,19 @@ static int qbus_reset_one(BusState *bus, void *opaque) { BusClass *bc = BUS_GET_CLASS(bus); if (bc->reset) { - return bc->reset(bus); + bc->reset(bus); } return 0; } void qdev_reset_all(DeviceState *dev) { - qdev_walk_children(dev, qdev_reset_one, qbus_reset_one, NULL, NULL, NULL); + qdev_walk_children(dev, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL); } void qbus_reset_all(BusState *bus) { - qbus_walk_children(bus, qdev_reset_one, qbus_reset_one, NULL, NULL, NULL); + qbus_walk_children(bus, NULL, NULL, qdev_reset_one, qbus_reset_one, NULL); } void qbus_reset_all_fn(void *opaque) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 131e599..aa2a395 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -46,7 +46,7 @@ static void pcibus_dev_print(Monitor *mon, DeviceState *dev, int indent); static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); -static int pcibus_reset(BusState *qbus); +static void pcibus_reset(BusState *qbus); static void pci_bus_finalize(Object *obj); static Property pci_props[] = { @@ -167,16 +167,10 @@ void pci_device_deassert_intx(PCIDevice *dev) } } -/* - * This function is called on #RST and FLR. - * FLR if PCI_EXP_DEVCTL_BCR_FLR is set - */ -void pci_device_reset(PCIDevice *dev) +static void pci_do_device_reset(PCIDevice *dev) { int r; - qdev_reset_all(&dev->qdev); - dev->irq_state = 0; pci_update_irq_status(dev); pci_device_deassert_intx(dev); @@ -209,27 +203,34 @@ void pci_device_reset(PCIDevice *dev) } /* + * This function is called on #RST and FLR. + * FLR if PCI_EXP_DEVCTL_BCR_FLR is set + */ +void pci_device_reset(PCIDevice *dev) +{ + qdev_reset_all(&dev->qdev); + pci_do_device_reset(dev); +} + +/* * Trigger pci bus reset under a given bus. - * To be called on RST# assert. + * Called via qbus_reset_all on RST# assert, after the devices + * have been reset qdev_reset_all-ed already. */ -static int pcibus_reset(BusState *qbus) +static void pcibus_reset(BusState *qbus) { PCIBus *bus = DO_UPCAST(PCIBus, qbus, qbus); int i; for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { if (bus->devices[i]) { - pci_device_reset(bus->devices[i]); + pci_do_device_reset(bus->devices[i]); } } for (i = 0; i < bus->nirq; i++) { assert(bus->irq_count[i] == 0); } - - /* topology traverse is done by pci_bus_reset(). - Tell qbus/qdev walker not to traverse the tree */ - return 1; } static void pci_host_bus_register(PCIBus *bus, DeviceState *parent) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index ecc80ec..b79f04e 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -30,13 +30,10 @@ static void virtio_ccw_bus_new(VirtioBusState *bus, size_t bus_size, VirtioCcwDevice *dev); -static int virtual_css_bus_reset(BusState *qbus) +static void virtual_css_bus_reset(BusState *qbus) { /* This should actually be modelled via the generic css */ css_reset(); - - /* we dont traverse ourself, return 0 */ - return 0; } -- cgit v1.1 From ddaaefb4dd427d6d2e41c1cfbe0cd8d8e8d6aad9 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Sat, 21 Dec 2013 03:02:50 +0100 Subject: piix: fix 32bit pci hole Make the 32bit pci hole start at end of ram, so all possible address space is covered. We used to try and make addresses aligned so they are easier to cover with MTRRs, but since they are cosmetic on KVM, this is probably not worth worrying about. Of course the firmware can use less than that. Leaving space unused is no problem, mapping pci bars outside the hole causes problems though. Signed-off-by: Gerd Hoffmann Signed-off-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 1 + hw/pci-host/piix.c | 11 ++--------- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'hw') diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 832e20c..2766414 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -170,6 +170,7 @@ static void pc_init1(QEMUMachineInitArgs *args, if (pci_enabled) { pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, &isa_bus, gsi, system_memory, system_io, args->ram_size, + below_4g_mem_size, above_4g_mem_size, pci_memory, ram_memory); } else { diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 63be7f6..4229d09 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -311,6 +311,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, ram_addr_t ram_size, + ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *pci_address_space, MemoryRegion *ram_memory) @@ -340,15 +341,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, f->ram_memory = ram_memory; i440fx = I440FX_PCI_HOST_BRIDGE(dev); - /* Set PCI window size the way seabios has always done it. */ - /* Power of 2 so bios can cover it with a single MTRR */ - if (ram_size <= 0x80000000) { - i440fx->pci_info.w32.begin = 0x80000000; - } else if (ram_size <= 0xc0000000) { - i440fx->pci_info.w32.begin = 0xc0000000; - } else { - i440fx->pci_info.w32.begin = 0xe0000000; - } + i440fx->pci_info.w32.begin = below_4g_mem_size; /* setup pci memory mapping */ pc_pci_as_mapping_init(OBJECT(f), f->system_memory, -- cgit v1.1 From 83d0704734955bf1aa7697af7be2a50e11a80a42 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 20 Dec 2013 19:48:51 +0100 Subject: virtio: add back call to virtio_bus_device_unplugged This got lost in a rebase. Reported-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 144b9ca..a001e66 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1172,6 +1172,8 @@ static void virtio_device_unrealize(DeviceState *dev, Error **errp) VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev); Error *err = NULL; + virtio_bus_device_unplugged(vdev); + if (vdc->unrealize != NULL) { vdc->unrealize(dev, &err); if (err != NULL) { -- cgit v1.1