diff options
Diffstat (limited to 'hw/i386/pc.c')
-rw-r--r-- | hw/i386/pc.c | 221 |
1 files changed, 191 insertions, 30 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1690b19..3889ecc 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -54,6 +54,7 @@ #include "sysemu/qtest.h" #include "kvm_i386.h" #include "hw/xen/xen.h" +#include "hw/xen/start_info.h" #include "ui/qemu-spice.h" #include "exec/memory.h" #include "exec/address-spaces.h" @@ -75,6 +76,7 @@ #include "hw/usb.h" #include "hw/i386/intel_iommu.h" #include "hw/net/ne2000-isa.h" +#include "standard-headers/asm-x86/bootparam.h" /* debug PC/ISA interrupts */ //#define DEBUG_IRQ @@ -110,6 +112,9 @@ static struct e820_entry *e820_table; static unsigned e820_entries; struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX}; +/* Physical Address of PVH entry point read from kernel ELF NOTE */ +static size_t pvh_start_addr; + GlobalProperty pc_compat_3_1[] = { { "intel-iommu", "dma-drain", "off" }, { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" }, @@ -1055,13 +1060,6 @@ static long get_file_size(FILE *f) return size; } -/* setup_data types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 - struct setup_data { uint64_t next; uint32_t type; @@ -1069,6 +1067,109 @@ struct setup_data { uint8_t data[0]; } __attribute__((packed)); + +/* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM + * direct boot ABI and is available in an ELFNOTE in the kernel binary. + * + * This function is passed to load_elf() when it is called from + * load_elfboot() which then additionally checks for an ELF Note of + * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to + * parse the PVH entry address from the ELF Note. + * + * Due to trickery in elf_opts.h, load_elf() is actually available as + * load_elf32() or load_elf64() and this routine needs to be able + * to deal with being called as 32 or 64 bit. + * + * The address of the PVH entry point is saved to the 'pvh_start_addr' + * global variable. (although the entry point is 32-bit, the kernel + * binary can be either 32-bit or 64-bit). + */ +static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +{ + size_t *elf_note_data_addr; + + /* Check if ELF Note header passed in is valid */ + if (arg1 == NULL) { + return 0; + } + + if (is64) { + struct elf64_note *nhdr64 = (struct elf64_note *)arg1; + uint64_t nhdr_size64 = sizeof(struct elf64_note); + uint64_t phdr_align = *(uint64_t *)arg2; + uint64_t nhdr_namesz = nhdr64->n_namesz; + + elf_note_data_addr = + ((void *)nhdr64) + nhdr_size64 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + } else { + struct elf32_note *nhdr32 = (struct elf32_note *)arg1; + uint32_t nhdr_size32 = sizeof(struct elf32_note); + uint32_t phdr_align = *(uint32_t *)arg2; + uint32_t nhdr_namesz = nhdr32->n_namesz; + + elf_note_data_addr = + ((void *)nhdr32) + nhdr_size32 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + } + + pvh_start_addr = *elf_note_data_addr; + + return pvh_start_addr; +} + +static bool load_elfboot(const char *kernel_filename, + int kernel_file_size, + uint8_t *header, + size_t pvh_xen_start_addr, + FWCfgState *fw_cfg) +{ + uint32_t flags = 0; + uint32_t mh_load_addr = 0; + uint32_t elf_kernel_size = 0; + uint64_t elf_entry; + uint64_t elf_low, elf_high; + int kernel_size; + + if (ldl_p(header) != 0x464c457f) { + return false; /* no elfboot */ + } + + bool elf_is64 = header[EI_CLASS] == ELFCLASS64; + flags = elf_is64 ? + ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; + + if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ + error_report("elfboot unsupported flags = %x", flags); + exit(1); + } + + uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; + kernel_size = load_elf(kernel_filename, read_pvh_start_addr, + NULL, &elf_note_type, &elf_entry, + &elf_low, &elf_high, 0, I386_ELF_MACHINE, + 0, 0); + + if (kernel_size < 0) { + error_report("Error while loading elf kernel"); + exit(1); + } + mh_load_addr = elf_low; + elf_kernel_size = elf_high - elf_low; + + if (pvh_start_addr == 0) { + error_report("Error loading uncompressed kernel without PVH ELF Note"); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); + + return true; +} + static void load_linux(PCMachineState *pcms, FWCfgState *fw_cfg) { @@ -1108,6 +1209,59 @@ static void load_linux(PCMachineState *pcms, if (ldl_p(header+0x202) == 0x53726448) { protocol = lduw_p(header+0x206); } else { + /* + * Check if the file is an uncompressed kernel file (ELF) and load it, + * saving the PVH entry point used by the x86/HVM direct boot ABI. + * If load_elfboot() is successful, populate the fw_cfg info. + */ + if (pcmc->pvh_enabled && + load_elfboot(kernel_filename, kernel_size, + header, pvh_start_addr, fw_cfg)) { + fclose(f); + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, + header, sizeof(header)); + + /* load initrd */ + if (initrd_filename) { + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + if (!g_file_get_contents(initrd_filename, &initrd_data, + &initrd_size, &gerr)) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + + initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1; + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, + initrd_size); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "pvh.bin"; + nb_option_roms++; + + return; + } /* This looks like a multiboot kernel. If it is, let's stop treating it like a Linux kernel. */ if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename, @@ -1145,7 +1299,26 @@ static void load_linux(PCMachineState *pcms, #endif /* highest address for loading the initrd */ - if (protocol >= 0x203) { + if (protocol >= 0x20c && + lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { + /* + * Linux has supported initrd up to 4 GB for a very long time (2007, + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), + * though it only sets initrd_max to 2 GB to "work around bootloader + * bugs". Luckily, QEMU firmware(which does something like bootloader) + * has supported this. + * + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can + * be loaded into any address. + * + * In addition, initrd_max is uint32_t simply because QEMU doesn't + * support the 64-bit boot protocol (specifically the ext_ramdisk_image + * field). + * + * Therefore here just limit initrd_max to UINT32_MAX simply as well. + */ + initrd_max = UINT32_MAX; + } else if (protocol >= 0x203) { initrd_max = ldl_p(header+0x22c); } else { initrd_max = 0x37ffffff; @@ -1557,6 +1730,7 @@ void xen_load_linux(PCMachineState *pcms) for (i = 0; i < nb_option_roms; i++) { assert(!strcmp(option_rom[i].name, "linuxboot.bin") || !strcmp(option_rom[i].name, "linuxboot_dma.bin") || + !strcmp(option_rom[i].name, "pvh.bin") || !strcmp(option_rom[i].name, "multiboot.bin")); rom_add_option(option_rom[i].name, option_rom[i].bootindex); } @@ -1948,7 +2122,6 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, static void pc_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); @@ -1962,8 +2135,7 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev, nvdimm_plug(&pcms->acpi_nvdimm_state); } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); + hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort); out: error_propagate(errp, local_err); } @@ -1971,7 +2143,6 @@ out: static void pc_memory_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { - HotplugHandlerClass *hhc; Error *local_err = NULL; PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -1992,9 +2163,8 @@ static void pc_memory_unplug_request(HotplugHandler *hotplug_dev, goto out; } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, + &local_err); out: error_propagate(errp, local_err); } @@ -2003,12 +2173,9 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { PCMachineState *pcms = PC_MACHINE(hotplug_dev); - HotplugHandlerClass *hhc; Error *local_err = NULL; - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } @@ -2050,14 +2217,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { CPUArchId *found_cpu; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); if (pcms->acpi_dev) { - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); + hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } @@ -2081,7 +2246,6 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { int idx = -1; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); @@ -2098,9 +2262,8 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, goto out; } - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, + &local_err); if (local_err) { goto out; } @@ -2114,14 +2277,11 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { CPUArchId *found_cpu; - HotplugHandlerClass *hhc; Error *local_err = NULL; X86CPU *cpu = X86_CPU(dev); PCMachineState *pcms = PC_MACHINE(hotplug_dev); - hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev); - hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); - + hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err); if (local_err) { goto out; } @@ -2623,6 +2783,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->save_tsc_khz = true; pcmc->linuxboot_dma_enabled = true; + pcmc->pvh_enabled = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotplug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; |