aboutsummaryrefslogtreecommitdiff
path: root/hw/i386/pc.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/i386/pc.c')
-rw-r--r--hw/i386/pc.c221
1 files changed, 191 insertions, 30 deletions
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 1690b19..3889ecc 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -54,6 +54,7 @@
#include "sysemu/qtest.h"
#include "kvm_i386.h"
#include "hw/xen/xen.h"
+#include "hw/xen/start_info.h"
#include "ui/qemu-spice.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
@@ -75,6 +76,7 @@
#include "hw/usb.h"
#include "hw/i386/intel_iommu.h"
#include "hw/net/ne2000-isa.h"
+#include "standard-headers/asm-x86/bootparam.h"
/* debug PC/ISA interrupts */
//#define DEBUG_IRQ
@@ -110,6 +112,9 @@ static struct e820_entry *e820_table;
static unsigned e820_entries;
struct hpet_fw_config hpet_cfg = {.count = UINT8_MAX};
+/* Physical Address of PVH entry point read from kernel ELF NOTE */
+static size_t pvh_start_addr;
+
GlobalProperty pc_compat_3_1[] = {
{ "intel-iommu", "dma-drain", "off" },
{ "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "off" },
@@ -1055,13 +1060,6 @@ static long get_file_size(FILE *f)
return size;
}
-/* setup_data types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-#define SETUP_PCI 3
-#define SETUP_EFI 4
-
struct setup_data {
uint64_t next;
uint32_t type;
@@ -1069,6 +1067,109 @@ struct setup_data {
uint8_t data[0];
} __attribute__((packed));
+
+/*
+ * The entry point into the kernel for PVH boot is different from
+ * the native entry point. The PVH entry is defined by the x86/HVM
+ * direct boot ABI and is available in an ELFNOTE in the kernel binary.
+ *
+ * This function is passed to load_elf() when it is called from
+ * load_elfboot() which then additionally checks for an ELF Note of
+ * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to
+ * parse the PVH entry address from the ELF Note.
+ *
+ * Due to trickery in elf_opts.h, load_elf() is actually available as
+ * load_elf32() or load_elf64() and this routine needs to be able
+ * to deal with being called as 32 or 64 bit.
+ *
+ * The address of the PVH entry point is saved to the 'pvh_start_addr'
+ * global variable. (although the entry point is 32-bit, the kernel
+ * binary can be either 32-bit or 64-bit).
+ */
+static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64)
+{
+ size_t *elf_note_data_addr;
+
+ /* Check if ELF Note header passed in is valid */
+ if (arg1 == NULL) {
+ return 0;
+ }
+
+ if (is64) {
+ struct elf64_note *nhdr64 = (struct elf64_note *)arg1;
+ uint64_t nhdr_size64 = sizeof(struct elf64_note);
+ uint64_t phdr_align = *(uint64_t *)arg2;
+ uint64_t nhdr_namesz = nhdr64->n_namesz;
+
+ elf_note_data_addr =
+ ((void *)nhdr64) + nhdr_size64 +
+ QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+ } else {
+ struct elf32_note *nhdr32 = (struct elf32_note *)arg1;
+ uint32_t nhdr_size32 = sizeof(struct elf32_note);
+ uint32_t phdr_align = *(uint32_t *)arg2;
+ uint32_t nhdr_namesz = nhdr32->n_namesz;
+
+ elf_note_data_addr =
+ ((void *)nhdr32) + nhdr_size32 +
+ QEMU_ALIGN_UP(nhdr_namesz, phdr_align);
+ }
+
+ pvh_start_addr = *elf_note_data_addr;
+
+ return pvh_start_addr;
+}
+
+static bool load_elfboot(const char *kernel_filename,
+ int kernel_file_size,
+ uint8_t *header,
+ size_t pvh_xen_start_addr,
+ FWCfgState *fw_cfg)
+{
+ uint32_t flags = 0;
+ uint32_t mh_load_addr = 0;
+ uint32_t elf_kernel_size = 0;
+ uint64_t elf_entry;
+ uint64_t elf_low, elf_high;
+ int kernel_size;
+
+ if (ldl_p(header) != 0x464c457f) {
+ return false; /* no elfboot */
+ }
+
+ bool elf_is64 = header[EI_CLASS] == ELFCLASS64;
+ flags = elf_is64 ?
+ ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags;
+
+ if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */
+ error_report("elfboot unsupported flags = %x", flags);
+ exit(1);
+ }
+
+ uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY;
+ kernel_size = load_elf(kernel_filename, read_pvh_start_addr,
+ NULL, &elf_note_type, &elf_entry,
+ &elf_low, &elf_high, 0, I386_ELF_MACHINE,
+ 0, 0);
+
+ if (kernel_size < 0) {
+ error_report("Error while loading elf kernel");
+ exit(1);
+ }
+ mh_load_addr = elf_low;
+ elf_kernel_size = elf_high - elf_low;
+
+ if (pvh_start_addr == 0) {
+ error_report("Error loading uncompressed kernel without PVH ELF Note");
+ exit(1);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size);
+
+ return true;
+}
+
static void load_linux(PCMachineState *pcms,
FWCfgState *fw_cfg)
{
@@ -1108,6 +1209,59 @@ static void load_linux(PCMachineState *pcms,
if (ldl_p(header+0x202) == 0x53726448) {
protocol = lduw_p(header+0x206);
} else {
+ /*
+ * Check if the file is an uncompressed kernel file (ELF) and load it,
+ * saving the PVH entry point used by the x86/HVM direct boot ABI.
+ * If load_elfboot() is successful, populate the fw_cfg info.
+ */
+ if (pcmc->pvh_enabled &&
+ load_elfboot(kernel_filename, kernel_size,
+ header, pvh_start_addr, fw_cfg)) {
+ fclose(f);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+ strlen(kernel_cmdline) + 1);
+ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header));
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA,
+ header, sizeof(header));
+
+ /* load initrd */
+ if (initrd_filename) {
+ gsize initrd_size;
+ gchar *initrd_data;
+ GError *gerr = NULL;
+
+ if (!g_file_get_contents(initrd_filename, &initrd_data,
+ &initrd_size, &gerr)) {
+ fprintf(stderr, "qemu: error reading initrd %s: %s\n",
+ initrd_filename, gerr->message);
+ exit(1);
+ }
+
+ initrd_max = pcms->below_4g_mem_size - pcmc->acpi_data_size - 1;
+ if (initrd_size >= initrd_max) {
+ fprintf(stderr, "qemu: initrd is too large, cannot support."
+ "(max: %"PRIu32", need %"PRId64")\n",
+ initrd_max, (uint64_t)initrd_size);
+ exit(1);
+ }
+
+ initrd_addr = (initrd_max - initrd_size) & ~4095;
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data,
+ initrd_size);
+ }
+
+ option_rom[nb_option_roms].bootindex = 0;
+ option_rom[nb_option_roms].name = "pvh.bin";
+ nb_option_roms++;
+
+ return;
+ }
/* This looks like a multiboot kernel. If it is, let's stop
treating it like a Linux kernel. */
if (load_multiboot(fw_cfg, f, kernel_filename, initrd_filename,
@@ -1145,7 +1299,26 @@ static void load_linux(PCMachineState *pcms,
#endif
/* highest address for loading the initrd */
- if (protocol >= 0x203) {
+ if (protocol >= 0x20c &&
+ lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) {
+ /*
+ * Linux has supported initrd up to 4 GB for a very long time (2007,
+ * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013),
+ * though it only sets initrd_max to 2 GB to "work around bootloader
+ * bugs". Luckily, QEMU firmware(which does something like bootloader)
+ * has supported this.
+ *
+ * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can
+ * be loaded into any address.
+ *
+ * In addition, initrd_max is uint32_t simply because QEMU doesn't
+ * support the 64-bit boot protocol (specifically the ext_ramdisk_image
+ * field).
+ *
+ * Therefore here just limit initrd_max to UINT32_MAX simply as well.
+ */
+ initrd_max = UINT32_MAX;
+ } else if (protocol >= 0x203) {
initrd_max = ldl_p(header+0x22c);
} else {
initrd_max = 0x37ffffff;
@@ -1557,6 +1730,7 @@ void xen_load_linux(PCMachineState *pcms)
for (i = 0; i < nb_option_roms; i++) {
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
!strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
+ !strcmp(option_rom[i].name, "pvh.bin") ||
!strcmp(option_rom[i].name, "multiboot.bin"));
rom_add_option(option_rom[i].name, option_rom[i].bootindex);
}
@@ -1948,7 +2122,6 @@ static void pc_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
static void pc_memory_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
@@ -1962,8 +2135,7 @@ static void pc_memory_plug(HotplugHandler *hotplug_dev,
nvdimm_plug(&pcms->acpi_nvdimm_state);
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
+ hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &error_abort);
out:
error_propagate(errp, local_err);
}
@@ -1971,7 +2143,6 @@ out:
static void pc_memory_unplug_request(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
@@ -1992,9 +2163,8 @@ static void pc_memory_unplug_request(HotplugHandler *hotplug_dev,
goto out;
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
+ &local_err);
out:
error_propagate(errp, local_err);
}
@@ -2003,12 +2173,9 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
@@ -2050,14 +2217,12 @@ static void pc_cpu_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
CPUArchId *found_cpu;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
if (pcms->acpi_dev) {
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
+ hotplug_handler_plug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
@@ -2081,7 +2246,6 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
int idx = -1;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
@@ -2098,9 +2262,8 @@ static void pc_cpu_unplug_request_cb(HotplugHandler *hotplug_dev,
goto out;
}
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug_request(HOTPLUG_HANDLER(pcms->acpi_dev), dev,
+ &local_err);
if (local_err) {
goto out;
}
@@ -2114,14 +2277,11 @@ static void pc_cpu_unplug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
CPUArchId *found_cpu;
- HotplugHandlerClass *hhc;
Error *local_err = NULL;
X86CPU *cpu = X86_CPU(dev);
PCMachineState *pcms = PC_MACHINE(hotplug_dev);
- hhc = HOTPLUG_HANDLER_GET_CLASS(pcms->acpi_dev);
- hhc->unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
-
+ hotplug_handler_unplug(HOTPLUG_HANDLER(pcms->acpi_dev), dev, &local_err);
if (local_err) {
goto out;
}
@@ -2623,6 +2783,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
pcmc->acpi_data_size = 0x20000 + 0x8000;
pcmc->save_tsc_khz = true;
pcmc->linuxboot_dma_enabled = true;
+ pcmc->pvh_enabled = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
mc->cpu_index_to_instance_props = pc_cpu_index_to_props;