diff options
Diffstat (limited to 'hw')
57 files changed, 2434 insertions, 844 deletions
diff --git a/hw/acpi/aml-build-stub.c b/hw/acpi/aml-build-stub.c index 8d8ad1a..89a8fec 100644 --- a/hw/acpi/aml-build-stub.c +++ b/hw/acpi/aml-build-stub.c @@ -26,6 +26,16 @@ void aml_append(Aml *parent_ctx, Aml *child) { } +Aml *aml_return(Aml *val) +{ + return NULL; +} + +Aml *aml_method(const char *name, int arg_count, AmlSerializeFlag sflag) +{ + return NULL; +} + Aml *aml_resource_template(void) { return NULL; diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index e6bfac9..42feb4d 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -2070,7 +2070,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, acpi_table_end(linker, &table); } -/* build rev1/rev3/rev5.1 FADT */ +/* build rev1/rev3/rev5.1/rev6.0 FADT */ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, const char *oem_id, const char *oem_table_id) { @@ -2193,8 +2193,15 @@ void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, /* SLEEP_STATUS_REG */ build_append_gas_from_struct(tbl, &f->sleep_sts); - /* TODO: extra fields need to be added to support revisions above rev5 */ - assert(f->rev == 5); + if (f->rev == 5) { + goto done; + } + + /* Hypervisor Vendor Identity */ + build_append_padded_str(tbl, "QEMU", 8, '\0'); + + /* TODO: extra fields need to be added to support revisions above rev6 */ + assert(f->rev == 6); done: acpi_table_end(linker, &table); diff --git a/hw/acpi/erst.c b/hw/acpi/erst.c index df856b2..aefcc03 100644 --- a/hw/acpi/erst.c +++ b/hw/acpi/erst.c @@ -635,7 +635,7 @@ static unsigned read_erst_record(ERSTDeviceState *s) if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { rc = STATUS_FAILED; } - if ((s->record_offset + record_length) > exchange_length) { + if (record_length > exchange_length - s->record_offset) { rc = STATUS_FAILED; } /* If all is ok, copy the record to the exchange buffer */ @@ -684,7 +684,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) if (record_length < UEFI_CPER_RECORD_MIN_SIZE) { return STATUS_FAILED; } - if ((s->record_offset + record_length) > exchange_length) { + if (record_length > exchange_length - s->record_offset) { return STATUS_FAILED; } @@ -716,7 +716,7 @@ static unsigned write_erst_record(ERSTDeviceState *s) if (nvram) { /* Write the record into the slot */ memcpy(nvram, exchange, record_length); - memset(nvram + record_length, exchange_length - record_length, 0xFF); + memset(nvram + record_length, 0xFF, exchange_length - record_length); /* If a new record, increment the record_count */ if (!record_found) { uint32_t record_count; diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 31e46df..a3b25a9 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -922,6 +922,7 @@ void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, #define NVDIMM_DSM_RFIT_STATUS "RSTA" #define NVDIMM_QEMU_RSVD_UUID "648B9CF2-CDA1-4312-8AD9-49C4AF32BD62" +#define NVDIMM_DEVICE_DSM_UUID "4309AC30-0D11-11E4-9191-0800200C9A66" static void nvdimm_build_common_dsm(Aml *dev, NVDIMMState *nvdimm_state) @@ -1029,15 +1030,14 @@ static void nvdimm_build_common_dsm(Aml *dev, /* UUID for QEMU internal use */), expected_uuid)); aml_append(elsectx, ifctx); elsectx2 = aml_else(); - aml_append(elsectx2, aml_store( - aml_touuid("4309AC30-0D11-11E4-9191-0800200C9A66") + aml_append(elsectx2, aml_store(aml_touuid(NVDIMM_DEVICE_DSM_UUID) /* UUID for NVDIMM Devices */, expected_uuid)); aml_append(elsectx, elsectx2); aml_append(method, elsectx); uuid_invalid = aml_lnot(aml_equal(uuid, expected_uuid)); - unsupport = aml_if(aml_or(unpatched, uuid_invalid, NULL)); + unsupport = aml_if(aml_lor(unpatched, uuid_invalid)); /* * function 0 is called to inquire what functions are supported by @@ -1069,10 +1069,9 @@ static void nvdimm_build_common_dsm(Aml *dev, * in the DSM Spec. */ pckg = aml_arg(3); - ifctx = aml_if(aml_and(aml_equal(aml_object_type(pckg), + ifctx = aml_if(aml_land(aml_equal(aml_object_type(pckg), aml_int(4 /* Package */)) /* It is a Package? */, - aml_equal(aml_sizeof(pckg), aml_int(1)) /* 1 element? */, - NULL)); + aml_equal(aml_sizeof(pckg), aml_int(1)) /* 1 element? */)); pckg_index = aml_local(2); pckg_buf = aml_local(3); @@ -1244,6 +1243,7 @@ static void nvdimm_build_fit(Aml *dev) static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots) { uint32_t slot; + Aml *method, *pkg, *field, *com_call; for (slot = 0; slot < ram_slots; slot++) { uint32_t handle = nvdimm_slot_to_handle(slot); @@ -1261,6 +1261,100 @@ static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots) */ aml_append(nvdimm_dev, aml_name_decl("_ADR", aml_int(handle))); + /* + * ACPI v6.4: Section 6.5.10 NVDIMM Label Methods + */ + /* _LSI */ + method = aml_method("_LSI", 0, AML_SERIALIZED); + com_call = aml_call5(NVDIMM_COMMON_DSM, + aml_touuid(NVDIMM_DEVICE_DSM_UUID), + aml_int(1), aml_int(4), aml_int(0), + aml_int(handle)); + aml_append(method, aml_store(com_call, aml_local(0))); + + aml_append(method, aml_create_dword_field(aml_local(0), + aml_int(0), "STTS")); + aml_append(method, aml_create_dword_field(aml_local(0), aml_int(4), + "SLSA")); + aml_append(method, aml_create_dword_field(aml_local(0), aml_int(8), + "MAXT")); + + pkg = aml_package(3); + aml_append(pkg, aml_name("STTS")); + aml_append(pkg, aml_name("SLSA")); + aml_append(pkg, aml_name("MAXT")); + aml_append(method, aml_store(pkg, aml_local(1))); + aml_append(method, aml_return(aml_local(1))); + + aml_append(nvdimm_dev, method); + + /* _LSR */ + method = aml_method("_LSR", 2, AML_SERIALIZED); + aml_append(method, aml_name_decl("INPT", aml_buffer(8, NULL))); + + aml_append(method, aml_create_dword_field(aml_name("INPT"), + aml_int(0), "OFST")); + aml_append(method, aml_create_dword_field(aml_name("INPT"), + aml_int(4), "LEN")); + aml_append(method, aml_store(aml_arg(0), aml_name("OFST"))); + aml_append(method, aml_store(aml_arg(1), aml_name("LEN"))); + + pkg = aml_package(1); + aml_append(pkg, aml_name("INPT")); + aml_append(method, aml_store(pkg, aml_local(0))); + + com_call = aml_call5(NVDIMM_COMMON_DSM, + aml_touuid(NVDIMM_DEVICE_DSM_UUID), + aml_int(1), aml_int(5), aml_local(0), + aml_int(handle)); + aml_append(method, aml_store(com_call, aml_local(3))); + field = aml_create_dword_field(aml_local(3), aml_int(0), "STTS"); + aml_append(method, field); + field = aml_create_field(aml_local(3), aml_int(32), + aml_shiftleft(aml_name("LEN"), aml_int(3)), + "LDAT"); + aml_append(method, field); + aml_append(method, aml_name_decl("LSA", aml_buffer(0, NULL))); + aml_append(method, aml_to_buffer(aml_name("LDAT"), aml_name("LSA"))); + + pkg = aml_package(2); + aml_append(pkg, aml_name("STTS")); + aml_append(pkg, aml_name("LSA")); + + aml_append(method, aml_store(pkg, aml_local(1))); + aml_append(method, aml_return(aml_local(1))); + + aml_append(nvdimm_dev, method); + + /* _LSW */ + method = aml_method("_LSW", 3, AML_SERIALIZED); + aml_append(method, aml_store(aml_arg(2), aml_local(2))); + aml_append(method, aml_name_decl("INPT", aml_buffer(8, NULL))); + field = aml_create_dword_field(aml_name("INPT"), + aml_int(0), "OFST"); + aml_append(method, field); + field = aml_create_dword_field(aml_name("INPT"), + aml_int(4), "TLEN"); + aml_append(method, field); + aml_append(method, aml_store(aml_arg(0), aml_name("OFST"))); + aml_append(method, aml_store(aml_arg(1), aml_name("TLEN"))); + + aml_append(method, aml_concatenate(aml_name("INPT"), aml_local(2), + aml_name("INPT"))); + pkg = aml_package(1); + aml_append(pkg, aml_name("INPT")); + aml_append(method, aml_store(pkg, aml_local(0))); + com_call = aml_call5(NVDIMM_COMMON_DSM, + aml_touuid(NVDIMM_DEVICE_DSM_UUID), + aml_int(1), aml_int(6), aml_local(0), + aml_int(handle)); + aml_append(method, aml_store(com_call, aml_local(3))); + field = aml_create_dword_field(aml_local(3), aml_int(0), "STTS"); + aml_append(method, field); + aml_append(method, aml_return(aml_name("STTS"))); + + aml_append(nvdimm_dev, method); + nvdimm_build_device_dsm(nvdimm_dev, handle); aml_append(root_dev, nvdimm_dev); } diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index 15fa79a..17fcde8 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -30,6 +30,7 @@ config ARM_VIRT select ACPI_VIOT select VIRTIO_MEM_SUPPORTED select ACPI_CXL + select ACPI_HMAT config CHEETAH bool diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 13c6e3e..4156111 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -42,6 +42,7 @@ #include "hw/acpi/memory_hotplug.h" #include "hw/acpi/generic_event_device.h" #include "hw/acpi/tpm.h" +#include "hw/acpi/hmat.h" #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bus.h" @@ -685,7 +686,7 @@ build_dbg2(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) }; /* - * ACPI spec, Revision 5.1 Errata A + * ACPI spec, Revision 6.0 Errata A * 5.2.12 Multiple APIC Description Table (MADT) */ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) @@ -704,7 +705,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) int i; VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); const MemMapEntry *memmap = vms->memmap; - AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = vms->oem_id, + AcpiTable table = { .sig = "APIC", .rev = 4, .oem_id = vms->oem_id, .oem_table_id = vms->oem_table_id }; acpi_table_begin(&table, table_data); @@ -739,7 +740,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) /* 5.2.12.14 GIC Structure */ build_append_int_noprefix(table_data, 0xB, 1); /* Type */ - build_append_int_noprefix(table_data, 76, 1); /* Length */ + build_append_int_noprefix(table_data, 80, 1); /* Length */ build_append_int_noprefix(table_data, 0, 2); /* Reserved */ build_append_int_noprefix(table_data, i, 4); /* GIC ID */ build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ @@ -759,6 +760,10 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ /* MPIDR */ build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); + /* Processor Power Efficiency Class */ + build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ + build_append_int_noprefix(table_data, 0, 3); } if (vms->gic_version != VIRT_GIC_VERSION_2) { @@ -771,12 +776,6 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) if (its_class_name() && !vmc->no_its) { /* - * FIXME: Structure is from Revision 6.0 where 'GIC Structure' - * has additional fields on top of implemented 5.1 Errata A, - * to make it consistent with v6.0 we need to bump everything - * to v6.0 - */ - /* * ACPI spec, Revision 6.0 Errata A * (original 6.0 definition has invalid Length) * 5.2.12.18 GIC ITS Structure @@ -808,13 +807,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) } /* FADT */ -static void build_fadt_rev5(GArray *table_data, BIOSLinker *linker, +static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms, unsigned dsdt_tbl_offset) { - /* ACPI v5.1 */ + /* ACPI v6.0 */ AcpiFadtData fadt = { - .rev = 5, - .minor_ver = 1, + .rev = 6, + .minor_ver = 0, .flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI, .xdsdt_tbl_offset = &dsdt_tbl_offset, }; @@ -944,7 +943,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) /* FADT MADT PPTT GTDT MCFG SPCR DBG2 pointed to by RSDT */ acpi_add_table(table_offsets, tables_blob); - build_fadt_rev5(tables_blob, tables->linker, vms, dsdt); + build_fadt_rev6(tables_blob, tables->linker, vms, dsdt); acpi_add_table(table_offsets, tables_blob); build_madt(tables_blob, tables->linker, vms); @@ -989,6 +988,12 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_slit(tables_blob, tables->linker, ms, vms->oem_id, vms->oem_table_id); } + + if (ms->numa_state->hmat_enabled) { + acpi_add_table(table_offsets, tables_blob); + build_hmat(tables_blob, tables->linker, ms->numa_state, + vms->oem_id, vms->oem_table_id); + } } if (ms->nvdimms_state->is_enabled) { diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 13bf5cc..16ad400 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -168,13 +168,6 @@ static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp) goto err_guest_notifiers; } - ret = vhost_dev_start(&s->dev, vdev); - if (ret < 0) { - error_setg_errno(errp, -ret, "Error starting vhost"); - goto err_guest_notifiers; - } - s->started_vu = true; - /* guest_notifier_mask/pending not used yet, so just unmask * everything here. virtio-pci will do the right thing by * enabling/disabling irqfd. @@ -183,9 +176,20 @@ static int vhost_user_blk_start(VirtIODevice *vdev, Error **errp) vhost_virtqueue_mask(&s->dev, vdev, i, false); } + s->dev.vq_index_end = s->dev.nvqs; + ret = vhost_dev_start(&s->dev, vdev); + if (ret < 0) { + error_setg_errno(errp, -ret, "Error starting vhost"); + goto err_guest_notifiers; + } + s->started_vu = true; + return ret; err_guest_notifiers: + for (i = 0; i < s->dev.nvqs; i++) { + vhost_virtqueue_mask(&s->dev, vdev, i, true); + } k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); err_host_notifiers: vhost_dev_disable_notifiers(&s->dev, vdev); @@ -222,14 +226,10 @@ static void vhost_user_blk_stop(VirtIODevice *vdev) static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBlk *s = VHOST_USER_BLK(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); Error *local_err = NULL; int ret; - if (!vdev->vm_running) { - should_start = false; - } - if (!s->connected) { return; } diff --git a/hw/core/machine.c b/hw/core/machine.c index aa520e7..8d34caa 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -40,7 +40,9 @@ #include "hw/virtio/virtio-pci.h" #include "qom/object_interfaces.h" -GlobalProperty hw_compat_7_1[] = {}; +GlobalProperty hw_compat_7_1[] = { + { "virtio-device", "queue_reset", "false" }, +}; const size_t hw_compat_7_1_len = G_N_ELEMENTS(hw_compat_7_1); GlobalProperty hw_compat_7_0[] = { @@ -1176,9 +1178,7 @@ static void numa_validate_initiator(NumaState *numa_state) for (i = 0; i < numa_state->num_nodes; i++) { if (numa_info[i].initiator == MAX_NODES) { - error_report("The initiator of NUMA node %d is missing, use " - "'-numa node,initiator' option to declare it", i); - exit(1); + continue; } if (!numa_info[numa_info[i].initiator].present) { diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c new file mode 100644 index 0000000..3653aa5 --- /dev/null +++ b/hw/cxl/cxl-cdat.c @@ -0,0 +1,224 @@ +/* + * CXL CDAT Structure + * + * Copyright (C) 2021 Avery Design Systems, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/pci/pci.h" +#include "hw/cxl/cxl.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +static void cdat_len_check(CDATSubHeader *hdr, Error **errp) +{ + assert(hdr->length); + assert(hdr->reserved == 0); + + switch (hdr->type) { + case CDAT_TYPE_DSMAS: + assert(hdr->length == sizeof(CDATDsmas)); + break; + case CDAT_TYPE_DSLBIS: + assert(hdr->length == sizeof(CDATDslbis)); + break; + case CDAT_TYPE_DSMSCIS: + assert(hdr->length == sizeof(CDATDsmscis)); + break; + case CDAT_TYPE_DSIS: + assert(hdr->length == sizeof(CDATDsis)); + break; + case CDAT_TYPE_DSEMTS: + assert(hdr->length == sizeof(CDATDsemts)); + break; + case CDAT_TYPE_SSLBIS: + assert(hdr->length >= sizeof(CDATSslbisHeader)); + assert((hdr->length - sizeof(CDATSslbisHeader)) % + sizeof(CDATSslbe) == 0); + break; + default: + error_setg(errp, "Type %d is reserved", hdr->type); + } +} + +static void ct3_build_cdat(CDATObject *cdat, Error **errp) +{ + g_autofree CDATTableHeader *cdat_header = NULL; + g_autofree CDATEntry *cdat_st = NULL; + uint8_t sum = 0; + int ent, i; + + /* Use default table if fopen == NULL */ + assert(cdat->build_cdat_table); + + cdat_header = g_malloc0(sizeof(*cdat_header)); + if (!cdat_header) { + error_setg(errp, "Failed to allocate CDAT header"); + return; + } + + cdat->built_buf_len = cdat->build_cdat_table(&cdat->built_buf, cdat->private); + + if (!cdat->built_buf_len) { + /* Build later as not all data available yet */ + cdat->to_update = true; + return; + } + cdat->to_update = false; + + cdat_st = g_malloc0(sizeof(*cdat_st) * (cdat->built_buf_len + 1)); + if (!cdat_st) { + error_setg(errp, "Failed to allocate CDAT entry array"); + return; + } + + /* Entry 0 for CDAT header, starts with Entry 1 */ + for (ent = 1; ent < cdat->built_buf_len + 1; ent++) { + CDATSubHeader *hdr = cdat->built_buf[ent - 1]; + uint8_t *buf = (uint8_t *)cdat->built_buf[ent - 1]; + + cdat_st[ent].base = hdr; + cdat_st[ent].length = hdr->length; + + cdat_header->length += hdr->length; + for (i = 0; i < hdr->length; i++) { + sum += buf[i]; + } + } + + /* CDAT header */ + cdat_header->revision = CXL_CDAT_REV; + /* For now, no runtime updates */ + cdat_header->sequence = 0; + cdat_header->length += sizeof(CDATTableHeader); + sum += cdat_header->revision + cdat_header->sequence + + cdat_header->length; + /* Sum of all bytes including checksum must be 0 */ + cdat_header->checksum = ~sum + 1; + + cdat_st[0].base = g_steal_pointer(&cdat_header); + cdat_st[0].length = sizeof(*cdat_header); + cdat->entry_len = 1 + cdat->built_buf_len; + cdat->entry = g_steal_pointer(&cdat_st); +} + +static void ct3_load_cdat(CDATObject *cdat, Error **errp) +{ + g_autofree CDATEntry *cdat_st = NULL; + uint8_t sum = 0; + int num_ent; + int i = 0, ent = 1, file_size = 0; + CDATSubHeader *hdr; + FILE *fp = NULL; + + /* Read CDAT file and create its cache */ + fp = fopen(cdat->filename, "r"); + if (!fp) { + error_setg(errp, "CDAT: Unable to open file"); + return; + } + + fseek(fp, 0, SEEK_END); + file_size = ftell(fp); + fseek(fp, 0, SEEK_SET); + cdat->buf = g_malloc0(file_size); + + if (fread(cdat->buf, file_size, 1, fp) == 0) { + error_setg(errp, "CDAT: File read failed"); + return; + } + + fclose(fp); + + if (file_size < sizeof(CDATTableHeader)) { + error_setg(errp, "CDAT: File too short"); + return; + } + i = sizeof(CDATTableHeader); + num_ent = 1; + while (i < file_size) { + hdr = (CDATSubHeader *)(cdat->buf + i); + cdat_len_check(hdr, errp); + i += hdr->length; + num_ent++; + } + if (i != file_size) { + error_setg(errp, "CDAT: File length missmatch"); + return; + } + + cdat_st = g_malloc0(sizeof(*cdat_st) * num_ent); + if (!cdat_st) { + error_setg(errp, "CDAT: Failed to allocate entry array"); + return; + } + + /* Set CDAT header, Entry = 0 */ + cdat_st[0].base = cdat->buf; + cdat_st[0].length = sizeof(CDATTableHeader); + i = 0; + + while (i < cdat_st[0].length) { + sum += cdat->buf[i++]; + } + + /* Read CDAT structures */ + while (i < file_size) { + hdr = (CDATSubHeader *)(cdat->buf + i); + cdat_len_check(hdr, errp); + + cdat_st[ent].base = hdr; + cdat_st[ent].length = hdr->length; + + while (cdat->buf + i < + (uint8_t *)cdat_st[ent].base + cdat_st[ent].length) { + assert(i < file_size); + sum += cdat->buf[i++]; + } + + ent++; + } + + if (sum != 0) { + warn_report("CDAT: Found checksum mismatch in %s", cdat->filename); + } + cdat->entry_len = num_ent; + cdat->entry = g_steal_pointer(&cdat_st); +} + +void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp) +{ + CDATObject *cdat = &cxl_cstate->cdat; + + if (cdat->filename) { + ct3_load_cdat(cdat, errp); + } else { + ct3_build_cdat(cdat, errp); + } +} + +void cxl_doe_cdat_update(CXLComponentState *cxl_cstate, Error **errp) +{ + CDATObject *cdat = &cxl_cstate->cdat; + + if (cdat->to_update) { + ct3_build_cdat(cdat, errp); + } +} + +void cxl_doe_cdat_release(CXLComponentState *cxl_cstate) +{ + CDATObject *cdat = &cxl_cstate->cdat; + + free(cdat->entry); + if (cdat->built_buf) { + cdat->free_cdat_table(cdat->built_buf, cdat->built_buf_len, + cdat->private); + } + if (cdat->buf) { + free(cdat->buf); + } +} diff --git a/hw/cxl/meson.build b/hw/cxl/meson.build index f117b99..cfa95ff 100644 --- a/hw/cxl/meson.build +++ b/hw/cxl/meson.build @@ -4,6 +4,7 @@ softmmu_ss.add(when: 'CONFIG_CXL', 'cxl-device-utils.c', 'cxl-mailbox-utils.c', 'cxl-host.c', + 'cxl-cdat.c', ), if_false: files( 'cxl-host-stubs.c', diff --git a/hw/display/acpi-vga-stub.c b/hw/display/acpi-vga-stub.c new file mode 100644 index 0000000..a9b0ecf --- /dev/null +++ b/hw/display/acpi-vga-stub.c @@ -0,0 +1,7 @@ +#include "qemu/osdep.h" +#include "hw/acpi/acpi_aml_interface.h" +#include "vga_int.h" + +void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope) +{ +} diff --git a/hw/display/acpi-vga.c b/hw/display/acpi-vga.c new file mode 100644 index 0000000..f0e9ef1 --- /dev/null +++ b/hw/display/acpi-vga.c @@ -0,0 +1,26 @@ +#include "qemu/osdep.h" +#include "hw/acpi/acpi_aml_interface.h" +#include "hw/pci/pci.h" +#include "vga_int.h" + +void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope) +{ + int s3d = 0; + Aml *method; + + if (object_dynamic_cast(OBJECT(adev), "qxl-vga")) { + s3d = 3; + } + + method = aml_method("_S1D", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0))); + aml_append(scope, method); + + method = aml_method("_S2D", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(0))); + aml_append(scope, method); + + method = aml_method("_S3D", 0, AML_NOTSERIALIZED); + aml_append(method, aml_return(aml_int(s3d))); + aml_append(scope, method); +} diff --git a/hw/display/meson.build b/hw/display/meson.build index adc53dd..7a725ed 100644 --- a/hw/display/meson.build +++ b/hw/display/meson.build @@ -38,10 +38,21 @@ softmmu_ss.add(when: 'CONFIG_NEXTCUBE', if_true: files('next-fb.c')) specific_ss.add(when: 'CONFIG_VGA', if_true: files('vga.c')) +if (config_all_devices.has_key('CONFIG_VGA_CIRRUS') or + config_all_devices.has_key('CONFIG_VGA_PCI') or + config_all_devices.has_key('CONFIG_VMWARE_VGA') or + config_all_devices.has_key('CONFIG_ATI_VGA') + ) + softmmu_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'), + if_false: files('acpi-vga-stub.c')) +endif + if config_all_devices.has_key('CONFIG_QXL') qxl_ss = ss.source_set() qxl_ss.add(when: 'CONFIG_QXL', if_true: [files('qxl.c', 'qxl-logger.c', 'qxl-render.c'), pixman, spice]) + qxl_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'), + if_false: files('acpi-vga-stub.c')) hw_display_modules += {'qxl': qxl_ss} endif @@ -52,6 +63,7 @@ softmmu_ss.add(when: 'CONFIG_ARTIST', if_true: files('artist.c')) softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 'ati_2d.c', 'ati_dbg.c')) + if config_all_devices.has_key('CONFIG_VIRTIO_GPU') virtio_gpu_ss = ss.source_set() virtio_gpu_ss.add(when: 'CONFIG_VIRTIO_GPU', @@ -87,14 +99,19 @@ if config_all_devices.has_key('CONFIG_VIRTIO_VGA') if_true: [files('virtio-vga.c'), pixman]) virtio_vga_ss.add(when: 'CONFIG_VHOST_USER_VGA', if_true: files('vhost-user-vga.c')) + virtio_vga_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'), + if_false: files('acpi-vga-stub.c')) hw_display_modules += {'virtio-vga': virtio_vga_ss} virtio_vga_gl_ss = ss.source_set() virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA', virgl, opengl], if_true: [files('virtio-vga-gl.c'), pixman]) + virtio_vga_gl_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'), + if_false: files('acpi-vga-stub.c')) hw_display_modules += {'virtio-vga-gl': virtio_vga_gl_ss} endif specific_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_lcdc.c')) +softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('acpi-vga-stub.c')) modules += { 'hw-display': hw_display_modules } diff --git a/hw/display/vga-pci.c b/hw/display/vga-pci.c index 3e5bc25..9a91de7 100644 --- a/hw/display/vga-pci.c +++ b/hw/display/vga-pci.c @@ -35,6 +35,7 @@ #include "hw/loader.h" #include "hw/display/edid.h" #include "qom/object.h" +#include "hw/acpi/acpi_aml_interface.h" enum vga_pci_flags { PCI_VGA_FLAG_ENABLE_MMIO = 1, @@ -354,11 +355,13 @@ static void vga_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + AcpiDevAmlIfClass *adevc = ACPI_DEV_AML_IF_CLASS(klass); k->vendor_id = PCI_VENDOR_ID_QEMU; k->device_id = PCI_DEVICE_ID_QEMU_VGA; dc->vmsd = &vmstate_vga_pci; set_bit(DEVICE_CATEGORY_DISPLAY, dc->categories); + adevc->build_dev_aml = build_vga_aml; } static const TypeInfo vga_pci_type_info = { @@ -369,6 +372,7 @@ static const TypeInfo vga_pci_type_info = { .class_init = vga_pci_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { TYPE_ACPI_DEV_AML_IF }, { }, }, }; diff --git a/hw/display/vga_int.h b/hw/display/vga_int.h index 305e700..330406a 100644 --- a/hw/display/vga_int.h +++ b/hw/display/vga_int.h @@ -30,6 +30,7 @@ #include "ui/console.h" #include "hw/display/bochs-vbe.h" +#include "hw/acpi/acpi_aml_interface.h" #define ST01_V_RETRACE 0x08 #define ST01_DISP_ENABLE 0x01 @@ -195,4 +196,5 @@ void pci_std_vga_mmio_region_init(VGACommonState *s, MemoryRegion *subs, bool qext, bool edid); +void build_vga_aml(AcpiDevAmlIf *adev, Aml *scope); #endif diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 4f54b61..d9eaa5f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -60,6 +60,7 @@ #include "hw/i386/fw_cfg.h" #include "hw/i386/ich9.h" #include "hw/pci/pci_bus.h" +#include "hw/pci-host/i440fx.h" #include "hw/pci-host/q35.h" #include "hw/i386/x86-iommu.h" @@ -112,7 +113,6 @@ typedef struct AcpiPmInfo { } AcpiPmInfo; typedef struct AcpiMiscInfo { - bool is_piix4; bool has_hpet; #ifdef CONFIG_TPM TPMVersion tpm_version; @@ -121,13 +121,6 @@ typedef struct AcpiMiscInfo { unsigned dsdt_size; } AcpiMiscInfo; -typedef struct AcpiBuildPciBusHotplugState { - GArray *device_table; - GArray *notify_table; - struct AcpiBuildPciBusHotplugState *parent; - bool pcihp_bridge_en; -} AcpiBuildPciBusHotplugState; - typedef struct FwCfgTPMConfig { uint32_t tpmppi_address; uint8_t tpm_version; @@ -288,17 +281,6 @@ static void acpi_get_pm_info(MachineState *machine, AcpiPmInfo *pm) static void acpi_get_misc_info(AcpiMiscInfo *info) { - Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM); - Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE); - assert(!!piix != !!lpc); - - if (piix) { - info->is_piix4 = true; - } - if (lpc) { - info->is_piix4 = false; - } - info->has_hpet = hpet_find(); #ifdef CONFIG_TPM info->tpm_version = tpm_get_version(tpm_find()); @@ -430,18 +412,11 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, bool hotpluggbale_slot = false; bool bridge_in_acpi = false; bool cold_plugged_bridge = false; - bool is_vga = false; if (pdev) { pc = PCI_DEVICE_GET_CLASS(pdev); dc = DEVICE_GET_CLASS(pdev); - if (pc->class_id == PCI_CLASS_BRIDGE_ISA) { - continue; - } - - is_vga = pc->class_id == PCI_CLASS_DISPLAY_VGA; - /* * Cold plugged bridges aren't themselves hot-pluggable. * Hotplugged bridges *are* hot-pluggable. @@ -455,9 +430,10 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, /* * allow describing coldplugged bridges in ACPI even if they are not * on function 0, as they are not unpluggable, for all other devices - * generate description only for function 0 per slot + * generate description only for function 0 per slot, and for other + * functions if device on function provides its own AML */ - if (func && !bridge_in_acpi) { + if (func && !bridge_in_acpi && !get_dev_aml_func(DEVICE(pdev))) { continue; } } else { @@ -489,28 +465,7 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(dev, aml_pci_device_dsm()); } - if (is_vga) { - /* add VGA specific AML methods */ - int s3d; - - if (object_dynamic_cast(OBJECT(pdev), "qxl-vga")) { - s3d = 3; - } else { - s3d = 0; - } - - method = aml_method("_S1D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S2D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(0))); - aml_append(dev, method); - - method = aml_method("_S3D", 0, AML_NOTSERIALIZED); - aml_append(method, aml_return(aml_int(s3d))); - aml_append(dev, method); - } + call_dev_aml_func(DEVICE(pdev), dev); bridge_in_acpi = cold_plugged_bridge && pcihp_bridge_en; if (bridge_in_acpi) { @@ -1030,7 +985,6 @@ static void build_piix4_pci0_int(Aml *table) { Aml *dev; Aml *crs; - Aml *field; Aml *method; uint32_t irqs; Aml *sb_scope = aml_scope("_SB"); @@ -1039,13 +993,6 @@ static void build_piix4_pci0_int(Aml *table) aml_append(pci0_scope, build_prt(true)); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQ0", 8)); - aml_append(field, aml_named_field("PRQ1", 8)); - aml_append(field, aml_named_field("PRQ2", 8)); - aml_append(field, aml_named_field("PRQ3", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(true)); @@ -1149,7 +1096,6 @@ static Aml *build_q35_routing_table(const char *str) static void build_q35_pci0_int(Aml *table) { - Aml *field; Aml *method; Aml *sb_scope = aml_scope("_SB"); Aml *pci0_scope = aml_scope("PCI0"); @@ -1186,18 +1132,6 @@ static void build_q35_pci0_int(Aml *table) aml_append(pci0_scope, method); aml_append(sb_scope, pci0_scope); - field = aml_field("PCI0.ISA.PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); - aml_append(field, aml_named_field("PRQA", 8)); - aml_append(field, aml_named_field("PRQB", 8)); - aml_append(field, aml_named_field("PRQC", 8)); - aml_append(field, aml_named_field("PRQD", 8)); - aml_append(field, aml_reserved_field(0x20)); - aml_append(field, aml_named_field("PRQE", 8)); - aml_append(field, aml_named_field("PRQF", 8)); - aml_append(field, aml_named_field("PRQG", 8)); - aml_append(field, aml_named_field("PRQH", 8)); - aml_append(sb_scope, field); - aml_append(sb_scope, build_irq_status_method()); aml_append(sb_scope, build_iqcr_method(false)); @@ -1262,54 +1196,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg) return dev; } -static void build_q35_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_q35_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_ICH9_LPC_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x001F0000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - -static void build_piix4_isa_bridge(Aml *table) -{ - Aml *dev; - Aml *scope; - Object *obj; - bool ambiguous; - - /* - * temporarily fish out isa bridge, build_piix4_isa_bridge() will be dropped - * once PCI is converted to AcpiDevAmlIf and would be ble to generate - * AML for bridge itself - */ - obj = object_resolve_path_type("", TYPE_PIIX3_PCI_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - scope = aml_scope("_SB.PCI0"); - dev = aml_device("ISA"); - aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010000))); - - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr) { Aml *scope; @@ -1416,25 +1302,6 @@ static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug) return method; } -static void build_smb0(Aml *table, int devnr, int func) -{ - Aml *scope = aml_scope("_SB.PCI0"); - Aml *dev = aml_device("SMB0"); - bool ambiguous; - Object *obj; - /* - * temporarily fish out device hosting SMBUS, build_smb0 will be gone once - * PCI enumeration will be switched to call_dev_aml_func() - */ - obj = object_resolve_path_type("", TYPE_ICH9_SMB_DEVICE, &ambiguous); - assert(obj && !ambiguous); - - aml_append(dev, aml_name_decl("_ADR", aml_int(devnr << 16 | func))); - call_dev_aml_func(DEVICE(obj), dev); - aml_append(scope, dev); - aml_append(table, scope); -} - static void build_acpi0017(Aml *table) { Aml *dev, *scope, *method; @@ -1456,6 +1323,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiPmInfo *pm, AcpiMiscInfo *misc, Range *pci_hole, Range *pci_hole64, MachineState *machine) { + Object *i440fx = object_resolve_type_unambiguous(TYPE_I440FX_PCI_HOST_BRIDGE); + Object *q35 = object_resolve_type_unambiguous(TYPE_Q35_HOST_DEVICE); CrsRangeEntry *entry; Aml *dsdt, *sb_scope, *scope, *dev, *method, *field, *pkg, *crs; CrsRangeSet crs_range_set; @@ -1476,11 +1345,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, AcpiTable table = { .sig = "DSDT", .rev = 1, .oem_id = x86ms->oem_id, .oem_table_id = x86ms->oem_table_id }; + assert(!!i440fx != !!q35); + acpi_table_begin(&table, table_data); dsdt = init_aml_allocator(); build_dbg_aml(dsdt); - if (misc->is_piix4) { + if (i440fx) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); @@ -1489,12 +1360,11 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(sb_scope, dev); aml_append(dsdt, sb_scope); - build_piix4_isa_bridge(dsdt); if (pm->pcihp_bridge_en || pm->pcihp_root_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_piix4_pci0_int(dsdt); - } else { + } else if (q35) { sb_scope = aml_scope("_SB"); dev = aml_device("PCI0"); aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); @@ -1534,14 +1404,10 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); - build_q35_isa_bridge(dsdt); if (pm->pcihp_bridge_en) { build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base); } build_q35_pci0_int(dsdt); - if (pcms->smbus) { - build_smb0(dsdt, ICH9_SMB_DEV, ICH9_SMB_FUNC); - } } if (misc->has_hpet) { @@ -1554,6 +1420,18 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, aml_append(dsdt, sb_scope); } + scope = aml_scope("_GPE"); + { + aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); + if (machine->nvdimms_state->is_enabled) { + method = aml_method("_E04", 0, AML_NOTSERIALIZED); + aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), + aml_int(0x80))); + aml_append(scope, method); + } + } + aml_append(dsdt, scope); + if (pcmc->legacy_cpu_hotplug) { build_legacy_cpu_hotplug_aml(dsdt, machine, pm->cpu_hp_io_base); } else { @@ -1572,28 +1450,6 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, pcms->memhp_io_base); } - scope = aml_scope("_GPE"); - { - aml_append(scope, aml_name_decl("_HID", aml_string("ACPI0006"))); - - if (pm->pcihp_bridge_en || pm->pcihp_root_en) { - method = aml_method("_E01", 0, AML_NOTSERIALIZED); - aml_append(method, - aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); - aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); - aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); - aml_append(scope, method); - } - - if (machine->nvdimms_state->is_enabled) { - method = aml_method("_E04", 0, AML_NOTSERIALIZED); - aml_append(method, aml_notify(aml_name("\\_SB.NVDR"), - aml_int(0x80))); - aml_append(scope, method); - } - } - aml_append(dsdt, scope); - crs_range_set_init(&crs_range_set); bus = PC_MACHINE(machine)->bus; if (bus) { @@ -1872,6 +1728,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, } aml_append(dsdt, sb_scope); + if (pm->pcihp_bridge_en || pm->pcihp_root_en) { + scope = aml_scope("_GPE"); + { + method = aml_method("_E01", 0, AML_NOTSERIALIZED); + aml_append(method, + aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF)); + aml_append(method, aml_call0("\\_SB.PCI0.PCNT")); + aml_append(method, aml_release(aml_name("\\_SB.PCI0.BLCK"))); + aml_append(scope, method); + } + aml_append(dsdt, scope); + } + /* copy AML table into ACPI tables blob and patch header there */ g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len); acpi_table_end(linker, &table); diff --git a/hw/i386/e820_memory_layout.c b/hw/i386/e820_memory_layout.c index bcf9eaf..06970ac 100644 --- a/hw/i386/e820_memory_layout.c +++ b/hw/i386/e820_memory_layout.c @@ -11,29 +11,11 @@ #include "e820_memory_layout.h" static size_t e820_entries; -struct e820_table e820_reserve; struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type) { - int index = le32_to_cpu(e820_reserve.count); - struct e820_entry *entry; - - if (type != E820_RAM) { - /* old FW_CFG_E820_TABLE entry -- reservations only */ - if (index >= E820_NR_ENTRIES) { - return -EBUSY; - } - entry = &e820_reserve.entry[index++]; - - entry->address = cpu_to_le64(address); - entry->length = cpu_to_le64(length); - entry->type = cpu_to_le32(type); - - e820_reserve.count = cpu_to_le32(index); - } - - /* new "etc/e820" file -- include ram too */ + /* new "etc/e820" file -- include ram and reserved entries */ e820_table = g_renew(struct e820_entry, e820_table, e820_entries + 1); e820_table[e820_entries].address = cpu_to_le64(address); e820_table[e820_entries].length = cpu_to_le64(length); diff --git a/hw/i386/e820_memory_layout.h b/hw/i386/e820_memory_layout.h index 04f9378..7c239aa 100644 --- a/hw/i386/e820_memory_layout.h +++ b/hw/i386/e820_memory_layout.h @@ -16,20 +16,12 @@ #define E820_NVS 4 #define E820_UNUSABLE 5 -#define E820_NR_ENTRIES 16 - struct e820_entry { uint64_t address; uint64_t length; uint32_t type; } QEMU_PACKED __attribute((__aligned__(4))); -struct e820_table { - uint32_t count; - struct e820_entry entry[E820_NR_ENTRIES]; -} QEMU_PACKED __attribute((__aligned__(4))); - -extern struct e820_table e820_reserve; extern struct e820_entry *e820_table; int e820_add_entry(uint64_t address, uint64_t length, uint32_t type); diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index a283785..72a42f3 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -36,7 +36,6 @@ const char *fw_cfg_arch_key_name(uint16_t key) {FW_CFG_ACPI_TABLES, "acpi_tables"}, {FW_CFG_SMBIOS_ENTRIES, "smbios_entries"}, {FW_CFG_IRQ0_OVERRIDE, "irq0_override"}, - {FW_CFG_E820_TABLE, "e820_table"}, {FW_CFG_HPET, "hpet"}, }; @@ -127,8 +126,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, #endif fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 275f15c..86ca7c1 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -17,7 +17,6 @@ #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0) #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1) #define FW_CFG_IRQ0_OVERRIDE (FW_CFG_ARCH_LOCAL + 2) -#define FW_CFG_E820_TABLE (FW_CFG_ARCH_LOCAL + 3) #define FW_CFG_HPET (FW_CFG_ARCH_LOCAL + 4) FWCfgState *fw_cfg_arch_create(MachineState *ms, diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 6524c2e..a08ee85 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -49,17 +49,24 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) -#define VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write) {\ - if (ret_fr) { \ - ret_fr = -ret_fr; \ - if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { \ - trace_vtd_fault_disabled(); \ - } else { \ - vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write); \ - } \ - goto error; \ - } \ -} + +/* + * PCI bus number (or SID) is not reliable since the device is usaully + * initalized before guest can configure the PCI bridge + * (SECONDARY_BUS_NUMBER). + */ +struct vtd_as_key { + PCIBus *bus; + uint8_t devfn; + uint32_t pasid; +}; + +struct vtd_iotlb_key { + uint64_t gfn; + uint32_t pasid; + uint32_t level; + uint16_t sid; +}; static void vtd_address_space_refresh_all(IntelIOMMUState *s); static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n); @@ -200,14 +207,46 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as) } /* GHashTable functions */ -static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2) +static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2) { - return *((const uint64_t *)v1) == *((const uint64_t *)v2); + const struct vtd_iotlb_key *key1 = v1; + const struct vtd_iotlb_key *key2 = v2; + + return key1->sid == key2->sid && + key1->pasid == key2->pasid && + key1->level == key2->level && + key1->gfn == key2->gfn; +} + +static guint vtd_iotlb_hash(gconstpointer v) +{ + const struct vtd_iotlb_key *key = v; + + return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) | + (key->level) << VTD_IOTLB_LVL_SHIFT | + (key->pasid) << VTD_IOTLB_PASID_SHIFT; } -static guint vtd_uint64_hash(gconstpointer v) +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_as_key *key1 = v1; + const struct vtd_as_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn) && + (key1->pasid == key2->pasid); +} + +/* + * Note that we use pointer to PCIBus as the key, so hashing/shifting + * based on the pointer value is intended. Note that we deal with + * collisions through vtd_as_equal(). + */ +static guint vtd_as_hash(gconstpointer v) { - return (guint)*(const uint64_t *)v; + const struct vtd_as_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); } static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, @@ -248,22 +287,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, static void vtd_reset_context_cache_locked(IntelIOMMUState *s) { VTDAddressSpace *vtd_as; - VTDBus *vtd_bus; - GHashTableIter bus_it; - uint32_t devfn_it; + GHashTableIter as_it; trace_vtd_context_cache_reset(); - g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr); + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); - while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) { - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (!vtd_as) { - continue; - } - vtd_as->context_cache_entry.context_cache_gen = 0; - } + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + vtd_as->context_cache_entry.context_cache_gen = 0; } s->context_cache_gen = 1; } @@ -290,13 +321,6 @@ static void vtd_reset_caches(IntelIOMMUState *s) vtd_iommu_unlock(s); } -static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id, - uint32_t level) -{ - return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) | - ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT); -} - static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; @@ -304,15 +328,17 @@ static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) /* Must be called with IOMMU lock held */ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, - hwaddr addr) + uint32_t pasid, hwaddr addr) { + struct vtd_iotlb_key key; VTDIOTLBEntry *entry; - uint64_t key; int level; for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { - key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level), - source_id, level); + key.gfn = vtd_get_iotlb_gfn(addr, level); + key.level = level; + key.sid = source_id; + key.pasid = pasid; entry = g_hash_table_lookup(s->iotlb, &key); if (entry) { goto out; @@ -326,10 +352,11 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, uint16_t domain_id, hwaddr addr, uint64_t slpte, - uint8_t access_flags, uint32_t level) + uint8_t access_flags, uint32_t level, + uint32_t pasid) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); - uint64_t *key = g_malloc(sizeof(*key)); + struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); @@ -343,7 +370,13 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->slpte = slpte; entry->access_flags = access_flags; entry->mask = vtd_slpt_level_page_mask(level); - *key = vtd_get_iotlb_key(gfn, source_id, level); + entry->pasid = pasid; + + key->gfn = gfn; + key->sid = source_id; + key->level = level; + key->pasid = pasid; + g_hash_table_replace(s->iotlb, key, entry); } @@ -436,7 +469,8 @@ static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index) /* Must not update F field now, should be done later */ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, uint16_t source_id, hwaddr addr, - VTDFaultReason fault, bool is_write) + VTDFaultReason fault, bool is_write, + bool is_pasid, uint32_t pasid) { uint64_t hi = 0, lo; hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4); @@ -444,7 +478,8 @@ static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index, assert(index < DMAR_FRCD_REG_NR); lo = VTD_FRCD_FI(addr); - hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault); + hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault) | + VTD_FRCD_PV(pasid) | VTD_FRCD_PP(is_pasid); if (!is_write) { hi |= VTD_FRCD_T; } @@ -475,7 +510,8 @@ static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id) /* Log and report an DMAR (address translation) fault to software */ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, hwaddr addr, VTDFaultReason fault, - bool is_write) + bool is_write, bool is_pasid, + uint32_t pasid) { uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG); @@ -502,7 +538,8 @@ static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id, return; } - vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write); + vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, + is_write, is_pasid, pasid); if (fsts_reg & VTD_FSTS_PPF) { error_report_once("There are pending faults already, " @@ -807,13 +844,15 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, VTDContextEntry *ce, - VTDPASIDEntry *pe) + VTDPASIDEntry *pe, + uint32_t pasid) { - uint32_t pasid; dma_addr_t pasid_dir_base; int ret = 0; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); ret = vtd_get_pe_from_pasid_table(s, pasid_dir_base, pasid, pe); @@ -822,15 +861,17 @@ static int vtd_ce_get_rid2pasid_entry(IntelIOMMUState *s, static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, VTDContextEntry *ce, - bool *pe_fpd_set) + bool *pe_fpd_set, + uint32_t pasid) { int ret; - uint32_t pasid; dma_addr_t pasid_dir_base; VTDPASIDDirEntry pdire; VTDPASIDEntry pe; - pasid = VTD_CE_GET_RID2PASID(ce); + if (pasid == PCI_NO_PASID) { + pasid = VTD_CE_GET_RID2PASID(ce); + } pasid_dir_base = VTD_CE_GET_PASID_DIR_TABLE(ce); /* @@ -876,12 +917,13 @@ static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce) } static uint32_t vtd_get_iova_level(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_PE_GET_LEVEL(&pe); } @@ -894,12 +936,13 @@ static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce) } static uint32_t vtd_get_iova_agaw(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return 30 + ((pe.val[0] >> 2) & VTD_SM_PASID_ENTRY_AW) * 9; } @@ -941,31 +984,33 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, } static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, - VTDContextEntry *ce, uint8_t aw) + VTDContextEntry *ce, uint8_t aw, + uint32_t pasid) { - uint32_t ce_agaw = vtd_get_iova_agaw(s, ce); + uint32_t ce_agaw = vtd_get_iova_agaw(s, ce, pasid); return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ static inline bool vtd_iova_range_check(IntelIOMMUState *s, uint64_t iova, VTDContextEntry *ce, - uint8_t aw) + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(s, ce, aw) - 1)); + return !(iova & ~(vtd_iova_limit(s, ce, aw, pasid) - 1)); } static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; } @@ -993,50 +1038,25 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) return slpte & rsvd_mask; } -/* Find the VTD address space associated with a given bus number */ -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) -{ - VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num]; - GHashTableIter iter; - - if (vtd_bus) { - return vtd_bus; - } - - /* - * Iterate over the registered buses to find the one which - * currently holds this bus number and update the bus_num - * lookup table. - */ - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - if (pci_bus_num(vtd_bus->bus) == bus_num) { - s->vtd_as_by_bus_num[bus_num] = vtd_bus; - return vtd_bus; - } - } - - return NULL; -} - /* Given the @iova, get relevant @slptep. @slpte_level will be the last level * of the translation, can be used for deciding the size of large page. */ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes, uint8_t aw_bits) + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; uint64_t slpte; uint64_t access_right_check; uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits)) { - error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 ")", - __func__, iova); + if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -1049,8 +1069,9 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " - "(iova=0x%" PRIx64 ")", __func__, iova); - if (level == vtd_get_iova_level(s, ce)) { + "(iova=0x%" PRIx64 ", pasid=0x%" PRIx32 ")", + __func__, iova, pasid); + if (level == vtd_get_iova_level(s, ce, pasid)) { /* Invalid programming of context-entry */ return -VTD_FR_CONTEXT_ENTRY_INV; } else { @@ -1062,15 +1083,16 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, if (!(slpte & access_right_check)) { error_report_once("%s: detected slpte permission error " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d)", __func__, - iova, level, slpte, is_write); + "slpte=0x%" PRIx64 ", write=%d, pasid=0x%" + PRIx32 ")", __func__, iova, level, + slpte, is_write, pasid); return is_write ? -VTD_FR_WRITE : -VTD_FR_READ; } if (vtd_slpte_nonzero_rsvd(slpte, level)) { error_report_once("%s: detected splte reserve non-zero " "iova=0x%" PRIx64 ", level=0x%" PRIx32 - "slpte=0x%" PRIx64 ")", __func__, iova, - level, slpte); + "slpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, slpte, pasid); return -VTD_FR_PAGING_ENTRY_RSVD; } @@ -1098,9 +1120,10 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, error_report_once("%s: xlat address is in interrupt range " "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ")", + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", __func__, iova, level, slpte, is_write, - xlat, size); + xlat, size, pasid); return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : -VTD_FR_INTERRUPT_ADDR; } @@ -1314,18 +1337,19 @@ next: */ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t start, uint64_t end, - vtd_page_walk_info *info) + vtd_page_walk_info *info, + uint32_t pasid) { - dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce); - uint32_t level = vtd_get_iova_level(s, ce); + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw)) { + if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw)) { + if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(s, ce, info->aw); + end = vtd_iova_limit(s, ce, info->aw, pasid); } return vtd_page_walk_level(addr, start, end, level, true, true, info); @@ -1393,7 +1417,7 @@ static int vtd_ce_rid2pasid_check(IntelIOMMUState *s, * has valid rid2pasid setting, which includes valid * rid2pasid field and corresponding pasid entry setting */ - return vtd_ce_get_rid2pasid_entry(s, ce, &pe); + return vtd_ce_get_rid2pasid_entry(s, ce, &pe, PCI_NO_PASID); } /* Map a device to its corresponding domain (context-entry) */ @@ -1476,12 +1500,13 @@ static int vtd_sync_shadow_page_hook(IOMMUTLBEvent *event, } static uint16_t vtd_get_domain_id(IntelIOMMUState *s, - VTDContextEntry *ce) + VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; if (s->root_scalable) { - vtd_ce_get_rid2pasid_entry(s, ce, &pe); + vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); return VTD_SM_PASID_ENTRY_DID(pe.val[1]); } @@ -1499,10 +1524,10 @@ static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as, .notify_unmap = true, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, ce), + .domain_id = vtd_get_domain_id(s, ce, vtd_as->pasid), }; - return vtd_page_walk(s, ce, addr, addr + size, &info); + return vtd_page_walk(s, ce, addr, addr + size, &info, vtd_as->pasid); } static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) @@ -1546,16 +1571,19 @@ static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as) * 1st-level translation or 2nd-level translation, it depends * on PGTT setting. */ -static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce) +static bool vtd_dev_pt_enabled(IntelIOMMUState *s, VTDContextEntry *ce, + uint32_t pasid) { VTDPASIDEntry pe; int ret; if (s->root_scalable) { - ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe); + ret = vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); if (ret) { - error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32, - __func__, ret); + /* + * This error is guest triggerable. We should assumt PT + * not enabled for safety. + */ return false; } return (VTD_PE_GET_TYPE(&pe) == VTD_SM_PASID_ENTRY_PT); @@ -1569,14 +1597,12 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) { IntelIOMMUState *s; VTDContextEntry ce; - int ret; assert(as); s = as->iommu_state; - ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus), - as->devfn, &ce); - if (ret) { + if (vtd_dev_to_context_entry(s, pci_bus_num(as->bus), as->devfn, + &ce)) { /* * Possibly failed to parse the context entry for some reason * (e.g., during init, or any guest configuration errors on @@ -1586,19 +1612,20 @@ static bool vtd_as_pt_enabled(VTDAddressSpace *as) return false; } - return vtd_dev_pt_enabled(s, &ce); + return vtd_dev_pt_enabled(s, &ce, as->pasid); } /* Return whether the device is using IOMMU translation. */ static bool vtd_switch_address_space(VTDAddressSpace *as) { - bool use_iommu; + bool use_iommu, pt; /* Whether we need to take the BQL on our own */ bool take_bql = !qemu_mutex_iothread_locked(); assert(as); use_iommu = as->iommu_state->dmar_enabled && !vtd_as_pt_enabled(as); + pt = as->iommu_state->dmar_enabled && vtd_as_pt_enabled(as); trace_vtd_switch_address_space(pci_bus_num(as->bus), VTD_PCI_SLOT(as->devfn), @@ -1618,11 +1645,53 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) if (use_iommu) { memory_region_set_enabled(&as->nodmar, false); memory_region_set_enabled(MEMORY_REGION(&as->iommu), true); + /* + * vt-d spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx + * are translated normally like any other request-with-PASID + * through DMA-remapping hardware. + * """ + * + * Need to disable ir for as with PASID. + */ + if (as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir, false); + } else { + memory_region_set_enabled(&as->iommu_ir, true); + } } else { memory_region_set_enabled(MEMORY_REGION(&as->iommu), false); memory_region_set_enabled(&as->nodmar, true); } + /* + * vtd-spec v3.4 3.14: + * + * """ + * Requests-with-PASID with input address in range 0xFEEx_xxxx are + * translated normally like any other request-with-PASID through + * DMA-remapping hardware. However, if such a request is processed + * using pass-through translation, it will be blocked as described + * in the paragraph below. + * + * Software must not program paging-structure entries to remap any + * address to the interrupt address range. Untranslated requests + * and translation requests that result in an address in the + * interrupt range will be blocked with condition code LGN.4 or + * SGN.8. + * """ + * + * We enable per as memory region (iommu_ir_fault) for catching + * the tranlsation for interrupt range through PASID + PT. + */ + if (pt && as->pasid != PCI_NO_PASID) { + memory_region_set_enabled(&as->iommu_ir_fault, true); + } else { + memory_region_set_enabled(&as->iommu_ir_fault, false); + } + if (take_bql) { qemu_mutex_unlock_iothread(); } @@ -1632,24 +1701,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) static void vtd_switch_address_space_all(IntelIOMMUState *s) { + VTDAddressSpace *vtd_as; GHashTableIter iter; - VTDBus *vtd_bus; - int i; - - g_hash_table_iter_init(&iter, s->vtd_as_by_busptr); - while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) { - for (i = 0; i < PCI_DEVFN_MAX; i++) { - if (!vtd_bus->dev_as[i]) { - continue; - } - vtd_switch_address_space(vtd_bus->dev_as[i]); - } - } -} -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn) -{ - return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL); + g_hash_table_iter_init(&iter, s->vtd_address_spaces); + while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) { + vtd_switch_address_space(vtd_as); + } } static const bool vtd_qualified_faults[] = { @@ -1686,18 +1744,37 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, + gpointer user_data) +{ + struct vtd_as_key *as_key = (struct vtd_as_key *)key; + uint16_t target_sid = *(uint16_t *)user_data; + uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); + return sid == target_sid; +} + +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) +{ + uint8_t bus_num = PCI_BUS_NUM(sid); + VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; + + if (vtd_as && + (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { + return vtd_as; + } + + vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); + s->vtd_as_cache[bus_num] = vtd_as; + + return vtd_as; +} + static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) { - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; bool success = false; - vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id)); - if (!vtd_bus) { - goto out; - } - - vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)]; + vtd_as = vtd_get_as_by_sid(s, source_id); if (!vtd_as) { goto out; } @@ -1711,6 +1788,22 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +static void vtd_report_fault(IntelIOMMUState *s, + int err, bool is_fpd_set, + uint16_t source_id, + hwaddr addr, + bool is_write, + bool is_pasid, + uint32_t pasid) +{ + if (is_fpd_set && vtd_is_qualified_fault(err)) { + trace_vtd_fault_disabled(); + } else { + vtd_report_dmar_fault(s, source_id, addr, err, is_write, + is_pasid, pasid); + } +} + /* Map dev to context-entry then do a paging-structures walk to do a iommu * translation. * @@ -1732,13 +1825,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; uint64_t slpte, page_mask; - uint32_t level; - uint16_t source_id = vtd_make_source_id(bus_num, devfn); + uint32_t level, pasid = vtd_as->pasid; + uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; bool is_fpd_set = false; bool reads = true; bool writes = true; uint8_t access_flags; + bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; /* @@ -1751,15 +1845,17 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB */ - iotlb_entry = vtd_lookup_iotlb(s, source_id, addr); - if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, - iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; - access_flags = iotlb_entry->access_flags; - page_mask = iotlb_entry->mask; - goto out; + /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + if (!rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } } /* Try to fetch context-entry from cache first */ @@ -1770,16 +1866,26 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ce = cc_entry->context_entry; is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write, + false, 0); + goto error; + } } } else { ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce); is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; if (!ret_fr && !is_fpd_set && s->root_scalable) { - ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set); + ret_fr = vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, pasid); + } + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, + source_id, addr, is_write, + false, 0); + goto error; } - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); /* Update context-cache */ trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo, cc_entry->context_cache_gen, @@ -1788,11 +1894,15 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry->context_cache_gen = s->context_cache_gen; } + if (rid2pasid) { + pasid = VTD_CE_GET_RID2PASID(&ce); + } + /* * We don't need to translate for pass-through context entries. * Also, let's ignore IOTLB caching as well for PT devices. */ - if (vtd_dev_pt_enabled(s, &ce)) { + if (vtd_dev_pt_enabled(s, &ce, pasid)) { entry->iova = addr & VTD_PAGE_MASK_4K; entry->translated_addr = entry->iova; entry->addr_mask = ~VTD_PAGE_MASK_4K; @@ -1813,14 +1923,31 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } + /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + if (rid2pasid) { + iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); + if (iotlb_entry) { + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + iotlb_entry->domain_id); + slpte = iotlb_entry->slpte; + access_flags = iotlb_entry->access_flags; + page_mask = iotlb_entry->mask; + goto out; + } + } + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, - &reads, &writes, s->aw_bits); - VTD_PE_GET_FPD_ERR(ret_fr, is_fpd_set, s, source_id, addr, is_write); + &reads, &writes, s->aw_bits, pasid); + if (ret_fr) { + vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, + addr, is_write, pasid != PCI_NO_PASID, pasid); + goto error; + } page_mask = vtd_slpt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); - vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce), addr, slpte, - access_flags, level); + vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), + addr, slpte, access_flags, level, pasid); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; @@ -1905,11 +2032,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, uint16_t source_id, uint16_t func_mask) { + GHashTableIter as_it; uint16_t mask; - VTDBus *vtd_bus; VTDAddressSpace *vtd_as; uint8_t bus_n, devfn; - uint16_t devfn_it; trace_vtd_inv_desc_cc_devices(source_id, func_mask); @@ -1932,32 +2058,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s, mask = ~mask; bus_n = VTD_SID_TO_BUS(source_id); - vtd_bus = vtd_find_as_from_bus_num(s, bus_n); - if (vtd_bus) { - devfn = VTD_SID_TO_DEVFN(source_id); - for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) { - vtd_as = vtd_bus->dev_as[devfn_it]; - if (vtd_as && ((devfn_it & mask) == (devfn & mask))) { - trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it), - VTD_PCI_FUNC(devfn_it)); - vtd_iommu_lock(s); - vtd_as->context_cache_entry.context_cache_gen = 0; - vtd_iommu_unlock(s); - /* - * Do switch address space when needed, in case if the - * device passthrough bit is switched. - */ - vtd_switch_address_space(vtd_as); - /* - * So a device is moving out of (or moving into) a - * domain, resync the shadow page table. - * This won't bring bad even if we have no such - * notifier registered - the IOMMU notification - * framework will skip MAP notifications if that - * happened. - */ - vtd_sync_shadow_page_table(vtd_as); - } + devfn = VTD_SID_TO_DEVFN(source_id); + + g_hash_table_iter_init(&as_it, s->vtd_address_spaces); + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) { + if ((pci_bus_num(vtd_as->bus) == bus_n) && + (vtd_as->devfn & mask) == (devfn & mask)) { + trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn), + VTD_PCI_FUNC(vtd_as->devfn)); + vtd_iommu_lock(s); + vtd_as->context_cache_entry.context_cache_gen = 0; + vtd_iommu_unlock(s); + /* + * Do switch address space when needed, in case if the + * device passthrough bit is switched. + */ + vtd_switch_address_space(vtd_as); + /* + * So a device is moving out of (or moving into) a + * domain, resync the shadow page table. + * This won't bring bad even if we have no such + * notifier registered - the IOMMU notification + * framework will skip MAP notifications if that + * happened. + */ + vtd_sync_shadow_page_table(vtd_as); } } } @@ -2014,7 +2139,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce) && - domain_id == vtd_get_domain_id(s, &ce)) { + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { vtd_sync_shadow_page_table(vtd_as); } } @@ -2022,7 +2147,7 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, uint16_t domain_id, hwaddr addr, - uint8_t am) + uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; VTDContextEntry ce; @@ -2030,9 +2155,12 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { + if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { + continue; + } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); - if (!ret && domain_id == vtd_get_domain_id(s, &ce)) { + if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { if (vtd_as_has_map_notifier(vtd_as)) { /* * As long as we have MAP notifications registered in @@ -2076,7 +2204,7 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, vtd_iommu_lock(s); g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info); vtd_iommu_unlock(s); - vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am); + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID); } /* Flush IOTLB @@ -2473,18 +2601,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, { VTDAddressSpace *vtd_dev_as; IOMMUTLBEvent event; - struct VTDBus *vtd_bus; hwaddr addr; uint64_t sz; uint16_t sid; - uint8_t devfn; bool size; - uint8_t bus_num; addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi); sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo); - devfn = sid & 0xff; - bus_num = sid >> 8; size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi); if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) || @@ -2495,12 +2618,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, return false; } - vtd_bus = vtd_find_as_from_bus_num(s, bus_num); - if (!vtd_bus) { - goto done; - } - - vtd_dev_as = vtd_bus->dev_as[devfn]; + /* + * Using sid is OK since the guest should have finished the + * initialization of both the bus and device. + */ + vtd_dev_as = vtd_get_as_by_sid(s, sid); if (!vtd_dev_as) { goto done; } @@ -3151,6 +3273,7 @@ static Property vtd_properties[] = { DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), + DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_END_OF_LIST(), @@ -3425,32 +3548,98 @@ static const MemoryRegionOps vtd_mem_ir_ops = { }, }; -VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) +static void vtd_report_ir_illegal_access(VTDAddressSpace *vtd_as, + hwaddr addr, bool is_write) { - uintptr_t key = (uintptr_t)bus; - VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key); - VTDAddressSpace *vtd_dev_as; - char name[128]; + IntelIOMMUState *s = vtd_as->iommu_state; + uint8_t bus_n = pci_bus_num(vtd_as->bus); + uint16_t sid = PCI_BUILD_BDF(bus_n, vtd_as->devfn); + bool is_fpd_set = false; + VTDContextEntry ce; - if (!vtd_bus) { - uintptr_t *new_key = g_malloc(sizeof(*new_key)); - *new_key = (uintptr_t)bus; - /* No corresponding free() */ - vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \ - PCI_DEVFN_MAX); - vtd_bus->bus = bus; - g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus); + assert(vtd_as->pasid != PCI_NO_PASID); + + /* Try out best to fetch FPD, we can't do anything more */ + if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) { + is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD; + if (!is_fpd_set && s->root_scalable) { + vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid); + } } - vtd_dev_as = vtd_bus->dev_as[devfn]; + vtd_report_fault(s, VTD_FR_SM_INTERRUPT_ADDR, + is_fpd_set, sid, addr, is_write, + true, vtd_as->pasid); +} + +static MemTxResult vtd_mem_ir_fault_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, false); + return MEMTX_ERROR; +} + +static MemTxResult vtd_mem_ir_fault_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size, + MemTxAttrs attrs) +{ + vtd_report_ir_illegal_access(opaque, addr, true); + + return MEMTX_ERROR; +} + +static const MemoryRegionOps vtd_mem_ir_fault_ops = { + .read_with_attrs = vtd_mem_ir_fault_read, + .write_with_attrs = vtd_mem_ir_fault_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 1, + .max_access_size = 8, + }, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + }, +}; + +VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, + int devfn, unsigned int pasid) +{ + /* + * We can't simply use sid here since the bus number might not be + * initialized by the guest. + */ + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + .pasid = pasid, + }; + VTDAddressSpace *vtd_dev_as; + char name[128]; + + vtd_dev_as = g_hash_table_lookup(s->vtd_address_spaces, &key); if (!vtd_dev_as) { - snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), - PCI_FUNC(devfn)); - vtd_bus->dev_as[devfn] = vtd_dev_as = g_new0(VTDAddressSpace, 1); + struct vtd_as_key *new_key = g_malloc(sizeof(*new_key)); + + new_key->bus = bus; + new_key->devfn = devfn; + new_key->pasid = pasid; + + if (pasid == PCI_NO_PASID) { + snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn), + PCI_FUNC(devfn)); + } else { + snprintf(name, sizeof(name), "vtd-%02x.%x-pasid-%x", PCI_SLOT(devfn), + PCI_FUNC(devfn), pasid); + } + + vtd_dev_as = g_new0(VTDAddressSpace, 1); vtd_dev_as->bus = bus; vtd_dev_as->devfn = (uint8_t)devfn; + vtd_dev_as->pasid = pasid; vtd_dev_as->iommu_state = s; vtd_dev_as->context_cache_entry.context_cache_gen = 0; vtd_dev_as->iova_tree = iova_tree_new(); @@ -3492,6 +3681,24 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) &vtd_dev_as->iommu_ir, 1); /* + * This region is used for catching fault to access interrupt + * range via passthrough + PASID. See also + * vtd_switch_address_space(). We can't use alias since we + * need to know the sid which is valid for MSI who uses + * bus_master_as (see msi_send_message()). + */ + memory_region_init_io(&vtd_dev_as->iommu_ir_fault, OBJECT(s), + &vtd_mem_ir_fault_ops, vtd_dev_as, "vtd-no-ir", + VTD_INTERRUPT_ADDR_SIZE); + /* + * Hook to root since when PT is enabled vtd_dev_as->iommu + * will be disabled. + */ + memory_region_add_subregion_overlap(MEMORY_REGION(&vtd_dev_as->root), + VTD_INTERRUPT_ADDR_FIRST, + &vtd_dev_as->iommu_ir_fault, 2); + + /* * Hook both the containers under the root container, we * switch between DMAR & noDMAR by enable/disable * corresponding sub-containers @@ -3503,6 +3710,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn) &vtd_dev_as->nodmar, 0); vtd_switch_address_space(vtd_dev_as); + + g_hash_table_insert(s->vtd_address_spaces, new_key, vtd_dev_as); } return vtd_dev_as; } @@ -3609,7 +3818,7 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) "legacy mode", bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn), - vtd_get_domain_id(s, &ce), + vtd_get_domain_id(s, &ce, vtd_as->pasid), ce.hi, ce.lo); if (vtd_as_has_map_notifier(vtd_as)) { /* This is required only for MAP typed notifiers */ @@ -3619,10 +3828,10 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) .notify_unmap = false, .aw = s->aw_bits, .as = vtd_as, - .domain_id = vtd_get_domain_id(s, &ce), + .domain_id = vtd_get_domain_id(s, &ce, vtd_as->pasid), }; - vtd_page_walk(s, &ce, 0, ~0ULL, &info); + vtd_page_walk(s, &ce, 0, ~0ULL, &info, vtd_as->pasid); } } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), @@ -3722,6 +3931,10 @@ static void vtd_init(IntelIOMMUState *s) s->ecap |= VTD_ECAP_SC; } + if (s->pasid) { + s->ecap |= VTD_ECAP_PASID; + } + vtd_reset_caches(s); /* Define registers with default values and bit semantics */ @@ -3795,7 +4008,7 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) assert(0 <= devfn && devfn < PCI_DEVFN_MAX); - vtd_as = vtd_find_add_as(s, bus, devfn); + vtd_as = vtd_find_add_as(s, bus, devfn, PCI_NO_PASID); return &vtd_as->as; } @@ -3838,6 +4051,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return false; } + if (s->pasid && !s->scalable_mode) { + error_setg(errp, "Need to set scalable mode for PASID"); + return false; + } + return true; } @@ -3874,6 +4092,17 @@ static void vtd_realize(DeviceState *dev, Error **errp) X86MachineState *x86ms = X86_MACHINE(ms); PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); + X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); + + if (s->pasid && x86_iommu->dt_supported) { + /* + * PASID-based-Device-TLB Invalidate Descriptor is not + * implemented and it requires support from vhost layer which + * needs to be implemented in the future. + */ + error_setg(errp, "PASID based device IOTLB is not supported"); + return; + } if (!vtd_decide_config(s, errp)) { return; @@ -3881,7 +4110,6 @@ static void vtd_realize(DeviceState *dev, Error **errp) QLIST_INIT(&s->vtd_as_with_notifiers); qemu_mutex_init(&s->iommu_lock); - memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num)); memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); @@ -3901,10 +4129,10 @@ static void vtd_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem); /* No corresponding destroy */ - s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, + s->iotlb = g_hash_table_new_full(vtd_iotlb_hash, vtd_iotlb_equal, g_free, g_free); - s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal, - g_free, g_free); + s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, + g_free, g_free); vtd_init(s); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR); pci_setup_iommu(bus, vtd_host_dma_iommu, dev); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 930ce61..f090e61 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -114,8 +114,9 @@ VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ -#define VTD_IOTLB_SID_SHIFT 36 -#define VTD_IOTLB_LVL_SHIFT 52 +#define VTD_IOTLB_SID_SHIFT 20 +#define VTD_IOTLB_LVL_SHIFT 28 +#define VTD_IOTLB_PASID_SHIFT 30 #define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ /* IOTLB_REG */ @@ -191,6 +192,7 @@ #define VTD_ECAP_SC (1ULL << 7) #define VTD_ECAP_MHMV (15ULL << 20) #define VTD_ECAP_SRS (1ULL << 31) +#define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) @@ -211,6 +213,8 @@ #define VTD_CAP_DRAIN_READ (1ULL << 55) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) +#define VTD_PASID_ID_SHIFT 20 +#define VTD_PASID_ID_MASK ((1ULL << VTD_PASID_ID_SHIFT) - 1) /* Supported Adjusted Guest Address Widths */ #define VTD_CAP_SAGAW_SHIFT 8 @@ -262,6 +266,8 @@ #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ #define VTD_FRCD_FI(val) ((val) & ~0xfffULL) +#define VTD_FRCD_PV(val) (((val) & 0xffffULL) << 40) +#define VTD_FRCD_PP(val) (((val) & 0x1) << 31) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -379,6 +385,11 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL +#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) +#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) +#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL +#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -413,6 +424,7 @@ typedef union VTDInvDesc VTDInvDesc; /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; + uint32_t pasid; uint64_t addr; uint8_t mask; }; diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index ffd1884..170a331 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -324,8 +324,6 @@ static void microvm_memory_init(MicrovmMachineState *mms) fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, machine->smp.max_cpus); fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); fw_cfg_add_i32(fw_cfg, FW_CFG_IRQ0_OVERRIDE, 1); - fw_cfg_add_bytes(fw_cfg, FW_CFG_E820_TABLE, - &e820_reserve, sizeof(e820_reserve)); fw_cfg_add_file(fw_cfg, "etc/e820", e820_table, sizeof(struct e820_entry) * e820_get_num_entries()); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index ef14da5..546b703 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1061,7 +1061,6 @@ void pc_memory_init(PCMachineState *pcms, hwaddr cxl_size = MiB; cxl_base = pc_get_cxl_range_start(pcms); - e820_add_entry(cxl_base, cxl_size, E820_RESERVED); memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size); memory_region_add_subregion(system_memory, cxl_base, mr); cxl_resv_end = cxl_base + cxl_size; @@ -1077,7 +1076,6 @@ void pc_memory_init(PCMachineState *pcms, memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw, "cxl-fixed-memory-region", fw->size); memory_region_add_subregion(system_memory, fw->base, &fw->mr); - e820_add_entry(fw->base, fw->size, E820_RESERVED); cxl_fmw_base += fw->size; cxl_resv_end = cxl_fmw_base; } diff --git a/hw/i386/trace-events b/hw/i386/trace-events index e49814d..04fd71b 100644 --- a/hw/i386/trace-events +++ b/hw/i386/trace-events @@ -12,6 +12,8 @@ vtd_inv_desc_cc_devices(uint16_t sid, uint16_t fmask) "context invalidate device vtd_inv_desc_iotlb_global(void) "iotlb invalidate global" vtd_inv_desc_iotlb_domain(uint16_t domain) "iotlb invalidate whole domain 0x%"PRIx16 vtd_inv_desc_iotlb_pages(uint16_t domain, uint64_t addr, uint8_t mask) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8 +vtd_inv_desc_iotlb_pasid_pages(uint16_t domain, uint64_t addr, uint8_t mask, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" addr 0x%"PRIx64" mask 0x%"PRIx8" pasid 0x%"PRIx32 +vtd_inv_desc_iotlb_pasid(uint16_t domain, uint32_t pasid) "iotlb invalidate domain 0x%"PRIx16" pasid 0x%"PRIx32 vtd_inv_desc_wait_sw(uint64_t addr, uint32_t data) "wait invalidate status write addr 0x%"PRIx64" data 0x%"PRIx32 vtd_inv_desc_wait_irq(const char *msg) "%s" vtd_inv_desc_wait_write_fail(uint64_t hi, uint64_t lo) "write fail for wait desc hi 0x%"PRIx64" lo 0x%"PRIx64 diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 8694e58..0b0a83e 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -809,6 +809,7 @@ static void ich9_send_gpe(AcpiDeviceIf *adev, AcpiEventStatusBits ev) static void build_ich9_isa_aml(AcpiDevAmlIf *adev, Aml *scope) { + Aml *field; BusChild *kid; ICH9LPCState *s = ICH9_LPC_DEVICE(adev); BusState *bus = BUS(s->isa_bus); @@ -816,6 +817,28 @@ static void build_ich9_isa_aml(AcpiDevAmlIf *adev, Aml *scope) /* ICH9 PCI to ISA irq remapping */ aml_append(scope, aml_operation_region("PIRQ", AML_PCI_CONFIG, aml_int(0x60), 0x0C)); + /* Fields declarion has to happen *after* operation region */ + field = aml_field("PIRQ", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); + aml_append(field, aml_named_field("PRQA", 8)); + aml_append(field, aml_named_field("PRQB", 8)); + aml_append(field, aml_named_field("PRQC", 8)); + aml_append(field, aml_named_field("PRQD", 8)); + aml_append(field, aml_reserved_field(0x20)); + aml_append(field, aml_named_field("PRQE", 8)); + aml_append(field, aml_named_field("PRQF", 8)); + aml_append(field, aml_named_field("PRQG", 8)); + aml_append(field, aml_named_field("PRQH", 8)); + aml_append(scope, field); + + /* hack: put fields into _SB scope for LNKx to find them */ + aml_append(scope, aml_alias("PRQA", "\\_SB.PRQA")); + aml_append(scope, aml_alias("PRQB", "\\_SB.PRQB")); + aml_append(scope, aml_alias("PRQC", "\\_SB.PRQC")); + aml_append(scope, aml_alias("PRQD", "\\_SB.PRQD")); + aml_append(scope, aml_alias("PRQE", "\\_SB.PRQE")); + aml_append(scope, aml_alias("PRQF", "\\_SB.PRQF")); + aml_append(scope, aml_alias("PRQG", "\\_SB.PRQG")); + aml_append(scope, aml_alias("PRQH", "\\_SB.PRQH")); QTAILQ_FOREACH(kid, &bus->children, sibling) { call_dev_aml_func(DEVICE(kid->child), scope); diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index 808fd4e..f9b4af5 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -316,12 +316,27 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) { + Aml *field; BusChild *kid; BusState *bus = qdev_get_child_bus(DEVICE(adev), "isa.0"); /* PIIX PCI to ISA irq remapping */ aml_append(scope, aml_operation_region("P40C", AML_PCI_CONFIG, - aml_int(0x60), 0x04)); + aml_int(0x60), 0x04)); + /* Fields declarion has to happen *after* operation region */ + field = aml_field("P40C", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE); + aml_append(field, aml_named_field("PRQ0", 8)); + aml_append(field, aml_named_field("PRQ1", 8)); + aml_append(field, aml_named_field("PRQ2", 8)); + aml_append(field, aml_named_field("PRQ3", 8)); + aml_append(scope, field); + + /* hack: put fields into _SB scope for LNKx to find them */ + aml_append(scope, aml_alias("PRQ0", "\\_SB.PRQ0")); + aml_append(scope, aml_alias("PRQ1", "\\_SB.PRQ1")); + aml_append(scope, aml_alias("PRQ2", "\\_SB.PRQ2")); + aml_append(scope, aml_alias("PRQ3", "\\_SB.PRQ3")); + QTAILQ_FOREACH(kid, &bus->children, sibling) { call_dev_aml_func(DEVICE(kid->child), scope); } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index a71bf1a..2555902 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -12,7 +12,245 @@ #include "qemu/range.h" #include "qemu/rcu.h" #include "sysemu/hostmem.h" +#include "sysemu/numa.h" #include "hw/cxl/cxl.h" +#include "hw/pci/msix.h" + +#define DWORD_BYTE 4 + +/* Default CDAT entries for a memory region */ +enum { + CT3_CDAT_DSMAS, + CT3_CDAT_DSLBIS0, + CT3_CDAT_DSLBIS1, + CT3_CDAT_DSLBIS2, + CT3_CDAT_DSLBIS3, + CT3_CDAT_DSEMTS, + CT3_CDAT_NUM_ENTRIES +}; + +static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, + int dsmad_handle, MemoryRegion *mr) +{ + g_autofree CDATDsmas *dsmas = NULL; + g_autofree CDATDslbis *dslbis0 = NULL; + g_autofree CDATDslbis *dslbis1 = NULL; + g_autofree CDATDslbis *dslbis2 = NULL; + g_autofree CDATDslbis *dslbis3 = NULL; + g_autofree CDATDsemts *dsemts = NULL; + + dsmas = g_malloc(sizeof(*dsmas)); + if (!dsmas) { + return -ENOMEM; + } + *dsmas = (CDATDsmas) { + .header = { + .type = CDAT_TYPE_DSMAS, + .length = sizeof(*dsmas), + }, + .DSMADhandle = dsmad_handle, + .flags = CDAT_DSMAS_FLAG_NV, + .DPA_base = 0, + .DPA_length = int128_get64(mr->size), + }; + + /* For now, no memory side cache, plausiblish numbers */ + dslbis0 = g_malloc(sizeof(*dslbis0)); + if (!dslbis0) { + return -ENOMEM; + } + *dslbis0 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis0), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_READ_LATENCY, + .entry_base_unit = 10000, /* 10ns base */ + .entry[0] = 15, /* 150ns */ + }; + + dslbis1 = g_malloc(sizeof(*dslbis1)); + if (!dslbis1) { + return -ENOMEM; + } + *dslbis1 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis1), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_WRITE_LATENCY, + .entry_base_unit = 10000, + .entry[0] = 25, /* 250ns */ + }; + + dslbis2 = g_malloc(sizeof(*dslbis2)); + if (!dslbis2) { + return -ENOMEM; + } + *dslbis2 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis2), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_READ_BANDWIDTH, + .entry_base_unit = 1000, /* GB/s */ + .entry[0] = 16, + }; + + dslbis3 = g_malloc(sizeof(*dslbis3)); + if (!dslbis3) { + return -ENOMEM; + } + *dslbis3 = (CDATDslbis) { + .header = { + .type = CDAT_TYPE_DSLBIS, + .length = sizeof(*dslbis3), + }, + .handle = dsmad_handle, + .flags = HMAT_LB_MEM_MEMORY, + .data_type = HMAT_LB_DATA_WRITE_BANDWIDTH, + .entry_base_unit = 1000, /* GB/s */ + .entry[0] = 16, + }; + + dsemts = g_malloc(sizeof(*dsemts)); + if (!dsemts) { + return -ENOMEM; + } + *dsemts = (CDATDsemts) { + .header = { + .type = CDAT_TYPE_DSEMTS, + .length = sizeof(*dsemts), + }, + .DSMAS_handle = dsmad_handle, + /* Reserved - the non volatile from DSMAS matters */ + .EFI_memory_type_attr = 2, + .DPA_offset = 0, + .DPA_length = int128_get64(mr->size), + }; + + /* Header always at start of structure */ + cdat_table[CT3_CDAT_DSMAS] = g_steal_pointer(&dsmas); + cdat_table[CT3_CDAT_DSLBIS0] = g_steal_pointer(&dslbis0); + cdat_table[CT3_CDAT_DSLBIS1] = g_steal_pointer(&dslbis1); + cdat_table[CT3_CDAT_DSLBIS2] = g_steal_pointer(&dslbis2); + cdat_table[CT3_CDAT_DSLBIS3] = g_steal_pointer(&dslbis3); + cdat_table[CT3_CDAT_DSEMTS] = g_steal_pointer(&dsemts); + + return 0; +} + +static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) +{ + g_autofree CDATSubHeader **table = NULL; + MemoryRegion *nonvolatile_mr; + CXLType3Dev *ct3d = priv; + int dsmad_handle = 0; + int rc; + + if (!ct3d->hostmem) { + return 0; + } + + nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem); + if (!nonvolatile_mr) { + return -EINVAL; + } + + table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table)); + if (!table) { + return -ENOMEM; + } + + rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr); + if (rc < 0) { + return rc; + } + + *cdat_table = g_steal_pointer(&table); + + return CT3_CDAT_NUM_ENTRIES; +} + +static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv) +{ + int i; + + for (i = 0; i < num; i++) { + g_free(cdat_table[i]); + } + g_free(cdat_table); +} + +static bool cxl_doe_cdat_rsp(DOECap *doe_cap) +{ + CDATObject *cdat = &CXL_TYPE3(doe_cap->pdev)->cxl_cstate.cdat; + uint16_t ent; + void *base; + uint32_t len; + CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap); + CDATRsp rsp; + + assert(cdat->entry_len); + + /* Discard if request length mismatched */ + if (pcie_doe_get_obj_len(req) < + DIV_ROUND_UP(sizeof(CDATReq), DWORD_BYTE)) { + return false; + } + + ent = req->entry_handle; + base = cdat->entry[ent].base; + len = cdat->entry[ent].length; + + rsp = (CDATRsp) { + .header = { + .vendor_id = CXL_VENDOR_ID, + .data_obj_type = CXL_DOE_TABLE_ACCESS, + .reserved = 0x0, + .length = DIV_ROUND_UP((sizeof(rsp) + len), DWORD_BYTE), + }, + .rsp_code = CXL_DOE_TAB_RSP, + .table_type = CXL_DOE_TAB_TYPE_CDAT, + .entry_handle = (ent < cdat->entry_len - 1) ? + ent + 1 : CXL_DOE_TAB_ENT_MAX, + }; + + memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp)); + memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), DWORD_BYTE), + base, len); + + doe_cap->read_mbox_len += rsp.header.length; + + return true; +} + +static uint32_t ct3d_config_read(PCIDevice *pci_dev, uint32_t addr, int size) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + uint32_t val; + + if (pcie_doe_read_config(&ct3d->doe_cdat, addr, size, &val)) { + return val; + } + + return pci_default_read_config(pci_dev, addr, size); +} + +static void ct3d_config_write(PCIDevice *pci_dev, uint32_t addr, uint32_t val, + int size) +{ + CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); + + pcie_doe_write_config(&ct3d->doe_cdat, addr, val, size); + pci_default_write_config(pci_dev, addr, val, size); +} /* * Null value of all Fs suggested by IEEE RA guidelines for use of @@ -139,6 +377,11 @@ static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) return true; } +static DOEProtocol doe_cdat_prot[] = { + { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp }, + { } +}; + static void ct3_realize(PCIDevice *pci_dev, Error **errp) { CXLType3Dev *ct3d = CXL_TYPE3(pci_dev); @@ -146,6 +389,8 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; + unsigned short msix_num = 1; + int i; if (!cxl_setup_memory(ct3d, errp)) { return; @@ -180,6 +425,20 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &ct3d->cxl_dstate.device_registers); + + /* MSI(-X) Initailization */ + msix_init_exclusive_bar(pci_dev, msix_num, 4, NULL); + for (i = 0; i < msix_num; i++) { + msix_vector_use(pci_dev, i); + } + + /* DOE Initailization */ + pcie_doe_init(pci_dev, &ct3d->doe_cdat, 0x190, doe_cdat_prot, true, 0); + + cxl_cstate->cdat.build_cdat_table = ct3_build_cdat_table; + cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; + cxl_cstate->cdat.private = ct3d; + cxl_doe_cdat_init(cxl_cstate, errp); } static void ct3_exit(PCIDevice *pci_dev) @@ -188,6 +447,7 @@ static void ct3_exit(PCIDevice *pci_dev) CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; ComponentRegisters *regs = &cxl_cstate->crb; + cxl_doe_cdat_release(cxl_cstate); g_free(regs->special_ops); address_space_destroy(&ct3d->hostmem_as); } @@ -287,6 +547,7 @@ static Property ct3_props[] = { DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, HostMemoryBackend *), DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), + DEFINE_PROP_STRING("cdat", CXLType3Dev, cxl_cstate.cdat.filename), DEFINE_PROP_END_OF_LIST(), }; @@ -352,6 +613,9 @@ static void ct3_class_init(ObjectClass *oc, void *data) pc->device_id = 0xd93; /* LVF for now */ pc->revision = 1; + pc->config_write = ct3d_config_write; + pc->config_read = ct3d_config_read; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->desc = "CXL PMEM Device (Type 3)"; dc->reset = ct3d_reset; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index ac96f76..7523e9f 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -276,25 +276,18 @@ e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors) } } -static bool +static void e1000e_use_msix_vectors(E1000EState *s, int num_vectors) { int i; for (i = 0; i < num_vectors; i++) { - int res = msix_vector_use(PCI_DEVICE(s), i); - if (res < 0) { - trace_e1000e_msix_use_vector_fail(i, res); - e1000e_unuse_msix_vectors(s, i); - return false; - } + msix_vector_use(PCI_DEVICE(s), i); } - return true; } static void e1000e_init_msix(E1000EState *s) { - PCIDevice *d = PCI_DEVICE(s); int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM, &s->msix, E1000E_MSIX_IDX, E1000E_MSIX_TABLE, @@ -305,9 +298,7 @@ e1000e_init_msix(E1000EState *s) if (res < 0) { trace_e1000e_msix_init_fail(res); } else { - if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { - msix_uninit(d, &s->msix, &s->msix); - } + e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM); } } diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c index d8f3f16..281d43e 100644 --- a/hw/net/rocker/rocker.c +++ b/hw/net/rocker/rocker.c @@ -1212,24 +1212,14 @@ static void rocker_msix_vectors_unuse(Rocker *r, } } -static int rocker_msix_vectors_use(Rocker *r, - unsigned int num_vectors) +static void rocker_msix_vectors_use(Rocker *r, unsigned int num_vectors) { PCIDevice *dev = PCI_DEVICE(r); - int err; int i; for (i = 0; i < num_vectors; i++) { - err = msix_vector_use(dev, i); - if (err) { - goto rollback; - } + msix_vector_use(dev, i); } - return 0; - -rollback: - rocker_msix_vectors_unuse(r, i); - return err; } static int rocker_msix_init(Rocker *r, Error **errp) @@ -1247,16 +1237,9 @@ static int rocker_msix_init(Rocker *r, Error **errp) return err; } - err = rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); - if (err) { - goto err_msix_vectors_use; - } + rocker_msix_vectors_use(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports)); return 0; - -err_msix_vectors_use: - msix_uninit(dev, &r->msix_bar, &r->msix_bar); - return err; } static void rocker_msix_uninit(Rocker *r) diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c index 89d71cf..9f7daae 100644 --- a/hw/net/vhost_net-stub.c +++ b/hw/net/vhost_net-stub.c @@ -101,3 +101,15 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) { return 0; } + +void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, + int vq_index) +{ + +} + +int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, + int vq_index) +{ + return 0; +} diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index d28f8b9..feda448 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -34,6 +34,7 @@ #include "standard-headers/linux/virtio_ring.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-bus.h" +#include "linux-headers/linux/vhost.h" /* Features supported by host kernel. */ @@ -46,6 +47,7 @@ static const int kernel_feature_bits[] = { VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, VIRTIO_NET_F_HASH_REPORT, VHOST_INVALID_FEATURE_BIT }; @@ -387,21 +389,20 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } else { peer = qemu_get_peer(ncs, n->max_queue_pairs); } - r = vhost_net_start_one(get_vhost_net(peer), dev); - - if (r < 0) { - goto err_start; - } if (peer->vring_enable) { /* restore vring enable state */ r = vhost_set_vring_enable(peer, peer->vring_enable); if (r < 0) { - vhost_net_stop_one(get_vhost_net(peer), dev); goto err_start; } } + + r = vhost_net_start_one(get_vhost_net(peer), dev); + if (r < 0) { + goto err_start; + } } return 0; @@ -531,3 +532,80 @@ int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu) return vhost_ops->vhost_net_set_mtu(&net->dev, mtu); } + +void vhost_net_virtqueue_reset(VirtIODevice *vdev, NetClientState *nc, + int vq_index) +{ + VHostNetState *net = get_vhost_net(nc->peer); + const VhostOps *vhost_ops = net->dev.vhost_ops; + struct vhost_vring_file file = { .fd = -1 }; + int idx; + + /* should only be called after backend is connected */ + assert(vhost_ops); + + idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); + + if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { + file.index = idx; + int r = vhost_net_set_backend(&net->dev, &file); + assert(r >= 0); + } + + vhost_virtqueue_stop(&net->dev, + vdev, + net->dev.vqs + idx, + net->dev.vq_index + idx); +} + +int vhost_net_virtqueue_restart(VirtIODevice *vdev, NetClientState *nc, + int vq_index) +{ + VHostNetState *net = get_vhost_net(nc->peer); + const VhostOps *vhost_ops = net->dev.vhost_ops; + struct vhost_vring_file file = { }; + int idx, r; + + if (!net->dev.started) { + return -EBUSY; + } + + /* should only be called after backend is connected */ + assert(vhost_ops); + + idx = vhost_ops->vhost_get_vq_index(&net->dev, vq_index); + + r = vhost_virtqueue_start(&net->dev, + vdev, + net->dev.vqs + idx, + net->dev.vq_index + idx); + if (r < 0) { + goto err_start; + } + + if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { + file.index = idx; + file.fd = net->backend; + r = vhost_net_set_backend(&net->dev, &file); + if (r < 0) { + r = -errno; + goto err_start; + } + } + + return 0; + +err_start: + error_report("Error when restarting the queue."); + + if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) { + file.fd = VHOST_FILE_UNBIND; + file.index = idx; + int r = vhost_net_set_backend(&net->dev, &file); + assert(r >= 0); + } + + vhost_dev_stop(&net->dev, vdev); + + return r; +} diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b6903ae..8b32339 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -124,6 +124,16 @@ static int vq2q(int queue_index) return queue_index / 2; } +static void flush_or_purge_queued_packets(NetClientState *nc) +{ + if (!nc->peer) { + return; + } + + qemu_flush_or_purge_queued_packets(nc->peer, true); + assert(!virtio_net_get_subqueue(nc)->async_tx.elem); +} + /* TODO * - we could suppress RX interrupt if we were so inclined. */ @@ -536,6 +546,43 @@ static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc) return info; } +static void virtio_net_queue_reset(VirtIODevice *vdev, uint32_t queue_index) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); + + if (!nc->peer) { + return; + } + + if (get_vhost_net(nc->peer) && + nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { + vhost_net_virtqueue_reset(vdev, nc, queue_index); + } + + flush_or_purge_queued_packets(nc); +} + +static void virtio_net_queue_enable(VirtIODevice *vdev, uint32_t queue_index) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(queue_index)); + int r; + + if (!nc->peer || !vdev->vhost_started) { + return; + } + + if (get_vhost_net(nc->peer) && + nc->peer->info->type == NET_CLIENT_DRIVER_TAP) { + r = vhost_net_virtqueue_restart(vdev, nc, queue_index); + if (r < 0) { + error_report("unable to restart vhost net virtqueue: %d, " + "when resetting the queue", queue_index); + } + } +} + static void virtio_net_reset(VirtIODevice *vdev) { VirtIONet *n = VIRTIO_NET(vdev); @@ -566,12 +613,7 @@ static void virtio_net_reset(VirtIODevice *vdev) /* Flush any async TX */ for (i = 0; i < n->max_queue_pairs; i++) { - NetClientState *nc = qemu_get_subqueue(n->nic, i); - - if (nc->peer) { - qemu_flush_or_purge_queued_packets(nc->peer, true); - assert(!virtio_net_get_subqueue(nc)->async_tx.elem); - } + flush_or_purge_queued_packets(qemu_get_subqueue(n->nic, i)); } } @@ -746,6 +788,7 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, } if (!get_vhost_net(nc->peer)) { + virtio_add_feature(&features, VIRTIO_F_RING_RESET); return features; } @@ -3822,6 +3865,8 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->set_features = virtio_net_set_features; vdc->bad_features = virtio_net_bad_features; vdc->reset = virtio_net_reset; + vdc->queue_reset = virtio_net_queue_reset; + vdc->queue_enable = virtio_net_queue_enable; vdc->set_status = virtio_net_set_status; vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 0b7acf7..d2ab527 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -2110,20 +2110,14 @@ vmxnet3_unuse_msix_vectors(VMXNET3State *s, int num_vectors) } } -static bool +static void vmxnet3_use_msix_vectors(VMXNET3State *s, int num_vectors) { PCIDevice *d = PCI_DEVICE(s); int i; for (i = 0; i < num_vectors; i++) { - int res = msix_vector_use(d, i); - if (0 > res) { - VMW_WRPRN("Failed to use MSI-X vector %d, error %d", i, res); - vmxnet3_unuse_msix_vectors(s, i); - return false; - } + msix_vector_use(d, i); } - return true; } static bool @@ -2141,13 +2135,8 @@ vmxnet3_init_msix(VMXNET3State *s) VMW_WRPRN("Failed to initialize MSI-X, error %d", res); s->msix_used = false; } else { - if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { - VMW_WRPRN("Failed to use MSI-X vectors, error %d", res); - msix_uninit(d, &s->msix_bar, &s->msix_bar); - s->msix_used = false; - } else { - s->msix_used = true; - } + vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS); + s->msix_used = true; } return s->msix_used; } @@ -2412,19 +2401,13 @@ static const VMStateDescription vmstate_vmxnet3_rxq_descr = { static int vmxnet3_post_load(void *opaque, int version_id) { VMXNET3State *s = opaque; - PCIDevice *d = PCI_DEVICE(s); net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags, s->peer_has_vhdr); net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); if (s->msix_used) { - if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { - VMW_WRPRN("Failed to re-use MSI-X vectors"); - msix_uninit(d, &s->msix_bar, &s->msix_bar); - s->msix_used = false; - return -1; - } + vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS); } if (!vmxnet3_validate_queues(s)) { diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 9a9857c..ac3885c 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -4740,11 +4740,8 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) { - int ret; - if (msix_enabled(&n->parent_obj)) { - ret = msix_vector_use(&n->parent_obj, vector); - assert(ret == 0); + msix_vector_use(&n->parent_obj, vector); } cq->ctrl = n; cq->cqid = cqid; diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index a83a3e8..9b8b57d 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -10,11 +10,12 @@ #include "qemu/osdep.h" #include "qemu/log.h" +#include "hw/qdev-properties.h" #include "hw/pci/msi.h" #include "hw/pci/pcie.h" #include "hw/pci/pcie_port.h" -#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 1 +#define CXL_UPSTREAM_PORT_MSI_NR_VECTOR 2 #define CXL_UPSTREAM_PORT_MSI_OFFSET 0x70 #define CXL_UPSTREAM_PORT_PCIE_CAP_OFFSET 0x90 @@ -28,6 +29,7 @@ typedef struct CXLUpstreamPort { /*< public >*/ CXLComponentState cxl_cstate; + DOECap doe_cdat; } CXLUpstreamPort; CXLComponentState *cxl_usp_to_cstate(CXLUpstreamPort *usp) @@ -60,6 +62,9 @@ static void cxl_usp_dvsec_write_config(PCIDevice *dev, uint32_t addr, static void cxl_usp_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { + CXLUpstreamPort *usp = CXL_USP(d); + + pcie_doe_write_config(&usp->doe_cdat, address, val, len); pci_bridge_write_config(d, address, val, len); pcie_cap_flr_write_config(d, address, val, len); pcie_aer_write_config(d, address, val, len); @@ -67,6 +72,18 @@ static void cxl_usp_write_config(PCIDevice *d, uint32_t address, cxl_usp_dvsec_write_config(d, address, val, len); } +static uint32_t cxl_usp_read_config(PCIDevice *d, uint32_t address, int len) +{ + CXLUpstreamPort *usp = CXL_USP(d); + uint32_t val; + + if (pcie_doe_read_config(&usp->doe_cdat, address, len, &val)) { + return val; + } + + return pci_default_read_config(d, address, len); +} + static void latch_registers(CXLUpstreamPort *usp) { uint32_t *reg_state = usp->cxl_cstate.crb.cache_mem_registers; @@ -119,6 +136,167 @@ static void build_dvsecs(CXLComponentState *cxl) REG_LOC_DVSEC_REVID, dvsec); } +static bool cxl_doe_cdat_rsp(DOECap *doe_cap) +{ + CDATObject *cdat = &CXL_USP(doe_cap->pdev)->cxl_cstate.cdat; + uint16_t ent; + void *base; + uint32_t len; + CDATReq *req = pcie_doe_get_write_mbox_ptr(doe_cap); + CDATRsp rsp; + + cxl_doe_cdat_update(&CXL_USP(doe_cap->pdev)->cxl_cstate, &error_fatal); + assert(cdat->entry_len); + + /* Discard if request length mismatched */ + if (pcie_doe_get_obj_len(req) < + DIV_ROUND_UP(sizeof(CDATReq), sizeof(uint32_t))) { + return false; + } + + ent = req->entry_handle; + base = cdat->entry[ent].base; + len = cdat->entry[ent].length; + + rsp = (CDATRsp) { + .header = { + .vendor_id = CXL_VENDOR_ID, + .data_obj_type = CXL_DOE_TABLE_ACCESS, + .reserved = 0x0, + .length = DIV_ROUND_UP((sizeof(rsp) + len), sizeof(uint32_t)), + }, + .rsp_code = CXL_DOE_TAB_RSP, + .table_type = CXL_DOE_TAB_TYPE_CDAT, + .entry_handle = (ent < cdat->entry_len - 1) ? + ent + 1 : CXL_DOE_TAB_ENT_MAX, + }; + + memcpy(doe_cap->read_mbox, &rsp, sizeof(rsp)); + memcpy(doe_cap->read_mbox + DIV_ROUND_UP(sizeof(rsp), sizeof(uint32_t)), + base, len); + + doe_cap->read_mbox_len += rsp.header.length; + + return true; +} + +static DOEProtocol doe_cdat_prot[] = { + { CXL_VENDOR_ID, CXL_DOE_TABLE_ACCESS, cxl_doe_cdat_rsp }, + { } +}; + +enum { + CXL_USP_CDAT_SSLBIS_LAT, + CXL_USP_CDAT_SSLBIS_BW, + CXL_USP_CDAT_NUM_ENTRIES +}; + +static int build_cdat_table(CDATSubHeader ***cdat_table, void *priv) +{ + g_autofree CDATSslbis *sslbis_latency = NULL; + g_autofree CDATSslbis *sslbis_bandwidth = NULL; + CXLUpstreamPort *us = CXL_USP(priv); + PCIBus *bus = &PCI_BRIDGE(us)->sec_bus; + int devfn, sslbis_size, i; + int count = 0; + uint16_t port_ids[256]; + + for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) { + PCIDevice *d = bus->devices[devfn]; + PCIEPort *port; + + if (!d || !pci_is_express(d) || !d->exp.exp_cap) { + continue; + } + + /* + * Whilst the PCI express spec doesn't allow anything other than + * downstream ports on this bus, let us be a little paranoid + */ + if (!object_dynamic_cast(OBJECT(d), TYPE_PCIE_PORT)) { + continue; + } + + port = PCIE_PORT(d); + port_ids[count] = port->port; + count++; + } + + /* May not yet have any ports - try again later */ + if (count == 0) { + return 0; + } + + sslbis_size = sizeof(CDATSslbis) + sizeof(*sslbis_latency->sslbe) * count; + sslbis_latency = g_malloc(sslbis_size); + if (!sslbis_latency) { + return -ENOMEM; + } + *sslbis_latency = (CDATSslbis) { + .sslbis_header = { + .header = { + .type = CDAT_TYPE_SSLBIS, + .length = sslbis_size, + }, + .data_type = HMATLB_DATA_TYPE_ACCESS_LATENCY, + .entry_base_unit = 10000, + }, + }; + + for (i = 0; i < count; i++) { + sslbis_latency->sslbe[i] = (CDATSslbe) { + .port_x_id = CDAT_PORT_ID_USP, + .port_y_id = port_ids[i], + .latency_bandwidth = 15, /* 150ns */ + }; + } + + sslbis_bandwidth = g_malloc(sslbis_size); + if (!sslbis_bandwidth) { + return 0; + } + *sslbis_bandwidth = (CDATSslbis) { + .sslbis_header = { + .header = { + .type = CDAT_TYPE_SSLBIS, + .length = sslbis_size, + }, + .data_type = HMATLB_DATA_TYPE_ACCESS_BANDWIDTH, + .entry_base_unit = 1000, + }, + }; + + for (i = 0; i < count; i++) { + sslbis_bandwidth->sslbe[i] = (CDATSslbe) { + .port_x_id = CDAT_PORT_ID_USP, + .port_y_id = port_ids[i], + .latency_bandwidth = 16, /* 16 GB/s */ + }; + } + + *cdat_table = g_malloc0(sizeof(*cdat_table) * CXL_USP_CDAT_NUM_ENTRIES); + if (!*cdat_table) { + return -ENOMEM; + } + + /* Header always at start of structure */ + (*cdat_table)[CXL_USP_CDAT_SSLBIS_LAT] = g_steal_pointer(&sslbis_latency); + (*cdat_table)[CXL_USP_CDAT_SSLBIS_BW] = g_steal_pointer(&sslbis_bandwidth); + + return CXL_USP_CDAT_NUM_ENTRIES; +} + +static void free_default_cdat_table(CDATSubHeader **cdat_table, int num, + void *priv) +{ + int i; + + for (i = 0; i < num; i++) { + g_free(cdat_table[i]); + } + g_free(cdat_table); +} + static void cxl_usp_realize(PCIDevice *d, Error **errp) { PCIEPort *p = PCIE_PORT(d); @@ -161,6 +339,14 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp) PCI_BASE_ADDRESS_MEM_TYPE_64, component_bar); + pcie_doe_init(d, &usp->doe_cdat, cxl_cstate->dvsec_offset, doe_cdat_prot, + true, 1); + + cxl_cstate->cdat.build_cdat_table = build_cdat_table; + cxl_cstate->cdat.free_cdat_table = free_default_cdat_table; + cxl_cstate->cdat.private = d; + cxl_doe_cdat_init(cxl_cstate, errp); + return; err_cap: @@ -179,6 +365,11 @@ static void cxl_usp_exitfn(PCIDevice *d) pci_bridge_exitfn(d); } +static Property cxl_upstream_props[] = { + DEFINE_PROP_STRING("cdat", CXLUpstreamPort, cxl_cstate.cdat.filename), + DEFINE_PROP_END_OF_LIST() +}; + static void cxl_upstream_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -186,6 +377,7 @@ static void cxl_upstream_class_init(ObjectClass *oc, void *data) k->is_bridge = true; k->config_write = cxl_usp_write_config; + k->config_read = cxl_usp_read_config; k->realize = cxl_usp_realize; k->exit = cxl_usp_exitfn; k->vendor_id = 0x19e5; /* Huawei */ @@ -194,6 +386,7 @@ static void cxl_upstream_class_init(ObjectClass *oc, void *data) set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->desc = "CXL Switch Upstream Port"; dc->reset = cxl_usp_reset; + device_class_set_props(dc, cxl_upstream_props); } static const TypeInfo cxl_usp_info = { diff --git a/hw/pci/meson.build b/hw/pci/meson.build index bcc9c75..5aff7ed 100644 --- a/hw/pci/meson.build +++ b/hw/pci/meson.build @@ -13,6 +13,7 @@ pci_ss.add(files( # allow plugging PCIe devices into PCI buses, include them even if # CONFIG_PCI_EXPRESS=n. pci_ss.add(files('pcie.c', 'pcie_aer.c')) +pci_ss.add(files('pcie_doe.c')) softmmu_ss.add(when: 'CONFIG_PCI_EXPRESS', if_true: files('pcie_port.c', 'pcie_host.c')) softmmu_ss.add_all(when: 'CONFIG_PCI', if_true: pci_ss) diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 1e381a9..9e70fcd 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -136,17 +136,12 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) } } -void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +void msix_set_mask(PCIDevice *dev, int vector, bool mask) { - ERRP_GUARD(); unsigned offset; bool was_masked; - if (vector > dev->msix_entries_nr) { - error_setg(errp, "msix: vector %d not allocated. max vector is %d", - vector, dev->msix_entries_nr); - return; - } + assert(vector < dev->msix_entries_nr); offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; @@ -522,7 +517,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) { MSIMessage msg; - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { + assert(vector < dev->msix_entries_nr); + + if (!dev->msix_entry_used[vector]) { return; } @@ -558,20 +555,17 @@ void msix_reset(PCIDevice *dev) * don't want to follow the spec suggestion can declare all vectors as used. */ /* Mark vector as used. */ -int msix_vector_use(PCIDevice *dev, unsigned vector) +void msix_vector_use(PCIDevice *dev, unsigned vector) { - if (vector >= dev->msix_entries_nr) { - return -EINVAL; - } - + assert(vector < dev->msix_entries_nr); dev->msix_entry_used[vector]++; - return 0; } /* Mark vector as unused. */ void msix_vector_unuse(PCIDevice *dev, unsigned vector) { - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { + assert(vector < dev->msix_entries_nr); + if (!dev->msix_entry_used[vector]) { return; } if (--dev->msix_entry_used[vector]) { diff --git a/hw/pci/pcie_doe.c b/hw/pci/pcie_doe.c new file mode 100644 index 0000000..2210f86 --- /dev/null +++ b/hw/pci/pcie_doe.c @@ -0,0 +1,367 @@ +/* + * PCIe Data Object Exchange + * + * Copyright (C) 2021 Avery Design Systems, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/range.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pcie_doe.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#define DWORD_BYTE 4 + +typedef struct DoeDiscoveryReq { + DOEHeader header; + uint8_t index; + uint8_t reserved[3]; +} QEMU_PACKED DoeDiscoveryReq; + +typedef struct DoeDiscoveryRsp { + DOEHeader header; + uint16_t vendor_id; + uint8_t data_obj_type; + uint8_t next_index; +} QEMU_PACKED DoeDiscoveryRsp; + +static bool pcie_doe_discovery(DOECap *doe_cap) +{ + DoeDiscoveryReq *req = pcie_doe_get_write_mbox_ptr(doe_cap); + DoeDiscoveryRsp rsp; + uint8_t index = req->index; + DOEProtocol *prot; + + /* Discard request if length does not match DoeDiscoveryReq */ + if (pcie_doe_get_obj_len(req) < + DIV_ROUND_UP(sizeof(DoeDiscoveryReq), DWORD_BYTE)) { + return false; + } + + rsp.header = (DOEHeader) { + .vendor_id = PCI_VENDOR_ID_PCI_SIG, + .data_obj_type = PCI_SIG_DOE_DISCOVERY, + .length = DIV_ROUND_UP(sizeof(DoeDiscoveryRsp), DWORD_BYTE), + }; + + /* Point to the requested protocol, index 0 must be Discovery */ + if (index == 0) { + rsp.vendor_id = PCI_VENDOR_ID_PCI_SIG; + rsp.data_obj_type = PCI_SIG_DOE_DISCOVERY; + } else { + if (index < doe_cap->protocol_num) { + prot = &doe_cap->protocols[index - 1]; + rsp.vendor_id = prot->vendor_id; + rsp.data_obj_type = prot->data_obj_type; + } else { + rsp.vendor_id = 0xFFFF; + rsp.data_obj_type = 0xFF; + } + } + + if (index + 1 == doe_cap->protocol_num) { + rsp.next_index = 0; + } else { + rsp.next_index = index + 1; + } + + pcie_doe_set_rsp(doe_cap, &rsp); + + return true; +} + +static void pcie_doe_reset_mbox(DOECap *st) +{ + st->read_mbox_idx = 0; + st->read_mbox_len = 0; + st->write_mbox_len = 0; + + memset(st->read_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + memset(st->write_mbox, 0, PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); +} + +void pcie_doe_init(PCIDevice *dev, DOECap *doe_cap, uint16_t offset, + DOEProtocol *protocols, bool intr, uint16_t vec) +{ + pcie_add_capability(dev, PCI_EXT_CAP_ID_DOE, 0x1, offset, + PCI_DOE_SIZEOF); + + doe_cap->pdev = dev; + doe_cap->offset = offset; + + if (intr && (msi_present(dev) || msix_present(dev))) { + doe_cap->cap.intr = intr; + doe_cap->cap.vec = vec; + } + + doe_cap->write_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + doe_cap->read_mbox = g_malloc0(PCI_DOE_DW_SIZE_MAX * DWORD_BYTE); + + pcie_doe_reset_mbox(doe_cap); + + doe_cap->protocols = protocols; + for (; protocols->vendor_id; protocols++) { + doe_cap->protocol_num++; + } + assert(doe_cap->protocol_num < PCI_DOE_PROTOCOL_NUM_MAX); + + /* Increment to allow for the discovery protocol */ + doe_cap->protocol_num++; +} + +void pcie_doe_fini(DOECap *doe_cap) +{ + g_free(doe_cap->read_mbox); + g_free(doe_cap->write_mbox); + g_free(doe_cap); +} + +uint32_t pcie_doe_build_protocol(DOEProtocol *p) +{ + return DATA_OBJ_BUILD_HEADER1(p->vendor_id, p->data_obj_type); +} + +void *pcie_doe_get_write_mbox_ptr(DOECap *doe_cap) +{ + return doe_cap->write_mbox; +} + +/* + * Copy the response to read mailbox buffer + * This might be called in self-defined handle_request() if a DOE response is + * required in the corresponding protocol + */ +void pcie_doe_set_rsp(DOECap *doe_cap, void *rsp) +{ + uint32_t len = pcie_doe_get_obj_len(rsp); + + memcpy(doe_cap->read_mbox + doe_cap->read_mbox_len, rsp, len * DWORD_BYTE); + doe_cap->read_mbox_len += len; +} + +uint32_t pcie_doe_get_obj_len(void *obj) +{ + uint32_t len; + + if (!obj) { + return 0; + } + + /* Only lower 18 bits are valid */ + len = DATA_OBJ_LEN_MASK(((DOEHeader *)obj)->length); + + /* PCIe r6.0 Table 6.29: a value of 00000h indicates 2^18 DW */ + return (len) ? len : PCI_DOE_DW_SIZE_MAX; +} + +static void pcie_doe_irq_assert(DOECap *doe_cap) +{ + PCIDevice *dev = doe_cap->pdev; + + if (doe_cap->cap.intr && doe_cap->ctrl.intr) { + if (doe_cap->status.intr) { + return; + } + doe_cap->status.intr = 1; + + if (msix_enabled(dev)) { + msix_notify(dev, doe_cap->cap.vec); + } else if (msi_enabled(dev)) { + msi_notify(dev, doe_cap->cap.vec); + } + } +} + +static void pcie_doe_set_ready(DOECap *doe_cap, bool rdy) +{ + doe_cap->status.ready = rdy; + + if (rdy) { + pcie_doe_irq_assert(doe_cap); + } +} + +static void pcie_doe_set_error(DOECap *doe_cap, bool err) +{ + doe_cap->status.error = err; + + if (err) { + pcie_doe_irq_assert(doe_cap); + } +} + +/* + * Check incoming request in write_mbox for protocol format + */ +static void pcie_doe_prepare_rsp(DOECap *doe_cap) +{ + bool success = false; + int p; + bool (*handle_request)(DOECap *) = NULL; + + if (doe_cap->status.error) { + return; + } + + if (doe_cap->write_mbox[0] == + DATA_OBJ_BUILD_HEADER1(PCI_VENDOR_ID_PCI_SIG, PCI_SIG_DOE_DISCOVERY)) { + handle_request = pcie_doe_discovery; + } else { + for (p = 0; p < doe_cap->protocol_num - 1; p++) { + if (doe_cap->write_mbox[0] == + pcie_doe_build_protocol(&doe_cap->protocols[p])) { + handle_request = doe_cap->protocols[p].handle_request; + break; + } + } + } + + /* + * PCIe r6 DOE 6.30.1: + * If the number of DW transferred does not match the + * indicated Length for a data object, then the + * data object must be silently discarded. + */ + if (handle_request && (doe_cap->write_mbox_len == + pcie_doe_get_obj_len(pcie_doe_get_write_mbox_ptr(doe_cap)))) { + success = handle_request(doe_cap); + } + + if (success) { + pcie_doe_set_ready(doe_cap, 1); + } else { + pcie_doe_reset_mbox(doe_cap); + } +} + +/* + * Read from DOE config space. + * Return false if the address not within DOE_CAP range. + */ +bool pcie_doe_read_config(DOECap *doe_cap, uint32_t addr, int size, + uint32_t *buf) +{ + uint32_t shift; + uint16_t doe_offset = doe_cap->offset; + + if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP, + PCI_DOE_SIZEOF - 4, addr)) { + return false; + } + + addr -= doe_offset; + *buf = 0; + + if (range_covers_byte(PCI_EXP_DOE_CAP, DWORD_BYTE, addr)) { + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, INTR_SUPP, + doe_cap->cap.intr); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_REG, DOE_INTR_MSG_NUM, + doe_cap->cap.vec); + } else if (range_covers_byte(PCI_EXP_DOE_CTRL, DWORD_BYTE, addr)) { + /* Must return ABORT=0 and GO=0 */ + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_CONTROL, DOE_INTR_EN, + doe_cap->ctrl.intr); + } else if (range_covers_byte(PCI_EXP_DOE_STATUS, DWORD_BYTE, addr)) { + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_BUSY, + doe_cap->status.busy); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS, + doe_cap->status.intr); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DOE_ERROR, + doe_cap->status.error); + *buf = FIELD_DP32(*buf, PCI_DOE_CAP_STATUS, DATA_OBJ_RDY, + doe_cap->status.ready); + /* Mailbox should be DW accessed */ + } else if (addr == PCI_EXP_DOE_RD_DATA_MBOX && size == DWORD_BYTE) { + if (doe_cap->status.ready && !doe_cap->status.error) { + *buf = doe_cap->read_mbox[doe_cap->read_mbox_idx]; + } + } + + /* Process Alignment */ + shift = addr % DWORD_BYTE; + *buf = extract32(*buf, shift * 8, size * 8); + + return true; +} + +/* + * Write to DOE config space. + * Return if the address not within DOE_CAP range or receives an abort + */ +void pcie_doe_write_config(DOECap *doe_cap, + uint32_t addr, uint32_t val, int size) +{ + uint16_t doe_offset = doe_cap->offset; + uint32_t shift; + + if (!range_covers_byte(doe_offset + PCI_EXP_DOE_CAP, + PCI_DOE_SIZEOF - 4, addr)) { + return; + } + + /* Process Alignment */ + shift = addr % DWORD_BYTE; + addr -= (doe_offset + shift); + val = deposit32(val, shift * 8, size * 8, val); + + switch (addr) { + case PCI_EXP_DOE_CTRL: + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_ABORT)) { + pcie_doe_set_ready(doe_cap, 0); + pcie_doe_set_error(doe_cap, 0); + pcie_doe_reset_mbox(doe_cap); + return; + } + + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_GO)) { + pcie_doe_prepare_rsp(doe_cap); + } + + if (FIELD_EX32(val, PCI_DOE_CAP_CONTROL, DOE_INTR_EN)) { + doe_cap->ctrl.intr = 1; + /* Clear interrupt bit located within the first byte */ + } else if (shift == 0) { + doe_cap->ctrl.intr = 0; + } + break; + case PCI_EXP_DOE_STATUS: + if (FIELD_EX32(val, PCI_DOE_CAP_STATUS, DOE_INTR_STATUS)) { + doe_cap->status.intr = 0; + } + break; + case PCI_EXP_DOE_RD_DATA_MBOX: + /* Mailbox should be DW accessed */ + if (size != DWORD_BYTE) { + return; + } + doe_cap->read_mbox_idx++; + if (doe_cap->read_mbox_idx == doe_cap->read_mbox_len) { + pcie_doe_reset_mbox(doe_cap); + pcie_doe_set_ready(doe_cap, 0); + } else if (doe_cap->read_mbox_idx > doe_cap->read_mbox_len) { + /* Underflow */ + pcie_doe_set_error(doe_cap, 1); + } + break; + case PCI_EXP_DOE_WR_DATA_MBOX: + /* Mailbox should be DW accessed */ + if (size != DWORD_BYTE) { + return; + } + doe_cap->write_mbox[doe_cap->write_mbox_len] = val; + doe_cap->write_mbox_len++; + break; + case PCI_EXP_DOE_CAP: + /* fallthrough */ + default: + break; + } +} diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c index 58db0b8..4fc6712 100644 --- a/hw/rdma/vmw/pvrdma_main.c +++ b/hw/rdma/vmw/pvrdma_main.c @@ -307,12 +307,7 @@ static int init_msix(PCIDevice *pdev) } for (i = 0; i < RDMA_MAX_INTRS; i++) { - rc = msix_vector_use(PCI_DEVICE(dev), i); - if (rc < 0) { - rdma_error_report("Fail mark MSI-X vector %d", i); - uninit_msix(pdev, i); - return rc; - } + msix_vector_use(PCI_DEVICE(dev), i); } return 0; diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index c6cc53a..4e36bb8 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -602,17 +602,10 @@ static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, uint32_t count, bool mask) { VfuObject *o = vfu_get_private(vfu_ctx); - Error *err = NULL; uint32_t vector; for (vector = start; vector < count; vector++) { - msix_set_mask(o->pci_dev, vector, mask, &err); - if (err) { - VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, - error_get_pretty(err)); - error_free(err); - err = NULL; - } + msix_set_mask(o->pci_dev, vector, mask); } } diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 51437ca..b4243de 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -711,8 +711,14 @@ static void smbios_build_type_3_table(void) static void smbios_build_type_4_table(MachineState *ms, unsigned instance) { char sock_str[128]; + size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; - SMBIOS_BUILD_TABLE_PRE(4, T4_BASE + instance, true); /* required */ + if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { + tbl_len = SMBIOS_TYPE_4_LEN_V30; + } + + SMBIOS_BUILD_TABLE_PRE_SIZE(4, T4_BASE + instance, + true, tbl_len); /* required */ snprintf(sock_str, sizeof(sock_str), "%s%2x", type4.sock_pfx, instance); SMBIOS_TABLE_SET_STR(4, socket_designation_str, sock_str); @@ -739,8 +745,15 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) SMBIOS_TABLE_SET_STR(4, serial_number_str, type4.serial); SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset); SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part); - t->core_count = t->core_enabled = ms->smp.cores; - t->thread_count = ms->smp.threads; + + t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; + t->core_enabled = t->core_count; + + t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); + + t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; + t->thread_count2 = cpu_to_le16(ms->smp.threads); + t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ diff --git a/hw/smbios/smbios_build.h b/hw/smbios/smbios_build.h index 56b5a1e..3516600 100644 --- a/hw/smbios/smbios_build.h +++ b/hw/smbios/smbios_build.h @@ -27,6 +27,11 @@ extern unsigned smbios_table_max; extern unsigned smbios_table_cnt; #define SMBIOS_BUILD_TABLE_PRE(tbl_type, tbl_handle, tbl_required) \ + SMBIOS_BUILD_TABLE_PRE_SIZE(tbl_type, tbl_handle, tbl_required, \ + sizeof(struct smbios_type_##tbl_type))\ + +#define SMBIOS_BUILD_TABLE_PRE_SIZE(tbl_type, tbl_handle, \ + tbl_required, tbl_len) \ struct smbios_type_##tbl_type *t; \ size_t t_off; /* table offset into smbios_tables */ \ int str_index = 0; \ @@ -39,12 +44,12 @@ extern unsigned smbios_table_cnt; /* use offset of table t within smbios_tables */ \ /* (pointer must be updated after each realloc) */ \ t_off = smbios_tables_len; \ - smbios_tables_len += sizeof(*t); \ + smbios_tables_len += tbl_len; \ smbios_tables = g_realloc(smbios_tables, smbios_tables_len); \ t = (struct smbios_type_##tbl_type *)(smbios_tables + t_off); \ \ t->header.type = tbl_type; \ - t->header.length = sizeof(*t); \ + t->header.length = tbl_len; \ t->header.handle = cpu_to_le16(tbl_handle); \ } while (0) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 6b5d8c0..130e5d1 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -578,45 +578,11 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, ram_addr_t *ram_addr, bool *read_only) { - MemoryRegion *mr; - hwaddr xlat; - hwaddr len = iotlb->addr_mask + 1; - bool writable = iotlb->perm & IOMMU_WO; - - /* - * The IOMMU TLB entry we have just covers translation through - * this IOMMU to its immediate target. We need to translate - * it the rest of the way through to memory. - */ - mr = address_space_translate(&address_space_memory, - iotlb->translated_addr, - &xlat, &len, writable, - MEMTXATTRS_UNSPECIFIED); - if (!memory_region_is_ram(mr)) { - error_report("iommu map to non memory area %"HWADDR_PRIx"", - xlat); - return false; - } else if (memory_region_has_ram_discard_manager(mr)) { - RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); - MemoryRegionSection tmp = { - .mr = mr, - .offset_within_region = xlat, - .size = int128_make64(len), - }; - - /* - * Malicious VMs can map memory into the IOMMU, which is expected - * to remain discarded. vfio will pin all pages, populating memory. - * Disallow that. vmstate priorities make sure any RamDiscardManager - * were already restored before IOMMUs are restored. - */ - if (!ram_discard_manager_is_populated(rdm, &tmp)) { - error_report("iommu map to discarded memory (e.g., unplugged via" - " virtio-mem): %"HWADDR_PRIx"", - iotlb->translated_addr); - return false; - } + bool ret, mr_has_discard_manager; + ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only, + &mr_has_discard_manager); + if (ret && mr_has_discard_manager) { /* * Malicious VMs might trigger discarding of IOMMU-mapped memory. The * pages will remain pinned inside vfio until unmapped, resulting in a @@ -635,29 +601,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, " intended via an IOMMU. It's possible to mitigate " " by setting/adjusting RLIMIT_MEMLOCK."); } - - /* - * Translation truncates length to the IOMMU page size, - * check that it did not truncate too much. - */ - if (len & iotlb->addr_mask) { - error_report("iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; - } - - return true; + return ret; } static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index ad0f91c..1c40f42 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -123,7 +123,7 @@ static void vuf_stop(VirtIODevice *vdev) static void vuf_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserFS *fs = VHOST_USER_FS(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&fs->vhost_dev) == should_start) { return; diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index 8b40fe4..677d1c7 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -152,7 +152,7 @@ static void vu_gpio_stop(VirtIODevice *vdev) static void vu_gpio_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); trace_virtio_gpio_set_status(status); diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index bc58b6c..864eba6 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -93,7 +93,7 @@ static void vu_i2c_stop(VirtIODevice *vdev) static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserI2C *i2c = VHOST_USER_I2C(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&i2c->vhost_dev) == should_start) { return; diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index bc1f36c..8b47287 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -90,7 +90,7 @@ static void vu_rng_stop(VirtIODevice *vdev) static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&rng->vhost_dev) == should_start) { return; diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 7b67e29..9431b97 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -55,7 +55,7 @@ const VhostDevConfigOps vsock_ops = { static void vuv_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { return; diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 03415b6..abe23d4 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -81,6 +81,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13, /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */ VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_STATUS = 16, VHOST_USER_PROTOCOL_F_MAX }; @@ -126,6 +127,8 @@ typedef enum VhostUserRequest { VHOST_USER_GET_MAX_MEM_SLOTS = 36, VHOST_USER_ADD_MEM_REG = 37, VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_SET_STATUS = 39, + VHOST_USER_GET_STATUS = 40, VHOST_USER_MAX } VhostUserRequest; @@ -1452,6 +1455,43 @@ static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64, return 0; } +static int vhost_user_set_status(struct vhost_dev *dev, uint8_t status) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_STATUS, status, false); +} + +static int vhost_user_get_status(struct vhost_dev *dev, uint8_t *status) +{ + uint64_t value; + int ret; + + ret = vhost_user_get_u64(dev, VHOST_USER_GET_STATUS, &value); + if (ret < 0) { + return ret; + } + *status = value; + + return 0; +} + +static int vhost_user_add_status(struct vhost_dev *dev, uint8_t status) +{ + uint8_t s; + int ret; + + ret = vhost_user_get_status(dev, &s); + if (ret < 0) { + return ret; + } + + if ((s & status) == status) { + return 0; + } + s |= status; + + return vhost_user_set_status(dev, s); +} + static int vhost_user_set_features(struct vhost_dev *dev, uint64_t features) { @@ -1460,6 +1500,7 @@ static int vhost_user_set_features(struct vhost_dev *dev, * backend is actually logging changes */ bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); + int ret; /* * We need to include any extra backend only feature bits that @@ -1467,9 +1508,18 @@ static int vhost_user_set_features(struct vhost_dev *dev, * VHOST_USER_F_PROTOCOL_FEATURES bit for enabling protocol * features. */ - return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, + ret = vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features | dev->backend_features, log_enabled); + + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + if (!ret) { + return vhost_user_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); + } + } + + return ret; } static int vhost_user_set_protocol_features(struct vhost_dev *dev, @@ -1543,6 +1593,11 @@ static VhostUserHostNotifier *fetch_or_create_notifier(VhostUserState *u, n = g_ptr_array_index(u->notifiers, idx); if (!n) { + /* + * In case notification arrive out-of-order, + * make room for current index. + */ + g_ptr_array_remove_index(u->notifiers, idx); n = g_new0(VhostUserHostNotifier, 1); n->idx = idx; g_ptr_array_insert(u->notifiers, idx, n); @@ -2615,6 +2670,27 @@ void vhost_user_cleanup(VhostUserState *user) user->chr = NULL; } +static int vhost_user_dev_start(struct vhost_dev *dev, bool started) +{ + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + return 0; + } + + /* Set device status only for last queue pair */ + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return 0; + } + + if (started) { + return vhost_user_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | + VIRTIO_CONFIG_S_DRIVER | + VIRTIO_CONFIG_S_DRIVER_OK); + } else { + return vhost_user_set_status(dev, 0); + } +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_backend_init, @@ -2649,4 +2725,5 @@ const VhostOps user_ops = { .vhost_backend_mem_section_filter = vhost_user_mem_section_filter, .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, + .vhost_dev_start = vhost_user_dev_start, }; diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 7dc3c73..aa16d58 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -70,7 +70,7 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start) static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); - bool should_start = virtio_device_started(vdev, status); + bool should_start = virtio_device_should_start(vdev, status); int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 5185c15..d1c4c20 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1081,10 +1081,10 @@ out: return ret; } -static int vhost_virtqueue_start(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +int vhost_virtqueue_start(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); VirtioBusState *vbus = VIRTIO_BUS(qbus); @@ -1201,10 +1201,10 @@ fail_alloc_desc: return r; } -static void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +void vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index df4bde2..97da74e 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -27,6 +27,39 @@ #define VIRTIO_CRYPTO_VM_VERSION 1 +typedef struct VirtIOCryptoSessionReq { + VirtIODevice *vdev; + VirtQueue *vq; + VirtQueueElement *elem; + CryptoDevBackendSessionInfo info; + CryptoDevCompletionFunc cb; +} VirtIOCryptoSessionReq; + +static void virtio_crypto_free_create_session_req(VirtIOCryptoSessionReq *sreq) +{ + switch (sreq->info.op_code) { + case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: + g_free(sreq->info.u.sym_sess_info.cipher_key); + g_free(sreq->info.u.sym_sess_info.auth_key); + break; + + case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION: + g_free(sreq->info.u.asym_sess_info.key); + break; + + case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION: + case VIRTIO_CRYPTO_HASH_DESTROY_SESSION: + case VIRTIO_CRYPTO_MAC_DESTROY_SESSION: + case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION: + case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION: + break; + + default: + error_report("Unknown opcode: %u", sreq->info.op_code); + } + g_free(sreq); +} + /* * Transfer virtqueue index to crypto queue index. * The control virtqueue is after the data virtqueues @@ -75,27 +108,24 @@ virtio_crypto_cipher_session_helper(VirtIODevice *vdev, return 0; } -static int64_t +static int virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, struct virtio_crypto_sym_create_session_req *sess_req, uint32_t queue_id, uint32_t opcode, - struct iovec *iov, unsigned int out_num) + struct iovec *iov, unsigned int out_num, + VirtIOCryptoSessionReq *sreq) { VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); - CryptoDevBackendSessionInfo info; - CryptoDevBackendSymSessionInfo *sym_info; - int64_t session_id; + CryptoDevBackendSymSessionInfo *sym_info = &sreq->info.u.sym_sess_info; int queue_index; uint32_t op_type; - Error *local_err = NULL; int ret; - memset(&info, 0, sizeof(info)); op_type = ldl_le_p(&sess_req->op_type); - info.op_code = opcode; + sreq->info.op_code = opcode; - sym_info = &info.u.sym_sess_info; + sym_info = &sreq->info.u.sym_sess_info; sym_info->op_type = op_type; if (op_type == VIRTIO_CRYPTO_SYM_OP_CIPHER) { @@ -103,7 +133,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, &sess_req->u.cipher.para, &iov, &out_num); if (ret < 0) { - goto err; + return ret; } } else if (op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { size_t s; @@ -112,7 +142,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, &sess_req->u.chain.para.cipher_param, &iov, &out_num); if (ret < 0) { - goto err; + return ret; } /* hash part */ sym_info->alg_chain_order = ldl_le_p( @@ -129,8 +159,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, if (sym_info->auth_key_len > vcrypto->conf.max_auth_key_len) { error_report("virtio-crypto length of auth key is too big: %u", sym_info->auth_key_len); - ret = -VIRTIO_CRYPTO_ERR; - goto err; + return -VIRTIO_CRYPTO_ERR; } /* get auth key */ if (sym_info->auth_key_len > 0) { @@ -140,8 +169,7 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, if (unlikely(s != sym_info->auth_key_len)) { virtio_error(vdev, "virtio-crypto authenticated key incorrect"); - ret = -EFAULT; - goto err; + return -EFAULT; } iov_discard_front(&iov, &out_num, sym_info->auth_key_len); } @@ -153,49 +181,30 @@ virtio_crypto_create_sym_session(VirtIOCrypto *vcrypto, } else { /* VIRTIO_CRYPTO_SYM_HASH_MODE_NESTED */ error_report("unsupported hash mode"); - ret = -VIRTIO_CRYPTO_NOTSUPP; - goto err; + return -VIRTIO_CRYPTO_NOTSUPP; } } else { /* VIRTIO_CRYPTO_SYM_OP_NONE */ error_report("unsupported cipher op_type: VIRTIO_CRYPTO_SYM_OP_NONE"); - ret = -VIRTIO_CRYPTO_NOTSUPP; - goto err; + return -VIRTIO_CRYPTO_NOTSUPP; } queue_index = virtio_crypto_vq2q(queue_id); - session_id = cryptodev_backend_create_session( - vcrypto->cryptodev, - &info, queue_index, &local_err); - if (session_id >= 0) { - ret = session_id; - } else { - if (local_err) { - error_report_err(local_err); - } - ret = -VIRTIO_CRYPTO_ERR; - } - -err: - g_free(sym_info->cipher_key); - g_free(sym_info->auth_key); - return ret; + return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info, + queue_index, sreq->cb, sreq); } -static int64_t +static int virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, struct virtio_crypto_akcipher_create_session_req *sess_req, uint32_t queue_id, uint32_t opcode, - struct iovec *iov, unsigned int out_num) + struct iovec *iov, unsigned int out_num, + VirtIOCryptoSessionReq *sreq) { VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); - CryptoDevBackendSessionInfo info = {0}; - CryptoDevBackendAsymSessionInfo *asym_info; - int64_t session_id; + CryptoDevBackendAsymSessionInfo *asym_info = &sreq->info.u.asym_sess_info; int queue_index; uint32_t algo, keytype, keylen; - g_autofree uint8_t *key = NULL; - Error *local_err = NULL; algo = ldl_le_p(&sess_req->para.algo); keytype = ldl_le_p(&sess_req->para.keytype); @@ -208,20 +217,19 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, } if (keylen) { - key = g_malloc(keylen); - if (iov_to_buf(iov, out_num, 0, key, keylen) != keylen) { + asym_info->key = g_malloc(keylen); + if (iov_to_buf(iov, out_num, 0, asym_info->key, keylen) != keylen) { virtio_error(vdev, "virtio-crypto asym key incorrect"); return -EFAULT; } iov_discard_front(&iov, &out_num, keylen); } - info.op_code = opcode; - asym_info = &info.u.asym_sess_info; + sreq->info.op_code = opcode; + asym_info = &sreq->info.u.asym_sess_info; asym_info->algo = algo; asym_info->keytype = keytype; asym_info->keylen = keylen; - asym_info->key = key; switch (asym_info->algo) { case VIRTIO_CRYPTO_AKCIPHER_RSA: asym_info->u.rsa.padding_algo = @@ -237,45 +245,95 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, } queue_index = virtio_crypto_vq2q(queue_id); - session_id = cryptodev_backend_create_session(vcrypto->cryptodev, &info, - queue_index, &local_err); - if (session_id < 0) { - if (local_err) { - error_report_err(local_err); - } - return -VIRTIO_CRYPTO_ERR; - } - - return session_id; + return cryptodev_backend_create_session(vcrypto->cryptodev, &sreq->info, + queue_index, sreq->cb, sreq); } -static uint8_t +static int virtio_crypto_handle_close_session(VirtIOCrypto *vcrypto, struct virtio_crypto_destroy_session_req *close_sess_req, - uint32_t queue_id) + uint32_t queue_id, + VirtIOCryptoSessionReq *sreq) { - int ret; uint64_t session_id; - uint32_t status; - Error *local_err = NULL; session_id = ldq_le_p(&close_sess_req->session_id); DPRINTF("close session, id=%" PRIu64 "\n", session_id); - ret = cryptodev_backend_close_session( - vcrypto->cryptodev, session_id, queue_id, &local_err); - if (ret == 0) { - status = VIRTIO_CRYPTO_OK; + return cryptodev_backend_close_session( + vcrypto->cryptodev, session_id, queue_id, sreq->cb, sreq); +} + +static void virtio_crypto_create_session_completion(void *opaque, int ret) +{ + VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque; + VirtQueue *vq = sreq->vq; + VirtQueueElement *elem = sreq->elem; + VirtIODevice *vdev = sreq->vdev; + struct virtio_crypto_session_input input; + struct iovec *in_iov = elem->in_sg; + unsigned in_num = elem->in_num; + size_t s; + + memset(&input, 0, sizeof(input)); + /* Serious errors, need to reset virtio crypto device */ + if (ret == -EFAULT) { + virtqueue_detach_element(vq, elem, 0); + goto out; + } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) { + stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); + } else if (ret == -VIRTIO_CRYPTO_KEY_REJECTED) { + stl_le_p(&input.status, VIRTIO_CRYPTO_KEY_REJECTED); + } else if (ret != VIRTIO_CRYPTO_OK) { + stl_le_p(&input.status, VIRTIO_CRYPTO_ERR); } else { - if (local_err) { - error_report_err(local_err); - } else { - error_report("destroy session failed"); - } + /* Set the session id */ + stq_le_p(&input.session_id, sreq->info.session_id); + stl_le_p(&input.status, VIRTIO_CRYPTO_OK); + } + + s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); + if (unlikely(s != sizeof(input))) { + virtio_error(vdev, "virtio-crypto input incorrect"); + virtqueue_detach_element(vq, elem, 0); + goto out; + } + virtqueue_push(vq, elem, sizeof(input)); + virtio_notify(vdev, vq); + +out: + g_free(elem); + virtio_crypto_free_create_session_req(sreq); +} + +static void virtio_crypto_destroy_session_completion(void *opaque, int ret) +{ + VirtIOCryptoSessionReq *sreq = (VirtIOCryptoSessionReq *)opaque; + VirtQueue *vq = sreq->vq; + VirtQueueElement *elem = sreq->elem; + VirtIODevice *vdev = sreq->vdev; + struct iovec *in_iov = elem->in_sg; + unsigned in_num = elem->in_num; + uint8_t status; + size_t s; + + if (ret < 0) { status = VIRTIO_CRYPTO_ERR; + } else { + status = VIRTIO_CRYPTO_OK; + } + s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status)); + if (unlikely(s != sizeof(status))) { + virtio_error(vdev, "virtio-crypto status incorrect"); + virtqueue_detach_element(vq, elem, 0); + goto out; } + virtqueue_push(vq, elem, sizeof(status)); + virtio_notify(vdev, vq); - return status; +out: + g_free(elem); + g_free(sreq); } static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) @@ -283,16 +341,16 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); struct virtio_crypto_op_ctrl_req ctrl; VirtQueueElement *elem; - struct iovec *in_iov; - struct iovec *out_iov; - unsigned in_num; + VirtIOCryptoSessionReq *sreq; unsigned out_num; + unsigned in_num; uint32_t queue_id; uint32_t opcode; struct virtio_crypto_session_input input; - int64_t session_id; - uint8_t status; size_t s; + int ret; + struct iovec *out_iov; + struct iovec *in_iov; for (;;) { g_autofree struct iovec *out_iov_copy = NULL; @@ -327,44 +385,34 @@ static void virtio_crypto_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) opcode = ldl_le_p(&ctrl.header.opcode); queue_id = ldl_le_p(&ctrl.header.queue_id); - memset(&input, 0, sizeof(input)); + sreq = g_new0(VirtIOCryptoSessionReq, 1); + sreq->vdev = vdev; + sreq->vq = vq; + sreq->elem = elem; + switch (opcode) { case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION: - session_id = virtio_crypto_create_sym_session(vcrypto, - &ctrl.u.sym_create_session, - queue_id, opcode, - out_iov, out_num); - goto check_session; + sreq->cb = virtio_crypto_create_session_completion; + ret = virtio_crypto_create_sym_session(vcrypto, + &ctrl.u.sym_create_session, + queue_id, opcode, + out_iov, out_num, + sreq); + if (ret < 0) { + virtio_crypto_create_session_completion(sreq, ret); + } + break; case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION: - session_id = virtio_crypto_create_asym_session(vcrypto, + sreq->cb = virtio_crypto_create_session_completion; + ret = virtio_crypto_create_asym_session(vcrypto, &ctrl.u.akcipher_create_session, queue_id, opcode, - out_iov, out_num); - -check_session: - /* Serious errors, need to reset virtio crypto device */ - if (session_id == -EFAULT) { - virtqueue_detach_element(vq, elem, 0); - break; - } else if (session_id == -VIRTIO_CRYPTO_NOTSUPP) { - stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); - } else if (session_id == -VIRTIO_CRYPTO_ERR) { - stl_le_p(&input.status, VIRTIO_CRYPTO_ERR); - } else { - /* Set the session id */ - stq_le_p(&input.session_id, session_id); - stl_le_p(&input.status, VIRTIO_CRYPTO_OK); - } - - s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); - if (unlikely(s != sizeof(input))) { - virtio_error(vdev, "virtio-crypto input incorrect"); - virtqueue_detach_element(vq, elem, 0); - break; + out_iov, out_num, + sreq); + if (ret < 0) { + virtio_crypto_create_session_completion(sreq, ret); } - virtqueue_push(vq, elem, sizeof(input)); - virtio_notify(vdev, vq); break; case VIRTIO_CRYPTO_CIPHER_DESTROY_SESSION: @@ -372,37 +420,36 @@ check_session: case VIRTIO_CRYPTO_MAC_DESTROY_SESSION: case VIRTIO_CRYPTO_AEAD_DESTROY_SESSION: case VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION: - status = virtio_crypto_handle_close_session(vcrypto, - &ctrl.u.destroy_session, queue_id); - /* The status only occupy one byte, we can directly use it */ - s = iov_from_buf(in_iov, in_num, 0, &status, sizeof(status)); - if (unlikely(s != sizeof(status))) { - virtio_error(vdev, "virtio-crypto status incorrect"); - virtqueue_detach_element(vq, elem, 0); - break; + sreq->cb = virtio_crypto_destroy_session_completion; + ret = virtio_crypto_handle_close_session(vcrypto, + &ctrl.u.destroy_session, queue_id, + sreq); + if (ret < 0) { + virtio_crypto_destroy_session_completion(sreq, ret); } - virtqueue_push(vq, elem, sizeof(status)); - virtio_notify(vdev, vq); break; + case VIRTIO_CRYPTO_HASH_CREATE_SESSION: case VIRTIO_CRYPTO_MAC_CREATE_SESSION: case VIRTIO_CRYPTO_AEAD_CREATE_SESSION: default: + memset(&input, 0, sizeof(input)); error_report("virtio-crypto unsupported ctrl opcode: %d", opcode); stl_le_p(&input.status, VIRTIO_CRYPTO_NOTSUPP); s = iov_from_buf(in_iov, in_num, 0, &input, sizeof(input)); if (unlikely(s != sizeof(input))) { virtio_error(vdev, "virtio-crypto input incorrect"); virtqueue_detach_element(vq, elem, 0); - break; + } else { + virtqueue_push(vq, elem, sizeof(input)); + virtio_notify(vdev, vq); } - virtqueue_push(vq, elem, sizeof(input)); - virtio_notify(vdev, vq); + g_free(sreq); + g_free(elem); break; } /* end switch case */ - g_free(elem); } /* end for loop */ } @@ -448,6 +495,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) } } + g_free(req->in_iov); g_free(req); } @@ -458,6 +506,7 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev, CryptoDevBackendSymOpInfo *sym_op_info) { size_t s, len; + struct iovec *in_iov = req->in_iov; if (status != VIRTIO_CRYPTO_OK) { return; @@ -465,18 +514,18 @@ virtio_crypto_sym_input_data_helper(VirtIODevice *vdev, len = sym_op_info->src_len; /* Save the cipher result */ - s = iov_from_buf(req->in_iov, req->in_num, 0, sym_op_info->dst, len); + s = iov_from_buf(in_iov, req->in_num, 0, sym_op_info->dst, len); if (s != len) { virtio_error(vdev, "virtio-crypto dest data incorrect"); return; } - iov_discard_front(&req->in_iov, &req->in_num, len); + iov_discard_front(&in_iov, &req->in_num, len); if (sym_op_info->op_type == VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING) { /* Save the digest result */ - s = iov_from_buf(req->in_iov, req->in_num, 0, + s = iov_from_buf(in_iov, req->in_num, 0, sym_op_info->digest_result, sym_op_info->digest_result_len); if (s != sym_op_info->digest_result_len) { @@ -491,6 +540,7 @@ virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev, CryptoDevBackendAsymOpInfo *asym_op_info) { size_t s, len; + struct iovec *in_iov = req->in_iov; if (status != VIRTIO_CRYPTO_OK) { return; @@ -501,23 +551,24 @@ virtio_crypto_akcipher_input_data_helper(VirtIODevice *vdev, return; } - s = iov_from_buf(req->in_iov, req->in_num, 0, asym_op_info->dst, len); + s = iov_from_buf(in_iov, req->in_num, 0, asym_op_info->dst, len); if (s != len) { virtio_error(vdev, "virtio-crypto asym dest data incorrect"); return; } - iov_discard_front(&req->in_iov, &req->in_num, len); + iov_discard_front(&in_iov, &req->in_num, len); /* For akcipher, dst_len may be changed after operation */ req->in_len = sizeof(struct virtio_crypto_inhdr) + asym_op_info->dst_len; } - -static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status) +static void virtio_crypto_req_complete(void *opaque, int ret) { + VirtIOCryptoReq *req = (VirtIOCryptoReq *)opaque; VirtIOCrypto *vcrypto = req->vcrypto; VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); + uint8_t status = -ret; if (req->flags == CRYPTODEV_BACKEND_ALG_SYM) { virtio_crypto_sym_input_data_helper(vdev, req, status, @@ -529,6 +580,7 @@ static void virtio_crypto_req_complete(VirtIOCryptoReq *req, uint8_t status) stb_p(&req->in->status, status); virtqueue_push(req->vq, &req->elem, req->in_len); virtio_notify(vdev, req->vq); + virtio_crypto_free_request(req); } static VirtIOCryptoReq * @@ -773,9 +825,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) unsigned in_num; unsigned out_num; uint32_t opcode; - uint8_t status = VIRTIO_CRYPTO_ERR; CryptoDevBackendOpInfo *op_info = &request->op_info; - Error *local_err = NULL; if (elem->out_num < 1 || elem->in_num < 1) { virtio_error(vdev, "virtio-crypto dataq missing headers"); @@ -815,6 +865,8 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) */ request->in_num = in_num; request->in_iov = in_iov; + /* now, we free the in_iov_copy inside virtio_crypto_free_request */ + in_iov_copy = NULL; opcode = ldl_le_p(&req.header.opcode); op_info->session_id = ldq_le_p(&req.header.session_id); @@ -843,23 +895,15 @@ check_result: if (ret == -EFAULT) { return -1; } else if (ret == -VIRTIO_CRYPTO_NOTSUPP) { - virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP); - virtio_crypto_free_request(request); + virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP); } else { - - /* Set request's parameter */ ret = cryptodev_backend_crypto_operation(vcrypto->cryptodev, - request, queue_index, &local_err); + request, queue_index, + virtio_crypto_req_complete, + request); if (ret < 0) { - status = -ret; - if (local_err) { - error_report_err(local_err); - } - } else { /* ret == VIRTIO_CRYPTO_OK */ - status = ret; + virtio_crypto_req_complete(request, ret); } - virtio_crypto_req_complete(request, status); - virtio_crypto_free_request(request); } break; @@ -870,8 +914,7 @@ check_result: default: error_report("virtio-crypto unsupported dataq opcode: %u", opcode); - virtio_crypto_req_complete(request, VIRTIO_CRYPTO_NOTSUPP); - virtio_crypto_free_request(request); + virtio_crypto_req_complete(request, -VIRTIO_CRYPTO_NOTSUPP); } return 0; @@ -1011,7 +1054,7 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) vcrypto->vqs[i].vcrypto = vcrypto; } - vcrypto->ctrl_vq = virtio_add_queue(vdev, 64, virtio_crypto_handle_ctrl); + vcrypto->ctrl_vq = virtio_add_queue(vdev, 1024, virtio_crypto_handle_ctrl); if (!cryptodev_backend_is_ready(vcrypto->cryptodev)) { vcrypto->status &= ~VIRTIO_CRYPTO_S_HW_READY; } else { diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index 79ea833..7ef2f9d 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -17,6 +17,7 @@ #include "hw/qdev-properties-system.h" #include "qapi/error.h" #include "hw/boards.h" +#include "hw/pci/pci_bus.h" #include "qom/object.h" typedef struct VirtIOIOMMUPCI VirtIOIOMMUPCI; @@ -44,6 +45,7 @@ static Property virtio_iommu_pci_properties[] = { static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOIOMMUPCI *dev = VIRTIO_IOMMU_PCI(vpci_dev); + PCIBus *pbus = pci_get_bus(&vpci_dev->pci_dev); DeviceState *vdev = DEVICE(&dev->vdev); VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); @@ -57,11 +59,17 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) s->reserved_regions[i].type != VIRTIO_IOMMU_RESV_MEM_T_MSI) { error_setg(errp, "reserved region %d has an invalid type", i); error_append_hint(errp, "Valid values are 0 and 1\n"); + return; } } + if (!pci_bus_is_root(pbus)) { + error_setg(errp, "virtio-iommu-pci must be plugged on the root bus"); + return; + } + object_property_set_link(OBJECT(dev), "primary-bus", - OBJECT(pci_get_bus(&vpci_dev->pci_dev)), - &error_abort); + OBJECT(pbus), &error_abort); + virtio_pci_force_virtio_1(vpci_dev); qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 34db51e..a1c9dfa 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -71,9 +71,11 @@ static void virtio_pci_notify(DeviceState *d, uint16_t vector) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy_fast(d); - if (msix_enabled(&proxy->pci_dev)) - msix_notify(&proxy->pci_dev, vector); - else { + if (msix_enabled(&proxy->pci_dev)) { + if (vector != VIRTIO_NO_VECTOR) { + msix_notify(&proxy->pci_dev, vector); + } + } else { VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); pci_set_irq(&proxy->pci_dev, qatomic_read(&vdev->isr) & 1); } @@ -175,6 +177,7 @@ static int virtio_pci_load_config(DeviceState *d, QEMUFile *f) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + uint16_t vector; int ret; ret = pci_device_load(&proxy->pci_dev, f); @@ -184,12 +187,17 @@ static int virtio_pci_load_config(DeviceState *d, QEMUFile *f) msix_unuse_all_vectors(&proxy->pci_dev); msix_load(&proxy->pci_dev, f); if (msix_present(&proxy->pci_dev)) { - qemu_get_be16s(f, &vdev->config_vector); + qemu_get_be16s(f, &vector); + + if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) { + return -EINVAL; + } } else { - vdev->config_vector = VIRTIO_NO_VECTOR; + vector = VIRTIO_NO_VECTOR; } - if (vdev->config_vector != VIRTIO_NO_VECTOR) { - return msix_vector_use(&proxy->pci_dev, vdev->config_vector); + vdev->config_vector = vector; + if (vector != VIRTIO_NO_VECTOR) { + msix_vector_use(&proxy->pci_dev, vector); } return 0; } @@ -202,12 +210,15 @@ static int virtio_pci_load_queue(DeviceState *d, int n, QEMUFile *f) uint16_t vector; if (msix_present(&proxy->pci_dev)) { qemu_get_be16s(f, &vector); + if (vector != VIRTIO_NO_VECTOR && vector >= proxy->nvectors) { + return -EINVAL; + } } else { vector = VIRTIO_NO_VECTOR; } virtio_queue_set_vector(vdev, n, vector); if (vector != VIRTIO_NO_VECTOR) { - return msix_vector_use(&proxy->pci_dev, vector); + msix_vector_use(&proxy->pci_dev, vector); } return 0; @@ -299,6 +310,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + uint16_t vector; hwaddr pa; switch (addr) { @@ -352,18 +364,28 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) } break; case VIRTIO_MSI_CONFIG_VECTOR: - msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); + if (vdev->config_vector != VIRTIO_NO_VECTOR) { + msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); + } /* Make it possible for guest to discover an error took place. */ - if (msix_vector_use(&proxy->pci_dev, val) < 0) + if (val < proxy->nvectors) { + msix_vector_use(&proxy->pci_dev, val); + } else { val = VIRTIO_NO_VECTOR; + } vdev->config_vector = val; break; case VIRTIO_MSI_QUEUE_VECTOR: - msix_vector_unuse(&proxy->pci_dev, - virtio_queue_vector(vdev, vdev->queue_sel)); + vector = virtio_queue_vector(vdev, vdev->queue_sel); + if (vector != VIRTIO_NO_VECTOR) { + msix_vector_unuse(&proxy->pci_dev, vector); + } /* Make it possible for guest to discover an error took place. */ - if (msix_vector_use(&proxy->pci_dev, val) < 0) + if (val < proxy->nvectors) { + msix_vector_use(&proxy->pci_dev, val); + } else { val = VIRTIO_NO_VECTOR; + } virtio_queue_set_vector(vdev, vdev->queue_sel, val); break; default: @@ -1251,6 +1273,9 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, case VIRTIO_PCI_COMMON_Q_USEDHI: val = proxy->vqs[vdev->queue_sel].used[1]; break; + case VIRTIO_PCI_COMMON_Q_RESET: + val = proxy->vqs[vdev->queue_sel].reset; + break; default: val = 0; } @@ -1263,6 +1288,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + uint16_t vector; if (vdev == NULL) { return; @@ -1284,9 +1310,13 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, } break; case VIRTIO_PCI_COMMON_MSIX: - msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); + if (vdev->config_vector != VIRTIO_NO_VECTOR) { + msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); + } /* Make it possible for guest to discover an error took place. */ - if (msix_vector_use(&proxy->pci_dev, val) < 0) { + if (val < proxy->nvectors) { + msix_vector_use(&proxy->pci_dev, val); + } else { val = VIRTIO_NO_VECTOR; } vdev->config_vector = val; @@ -1318,10 +1348,14 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, proxy->vqs[vdev->queue_sel].num); break; case VIRTIO_PCI_COMMON_Q_MSIX: - msix_vector_unuse(&proxy->pci_dev, - virtio_queue_vector(vdev, vdev->queue_sel)); + vector = virtio_queue_vector(vdev, vdev->queue_sel); + if (vector != VIRTIO_NO_VECTOR) { + msix_vector_unuse(&proxy->pci_dev, vector); + } /* Make it possible for guest to discover an error took place. */ - if (msix_vector_use(&proxy->pci_dev, val) < 0) { + if (val < proxy->nvectors) { + msix_vector_use(&proxy->pci_dev, val); + } else { val = VIRTIO_NO_VECTOR; } virtio_queue_set_vector(vdev, vdev->queue_sel, val); @@ -1338,6 +1372,8 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, ((uint64_t)proxy->vqs[vdev->queue_sel].used[1]) << 32 | proxy->vqs[vdev->queue_sel].used[0]); proxy->vqs[vdev->queue_sel].enabled = 1; + proxy->vqs[vdev->queue_sel].reset = 0; + virtio_queue_enable(vdev, vdev->queue_sel); } else { virtio_error(vdev, "wrong value for queue_enable %"PRIx64, val); } @@ -1360,6 +1396,16 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, case VIRTIO_PCI_COMMON_Q_USEDHI: proxy->vqs[vdev->queue_sel].used[1] = val; break; + case VIRTIO_PCI_COMMON_Q_RESET: + if (val == 1) { + proxy->vqs[vdev->queue_sel].reset = 1; + + virtio_queue_reset(vdev, vdev->queue_sel); + + proxy->vqs[vdev->queue_sel].reset = 0; + proxy->vqs[vdev->queue_sel].enabled = 0; + } + break; default: break; } @@ -1954,6 +2000,7 @@ static void virtio_pci_reset(DeviceState *qdev) for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { proxy->vqs[i].enabled = 0; + proxy->vqs[i].reset = 0; proxy->vqs[i].num = 0; proxy->vqs[i].desc[0] = proxy->vqs[i].desc[1] = 0; proxy->vqs[i].avail[0] = proxy->vqs[i].avail[1] = 0; diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c index 151ece6..6e76f8b 100644 --- a/hw/virtio/virtio-rng-pci.c +++ b/hw/virtio/virtio-rng-pci.c @@ -13,6 +13,7 @@ #include "hw/virtio/virtio-pci.h" #include "hw/virtio/virtio-rng.h" +#include "hw/qdev-properties.h" #include "qapi/error.h" #include "qemu/module.h" #include "qom/object.h" @@ -31,11 +32,23 @@ struct VirtIORngPCI { VirtIORNG vdev; }; +static Property virtio_rng_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), + DEFINE_PROP_END_OF_LIST(), +}; + static void virtio_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIORngPCI *vrng = VIRTIO_RNG_PCI(vpci_dev); DeviceState *vdev = DEVICE(&vrng->vdev); + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = 2; + } + if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) { return; } @@ -54,6 +67,7 @@ static void virtio_rng_pci_class_init(ObjectClass *klass, void *data) pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_RNG; pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; pcidev_k->class_id = PCI_CLASS_OTHERS; + device_class_set_props(dc, virtio_rng_properties); } static void virtio_rng_initfn(Object *obj) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 808446b..9683b2e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2464,6 +2464,51 @@ static enum virtio_device_endian virtio_current_cpu_endian(void) } } +static void __virtio_queue_reset(VirtIODevice *vdev, uint32_t i) +{ + vdev->vq[i].vring.desc = 0; + vdev->vq[i].vring.avail = 0; + vdev->vq[i].vring.used = 0; + vdev->vq[i].last_avail_idx = 0; + vdev->vq[i].shadow_avail_idx = 0; + vdev->vq[i].used_idx = 0; + vdev->vq[i].last_avail_wrap_counter = true; + vdev->vq[i].shadow_avail_wrap_counter = true; + vdev->vq[i].used_wrap_counter = true; + virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); + vdev->vq[i].signalled_used = 0; + vdev->vq[i].signalled_used_valid = false; + vdev->vq[i].notification = true; + vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; + vdev->vq[i].inuse = 0; + virtio_virtqueue_reset_region_cache(&vdev->vq[i]); +} + +void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index) +{ + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + + if (k->queue_reset) { + k->queue_reset(vdev, queue_index); + } + + __virtio_queue_reset(vdev, queue_index); +} + +void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index) +{ + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + + if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + error_report("queue_enable is only suppported in devices of virtio " + "1.0 or later."); + } + + if (k->queue_enable) { + k->queue_enable(vdev, queue_index); + } +} + void virtio_reset(void *opaque) { VirtIODevice *vdev = opaque; @@ -2495,22 +2540,7 @@ void virtio_reset(void *opaque) virtio_notify_vector(vdev, vdev->config_vector); for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { - vdev->vq[i].vring.desc = 0; - vdev->vq[i].vring.avail = 0; - vdev->vq[i].vring.used = 0; - vdev->vq[i].last_avail_idx = 0; - vdev->vq[i].shadow_avail_idx = 0; - vdev->vq[i].used_idx = 0; - vdev->vq[i].last_avail_wrap_counter = true; - vdev->vq[i].shadow_avail_wrap_counter = true; - vdev->vq[i].used_wrap_counter = true; - virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); - vdev->vq[i].signalled_used = 0; - vdev->vq[i].signalled_used_valid = false; - vdev->vq[i].notification = true; - vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; - vdev->vq[i].inuse = 0; - virtio_virtqueue_reset_region_cache(&vdev->vq[i]); + __virtio_queue_reset(vdev, i); } } |