diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-11-08 10:01:51 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-11-08 10:01:51 +0000 |
commit | fa27257432689e8927cb993b251d380d654dcc86 (patch) | |
tree | 4f84ce4569599910e6d9b68e4c4ebb517fe20892 | |
parent | 4de6bb0c02ad3f0ec48f0f84ba1a65ab06e81b86 (diff) | |
parent | a458774ad711bceabefbf01e8f0b91d86ec72e0c (diff) | |
download | qemu-fa27257432689e8927cb993b251d380d654dcc86.zip qemu-fa27257432689e8927cb993b251d380d654dcc86.tar.gz qemu-fa27257432689e8927cb993b251d380d654dcc86.tar.bz2 |
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* icount fix (Clement)
* dumping fixes for non-volatile memory (Marc-André, myself)
* x86 emulation fix (Rudolf)
* recent Hyper-V CPUID flag (Vitaly)
* Q35 doc fix (Daniel)
* lsi fix (Prasad)
* SCSI block limits emulation fixes (myself)
* qemu_thread_atexit rework (Peter)
* ivshmem memory leak fix (Igor)
# gpg: Signature made Tue 06 Nov 2018 21:34:30 GMT
# gpg: using RSA key BFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg: aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1
# Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83
* remotes/bonzini/tags/for-upstream:
util/qemu-thread-posix: Fix qemu_thread_atexit* for OSX
include/qemu/thread.h: Document qemu_thread_atexit* API
scsi-generic: do not do VPD emulation for sense other than ILLEGAL_REQUEST
scsi-generic: avoid invalid access to struct when emulating block limits
scsi-generic: avoid out-of-bounds access to VPD page list
scsi-generic: keep VPD page list sorted
lsi53c895a: check message length value is valid
scripts/dump-guest-memory: Synchronize with guest_phys_blocks_region_add
memory-mapping: skip non-volatile memory regions in GuestPhysBlockList
nvdimm: set non-volatile on the memory region
memory: learn about non-volatile memory region
target/i386: Clear RF on SYSCALL instruction
MAINTAINERS: remove or downgrade myself to reviewer from some subsystems
ivshmem: fix memory backend leak
i386: clarify that the Q35 machine type implements a P35 chipset
x86: hv_evmcs CPU flag support
icount: fix deadlock when all cpus are sleeping
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | MAINTAINERS | 13 | ||||
-rw-r--r-- | cpus.c | 8 | ||||
-rw-r--r-- | docs/devel/migration.rst | 1 | ||||
-rw-r--r-- | hw/mem/nvdimm.c | 1 | ||||
-rw-r--r-- | hw/misc/ivshmem.c | 1 | ||||
-rw-r--r-- | hw/pci-host/q35.c | 10 | ||||
-rw-r--r-- | hw/scsi/Makefile.objs | 2 | ||||
-rw-r--r-- | hw/scsi/emulation.c | 42 | ||||
-rw-r--r-- | hw/scsi/lsi53c895a.c | 19 | ||||
-rw-r--r-- | hw/scsi/scsi-disk.c | 92 | ||||
-rw-r--r-- | hw/scsi/scsi-generic.c | 60 | ||||
-rw-r--r-- | include/exec/memory.h | 25 | ||||
-rw-r--r-- | include/hw/pci/pci_ids.h | 2 | ||||
-rw-r--r-- | include/hw/scsi/emulation.h | 16 | ||||
-rw-r--r-- | include/hw/scsi/scsi.h | 1 | ||||
-rw-r--r-- | include/qemu/thread.h | 22 | ||||
-rw-r--r-- | memory.c | 45 | ||||
-rw-r--r-- | memory_mapping.c | 3 | ||||
-rw-r--r-- | scripts/dump-guest-memory.py | 4 | ||||
-rw-r--r-- | target/i386/cpu.c | 1 | ||||
-rw-r--r-- | target/i386/cpu.h | 1 | ||||
-rw-r--r-- | target/i386/hyperv-proto.h | 2 | ||||
-rw-r--r-- | target/i386/kvm.c | 30 | ||||
-rw-r--r-- | target/i386/seg_helper.c | 4 | ||||
-rw-r--r-- | util/qemu-thread-posix.c | 44 |
25 files changed, 308 insertions, 141 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 0499e11..0d68e4b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -105,9 +105,9 @@ Guest CPU cores (TCG): ---------------------- Overall L: qemu-devel@nongnu.org -M: Paolo Bonzini <pbonzini@redhat.com> M: Peter Crosthwaite <crosthwaite.peter@gmail.com> M: Richard Henderson <rth@twiddle.net> +R: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: cpus.c F: exec.c @@ -1141,7 +1141,8 @@ F: hw/pci-host/ppce500.c F: hw/net/fsl_etsec/ Character devices -M: Paolo Bonzini <pbonzini@redhat.com> +M: Marc-André Lureau <marcandre.lureau@redhat.com> +R: Paolo Bonzini <pbonzini@redhat.com> S: Odd Fixes F: hw/char/ @@ -1528,8 +1529,8 @@ T: git git://github.com/famz/qemu.git bitmaps T: git git://github.com/jnsnow/qemu.git bitmaps Character device backends -M: Paolo Bonzini <pbonzini@redhat.com> M: Marc-André Lureau <marcandre.lureau@redhat.com> +R: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: chardev/ F: include/chardev/ @@ -1762,9 +1763,9 @@ F: tests/qmp-cmd-test.c T: git git://repo.or.cz/qemu/armbru.git qapi-next qtest -M: Paolo Bonzini <pbonzini@redhat.com> M: Thomas Huth <thuth@redhat.com> M: Laurent Vivier <lvivier@redhat.com> +R: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: qtest.c F: tests/libqtest.* @@ -1871,7 +1872,6 @@ F: tests/test-io-* Sockets M: Daniel P. Berrange <berrange@redhat.com> M: Gerd Hoffmann <kraxel@redhat.com> -M: Paolo Bonzini <pbonzini@redhat.com> S: Maintained F: include/qemu/sockets.h F: util/qemu-sockets.c @@ -2058,13 +2058,12 @@ M: Ronnie Sahlberg <ronniesahlberg@gmail.com> M: Paolo Bonzini <pbonzini@redhat.com> M: Peter Lieven <pl@kamp.de> L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/iscsi.c F: block/iscsi-opts.c Network Block Device (NBD) M: Eric Blake <eblake@redhat.com> -M: Paolo Bonzini <pbonzini@redhat.com> L: qemu-block@nongnu.org S: Maintained F: block/nbd* @@ -1554,6 +1554,14 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) atomic_mb_set(&cpu->exit_request, 0); } + if (use_icount && all_cpu_threads_idle()) { + /* + * When all cpus are sleeping (e.g in WFI), to avoid a deadlock + * in the main_loop, wake it up in order to start the warp timer. + */ + qemu_notify_event(); + } + qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu); deal_with_unplugged_cpus(); } diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 6875707..e7658ab 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -435,6 +435,7 @@ Examples of such memory API functions are: - memory_region_add_subregion() - memory_region_del_subregion() - memory_region_set_readonly() + - memory_region_set_nonvolatile() - memory_region_set_enabled() - memory_region_set_address() - memory_region_set_alias_offset() diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 49324f3..bf2adf5 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -116,6 +116,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp) nvdimm->nvdimm_mr = g_new(MemoryRegion, 1); memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm), "nvdimm-memory", mr, 0, pmem_size); + memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true); nvdimm->nvdimm_mr->align = align; } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index f88910e..ecfd10a 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -1279,6 +1279,7 @@ static void desugar_shm(IVShmemState *s) object_property_set_bool(obj, true, "share", &error_abort); object_property_add_child(OBJECT(s), "internal-shm-backend", obj, &error_abort); + object_unref(obj); user_creatable_complete(obj, &error_abort); s->hostmem = MEMORY_BACKEND(obj); } diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 0c38a8d..7b871b5 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -629,7 +629,15 @@ static void mch_class_init(ObjectClass *klass, void *data) dc->desc = "Host bridge"; dc->vmsd = &vmstate_mch; k->vendor_id = PCI_VENDOR_ID_INTEL; - k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH; + /* + * The 'q35' machine type implements an Intel Series 3 chipset, + * of which there are several variants. The key difference between + * the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that + * the latter has an integrated graphics adapter. QEMU does not + * implement integrated graphics, so uses the PCI ID for the 82P35 + * chipset. + */ + k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH; k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT; k->class_id = PCI_CLASS_BRIDGE_HOST; /* diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs index 718b4c2..45167ba 100644 --- a/hw/scsi/Makefile.objs +++ b/hw/scsi/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y += scsi-disk.o +common-obj-y += scsi-disk.o emulation.o common-obj-y += scsi-generic.o scsi-bus.o common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o common-obj-$(CONFIG_MPTSAS_SCSI_PCI) += mptsas.o mptconfig.o mptendian.o diff --git a/hw/scsi/emulation.c b/hw/scsi/emulation.c new file mode 100644 index 0000000..06d62f3 --- /dev/null +++ b/hw/scsi/emulation.c @@ -0,0 +1,42 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/bswap.h" +#include "hw/scsi/emulation.h" + +int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl) +{ + /* required VPD size with unmap support */ + memset(outbuf, 0, 0x3c); + + outbuf[0] = bl->wsnz; /* wsnz */ + + if (bl->max_io_sectors) { + /* optimal transfer length granularity. This field and the optimal + * transfer length can't be greater than maximum transfer length. + */ + stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors)); + + /* maximum transfer length */ + stl_be_p(outbuf + 4, bl->max_io_sectors); + + /* optimal transfer length */ + stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors)); + } else { + stw_be_p(outbuf + 2, bl->min_io_size); + stl_be_p(outbuf + 8, bl->opt_io_size); + } + + /* max unmap LBA count */ + stl_be_p(outbuf + 16, bl->max_unmap_sectors); + + /* max unmap descriptors */ + stl_be_p(outbuf + 20, bl->max_unmap_descr); + + /* optimal unmap granularity; alignment is zero */ + stl_be_p(outbuf + 24, bl->unmap_sectors); + + /* max write same size, make it the same as maximum transfer length */ + stl_be_p(outbuf + 36, bl->max_io_sectors); + + return 0x3c; +} diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index d1e6534..3f207f6 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -861,10 +861,11 @@ static void lsi_do_status(LSIState *s) static void lsi_do_msgin(LSIState *s) { - int len; + uint8_t len; trace_lsi_do_msgin(s->dbc, s->msg_len); s->sfbr = s->msg[0]; len = s->msg_len; + assert(len > 0 && len <= LSI_MAX_MSGIN_LEN); if (len > s->dbc) len = s->dbc; pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len); @@ -1705,8 +1706,10 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset) break; case 0x58: /* SBDL */ /* Some drivers peek at the data bus during the MSG IN phase. */ - if ((s->sstat1 & PHASE_MASK) == PHASE_MI) + if ((s->sstat1 & PHASE_MASK) == PHASE_MI) { + assert(s->msg_len > 0); return s->msg[0]; + } ret = 0; break; case 0x59: /* SBDL high */ @@ -2103,11 +2106,23 @@ static int lsi_pre_save(void *opaque) return 0; } +static int lsi_post_load(void *opaque, int version_id) +{ + LSIState *s = opaque; + + if (s->msg_len < 0 || s->msg_len > LSI_MAX_MSGIN_LEN) { + return -EINVAL; + } + + return 0; +} + static const VMStateDescription vmstate_lsi_scsi = { .name = "lsiscsi", .version_id = 0, .minimum_version_id = 0, .pre_save = lsi_pre_save, + .post_load = lsi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, LSIState), diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index e2c5408..6eb258d 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -33,6 +33,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #include "qapi/error.h" #include "qemu/error-report.h" #include "hw/scsi/scsi.h" +#include "hw/scsi/emulation.h" #include "scsi/constants.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -589,7 +590,7 @@ static uint8_t *scsi_get_buf(SCSIRequest *req) return (uint8_t *)r->iov.iov_base; } -int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) +static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); uint8_t page_code = req->cmd.buf[2]; @@ -691,89 +692,36 @@ int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) } case 0xb0: /* block limits */ { - unsigned int unmap_sectors = - s->qdev.conf.discard_granularity / s->qdev.blocksize; - unsigned int min_io_size = - s->qdev.conf.min_io_size / s->qdev.blocksize; - unsigned int opt_io_size = - s->qdev.conf.opt_io_size / s->qdev.blocksize; - unsigned int max_unmap_sectors = - s->max_unmap_size / s->qdev.blocksize; - unsigned int max_io_sectors = - s->max_io_size / s->qdev.blocksize; + SCSIBlockLimits bl = {}; if (s->qdev.type == TYPE_ROM) { DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n", page_code); return -1; } + bl.wsnz = 1; + bl.unmap_sectors = + s->qdev.conf.discard_granularity / s->qdev.blocksize; + bl.min_io_size = + s->qdev.conf.min_io_size / s->qdev.blocksize; + bl.opt_io_size = + s->qdev.conf.opt_io_size / s->qdev.blocksize; + bl.max_unmap_sectors = + s->max_unmap_size / s->qdev.blocksize; + bl.max_io_sectors = + s->max_io_size / s->qdev.blocksize; + /* 255 descriptors fit in 4 KiB with an 8-byte header */ + bl.max_unmap_descr = 255; + if (s->qdev.type == TYPE_DISK) { int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk); int max_io_sectors_blk = max_transfer_blk / s->qdev.blocksize; - max_io_sectors = - MIN_NON_ZERO(max_io_sectors_blk, max_io_sectors); - - /* min_io_size and opt_io_size can't be greater than - * max_io_sectors */ - if (min_io_size) { - min_io_size = MIN(min_io_size, max_io_sectors); - } - if (opt_io_size) { - opt_io_size = MIN(opt_io_size, max_io_sectors); - } + bl.max_io_sectors = + MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors); } - /* required VPD size with unmap support */ - buflen = 0x40; - memset(outbuf + 4, 0, buflen - 4); - - outbuf[4] = 0x1; /* wsnz */ - - /* optimal transfer length granularity */ - outbuf[6] = (min_io_size >> 8) & 0xff; - outbuf[7] = min_io_size & 0xff; - - /* maximum transfer length */ - outbuf[8] = (max_io_sectors >> 24) & 0xff; - outbuf[9] = (max_io_sectors >> 16) & 0xff; - outbuf[10] = (max_io_sectors >> 8) & 0xff; - outbuf[11] = max_io_sectors & 0xff; - - /* optimal transfer length */ - outbuf[12] = (opt_io_size >> 24) & 0xff; - outbuf[13] = (opt_io_size >> 16) & 0xff; - outbuf[14] = (opt_io_size >> 8) & 0xff; - outbuf[15] = opt_io_size & 0xff; - - /* max unmap LBA count, default is 1GB */ - outbuf[20] = (max_unmap_sectors >> 24) & 0xff; - outbuf[21] = (max_unmap_sectors >> 16) & 0xff; - outbuf[22] = (max_unmap_sectors >> 8) & 0xff; - outbuf[23] = max_unmap_sectors & 0xff; - - /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header */ - outbuf[24] = 0; - outbuf[25] = 0; - outbuf[26] = 0; - outbuf[27] = 255; - - /* optimal unmap granularity */ - outbuf[28] = (unmap_sectors >> 24) & 0xff; - outbuf[29] = (unmap_sectors >> 16) & 0xff; - outbuf[30] = (unmap_sectors >> 8) & 0xff; - outbuf[31] = unmap_sectors & 0xff; - - /* max write same size */ - outbuf[36] = 0; - outbuf[37] = 0; - outbuf[38] = 0; - outbuf[39] = 0; - - outbuf[40] = (max_io_sectors >> 24) & 0xff; - outbuf[41] = (max_io_sectors >> 16) & 0xff; - outbuf[42] = (max_io_sectors >> 8) & 0xff; - outbuf[43] = max_io_sectors & 0xff; + buflen += scsi_emulate_block_limits(outbuf + buflen, &bl); break; } case 0xb1: /* block device characteristics */ diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index d60c4d0..7237b41 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -16,6 +16,7 @@ #include "qemu-common.h" #include "qemu/error-report.h" #include "hw/scsi/scsi.h" +#include "hw/scsi/emulation.h" #include "sysemu/block-backend.h" #ifdef __linux__ @@ -144,7 +145,7 @@ static int execute_command(BlockBackend *blk, static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) { - uint8_t page, page_len; + uint8_t page, page_idx; /* * EVPD set to zero returns the standard INQUIRY data. @@ -181,7 +182,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) /* Also take care of the opt xfer len. */ stl_be_p(&r->buf[12], MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12]))); - } else if (page == 0x00 && s->needs_vpd_bl_emulation) { + } else if (s->needs_vpd_bl_emulation && page == 0x00) { /* * Now we're capable of supplying the VPD Block Limits * response if the hardware can't. Add it in the INQUIRY @@ -190,17 +191,43 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) * * This way, the guest kernel will be aware of the support * and will use it to proper setup the SCSI device. + * + * VPD page numbers must be sorted, so insert 0xb0 at the + * right place with an in-place insert. After the initialization + * part of the for loop is executed, the device response is + * at r[0] to r[page_idx - 1]. */ - page_len = r->buf[3]; - r->buf[page_len + 4] = 0xb0; - r->buf[3] = ++page_len; + for (page_idx = lduw_be_p(r->buf + 2) + 4; + page_idx > 4 && r->buf[page_idx - 1] >= 0xb0; + page_idx--) { + if (page_idx < r->buflen) { + r->buf[page_idx] = r->buf[page_idx - 1]; + } + } + r->buf[page_idx] = 0xb0; + stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1); } } } -static int scsi_emulate_block_limits(SCSIGenericReq *r) +static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) { - r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf); + int len; + uint8_t buf[64]; + + SCSIBlockLimits bl = { + .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize + }; + + memset(r->buf, 0, r->buflen); + stb_p(buf, s->type); + stb_p(buf + 1, 0xb0); + len = scsi_emulate_block_limits(buf + 4, &bl); + assert(len <= sizeof(buf) - 4); + stw_be_p(buf + 2, len); + + memcpy(r->buf, buf, MIN(r->buflen, len + 4)); + r->io_header.sb_len_wr = 0; /* @@ -219,7 +246,6 @@ static void scsi_read_complete(void * opaque, int ret) { SCSIGenericReq *r = (SCSIGenericReq *)opaque; SCSIDevice *s = r->req.dev; - SCSISense sense; int len; assert(r->req.aiocb != NULL); @@ -242,13 +268,15 @@ static void scsi_read_complete(void * opaque, int ret) * resulted in sense error but would need emulation. * In this case, emulate a valid VPD response. */ - if (s->needs_vpd_bl_emulation) { - int is_vpd_bl = r->req.cmd.buf[0] == INQUIRY && - r->req.cmd.buf[1] & 0x01 && - r->req.cmd.buf[2] == 0xb0; - - if (is_vpd_bl && sg_io_sense_from_errno(-ret, &r->io_header, &sense)) { - len = scsi_emulate_block_limits(r); + if (s->needs_vpd_bl_emulation && ret == 0 && + (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) && + r->req.cmd.buf[0] == INQUIRY && + (r->req.cmd.buf[1] & 0x01) && + r->req.cmd.buf[2] == 0xb0) { + SCSISense sense = + scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr); + if (sense.key == ILLEGAL_REQUEST) { + len = scsi_generic_emulate_block_limits(r, s); /* * No need to let scsi_read_complete go on and handle an * INQUIRY VPD BL request we created manually. @@ -527,7 +555,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice *s) } page_len = buf[3]; - for (i = 4; i < page_len + 4; i++) { + for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) { if (buf[i] == 0xb0) { s->needs_vpd_bl_emulation = false; return; diff --git a/include/exec/memory.h b/include/exec/memory.h index d0c7f0d..8e61450 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -355,6 +355,7 @@ struct MemoryRegion { bool ram; bool subpage; bool readonly; /* For RAM regions */ + bool nonvolatile; bool rom_device; bool flush_coalesced_mmio; bool global_locking; @@ -480,6 +481,7 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as) * @offset_within_address_space: the address of the first byte of the section * relative to the region's address space * @readonly: writes to this section are ignored + * @nonvolatile: this section is non-volatile */ struct MemoryRegionSection { MemoryRegion *mr; @@ -488,6 +490,7 @@ struct MemoryRegionSection { Int128 size; hwaddr offset_within_address_space; bool readonly; + bool nonvolatile; }; /** @@ -1170,6 +1173,17 @@ static inline bool memory_region_is_rom(MemoryRegion *mr) return mr->ram && mr->readonly; } +/** + * memory_region_is_nonvolatile: check whether a memory region is non-volatile + * + * Returns %true is a memory region is non-volatile memory. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_nonvolatile(MemoryRegion *mr) +{ + return mr->nonvolatile; +} /** * memory_region_get_fd: Get a file descriptor backing a RAM memory region. @@ -1342,6 +1356,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, void memory_region_set_readonly(MemoryRegion *mr, bool readonly); /** + * memory_region_set_nonvolatile: Turn a memory region non-volatile + * + * Allows a memory region to be marked as non-volatile. + * only useful on RAM regions. + * + * @mr: the region being updated. + * @nonvolatile: whether rhe region is to be non-volatile. + */ +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile); + +/** * memory_region_rom_device_set_romd: enable/disable ROMD mode * * Allows a ROM device (initialized with memory_region_init_rom_device() to diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 63acc72..eeb3301 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -255,7 +255,7 @@ #define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c #define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed -#define PCI_DEVICE_ID_INTEL_Q35_MCH 0x29c0 +#define PCI_DEVICE_ID_INTEL_P35_MCH 0x29c0 #define PCI_VENDOR_ID_XEN 0x5853 #define PCI_DEVICE_ID_XEN_PLATFORM 0x0001 diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h new file mode 100644 index 0000000..09fba1f --- /dev/null +++ b/include/hw/scsi/emulation.h @@ -0,0 +1,16 @@ +#ifndef HW_SCSI_EMULATION_H +#define HW_SCSI_EMULATION_H 1 + +typedef struct SCSIBlockLimits { + bool wsnz; + uint16_t min_io_size; + uint32_t max_unmap_descr; + uint32_t opt_io_size; + uint32_t max_unmap_sectors; + uint32_t unmap_sectors; + uint32_t max_io_sectors; +} SCSIBlockLimits; + +int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl); + +#endif diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index ee3a411..acef25f 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -189,7 +189,6 @@ void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); void scsi_device_unit_attention_reported(SCSIDevice *dev); void scsi_generic_read_device_inquiry(SCSIDevice *dev); int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed); -int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf); int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size, uint8_t *buf, uint8_t buf_size); SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun); diff --git a/include/qemu/thread.h b/include/qemu/thread.h index b2661b6..55d83a9 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -162,7 +162,29 @@ void qemu_thread_exit(void *retval); void qemu_thread_naming(bool enable); struct Notifier; +/** + * qemu_thread_atexit_add: + * @notifier: Notifier to add + * + * Add the specified notifier to a list which will be run via + * notifier_list_notify() when this thread exits (either by calling + * qemu_thread_exit() or by returning from its start_routine). + * The usual usage is that the caller passes a Notifier which is + * a per-thread variable; it can then use the callback to free + * other per-thread data. + * + * If the thread exits as part of the entire process exiting, + * it is unspecified whether notifiers are called or not. + */ void qemu_thread_atexit_add(struct Notifier *notifier); +/** + * qemu_thread_atexit_remove: + * @notifier: Notifier to remove + * + * Remove the specified notifier from the thread-exit notification + * list. It is not valid to try to remove a notifier which is not + * on the list. + */ void qemu_thread_atexit_remove(struct Notifier *notifier); struct QemuSpin { @@ -216,6 +216,7 @@ struct FlatRange { uint8_t dirty_log_mask; bool romd_mode; bool readonly; + bool nonvolatile; }; #define FOR_EACH_FLAT_RANGE(var, view) \ @@ -231,6 +232,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv) .size = fr->addr.size, .offset_within_address_space = int128_get64(fr->addr.start), .readonly = fr->readonly, + .nonvolatile = fr->nonvolatile, }; } @@ -240,7 +242,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b) && addrrange_equal(a->addr, b->addr) && a->offset_in_region == b->offset_in_region && a->romd_mode == b->romd_mode - && a->readonly == b->readonly; + && a->readonly == b->readonly + && a->nonvolatile == b->nonvolatile; } static FlatView *flatview_new(MemoryRegion *mr_root) @@ -312,7 +315,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2) int128_make64(r2->offset_in_region)) && r1->dirty_log_mask == r2->dirty_log_mask && r1->romd_mode == r2->romd_mode - && r1->readonly == r2->readonly; + && r1->readonly == r2->readonly + && r1->nonvolatile == r2->nonvolatile; } /* Attempt to simplify a view by merging adjacent ranges */ @@ -592,7 +596,8 @@ static void render_memory_region(FlatView *view, MemoryRegion *mr, Int128 base, AddrRange clip, - bool readonly) + bool readonly, + bool nonvolatile) { MemoryRegion *subregion; unsigned i; @@ -608,6 +613,7 @@ static void render_memory_region(FlatView *view, int128_addto(&base, int128_make64(mr->addr)); readonly |= mr->readonly; + nonvolatile |= mr->nonvolatile; tmp = addrrange_make(base, mr->size); @@ -620,13 +626,15 @@ static void render_memory_region(FlatView *view, if (mr->alias) { int128_subfrom(&base, int128_make64(mr->alias->addr)); int128_subfrom(&base, int128_make64(mr->alias_offset)); - render_memory_region(view, mr->alias, base, clip, readonly); + render_memory_region(view, mr->alias, base, clip, + readonly, nonvolatile); return; } /* Render subregions in priority order. */ QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { - render_memory_region(view, subregion, base, clip, readonly); + render_memory_region(view, subregion, base, clip, + readonly, nonvolatile); } if (!mr->terminates) { @@ -641,6 +649,7 @@ static void render_memory_region(FlatView *view, fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr); fr.romd_mode = mr->romd_mode; fr.readonly = readonly; + fr.nonvolatile = nonvolatile; /* Render the region itself into any gaps left by the current view. */ for (i = 0; i < view->nr && int128_nz(remain); ++i) { @@ -726,7 +735,8 @@ static FlatView *generate_memory_topology(MemoryRegion *mr) if (mr) { render_memory_region(view, mr, int128_zero(), - addrrange_make(int128_zero(), int128_2_64()), false); + addrrange_make(int128_zero(), int128_2_64()), + false, false); } flatview_simplify(view); @@ -2039,6 +2049,16 @@ void memory_region_set_readonly(MemoryRegion *mr, bool readonly) } } +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile) +{ + if (mr->nonvolatile != nonvolatile) { + memory_region_transaction_begin(); + mr->nonvolatile = nonvolatile; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); + } +} + void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode) { if (mr->romd_mode != romd_mode) { @@ -2489,6 +2509,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr, ret.size = range.size; ret.offset_within_address_space = int128_get64(range.start); ret.readonly = fr->readonly; + ret.nonvolatile = fr->nonvolatile; return ret; } @@ -2839,10 +2860,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue); } mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx - " (prio %d, %s): alias %s @%s " TARGET_FMT_plx + " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx "-" TARGET_FMT_plx "%s", cur_start, cur_end, mr->priority, + mr->nonvolatile ? "nv-" : "", memory_region_type((MemoryRegion *)mr), memory_region_name(mr), memory_region_name(mr->alias), @@ -2854,9 +2876,10 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, } } else { mon_printf(f, - TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s", + TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s%s): %s%s", cur_start, cur_end, mr->priority, + mr->nonvolatile ? "nv-" : "", memory_region_type((MemoryRegion *)mr), memory_region_name(mr), mr->enabled ? "" : " [disabled]"); @@ -2941,19 +2964,21 @@ static void mtree_print_flatview(gpointer key, gpointer value, mr = range->mr; if (range->offset_in_region) { p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx, + TARGET_FMT_plx " (prio %d, %s%s): %s @" TARGET_FMT_plx, int128_get64(range->addr.start), int128_get64(range->addr.start) + MR_SIZE(range->addr.size), mr->priority, + range->nonvolatile ? "nv-" : "", range->readonly ? "rom" : memory_region_type(mr), memory_region_name(mr), range->offset_in_region); } else { p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s", + TARGET_FMT_plx " (prio %d, %s%s): %s", int128_get64(range->addr.start), int128_get64(range->addr.start) + MR_SIZE(range->addr.size), mr->priority, + range->nonvolatile ? "nv-" : "", range->readonly ? "rom" : memory_region_type(mr), memory_region_name(mr)); } diff --git a/memory_mapping.c b/memory_mapping.c index 775466f..724dd0b 100644 --- a/memory_mapping.c +++ b/memory_mapping.c @@ -206,7 +206,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, /* we only care about RAM */ if (!memory_region_is_ram(section->mr) || - memory_region_is_ram_device(section->mr)) { + memory_region_is_ram_device(section->mr) || + memory_region_is_nonvolatile(section->mr)) { return; } diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 5a857ce..198cd0f 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -417,7 +417,9 @@ def get_guest_phys_blocks(): memory_region = flat_range["mr"].dereference() # we only care about RAM - if not memory_region["ram"]: + if (not memory_region["ram"] or + memory_region["ram_device"] or + memory_region["nonvolatile"]): continue section_size = int128_get64(flat_range["addr"]["size"]) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index af7e9f0..f81d35e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5732,6 +5732,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ad0e0b4..9c52d0c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1391,6 +1391,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_evmcs; bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index 8c572cd..c0272b3 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -18,6 +18,7 @@ #define HV_CPUID_FEATURES 0x40000003 #define HV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HV_CPUID_NESTED_FEATURES 0x4000000A #define HV_CPUID_MIN 0x40000005 #define HV_CPUID_MAX 0x4000ffff #define HV_HYPERVISOR_PRESENT_BIT 0x80000000 @@ -60,6 +61,7 @@ #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) #define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) +#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14) /* * Basic virtualized MSRs diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 796a049..f524e7d 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -869,6 +869,7 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; + uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -912,7 +913,8 @@ int kvm_arch_init_vcpu(CPUState *cs) memset(signature, 0, 12); memcpy(signature, cpu->hyperv_vendor_id, len); } - c->eax = HV_CPUID_MIN; + c->eax = cpu->hyperv_evmcs ? + HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -970,7 +972,16 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_CLUSTER_IPI_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } - + if (cpu->hyperv_evmcs) { + if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, + (uintptr_t)&evmcs_version)) { + fprintf(stderr, "Hyper-V Enlightened VMCS " + "(requested by 'hv-evmcs' cpu flag) " + "is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; c = &cpuid_data.entries[cpuid_i++]; @@ -981,6 +992,21 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_base = KVM_CPUID_SIGNATURE_NEXT; has_msr_hv_hypercall = true; + + if (cpu->hyperv_evmcs) { + __u32 function; + + /* Create zeroed 0x40000006..0x40000009 leaves */ + for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { + c = &cpuid_data.entries[cpuid_i++]; + c->function = function; + } + + c = &cpuid_data.entries[cpuid_i++]; + c->function = HV_CPUID_NESTED_FEATURES; + c->eax = evmcs_version; + } } if (cpu->expose_kvm) { diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c index 33714bc..63e265c 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/seg_helper.c @@ -991,11 +991,11 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) int code64; env->regs[R_ECX] = env->eip + next_eip_addend; - env->regs[11] = cpu_compute_eflags(env); + env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK; code64 = env->hflags & HF_CS64_MASK; - env->eflags &= ~env->fmask; + env->eflags &= ~(env->fmask | RF_MASK); cpu_load_eflags(env, env->eflags, 0); cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, 0, 0xffffffff, diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index dfa66ff..865e476 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -443,42 +443,34 @@ void qemu_event_wait(QemuEvent *ev) } } -static pthread_key_t exit_key; - -union NotifierThreadData { - void *ptr; - NotifierList list; -}; -QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); +static __thread NotifierList thread_exit; +/* + * Note that in this implementation you can register a thread-exit + * notifier for the main thread, but it will never be called. + * This is OK because main thread exit can only happen when the + * entire process is exiting, and the API allows notifiers to not + * be called on process exit. + */ void qemu_thread_atexit_add(Notifier *notifier) { - union NotifierThreadData ntd; - ntd.ptr = pthread_getspecific(exit_key); - notifier_list_add(&ntd.list, notifier); - pthread_setspecific(exit_key, ntd.ptr); + notifier_list_add(&thread_exit, notifier); } void qemu_thread_atexit_remove(Notifier *notifier) { - union NotifierThreadData ntd; - ntd.ptr = pthread_getspecific(exit_key); notifier_remove(notifier); - pthread_setspecific(exit_key, ntd.ptr); -} - -static void qemu_thread_atexit_run(void *arg) -{ - union NotifierThreadData ntd = { .ptr = arg }; - notifier_list_notify(&ntd.list, NULL); } -static void __attribute__((constructor)) qemu_thread_atexit_init(void) +static void qemu_thread_atexit_notify(void *arg) { - pthread_key_create(&exit_key, qemu_thread_atexit_run); + /* + * Called when non-main thread exits (via qemu_thread_exit() + * or by returning from its start routine.) + */ + notifier_list_notify(&thread_exit, NULL); } - typedef struct { void *(*start_routine)(void *); void *arg; @@ -490,6 +482,7 @@ static void *qemu_thread_start(void *args) QemuThreadArgs *qemu_thread_args = args; void *(*start_routine)(void *) = qemu_thread_args->start_routine; void *arg = qemu_thread_args->arg; + void *r; #ifdef CONFIG_PTHREAD_SETNAME_NP /* Attempt to set the threads name; note that this is for debug, so @@ -501,7 +494,10 @@ static void *qemu_thread_start(void *args) #endif g_free(qemu_thread_args->name); g_free(qemu_thread_args); - return start_routine(arg); + pthread_cleanup_push(qemu_thread_atexit_notify, NULL); + r = start_routine(arg); + pthread_cleanup_pop(1); + return r; } void qemu_thread_create(QemuThread *thread, const char *name, |