aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2018-11-08 10:01:51 +0000
committerPeter Maydell <peter.maydell@linaro.org>2018-11-08 10:01:51 +0000
commitfa27257432689e8927cb993b251d380d654dcc86 (patch)
tree4f84ce4569599910e6d9b68e4c4ebb517fe20892
parent4de6bb0c02ad3f0ec48f0f84ba1a65ab06e81b86 (diff)
parenta458774ad711bceabefbf01e8f0b91d86ec72e0c (diff)
downloadqemu-fa27257432689e8927cb993b251d380d654dcc86.zip
qemu-fa27257432689e8927cb993b251d380d654dcc86.tar.gz
qemu-fa27257432689e8927cb993b251d380d654dcc86.tar.bz2
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
* icount fix (Clement) * dumping fixes for non-volatile memory (Marc-André, myself) * x86 emulation fix (Rudolf) * recent Hyper-V CPUID flag (Vitaly) * Q35 doc fix (Daniel) * lsi fix (Prasad) * SCSI block limits emulation fixes (myself) * qemu_thread_atexit rework (Peter) * ivshmem memory leak fix (Igor) # gpg: Signature made Tue 06 Nov 2018 21:34:30 GMT # gpg: using RSA key BFFBD25F78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: util/qemu-thread-posix: Fix qemu_thread_atexit* for OSX include/qemu/thread.h: Document qemu_thread_atexit* API scsi-generic: do not do VPD emulation for sense other than ILLEGAL_REQUEST scsi-generic: avoid invalid access to struct when emulating block limits scsi-generic: avoid out-of-bounds access to VPD page list scsi-generic: keep VPD page list sorted lsi53c895a: check message length value is valid scripts/dump-guest-memory: Synchronize with guest_phys_blocks_region_add memory-mapping: skip non-volatile memory regions in GuestPhysBlockList nvdimm: set non-volatile on the memory region memory: learn about non-volatile memory region target/i386: Clear RF on SYSCALL instruction MAINTAINERS: remove or downgrade myself to reviewer from some subsystems ivshmem: fix memory backend leak i386: clarify that the Q35 machine type implements a P35 chipset x86: hv_evmcs CPU flag support icount: fix deadlock when all cpus are sleeping Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS13
-rw-r--r--cpus.c8
-rw-r--r--docs/devel/migration.rst1
-rw-r--r--hw/mem/nvdimm.c1
-rw-r--r--hw/misc/ivshmem.c1
-rw-r--r--hw/pci-host/q35.c10
-rw-r--r--hw/scsi/Makefile.objs2
-rw-r--r--hw/scsi/emulation.c42
-rw-r--r--hw/scsi/lsi53c895a.c19
-rw-r--r--hw/scsi/scsi-disk.c92
-rw-r--r--hw/scsi/scsi-generic.c60
-rw-r--r--include/exec/memory.h25
-rw-r--r--include/hw/pci/pci_ids.h2
-rw-r--r--include/hw/scsi/emulation.h16
-rw-r--r--include/hw/scsi/scsi.h1
-rw-r--r--include/qemu/thread.h22
-rw-r--r--memory.c45
-rw-r--r--memory_mapping.c3
-rw-r--r--scripts/dump-guest-memory.py4
-rw-r--r--target/i386/cpu.c1
-rw-r--r--target/i386/cpu.h1
-rw-r--r--target/i386/hyperv-proto.h2
-rw-r--r--target/i386/kvm.c30
-rw-r--r--target/i386/seg_helper.c4
-rw-r--r--util/qemu-thread-posix.c44
25 files changed, 308 insertions, 141 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 0499e11..0d68e4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -105,9 +105,9 @@ Guest CPU cores (TCG):
----------------------
Overall
L: qemu-devel@nongnu.org
-M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Richard Henderson <rth@twiddle.net>
+R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: cpus.c
F: exec.c
@@ -1141,7 +1141,8 @@ F: hw/pci-host/ppce500.c
F: hw/net/fsl_etsec/
Character devices
-M: Paolo Bonzini <pbonzini@redhat.com>
+M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
S: Odd Fixes
F: hw/char/
@@ -1528,8 +1529,8 @@ T: git git://github.com/famz/qemu.git bitmaps
T: git git://github.com/jnsnow/qemu.git bitmaps
Character device backends
-M: Paolo Bonzini <pbonzini@redhat.com>
M: Marc-André Lureau <marcandre.lureau@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: chardev/
F: include/chardev/
@@ -1762,9 +1763,9 @@ F: tests/qmp-cmd-test.c
T: git git://repo.or.cz/qemu/armbru.git qapi-next
qtest
-M: Paolo Bonzini <pbonzini@redhat.com>
M: Thomas Huth <thuth@redhat.com>
M: Laurent Vivier <lvivier@redhat.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: qtest.c
F: tests/libqtest.*
@@ -1871,7 +1872,6 @@ F: tests/test-io-*
Sockets
M: Daniel P. Berrange <berrange@redhat.com>
M: Gerd Hoffmann <kraxel@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: include/qemu/sockets.h
F: util/qemu-sockets.c
@@ -2058,13 +2058,12 @@ M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Lieven <pl@kamp.de>
L: qemu-block@nongnu.org
-S: Supported
+S: Odd Fixes
F: block/iscsi.c
F: block/iscsi-opts.c
Network Block Device (NBD)
M: Eric Blake <eblake@redhat.com>
-M: Paolo Bonzini <pbonzini@redhat.com>
L: qemu-block@nongnu.org
S: Maintained
F: block/nbd*
diff --git a/cpus.c b/cpus.c
index 3978f63..a2b33cc 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1554,6 +1554,14 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
atomic_mb_set(&cpu->exit_request, 0);
}
+ if (use_icount && all_cpu_threads_idle()) {
+ /*
+ * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+ * in the main_loop, wake it up in order to start the warp timer.
+ */
+ qemu_notify_event();
+ }
+
qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu);
deal_with_unplugged_cpus();
}
diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst
index 6875707..e7658ab 100644
--- a/docs/devel/migration.rst
+++ b/docs/devel/migration.rst
@@ -435,6 +435,7 @@ Examples of such memory API functions are:
- memory_region_add_subregion()
- memory_region_del_subregion()
- memory_region_set_readonly()
+ - memory_region_set_nonvolatile()
- memory_region_set_enabled()
- memory_region_set_address()
- memory_region_set_alias_offset()
diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 49324f3..bf2adf5 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -116,6 +116,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp)
nvdimm->nvdimm_mr = g_new(MemoryRegion, 1);
memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm),
"nvdimm-memory", mr, 0, pmem_size);
+ memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true);
nvdimm->nvdimm_mr->align = align;
}
diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index f88910e..ecfd10a 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -1279,6 +1279,7 @@ static void desugar_shm(IVShmemState *s)
object_property_set_bool(obj, true, "share", &error_abort);
object_property_add_child(OBJECT(s), "internal-shm-backend", obj,
&error_abort);
+ object_unref(obj);
user_creatable_complete(obj, &error_abort);
s->hostmem = MEMORY_BACKEND(obj);
}
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 0c38a8d..7b871b5 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -629,7 +629,15 @@ static void mch_class_init(ObjectClass *klass, void *data)
dc->desc = "Host bridge";
dc->vmsd = &vmstate_mch;
k->vendor_id = PCI_VENDOR_ID_INTEL;
- k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH;
+ /*
+ * The 'q35' machine type implements an Intel Series 3 chipset,
+ * of which there are several variants. The key difference between
+ * the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that
+ * the latter has an integrated graphics adapter. QEMU does not
+ * implement integrated graphics, so uses the PCI ID for the 82P35
+ * chipset.
+ */
+ k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH;
k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT;
k->class_id = PCI_CLASS_BRIDGE_HOST;
/*
diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs
index 718b4c2..45167ba 100644
--- a/hw/scsi/Makefile.objs
+++ b/hw/scsi/Makefile.objs
@@ -1,4 +1,4 @@
-common-obj-y += scsi-disk.o
+common-obj-y += scsi-disk.o emulation.o
common-obj-y += scsi-generic.o scsi-bus.o
common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
common-obj-$(CONFIG_MPTSAS_SCSI_PCI) += mptsas.o mptconfig.o mptendian.o
diff --git a/hw/scsi/emulation.c b/hw/scsi/emulation.c
new file mode 100644
index 0000000..06d62f3
--- /dev/null
+++ b/hw/scsi/emulation.c
@@ -0,0 +1,42 @@
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/bswap.h"
+#include "hw/scsi/emulation.h"
+
+int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl)
+{
+ /* required VPD size with unmap support */
+ memset(outbuf, 0, 0x3c);
+
+ outbuf[0] = bl->wsnz; /* wsnz */
+
+ if (bl->max_io_sectors) {
+ /* optimal transfer length granularity. This field and the optimal
+ * transfer length can't be greater than maximum transfer length.
+ */
+ stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors));
+
+ /* maximum transfer length */
+ stl_be_p(outbuf + 4, bl->max_io_sectors);
+
+ /* optimal transfer length */
+ stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors));
+ } else {
+ stw_be_p(outbuf + 2, bl->min_io_size);
+ stl_be_p(outbuf + 8, bl->opt_io_size);
+ }
+
+ /* max unmap LBA count */
+ stl_be_p(outbuf + 16, bl->max_unmap_sectors);
+
+ /* max unmap descriptors */
+ stl_be_p(outbuf + 20, bl->max_unmap_descr);
+
+ /* optimal unmap granularity; alignment is zero */
+ stl_be_p(outbuf + 24, bl->unmap_sectors);
+
+ /* max write same size, make it the same as maximum transfer length */
+ stl_be_p(outbuf + 36, bl->max_io_sectors);
+
+ return 0x3c;
+}
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index d1e6534..3f207f6 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -861,10 +861,11 @@ static void lsi_do_status(LSIState *s)
static void lsi_do_msgin(LSIState *s)
{
- int len;
+ uint8_t len;
trace_lsi_do_msgin(s->dbc, s->msg_len);
s->sfbr = s->msg[0];
len = s->msg_len;
+ assert(len > 0 && len <= LSI_MAX_MSGIN_LEN);
if (len > s->dbc)
len = s->dbc;
pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len);
@@ -1705,8 +1706,10 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset)
break;
case 0x58: /* SBDL */
/* Some drivers peek at the data bus during the MSG IN phase. */
- if ((s->sstat1 & PHASE_MASK) == PHASE_MI)
+ if ((s->sstat1 & PHASE_MASK) == PHASE_MI) {
+ assert(s->msg_len > 0);
return s->msg[0];
+ }
ret = 0;
break;
case 0x59: /* SBDL high */
@@ -2103,11 +2106,23 @@ static int lsi_pre_save(void *opaque)
return 0;
}
+static int lsi_post_load(void *opaque, int version_id)
+{
+ LSIState *s = opaque;
+
+ if (s->msg_len < 0 || s->msg_len > LSI_MAX_MSGIN_LEN) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const VMStateDescription vmstate_lsi_scsi = {
.name = "lsiscsi",
.version_id = 0,
.minimum_version_id = 0,
.pre_save = lsi_pre_save,
+ .post_load = lsi_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, LSIState),
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index e2c5408..6eb258d 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -33,6 +33,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0)
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "hw/scsi/scsi.h"
+#include "hw/scsi/emulation.h"
#include "scsi/constants.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
@@ -589,7 +590,7 @@ static uint8_t *scsi_get_buf(SCSIRequest *req)
return (uint8_t *)r->iov.iov_base;
}
-int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
+static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
{
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
uint8_t page_code = req->cmd.buf[2];
@@ -691,89 +692,36 @@ int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf)
}
case 0xb0: /* block limits */
{
- unsigned int unmap_sectors =
- s->qdev.conf.discard_granularity / s->qdev.blocksize;
- unsigned int min_io_size =
- s->qdev.conf.min_io_size / s->qdev.blocksize;
- unsigned int opt_io_size =
- s->qdev.conf.opt_io_size / s->qdev.blocksize;
- unsigned int max_unmap_sectors =
- s->max_unmap_size / s->qdev.blocksize;
- unsigned int max_io_sectors =
- s->max_io_size / s->qdev.blocksize;
+ SCSIBlockLimits bl = {};
if (s->qdev.type == TYPE_ROM) {
DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n",
page_code);
return -1;
}
+ bl.wsnz = 1;
+ bl.unmap_sectors =
+ s->qdev.conf.discard_granularity / s->qdev.blocksize;
+ bl.min_io_size =
+ s->qdev.conf.min_io_size / s->qdev.blocksize;
+ bl.opt_io_size =
+ s->qdev.conf.opt_io_size / s->qdev.blocksize;
+ bl.max_unmap_sectors =
+ s->max_unmap_size / s->qdev.blocksize;
+ bl.max_io_sectors =
+ s->max_io_size / s->qdev.blocksize;
+ /* 255 descriptors fit in 4 KiB with an 8-byte header */
+ bl.max_unmap_descr = 255;
+
if (s->qdev.type == TYPE_DISK) {
int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk);
int max_io_sectors_blk =
max_transfer_blk / s->qdev.blocksize;
- max_io_sectors =
- MIN_NON_ZERO(max_io_sectors_blk, max_io_sectors);
-
- /* min_io_size and opt_io_size can't be greater than
- * max_io_sectors */
- if (min_io_size) {
- min_io_size = MIN(min_io_size, max_io_sectors);
- }
- if (opt_io_size) {
- opt_io_size = MIN(opt_io_size, max_io_sectors);
- }
+ bl.max_io_sectors =
+ MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors);
}
- /* required VPD size with unmap support */
- buflen = 0x40;
- memset(outbuf + 4, 0, buflen - 4);
-
- outbuf[4] = 0x1; /* wsnz */
-
- /* optimal transfer length granularity */
- outbuf[6] = (min_io_size >> 8) & 0xff;
- outbuf[7] = min_io_size & 0xff;
-
- /* maximum transfer length */
- outbuf[8] = (max_io_sectors >> 24) & 0xff;
- outbuf[9] = (max_io_sectors >> 16) & 0xff;
- outbuf[10] = (max_io_sectors >> 8) & 0xff;
- outbuf[11] = max_io_sectors & 0xff;
-
- /* optimal transfer length */
- outbuf[12] = (opt_io_size >> 24) & 0xff;
- outbuf[13] = (opt_io_size >> 16) & 0xff;
- outbuf[14] = (opt_io_size >> 8) & 0xff;
- outbuf[15] = opt_io_size & 0xff;
-
- /* max unmap LBA count, default is 1GB */
- outbuf[20] = (max_unmap_sectors >> 24) & 0xff;
- outbuf[21] = (max_unmap_sectors >> 16) & 0xff;
- outbuf[22] = (max_unmap_sectors >> 8) & 0xff;
- outbuf[23] = max_unmap_sectors & 0xff;
-
- /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header */
- outbuf[24] = 0;
- outbuf[25] = 0;
- outbuf[26] = 0;
- outbuf[27] = 255;
-
- /* optimal unmap granularity */
- outbuf[28] = (unmap_sectors >> 24) & 0xff;
- outbuf[29] = (unmap_sectors >> 16) & 0xff;
- outbuf[30] = (unmap_sectors >> 8) & 0xff;
- outbuf[31] = unmap_sectors & 0xff;
-
- /* max write same size */
- outbuf[36] = 0;
- outbuf[37] = 0;
- outbuf[38] = 0;
- outbuf[39] = 0;
-
- outbuf[40] = (max_io_sectors >> 24) & 0xff;
- outbuf[41] = (max_io_sectors >> 16) & 0xff;
- outbuf[42] = (max_io_sectors >> 8) & 0xff;
- outbuf[43] = max_io_sectors & 0xff;
+ buflen += scsi_emulate_block_limits(outbuf + buflen, &bl);
break;
}
case 0xb1: /* block device characteristics */
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index d60c4d0..7237b41 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -16,6 +16,7 @@
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "hw/scsi/scsi.h"
+#include "hw/scsi/emulation.h"
#include "sysemu/block-backend.h"
#ifdef __linux__
@@ -144,7 +145,7 @@ static int execute_command(BlockBackend *blk,
static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
{
- uint8_t page, page_len;
+ uint8_t page, page_idx;
/*
* EVPD set to zero returns the standard INQUIRY data.
@@ -181,7 +182,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
/* Also take care of the opt xfer len. */
stl_be_p(&r->buf[12],
MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12])));
- } else if (page == 0x00 && s->needs_vpd_bl_emulation) {
+ } else if (s->needs_vpd_bl_emulation && page == 0x00) {
/*
* Now we're capable of supplying the VPD Block Limits
* response if the hardware can't. Add it in the INQUIRY
@@ -190,17 +191,43 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s)
*
* This way, the guest kernel will be aware of the support
* and will use it to proper setup the SCSI device.
+ *
+ * VPD page numbers must be sorted, so insert 0xb0 at the
+ * right place with an in-place insert. After the initialization
+ * part of the for loop is executed, the device response is
+ * at r[0] to r[page_idx - 1].
*/
- page_len = r->buf[3];
- r->buf[page_len + 4] = 0xb0;
- r->buf[3] = ++page_len;
+ for (page_idx = lduw_be_p(r->buf + 2) + 4;
+ page_idx > 4 && r->buf[page_idx - 1] >= 0xb0;
+ page_idx--) {
+ if (page_idx < r->buflen) {
+ r->buf[page_idx] = r->buf[page_idx - 1];
+ }
+ }
+ r->buf[page_idx] = 0xb0;
+ stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1);
}
}
}
-static int scsi_emulate_block_limits(SCSIGenericReq *r)
+static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
{
- r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf);
+ int len;
+ uint8_t buf[64];
+
+ SCSIBlockLimits bl = {
+ .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
+ };
+
+ memset(r->buf, 0, r->buflen);
+ stb_p(buf, s->type);
+ stb_p(buf + 1, 0xb0);
+ len = scsi_emulate_block_limits(buf + 4, &bl);
+ assert(len <= sizeof(buf) - 4);
+ stw_be_p(buf + 2, len);
+
+ memcpy(r->buf, buf, MIN(r->buflen, len + 4));
+
r->io_header.sb_len_wr = 0;
/*
@@ -219,7 +246,6 @@ static void scsi_read_complete(void * opaque, int ret)
{
SCSIGenericReq *r = (SCSIGenericReq *)opaque;
SCSIDevice *s = r->req.dev;
- SCSISense sense;
int len;
assert(r->req.aiocb != NULL);
@@ -242,13 +268,15 @@ static void scsi_read_complete(void * opaque, int ret)
* resulted in sense error but would need emulation.
* In this case, emulate a valid VPD response.
*/
- if (s->needs_vpd_bl_emulation) {
- int is_vpd_bl = r->req.cmd.buf[0] == INQUIRY &&
- r->req.cmd.buf[1] & 0x01 &&
- r->req.cmd.buf[2] == 0xb0;
-
- if (is_vpd_bl && sg_io_sense_from_errno(-ret, &r->io_header, &sense)) {
- len = scsi_emulate_block_limits(r);
+ if (s->needs_vpd_bl_emulation && ret == 0 &&
+ (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) &&
+ r->req.cmd.buf[0] == INQUIRY &&
+ (r->req.cmd.buf[1] & 0x01) &&
+ r->req.cmd.buf[2] == 0xb0) {
+ SCSISense sense =
+ scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr);
+ if (sense.key == ILLEGAL_REQUEST) {
+ len = scsi_generic_emulate_block_limits(r, s);
/*
* No need to let scsi_read_complete go on and handle an
* INQUIRY VPD BL request we created manually.
@@ -527,7 +555,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice *s)
}
page_len = buf[3];
- for (i = 4; i < page_len + 4; i++) {
+ for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) {
if (buf[i] == 0xb0) {
s->needs_vpd_bl_emulation = false;
return;
diff --git a/include/exec/memory.h b/include/exec/memory.h
index d0c7f0d..8e61450 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -355,6 +355,7 @@ struct MemoryRegion {
bool ram;
bool subpage;
bool readonly; /* For RAM regions */
+ bool nonvolatile;
bool rom_device;
bool flush_coalesced_mmio;
bool global_locking;
@@ -480,6 +481,7 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as)
* @offset_within_address_space: the address of the first byte of the section
* relative to the region's address space
* @readonly: writes to this section are ignored
+ * @nonvolatile: this section is non-volatile
*/
struct MemoryRegionSection {
MemoryRegion *mr;
@@ -488,6 +490,7 @@ struct MemoryRegionSection {
Int128 size;
hwaddr offset_within_address_space;
bool readonly;
+ bool nonvolatile;
};
/**
@@ -1170,6 +1173,17 @@ static inline bool memory_region_is_rom(MemoryRegion *mr)
return mr->ram && mr->readonly;
}
+/**
+ * memory_region_is_nonvolatile: check whether a memory region is non-volatile
+ *
+ * Returns %true is a memory region is non-volatile memory.
+ *
+ * @mr: the memory region being queried
+ */
+static inline bool memory_region_is_nonvolatile(MemoryRegion *mr)
+{
+ return mr->nonvolatile;
+}
/**
* memory_region_get_fd: Get a file descriptor backing a RAM memory region.
@@ -1342,6 +1356,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr,
void memory_region_set_readonly(MemoryRegion *mr, bool readonly);
/**
+ * memory_region_set_nonvolatile: Turn a memory region non-volatile
+ *
+ * Allows a memory region to be marked as non-volatile.
+ * only useful on RAM regions.
+ *
+ * @mr: the region being updated.
+ * @nonvolatile: whether rhe region is to be non-volatile.
+ */
+void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile);
+
+/**
* memory_region_rom_device_set_romd: enable/disable ROMD mode
*
* Allows a ROM device (initialized with memory_region_init_rom_device() to
diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h
index 63acc72..eeb3301 100644
--- a/include/hw/pci/pci_ids.h
+++ b/include/hw/pci/pci_ids.h
@@ -255,7 +255,7 @@
#define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c
#define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed
-#define PCI_DEVICE_ID_INTEL_Q35_MCH 0x29c0
+#define PCI_DEVICE_ID_INTEL_P35_MCH 0x29c0
#define PCI_VENDOR_ID_XEN 0x5853
#define PCI_DEVICE_ID_XEN_PLATFORM 0x0001
diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h
new file mode 100644
index 0000000..09fba1f
--- /dev/null
+++ b/include/hw/scsi/emulation.h
@@ -0,0 +1,16 @@
+#ifndef HW_SCSI_EMULATION_H
+#define HW_SCSI_EMULATION_H 1
+
+typedef struct SCSIBlockLimits {
+ bool wsnz;
+ uint16_t min_io_size;
+ uint32_t max_unmap_descr;
+ uint32_t opt_io_size;
+ uint32_t max_unmap_sectors;
+ uint32_t unmap_sectors;
+ uint32_t max_io_sectors;
+} SCSIBlockLimits;
+
+int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl);
+
+#endif
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index ee3a411..acef25f 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -189,7 +189,6 @@ void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
void scsi_device_unit_attention_reported(SCSIDevice *dev);
void scsi_generic_read_device_inquiry(SCSIDevice *dev);
int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed);
-int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf);
int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size,
uint8_t *buf, uint8_t buf_size);
SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun);
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index b2661b6..55d83a9 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -162,7 +162,29 @@ void qemu_thread_exit(void *retval);
void qemu_thread_naming(bool enable);
struct Notifier;
+/**
+ * qemu_thread_atexit_add:
+ * @notifier: Notifier to add
+ *
+ * Add the specified notifier to a list which will be run via
+ * notifier_list_notify() when this thread exits (either by calling
+ * qemu_thread_exit() or by returning from its start_routine).
+ * The usual usage is that the caller passes a Notifier which is
+ * a per-thread variable; it can then use the callback to free
+ * other per-thread data.
+ *
+ * If the thread exits as part of the entire process exiting,
+ * it is unspecified whether notifiers are called or not.
+ */
void qemu_thread_atexit_add(struct Notifier *notifier);
+/**
+ * qemu_thread_atexit_remove:
+ * @notifier: Notifier to remove
+ *
+ * Remove the specified notifier from the thread-exit notification
+ * list. It is not valid to try to remove a notifier which is not
+ * on the list.
+ */
void qemu_thread_atexit_remove(struct Notifier *notifier);
struct QemuSpin {
diff --git a/memory.c b/memory.c
index 51204aa..d14c6de 100644
--- a/memory.c
+++ b/memory.c
@@ -216,6 +216,7 @@ struct FlatRange {
uint8_t dirty_log_mask;
bool romd_mode;
bool readonly;
+ bool nonvolatile;
};
#define FOR_EACH_FLAT_RANGE(var, view) \
@@ -231,6 +232,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
.size = fr->addr.size,
.offset_within_address_space = int128_get64(fr->addr.start),
.readonly = fr->readonly,
+ .nonvolatile = fr->nonvolatile,
};
}
@@ -240,7 +242,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
&& addrrange_equal(a->addr, b->addr)
&& a->offset_in_region == b->offset_in_region
&& a->romd_mode == b->romd_mode
- && a->readonly == b->readonly;
+ && a->readonly == b->readonly
+ && a->nonvolatile == b->nonvolatile;
}
static FlatView *flatview_new(MemoryRegion *mr_root)
@@ -312,7 +315,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
int128_make64(r2->offset_in_region))
&& r1->dirty_log_mask == r2->dirty_log_mask
&& r1->romd_mode == r2->romd_mode
- && r1->readonly == r2->readonly;
+ && r1->readonly == r2->readonly
+ && r1->nonvolatile == r2->nonvolatile;
}
/* Attempt to simplify a view by merging adjacent ranges */
@@ -592,7 +596,8 @@ static void render_memory_region(FlatView *view,
MemoryRegion *mr,
Int128 base,
AddrRange clip,
- bool readonly)
+ bool readonly,
+ bool nonvolatile)
{
MemoryRegion *subregion;
unsigned i;
@@ -608,6 +613,7 @@ static void render_memory_region(FlatView *view,
int128_addto(&base, int128_make64(mr->addr));
readonly |= mr->readonly;
+ nonvolatile |= mr->nonvolatile;
tmp = addrrange_make(base, mr->size);
@@ -620,13 +626,15 @@ static void render_memory_region(FlatView *view,
if (mr->alias) {
int128_subfrom(&base, int128_make64(mr->alias->addr));
int128_subfrom(&base, int128_make64(mr->alias_offset));
- render_memory_region(view, mr->alias, base, clip, readonly);
+ render_memory_region(view, mr->alias, base, clip,
+ readonly, nonvolatile);
return;
}
/* Render subregions in priority order. */
QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) {
- render_memory_region(view, subregion, base, clip, readonly);
+ render_memory_region(view, subregion, base, clip,
+ readonly, nonvolatile);
}
if (!mr->terminates) {
@@ -641,6 +649,7 @@ static void render_memory_region(FlatView *view,
fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
fr.romd_mode = mr->romd_mode;
fr.readonly = readonly;
+ fr.nonvolatile = nonvolatile;
/* Render the region itself into any gaps left by the current view. */
for (i = 0; i < view->nr && int128_nz(remain); ++i) {
@@ -726,7 +735,8 @@ static FlatView *generate_memory_topology(MemoryRegion *mr)
if (mr) {
render_memory_region(view, mr, int128_zero(),
- addrrange_make(int128_zero(), int128_2_64()), false);
+ addrrange_make(int128_zero(), int128_2_64()),
+ false, false);
}
flatview_simplify(view);
@@ -2039,6 +2049,16 @@ void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
}
}
+void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile)
+{
+ if (mr->nonvolatile != nonvolatile) {
+ memory_region_transaction_begin();
+ mr->nonvolatile = nonvolatile;
+ memory_region_update_pending |= mr->enabled;
+ memory_region_transaction_commit();
+ }
+}
+
void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode)
{
if (mr->romd_mode != romd_mode) {
@@ -2489,6 +2509,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr,
ret.size = range.size;
ret.offset_within_address_space = int128_get64(range.start);
ret.readonly = fr->readonly;
+ ret.nonvolatile = fr->nonvolatile;
return ret;
}
@@ -2839,10 +2860,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue);
}
mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx
- " (prio %d, %s): alias %s @%s " TARGET_FMT_plx
+ " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx
"-" TARGET_FMT_plx "%s",
cur_start, cur_end,
mr->priority,
+ mr->nonvolatile ? "nv-" : "",
memory_region_type((MemoryRegion *)mr),
memory_region_name(mr),
memory_region_name(mr->alias),
@@ -2854,9 +2876,10 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
}
} else {
mon_printf(f,
- TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s",
+ TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s%s): %s%s",
cur_start, cur_end,
mr->priority,
+ mr->nonvolatile ? "nv-" : "",
memory_region_type((MemoryRegion *)mr),
memory_region_name(mr),
mr->enabled ? "" : " [disabled]");
@@ -2941,19 +2964,21 @@ static void mtree_print_flatview(gpointer key, gpointer value,
mr = range->mr;
if (range->offset_in_region) {
p(f, MTREE_INDENT TARGET_FMT_plx "-"
- TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx,
+ TARGET_FMT_plx " (prio %d, %s%s): %s @" TARGET_FMT_plx,
int128_get64(range->addr.start),
int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
mr->priority,
+ range->nonvolatile ? "nv-" : "",
range->readonly ? "rom" : memory_region_type(mr),
memory_region_name(mr),
range->offset_in_region);
} else {
p(f, MTREE_INDENT TARGET_FMT_plx "-"
- TARGET_FMT_plx " (prio %d, %s): %s",
+ TARGET_FMT_plx " (prio %d, %s%s): %s",
int128_get64(range->addr.start),
int128_get64(range->addr.start) + MR_SIZE(range->addr.size),
mr->priority,
+ range->nonvolatile ? "nv-" : "",
range->readonly ? "rom" : memory_region_type(mr),
memory_region_name(mr));
}
diff --git a/memory_mapping.c b/memory_mapping.c
index 775466f..724dd0b 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -206,7 +206,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener,
/* we only care about RAM */
if (!memory_region_is_ram(section->mr) ||
- memory_region_is_ram_device(section->mr)) {
+ memory_region_is_ram_device(section->mr) ||
+ memory_region_is_nonvolatile(section->mr)) {
return;
}
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 5a857ce..198cd0f 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -417,7 +417,9 @@ def get_guest_phys_blocks():
memory_region = flat_range["mr"].dereference()
# we only care about RAM
- if not memory_region["ram"]:
+ if (not memory_region["ram"] or
+ memory_region["ram_device"] or
+ memory_region["nonvolatile"]):
continue
section_size = int128_get64(flat_range["addr"]["size"])
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index af7e9f0..f81d35e 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5732,6 +5732,7 @@ static Property x86_cpu_properties[] = {
DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false),
DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false),
DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false),
+ DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false),
DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false),
DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true),
DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false),
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ad0e0b4..9c52d0c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1391,6 +1391,7 @@ struct X86CPU {
bool hyperv_frequencies;
bool hyperv_reenlightenment;
bool hyperv_tlbflush;
+ bool hyperv_evmcs;
bool hyperv_ipi;
bool check_cpuid;
bool enforce_cpuid;
diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h
index 8c572cd..c0272b3 100644
--- a/target/i386/hyperv-proto.h
+++ b/target/i386/hyperv-proto.h
@@ -18,6 +18,7 @@
#define HV_CPUID_FEATURES 0x40000003
#define HV_CPUID_ENLIGHTMENT_INFO 0x40000004
#define HV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HV_CPUID_NESTED_FEATURES 0x4000000A
#define HV_CPUID_MIN 0x40000005
#define HV_CPUID_MAX 0x4000ffff
#define HV_HYPERVISOR_PRESENT_BIT 0x80000000
@@ -60,6 +61,7 @@
#define HV_RELAXED_TIMING_RECOMMENDED (1u << 5)
#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10)
#define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11)
+#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14)
/*
* Basic virtualized MSRs
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 796a049..f524e7d 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -869,6 +869,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
uint32_t unused;
struct kvm_cpuid_entry2 *c;
uint32_t signature[3];
+ uint16_t evmcs_version;
int kvm_base = KVM_CPUID_SIGNATURE;
int r;
Error *local_err = NULL;
@@ -912,7 +913,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
memset(signature, 0, 12);
memcpy(signature, cpu->hyperv_vendor_id, len);
}
- c->eax = HV_CPUID_MIN;
+ c->eax = cpu->hyperv_evmcs ?
+ HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS;
c->ebx = signature[0];
c->ecx = signature[1];
c->edx = signature[2];
@@ -970,7 +972,16 @@ int kvm_arch_init_vcpu(CPUState *cs)
c->eax |= HV_CLUSTER_IPI_RECOMMENDED;
c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED;
}
-
+ if (cpu->hyperv_evmcs) {
+ if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0,
+ (uintptr_t)&evmcs_version)) {
+ fprintf(stderr, "Hyper-V Enlightened VMCS "
+ "(requested by 'hv-evmcs' cpu flag) "
+ "is not supported by kernel\n");
+ return -ENOSYS;
+ }
+ c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED;
+ }
c->ebx = cpu->hyperv_spinlock_attempts;
c = &cpuid_data.entries[cpuid_i++];
@@ -981,6 +992,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
kvm_base = KVM_CPUID_SIGNATURE_NEXT;
has_msr_hv_hypercall = true;
+
+ if (cpu->hyperv_evmcs) {
+ __u32 function;
+
+ /* Create zeroed 0x40000006..0x40000009 leaves */
+ for (function = HV_CPUID_IMPLEMENT_LIMITS + 1;
+ function < HV_CPUID_NESTED_FEATURES; function++) {
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = function;
+ }
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = HV_CPUID_NESTED_FEATURES;
+ c->eax = evmcs_version;
+ }
}
if (cpu->expose_kvm) {
diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c
index 33714bc..63e265c 100644
--- a/target/i386/seg_helper.c
+++ b/target/i386/seg_helper.c
@@ -991,11 +991,11 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
int code64;
env->regs[R_ECX] = env->eip + next_eip_addend;
- env->regs[11] = cpu_compute_eflags(env);
+ env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK;
code64 = env->hflags & HF_CS64_MASK;
- env->eflags &= ~env->fmask;
+ env->eflags &= ~(env->fmask | RF_MASK);
cpu_load_eflags(env, env->eflags, 0);
cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
0, 0xffffffff,
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index dfa66ff..865e476 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -443,42 +443,34 @@ void qemu_event_wait(QemuEvent *ev)
}
}
-static pthread_key_t exit_key;
-
-union NotifierThreadData {
- void *ptr;
- NotifierList list;
-};
-QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+static __thread NotifierList thread_exit;
+/*
+ * Note that in this implementation you can register a thread-exit
+ * notifier for the main thread, but it will never be called.
+ * This is OK because main thread exit can only happen when the
+ * entire process is exiting, and the API allows notifiers to not
+ * be called on process exit.
+ */
void qemu_thread_atexit_add(Notifier *notifier)
{
- union NotifierThreadData ntd;
- ntd.ptr = pthread_getspecific(exit_key);
- notifier_list_add(&ntd.list, notifier);
- pthread_setspecific(exit_key, ntd.ptr);
+ notifier_list_add(&thread_exit, notifier);
}
void qemu_thread_atexit_remove(Notifier *notifier)
{
- union NotifierThreadData ntd;
- ntd.ptr = pthread_getspecific(exit_key);
notifier_remove(notifier);
- pthread_setspecific(exit_key, ntd.ptr);
-}
-
-static void qemu_thread_atexit_run(void *arg)
-{
- union NotifierThreadData ntd = { .ptr = arg };
- notifier_list_notify(&ntd.list, NULL);
}
-static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+static void qemu_thread_atexit_notify(void *arg)
{
- pthread_key_create(&exit_key, qemu_thread_atexit_run);
+ /*
+ * Called when non-main thread exits (via qemu_thread_exit()
+ * or by returning from its start routine.)
+ */
+ notifier_list_notify(&thread_exit, NULL);
}
-
typedef struct {
void *(*start_routine)(void *);
void *arg;
@@ -490,6 +482,7 @@ static void *qemu_thread_start(void *args)
QemuThreadArgs *qemu_thread_args = args;
void *(*start_routine)(void *) = qemu_thread_args->start_routine;
void *arg = qemu_thread_args->arg;
+ void *r;
#ifdef CONFIG_PTHREAD_SETNAME_NP
/* Attempt to set the threads name; note that this is for debug, so
@@ -501,7 +494,10 @@ static void *qemu_thread_start(void *args)
#endif
g_free(qemu_thread_args->name);
g_free(qemu_thread_args);
- return start_routine(arg);
+ pthread_cleanup_push(qemu_thread_atexit_notify, NULL);
+ r = start_routine(arg);
+ pthread_cleanup_pop(1);
+ return r;
}
void qemu_thread_create(QemuThread *thread, const char *name,