From 013aabdc665e4256b38d8875a1a7b5e030ba98f1 Mon Sep 17 00:00:00 2001 From: Clement Deschamps Date: Sun, 21 Oct 2018 16:21:03 +0200 Subject: icount: fix deadlock when all cpus are sleeping When all cpus are sleeping (e.g in WFI), to avoid a deadlock in the main_loop, wake it up in order to start the warp timer. Signed-off-by: Clement Deschamps Message-Id: <20181021142103.19014-1-clement.deschamps@greensocs.com> Signed-off-by: Paolo Bonzini --- cpus.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpus.c b/cpus.c index 3978f63..a2b33cc 100644 --- a/cpus.c +++ b/cpus.c @@ -1554,6 +1554,14 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) atomic_mb_set(&cpu->exit_request, 0); } + if (use_icount && all_cpu_threads_idle()) { + /* + * When all cpus are sleeping (e.g in WFI), to avoid a deadlock + * in the main_loop, wake it up in order to start the warp timer. + */ + qemu_notify_event(); + } + qemu_tcg_rr_wait_io_event(cpu ? cpu : first_cpu); deal_with_unplugged_cpus(); } -- cgit v1.1 From e204ac612cb2cc1a33f4205976386d237d676319 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 22 Oct 2018 18:55:06 +0200 Subject: x86: hv_evmcs CPU flag support Adds a new CPU flag to enable the Enlightened VMCS KVM feature. QEMU enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS and gets back the version to be advertised in lower 16 bits of CPUID.0x4000000A:EAX. Suggested-by: Ladi Prosek Signed-off-by: Vitaly Kuznetsov Message-Id: <20181022165506.30332-3-vkuznets@redhat.com> Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + target/i386/hyperv-proto.h | 2 ++ target/i386/kvm.c | 30 ++++++++++++++++++++++++++++-- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index af7e9f0..f81d35e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -5732,6 +5732,7 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_BOOL("hv-evmcs", X86CPU, hyperv_evmcs, false), DEFINE_PROP_BOOL("hv-ipi", X86CPU, hyperv_ipi, false), DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ad0e0b4..9c52d0c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1391,6 +1391,7 @@ struct X86CPU { bool hyperv_frequencies; bool hyperv_reenlightenment; bool hyperv_tlbflush; + bool hyperv_evmcs; bool hyperv_ipi; bool check_cpuid; bool enforce_cpuid; diff --git a/target/i386/hyperv-proto.h b/target/i386/hyperv-proto.h index 8c572cd..c0272b3 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/hyperv-proto.h @@ -18,6 +18,7 @@ #define HV_CPUID_FEATURES 0x40000003 #define HV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HV_CPUID_NESTED_FEATURES 0x4000000A #define HV_CPUID_MIN 0x40000005 #define HV_CPUID_MAX 0x4000ffff #define HV_HYPERVISOR_PRESENT_BIT 0x80000000 @@ -60,6 +61,7 @@ #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) #define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) +#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14) /* * Basic virtualized MSRs diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 796a049..f524e7d 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -869,6 +869,7 @@ int kvm_arch_init_vcpu(CPUState *cs) uint32_t unused; struct kvm_cpuid_entry2 *c; uint32_t signature[3]; + uint16_t evmcs_version; int kvm_base = KVM_CPUID_SIGNATURE; int r; Error *local_err = NULL; @@ -912,7 +913,8 @@ int kvm_arch_init_vcpu(CPUState *cs) memset(signature, 0, 12); memcpy(signature, cpu->hyperv_vendor_id, len); } - c->eax = HV_CPUID_MIN; + c->eax = cpu->hyperv_evmcs ? + HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; c->ebx = signature[0]; c->ecx = signature[1]; c->edx = signature[2]; @@ -970,7 +972,16 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax |= HV_CLUSTER_IPI_RECOMMENDED; c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; } - + if (cpu->hyperv_evmcs) { + if (kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, + (uintptr_t)&evmcs_version)) { + fprintf(stderr, "Hyper-V Enlightened VMCS " + "(requested by 'hv-evmcs' cpu flag) " + "is not supported by kernel\n"); + return -ENOSYS; + } + c->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + } c->ebx = cpu->hyperv_spinlock_attempts; c = &cpuid_data.entries[cpuid_i++]; @@ -981,6 +992,21 @@ int kvm_arch_init_vcpu(CPUState *cs) kvm_base = KVM_CPUID_SIGNATURE_NEXT; has_msr_hv_hypercall = true; + + if (cpu->hyperv_evmcs) { + __u32 function; + + /* Create zeroed 0x40000006..0x40000009 leaves */ + for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { + c = &cpuid_data.entries[cpuid_i++]; + c->function = function; + } + + c = &cpuid_data.entries[cpuid_i++]; + c->function = HV_CPUID_NESTED_FEATURES; + c->eax = evmcs_version; + } } if (cpu->expose_kvm) { -- cgit v1.1 From d4715481ded13231d9ff8ae17da648de78b925d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= Date: Thu, 30 Aug 2018 11:57:57 +0100 Subject: i386: clarify that the Q35 machine type implements a P35 chipset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'q35' machine type implements an Intel Series 3 chipset, of which there are several variants: https://www.intel.com/Assets/PDF/datasheet/316966.pdf The key difference between the 82P35 MCH ('p35', PCI device ID 0x29c0) and 82Q35 GMCH ('q35', PCI device ID 0x29b0) variants is that the latter has an integrated graphics adapter. QEMU does not implement integrated graphics, so uses the PCI ID for the 82P35 chipset, despite calling the machine type 'q35'. Thus we rename the PCI device ID constant to reflect reality, to avoid confusing future developers. The new name more closely matches what pci.ids reports it to be: $ grep P35 /usr/share/hwdata/pci.ids | grep 29 29c0 82G33/G31/P35/P31 Express DRAM Controller 29c1 82G33/G31/P35/P31 Express PCI Express Root Port 29c4 82G33/G31/P35/P31 Express MEI Controller 29c5 82G33/G31/P35/P31 Express MEI Controller 29c6 82G33/G31/P35/P31 Express PT IDER Controller 29c7 82G33/G31/P35/P31 Express Serial KT Controller $ grep Q35 /usr/share/hwdata/pci.ids | grep 29 29b0 82Q35 Express DRAM Controller 29b1 82Q35 Express PCI Express Root Port 29b2 82Q35 Express Integrated Graphics Controller 29b3 82Q35 Express Integrated Graphics Controller 29b4 82Q35 Express MEI Controller 29b5 82Q35 Express MEI Controller 29b6 82Q35 Express PT IDER Controller 29b7 82Q35 Express Serial KT Controller Arguably the QEMU machine type should be named 'p35'. At this point in time, however, it is not worth the churn for management applications & documentation to worry about renaming it. Signed-off-by: Daniel P. Berrangé Message-Id: <20180830105757.10577-1-berrange@redhat.com> Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 10 +++++++++- include/hw/pci/pci_ids.h | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 966a7cf..71e4ca5 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -622,7 +622,15 @@ static void mch_class_init(ObjectClass *klass, void *data) dc->desc = "Host bridge"; dc->vmsd = &vmstate_mch; k->vendor_id = PCI_VENDOR_ID_INTEL; - k->device_id = PCI_DEVICE_ID_INTEL_Q35_MCH; + /* + * The 'q35' machine type implements an Intel Series 3 chipset, + * of which there are several variants. The key difference between + * the 82P35 MCH ('p35') and 82Q35 GMCH ('q35') variants is that + * the latter has an integrated graphics adapter. QEMU does not + * implement integrated graphics, so uses the PCI ID for the 82P35 + * chipset. + */ + k->device_id = PCI_DEVICE_ID_INTEL_P35_MCH; k->revision = MCH_HOST_BRIDGE_REVISION_DEFAULT; k->class_id = PCI_CLASS_BRIDGE_HOST; /* diff --git a/include/hw/pci/pci_ids.h b/include/hw/pci/pci_ids.h index 63acc72..eeb3301 100644 --- a/include/hw/pci/pci_ids.h +++ b/include/hw/pci/pci_ids.h @@ -255,7 +255,7 @@ #define PCI_DEVICE_ID_INTEL_82801I_EHCI2 0x293c #define PCI_DEVICE_ID_INTEL_82599_SFP_VF 0x10ed -#define PCI_DEVICE_ID_INTEL_Q35_MCH 0x29c0 +#define PCI_DEVICE_ID_INTEL_P35_MCH 0x29c0 #define PCI_VENDOR_ID_XEN 0x5853 #define PCI_DEVICE_ID_XEN_PLATFORM 0x0001 -- cgit v1.1 From bce410a33b9ed51051eb6a1fb31f8d0c13a51d48 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 1 Nov 2018 11:44:46 +0100 Subject: ivshmem: fix memory backend leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit object_new() returns a new backend with refcount == 1 and then later object_property_add_child() increases refcount to 2 So when ivshmem is destroyed, the backend it has created isn't destroyed along with it as children cleanup will bring backend's refcount only to 1, which leaks backend including resources it is using. Drop the original reference from object_new() once backend is attached to its parent. Signed-off-by: Igor Mammedov Message-Id: <1541069086-167036-1-git-send-email-imammedo@redhat.com> Reviewed-by: Marc-André Lureau Fixes: 5503e285041979dd29698ecb41729b3b22622e8d Signed-off-by: Paolo Bonzini --- hw/misc/ivshmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index f88910e..ecfd10a 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -1279,6 +1279,7 @@ static void desugar_shm(IVShmemState *s) object_property_set_bool(obj, true, "share", &error_abort); object_property_add_child(OBJECT(s), "internal-shm-backend", obj, &error_abort); + object_unref(obj); user_creatable_complete(obj, &error_abort); s->hostmem = MEMORY_BACKEND(obj); } -- cgit v1.1 From 2185fd67d2f277ebb1d2946cf5f7cdc773e04198 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 19 Oct 2018 14:25:42 +0200 Subject: MAINTAINERS: remove or downgrade myself to reviewer from some subsystems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other people are doing a much better work than myself at handling some subsystems. For those files it is better if I downgrade myself to reviewer or recognize that I am not actually doing any work there. Cc: Daniel P. Berrange Cc: Gerd Hoffmann Cc: Eric Blake Cc: Thomas Huth Cc: Laurent Vivier Cc: Marc-André Lureau Signed-off-by: Paolo Bonzini --- MAINTAINERS | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8ec2281..d411521 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -105,9 +105,9 @@ Guest CPU cores (TCG): ---------------------- Overall L: qemu-devel@nongnu.org -M: Paolo Bonzini M: Peter Crosthwaite M: Richard Henderson +R: Paolo Bonzini S: Maintained F: cpus.c F: exec.c @@ -1139,7 +1139,8 @@ F: hw/pci-host/ppce500.c F: hw/net/fsl_etsec/ Character devices -M: Paolo Bonzini +M: Marc-André Lureau +R: Paolo Bonzini S: Odd Fixes F: hw/char/ @@ -1526,8 +1527,8 @@ T: git git://github.com/famz/qemu.git bitmaps T: git git://github.com/jnsnow/qemu.git bitmaps Character device backends -M: Paolo Bonzini M: Marc-André Lureau +R: Paolo Bonzini S: Maintained F: chardev/ F: include/chardev/ @@ -1760,9 +1761,9 @@ F: tests/qmp-cmd-test.c T: git git://repo.or.cz/qemu/armbru.git qapi-next qtest -M: Paolo Bonzini M: Thomas Huth M: Laurent Vivier +R: Paolo Bonzini S: Maintained F: qtest.c F: tests/libqtest.* @@ -1869,7 +1870,6 @@ F: tests/test-io-* Sockets M: Daniel P. Berrange M: Gerd Hoffmann -M: Paolo Bonzini S: Maintained F: include/qemu/sockets.h F: util/qemu-sockets.c @@ -2056,13 +2056,12 @@ M: Ronnie Sahlberg M: Paolo Bonzini M: Peter Lieven L: qemu-block@nongnu.org -S: Supported +S: Odd Fixes F: block/iscsi.c F: block/iscsi-opts.c Network Block Device (NBD) M: Eric Blake -M: Paolo Bonzini L: qemu-block@nongnu.org S: Maintained F: block/nbd* -- cgit v1.1 From 1a1435dd61e28c1e3b70971107d72a7d05b28d03 Mon Sep 17 00:00:00 2001 From: Rudolf Marek Date: Fri, 19 Oct 2018 14:24:49 +0200 Subject: target/i386: Clear RF on SYSCALL instruction Fix the SYSCALL instruction in 64-bit (long mode). The RF flag should be cleared in R11 as well as in the RFLAGS. Intel and AMD CPUs behave same. AMD has this documented in the APM vol 3. Signed-off-by: Roman Kapl Signed-off-by: Rudolf Marek Message-Id: <20181019122449.26387-1-rka@sysgo.com> Signed-off-by: Paolo Bonzini --- target/i386/seg_helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/seg_helper.c b/target/i386/seg_helper.c index 33714bc..63e265c 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/seg_helper.c @@ -991,11 +991,11 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) int code64; env->regs[R_ECX] = env->eip + next_eip_addend; - env->regs[11] = cpu_compute_eflags(env); + env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK; code64 = env->hflags & HF_CS64_MASK; - env->eflags &= ~env->fmask; + env->eflags &= ~(env->fmask | RF_MASK); cpu_load_eflags(env, env->eflags, 0); cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, 0, 0xffffffff, -- cgit v1.1 From c26763f8ec70b1011098cab0da9178666d8256a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 3 Oct 2018 15:44:52 +0400 Subject: memory: learn about non-volatile memory region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new flag to mark memory region that are used as non-volatile, by NVDIMM for example. That bit is propagated down to the flat view, and reflected in HMP info mtree with a "nv-" prefix on the memory type. This way, guest_phys_blocks_region_add() can skip the NV memory regions for dumps and TCG memory clear in a following patch. Cc: dgilbert@redhat.com Cc: imammedo@redhat.com Cc: pbonzini@redhat.com Cc: guangrong.xiao@linux.intel.com Cc: mst@redhat.com Cc: xiaoguangrong.eric@gmail.com Signed-off-by: Marc-André Lureau Message-Id: <20181003114454.5662-2-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- docs/devel/migration.rst | 1 + include/exec/memory.h | 25 +++++++++++++++++++++++++ memory.c | 45 +++++++++++++++++++++++++++++++++++---------- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst index 6875707..e7658ab 100644 --- a/docs/devel/migration.rst +++ b/docs/devel/migration.rst @@ -435,6 +435,7 @@ Examples of such memory API functions are: - memory_region_add_subregion() - memory_region_del_subregion() - memory_region_set_readonly() + - memory_region_set_nonvolatile() - memory_region_set_enabled() - memory_region_set_address() - memory_region_set_alias_offset() diff --git a/include/exec/memory.h b/include/exec/memory.h index d0c7f0d..8e61450 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -355,6 +355,7 @@ struct MemoryRegion { bool ram; bool subpage; bool readonly; /* For RAM regions */ + bool nonvolatile; bool rom_device; bool flush_coalesced_mmio; bool global_locking; @@ -480,6 +481,7 @@ static inline FlatView *address_space_to_flatview(AddressSpace *as) * @offset_within_address_space: the address of the first byte of the section * relative to the region's address space * @readonly: writes to this section are ignored + * @nonvolatile: this section is non-volatile */ struct MemoryRegionSection { MemoryRegion *mr; @@ -488,6 +490,7 @@ struct MemoryRegionSection { Int128 size; hwaddr offset_within_address_space; bool readonly; + bool nonvolatile; }; /** @@ -1170,6 +1173,17 @@ static inline bool memory_region_is_rom(MemoryRegion *mr) return mr->ram && mr->readonly; } +/** + * memory_region_is_nonvolatile: check whether a memory region is non-volatile + * + * Returns %true is a memory region is non-volatile memory. + * + * @mr: the memory region being queried + */ +static inline bool memory_region_is_nonvolatile(MemoryRegion *mr) +{ + return mr->nonvolatile; +} /** * memory_region_get_fd: Get a file descriptor backing a RAM memory region. @@ -1342,6 +1356,17 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, void memory_region_set_readonly(MemoryRegion *mr, bool readonly); /** + * memory_region_set_nonvolatile: Turn a memory region non-volatile + * + * Allows a memory region to be marked as non-volatile. + * only useful on RAM regions. + * + * @mr: the region being updated. + * @nonvolatile: whether rhe region is to be non-volatile. + */ +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile); + +/** * memory_region_rom_device_set_romd: enable/disable ROMD mode * * Allows a ROM device (initialized with memory_region_init_rom_device() to diff --git a/memory.c b/memory.c index 51204aa..d14c6de 100644 --- a/memory.c +++ b/memory.c @@ -216,6 +216,7 @@ struct FlatRange { uint8_t dirty_log_mask; bool romd_mode; bool readonly; + bool nonvolatile; }; #define FOR_EACH_FLAT_RANGE(var, view) \ @@ -231,6 +232,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv) .size = fr->addr.size, .offset_within_address_space = int128_get64(fr->addr.start), .readonly = fr->readonly, + .nonvolatile = fr->nonvolatile, }; } @@ -240,7 +242,8 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b) && addrrange_equal(a->addr, b->addr) && a->offset_in_region == b->offset_in_region && a->romd_mode == b->romd_mode - && a->readonly == b->readonly; + && a->readonly == b->readonly + && a->nonvolatile == b->nonvolatile; } static FlatView *flatview_new(MemoryRegion *mr_root) @@ -312,7 +315,8 @@ static bool can_merge(FlatRange *r1, FlatRange *r2) int128_make64(r2->offset_in_region)) && r1->dirty_log_mask == r2->dirty_log_mask && r1->romd_mode == r2->romd_mode - && r1->readonly == r2->readonly; + && r1->readonly == r2->readonly + && r1->nonvolatile == r2->nonvolatile; } /* Attempt to simplify a view by merging adjacent ranges */ @@ -592,7 +596,8 @@ static void render_memory_region(FlatView *view, MemoryRegion *mr, Int128 base, AddrRange clip, - bool readonly) + bool readonly, + bool nonvolatile) { MemoryRegion *subregion; unsigned i; @@ -608,6 +613,7 @@ static void render_memory_region(FlatView *view, int128_addto(&base, int128_make64(mr->addr)); readonly |= mr->readonly; + nonvolatile |= mr->nonvolatile; tmp = addrrange_make(base, mr->size); @@ -620,13 +626,15 @@ static void render_memory_region(FlatView *view, if (mr->alias) { int128_subfrom(&base, int128_make64(mr->alias->addr)); int128_subfrom(&base, int128_make64(mr->alias_offset)); - render_memory_region(view, mr->alias, base, clip, readonly); + render_memory_region(view, mr->alias, base, clip, + readonly, nonvolatile); return; } /* Render subregions in priority order. */ QTAILQ_FOREACH(subregion, &mr->subregions, subregions_link) { - render_memory_region(view, subregion, base, clip, readonly); + render_memory_region(view, subregion, base, clip, + readonly, nonvolatile); } if (!mr->terminates) { @@ -641,6 +649,7 @@ static void render_memory_region(FlatView *view, fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr); fr.romd_mode = mr->romd_mode; fr.readonly = readonly; + fr.nonvolatile = nonvolatile; /* Render the region itself into any gaps left by the current view. */ for (i = 0; i < view->nr && int128_nz(remain); ++i) { @@ -726,7 +735,8 @@ static FlatView *generate_memory_topology(MemoryRegion *mr) if (mr) { render_memory_region(view, mr, int128_zero(), - addrrange_make(int128_zero(), int128_2_64()), false); + addrrange_make(int128_zero(), int128_2_64()), + false, false); } flatview_simplify(view); @@ -2039,6 +2049,16 @@ void memory_region_set_readonly(MemoryRegion *mr, bool readonly) } } +void memory_region_set_nonvolatile(MemoryRegion *mr, bool nonvolatile) +{ + if (mr->nonvolatile != nonvolatile) { + memory_region_transaction_begin(); + mr->nonvolatile = nonvolatile; + memory_region_update_pending |= mr->enabled; + memory_region_transaction_commit(); + } +} + void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode) { if (mr->romd_mode != romd_mode) { @@ -2489,6 +2509,7 @@ static MemoryRegionSection memory_region_find_rcu(MemoryRegion *mr, ret.size = range.size; ret.offset_within_address_space = int128_get64(range.start); ret.readonly = fr->readonly; + ret.nonvolatile = fr->nonvolatile; return ret; } @@ -2839,10 +2860,11 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, QTAILQ_INSERT_TAIL(alias_print_queue, ml, mrqueue); } mon_printf(f, TARGET_FMT_plx "-" TARGET_FMT_plx - " (prio %d, %s): alias %s @%s " TARGET_FMT_plx + " (prio %d, %s%s): alias %s @%s " TARGET_FMT_plx "-" TARGET_FMT_plx "%s", cur_start, cur_end, mr->priority, + mr->nonvolatile ? "nv-" : "", memory_region_type((MemoryRegion *)mr), memory_region_name(mr), memory_region_name(mr->alias), @@ -2854,9 +2876,10 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f, } } else { mon_printf(f, - TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s): %s%s", + TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %s%s): %s%s", cur_start, cur_end, mr->priority, + mr->nonvolatile ? "nv-" : "", memory_region_type((MemoryRegion *)mr), memory_region_name(mr), mr->enabled ? "" : " [disabled]"); @@ -2941,19 +2964,21 @@ static void mtree_print_flatview(gpointer key, gpointer value, mr = range->mr; if (range->offset_in_region) { p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s @" TARGET_FMT_plx, + TARGET_FMT_plx " (prio %d, %s%s): %s @" TARGET_FMT_plx, int128_get64(range->addr.start), int128_get64(range->addr.start) + MR_SIZE(range->addr.size), mr->priority, + range->nonvolatile ? "nv-" : "", range->readonly ? "rom" : memory_region_type(mr), memory_region_name(mr), range->offset_in_region); } else { p(f, MTREE_INDENT TARGET_FMT_plx "-" - TARGET_FMT_plx " (prio %d, %s): %s", + TARGET_FMT_plx " (prio %d, %s%s): %s", int128_get64(range->addr.start), int128_get64(range->addr.start) + MR_SIZE(range->addr.size), mr->priority, + range->nonvolatile ? "nv-" : "", range->readonly ? "rom" : memory_region_type(mr), memory_region_name(mr)); } -- cgit v1.1 From 640713d8a17107120ba29c4b2527b0b06951e33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 3 Oct 2018 15:44:53 +0400 Subject: nvdimm: set non-volatile on the memory region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qemu-system-x86_64 -machine pc,nvdimm -m 2G,slots=4,maxmem=16G -enable-kvm -monitor stdio -object memory-backend-file,id=mem1,share=on,mem-path=/tmp/foo,size=1G -device nvdimm,id=nvdimm1,memdev=mem1 HMP info mtree command reflects the flag with "nv-" prefix on memory type: (qemu) info mtree 0000000100000000-000000013fffffff (prio 0, nv-i/o): alias nvdimm-memory @/objects/mem1 0000000000000000-000000003fffffff (qemu) info mtree -f 0000000100000000-000000013fffffff (prio 0, nv-ram): /objects/mem1 Signed-off-by: Marc-André Lureau Message-Id: <20181003114454.5662-3-marcandre.lureau@redhat.com> Signed-off-by: Paolo Bonzini --- hw/mem/nvdimm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c index 49324f3..bf2adf5 100644 --- a/hw/mem/nvdimm.c +++ b/hw/mem/nvdimm.c @@ -116,6 +116,7 @@ static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp) nvdimm->nvdimm_mr = g_new(MemoryRegion, 1); memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm), "nvdimm-memory", mr, 0, pmem_size); + memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true); nvdimm->nvdimm_mr->align = align; } -- cgit v1.1 From 17a6ddb6fac51c1979dd5e35588cc82c19e8e75c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Wed, 3 Oct 2018 15:44:54 +0400 Subject: memory-mapping: skip non-volatile memory regions in GuestPhysBlockList MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GuestPhysBlockList is currently used to produce dumps. Given the size and the typical usage of NVDIMM for storage, they are not a good idea to have in the dumps. We may want to have an extra dump option to include them. For now, skip non-volatile regions. The TCG memory clear function is going to use the GuestPhysBlockList as well, and will thus skip NVDIMM for similar reasons. Signed-off-by: Marc-André Lureau Message-Id: <20181003114454.5662-4-marcandre.lureau@redhat.com> Reviewed-by: Laszlo Ersek Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- memory_mapping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/memory_mapping.c b/memory_mapping.c index 775466f..724dd0b 100644 --- a/memory_mapping.c +++ b/memory_mapping.c @@ -206,7 +206,8 @@ static void guest_phys_blocks_region_add(MemoryListener *listener, /* we only care about RAM */ if (!memory_region_is_ram(section->mr) || - memory_region_is_ram_device(section->mr)) { + memory_region_is_ram_device(section->mr) || + memory_region_is_nonvolatile(section->mr)) { return; } -- cgit v1.1 From 7f135356564bc776083b7ecee81096ab49e670e4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 29 Oct 2018 14:49:36 +0100 Subject: scripts/dump-guest-memory: Synchronize with guest_phys_blocks_region_add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent patches have removed ram_device and nonvolatile RAM from dump-guest-memory's output. Do the same for dumps that are extracted from a QEMU core file. Reviewed-by: Marc-André Lureau Signed-off-by: Paolo Bonzini --- scripts/dump-guest-memory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 5a857ce..198cd0f 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -417,7 +417,9 @@ def get_guest_phys_blocks(): memory_region = flat_range["mr"].dereference() # we only care about RAM - if not memory_region["ram"]: + if (not memory_region["ram"] or + memory_region["ram_device"] or + memory_region["nonvolatile"]): continue section_size = int128_get64(flat_range["addr"]["size"]) -- cgit v1.1 From e58ccf039650065a9442de43c9816f81e88f27f6 Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Sat, 27 Oct 2018 01:13:14 +0530 Subject: lsi53c895a: check message length value is valid While writing a message in 'lsi_do_msgin', message length value in 'msg_len' could be invalid due to an invalid migration stream. Add an assertion to avoid an out of bounds access, and reject the incoming migration data if it contains an invalid message length. Discovered by Deja vu Security. Reported by Oracle. Signed-off-by: Prasad J Pandit Message-Id: <20181026194314.18663-1-ppandit@redhat.com> Signed-off-by: Paolo Bonzini --- hw/scsi/lsi53c895a.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index d1e6534..3f207f6 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -861,10 +861,11 @@ static void lsi_do_status(LSIState *s) static void lsi_do_msgin(LSIState *s) { - int len; + uint8_t len; trace_lsi_do_msgin(s->dbc, s->msg_len); s->sfbr = s->msg[0]; len = s->msg_len; + assert(len > 0 && len <= LSI_MAX_MSGIN_LEN); if (len > s->dbc) len = s->dbc; pci_dma_write(PCI_DEVICE(s), s->dnad, s->msg, len); @@ -1705,8 +1706,10 @@ static uint8_t lsi_reg_readb(LSIState *s, int offset) break; case 0x58: /* SBDL */ /* Some drivers peek at the data bus during the MSG IN phase. */ - if ((s->sstat1 & PHASE_MASK) == PHASE_MI) + if ((s->sstat1 & PHASE_MASK) == PHASE_MI) { + assert(s->msg_len > 0); return s->msg[0]; + } ret = 0; break; case 0x59: /* SBDL high */ @@ -2103,11 +2106,23 @@ static int lsi_pre_save(void *opaque) return 0; } +static int lsi_post_load(void *opaque, int version_id) +{ + LSIState *s = opaque; + + if (s->msg_len < 0 || s->msg_len > LSI_MAX_MSGIN_LEN) { + return -EINVAL; + } + + return 0; +} + static const VMStateDescription vmstate_lsi_scsi = { .name = "lsiscsi", .version_id = 0, .minimum_version_id = 0, .pre_save = lsi_pre_save, + .post_load = lsi_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, LSIState), -- cgit v1.1 From 6c219fc8a112fc69b29f59ea2c7865717ff6e3e0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 00:46:21 +0200 Subject: scsi-generic: keep VPD page list sorted Block limits emulation is just placing 0xb0 as the final byte of the VPD pages list. However, VPD page numbers must be sorted, so change that to an in-place insert. Since I couldn't find any disk that triggered the loop more than once, this was tested by adding manually 0xb1 at the end of the list and checking that 0xb0 was added before. Reported-by: Max Reitz Reviewed-by: Max Reitz Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index d60c4d0..aebb7cd 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -144,7 +144,7 @@ static int execute_command(BlockBackend *blk, static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) { - uint8_t page, page_len; + uint8_t page, page_idx; /* * EVPD set to zero returns the standard INQUIRY data. @@ -190,10 +190,21 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) * * This way, the guest kernel will be aware of the support * and will use it to proper setup the SCSI device. + * + * VPD page numbers must be sorted, so insert 0xb0 at the + * right place with an in-place insert. After the initialization + * part of the for loop is executed, the device response is + * at r[0] to r[page_idx - 1]. */ - page_len = r->buf[3]; - r->buf[page_len + 4] = 0xb0; - r->buf[3] = ++page_len; + for (page_idx = lduw_be_p(r->buf + 2) + 4; + page_idx > 4 && r->buf[page_idx - 1] >= 0xb0; + page_idx--) { + if (page_idx < r->buflen) { + r->buf[page_idx] = r->buf[page_idx - 1]; + } + } + r->buf[page_idx] = 0xb0; + stw_be_p(r->buf + 2, lduw_be_p(r->buf + 2) + 1); } } } -- cgit v1.1 From 57dbb58d800f62b9e56d946660dba4e8dbd20204 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 00:58:43 +0200 Subject: scsi-generic: avoid out-of-bounds access to VPD page list A device can report an excessive number of VPD pages when asked for a list; this can cause an out-of-bounds access to buf in scsi_generic_set_vpd_bl_emulation. It should not happen, but it is technically not incorrect so handle it: do not check any byte past the allocation length that was sent to the INQUIRY command. Reported-by: Max Reitz Reviewed-by: Max Reitz Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index aebb7cd..c5497bb 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -538,7 +538,7 @@ static void scsi_generic_set_vpd_bl_emulation(SCSIDevice *s) } page_len = buf[3]; - for (i = 4; i < page_len + 4; i++) { + for (i = 4; i < MIN(sizeof(buf), page_len + 4); i++) { if (buf[i] == 0xb0) { s->needs_vpd_bl_emulation = false; return; -- cgit v1.1 From 3d4a8bf0eed68a781e06118e4d1df6e2f106a1f2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 00:43:51 +0200 Subject: scsi-generic: avoid invalid access to struct when emulating block limits Emulation of the block limits VPD page called back into scsi-disk.c, which however expected the request to be for a SCSIDiskState and accessed a scsi-generic device outside the bounds of its struct (namely to retrieve s->max_unmap_size and s->max_io_size). To avoid this, move the emulation code to a separate function that takes a new SCSIBlockLimits struct and marshals it into the VPD response format. Reported-by: Max Reitz Reviewed-by: Max Reitz Signed-off-by: Paolo Bonzini --- hw/scsi/Makefile.objs | 2 +- hw/scsi/emulation.c | 42 +++++++++++++++++++++ hw/scsi/scsi-disk.c | 92 ++++++++++----------------------------------- hw/scsi/scsi-generic.c | 35 ++++++++++++----- include/hw/scsi/emulation.h | 16 ++++++++ include/hw/scsi/scsi.h | 1 - 6 files changed, 104 insertions(+), 84 deletions(-) create mode 100644 hw/scsi/emulation.c create mode 100644 include/hw/scsi/emulation.h diff --git a/hw/scsi/Makefile.objs b/hw/scsi/Makefile.objs index 718b4c2..45167ba 100644 --- a/hw/scsi/Makefile.objs +++ b/hw/scsi/Makefile.objs @@ -1,4 +1,4 @@ -common-obj-y += scsi-disk.o +common-obj-y += scsi-disk.o emulation.o common-obj-y += scsi-generic.o scsi-bus.o common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o common-obj-$(CONFIG_MPTSAS_SCSI_PCI) += mptsas.o mptconfig.o mptendian.o diff --git a/hw/scsi/emulation.c b/hw/scsi/emulation.c new file mode 100644 index 0000000..06d62f3 --- /dev/null +++ b/hw/scsi/emulation.c @@ -0,0 +1,42 @@ +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/bswap.h" +#include "hw/scsi/emulation.h" + +int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl) +{ + /* required VPD size with unmap support */ + memset(outbuf, 0, 0x3c); + + outbuf[0] = bl->wsnz; /* wsnz */ + + if (bl->max_io_sectors) { + /* optimal transfer length granularity. This field and the optimal + * transfer length can't be greater than maximum transfer length. + */ + stw_be_p(outbuf + 2, MIN(bl->min_io_size, bl->max_io_sectors)); + + /* maximum transfer length */ + stl_be_p(outbuf + 4, bl->max_io_sectors); + + /* optimal transfer length */ + stl_be_p(outbuf + 8, MIN(bl->opt_io_size, bl->max_io_sectors)); + } else { + stw_be_p(outbuf + 2, bl->min_io_size); + stl_be_p(outbuf + 8, bl->opt_io_size); + } + + /* max unmap LBA count */ + stl_be_p(outbuf + 16, bl->max_unmap_sectors); + + /* max unmap descriptors */ + stl_be_p(outbuf + 20, bl->max_unmap_descr); + + /* optimal unmap granularity; alignment is zero */ + stl_be_p(outbuf + 24, bl->unmap_sectors); + + /* max write same size, make it the same as maximum transfer length */ + stl_be_p(outbuf + 36, bl->max_io_sectors); + + return 0x3c; +} diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index e2c5408..6eb258d 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -33,6 +33,7 @@ do { printf("scsi-disk: " fmt , ## __VA_ARGS__); } while (0) #include "qapi/error.h" #include "qemu/error-report.h" #include "hw/scsi/scsi.h" +#include "hw/scsi/emulation.h" #include "scsi/constants.h" #include "sysemu/sysemu.h" #include "sysemu/block-backend.h" @@ -589,7 +590,7 @@ static uint8_t *scsi_get_buf(SCSIRequest *req) return (uint8_t *)r->iov.iov_base; } -int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) +static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) { SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); uint8_t page_code = req->cmd.buf[2]; @@ -691,89 +692,36 @@ int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) } case 0xb0: /* block limits */ { - unsigned int unmap_sectors = - s->qdev.conf.discard_granularity / s->qdev.blocksize; - unsigned int min_io_size = - s->qdev.conf.min_io_size / s->qdev.blocksize; - unsigned int opt_io_size = - s->qdev.conf.opt_io_size / s->qdev.blocksize; - unsigned int max_unmap_sectors = - s->max_unmap_size / s->qdev.blocksize; - unsigned int max_io_sectors = - s->max_io_size / s->qdev.blocksize; + SCSIBlockLimits bl = {}; if (s->qdev.type == TYPE_ROM) { DPRINTF("Inquiry (EVPD[%02X] not supported for CDROM\n", page_code); return -1; } + bl.wsnz = 1; + bl.unmap_sectors = + s->qdev.conf.discard_granularity / s->qdev.blocksize; + bl.min_io_size = + s->qdev.conf.min_io_size / s->qdev.blocksize; + bl.opt_io_size = + s->qdev.conf.opt_io_size / s->qdev.blocksize; + bl.max_unmap_sectors = + s->max_unmap_size / s->qdev.blocksize; + bl.max_io_sectors = + s->max_io_size / s->qdev.blocksize; + /* 255 descriptors fit in 4 KiB with an 8-byte header */ + bl.max_unmap_descr = 255; + if (s->qdev.type == TYPE_DISK) { int max_transfer_blk = blk_get_max_transfer(s->qdev.conf.blk); int max_io_sectors_blk = max_transfer_blk / s->qdev.blocksize; - max_io_sectors = - MIN_NON_ZERO(max_io_sectors_blk, max_io_sectors); - - /* min_io_size and opt_io_size can't be greater than - * max_io_sectors */ - if (min_io_size) { - min_io_size = MIN(min_io_size, max_io_sectors); - } - if (opt_io_size) { - opt_io_size = MIN(opt_io_size, max_io_sectors); - } + bl.max_io_sectors = + MIN_NON_ZERO(max_io_sectors_blk, bl.max_io_sectors); } - /* required VPD size with unmap support */ - buflen = 0x40; - memset(outbuf + 4, 0, buflen - 4); - - outbuf[4] = 0x1; /* wsnz */ - - /* optimal transfer length granularity */ - outbuf[6] = (min_io_size >> 8) & 0xff; - outbuf[7] = min_io_size & 0xff; - - /* maximum transfer length */ - outbuf[8] = (max_io_sectors >> 24) & 0xff; - outbuf[9] = (max_io_sectors >> 16) & 0xff; - outbuf[10] = (max_io_sectors >> 8) & 0xff; - outbuf[11] = max_io_sectors & 0xff; - - /* optimal transfer length */ - outbuf[12] = (opt_io_size >> 24) & 0xff; - outbuf[13] = (opt_io_size >> 16) & 0xff; - outbuf[14] = (opt_io_size >> 8) & 0xff; - outbuf[15] = opt_io_size & 0xff; - - /* max unmap LBA count, default is 1GB */ - outbuf[20] = (max_unmap_sectors >> 24) & 0xff; - outbuf[21] = (max_unmap_sectors >> 16) & 0xff; - outbuf[22] = (max_unmap_sectors >> 8) & 0xff; - outbuf[23] = max_unmap_sectors & 0xff; - - /* max unmap descriptors, 255 fit in 4 kb with an 8-byte header */ - outbuf[24] = 0; - outbuf[25] = 0; - outbuf[26] = 0; - outbuf[27] = 255; - - /* optimal unmap granularity */ - outbuf[28] = (unmap_sectors >> 24) & 0xff; - outbuf[29] = (unmap_sectors >> 16) & 0xff; - outbuf[30] = (unmap_sectors >> 8) & 0xff; - outbuf[31] = unmap_sectors & 0xff; - - /* max write same size */ - outbuf[36] = 0; - outbuf[37] = 0; - outbuf[38] = 0; - outbuf[39] = 0; - - outbuf[40] = (max_io_sectors >> 24) & 0xff; - outbuf[41] = (max_io_sectors >> 16) & 0xff; - outbuf[42] = (max_io_sectors >> 8) & 0xff; - outbuf[43] = max_io_sectors & 0xff; + buflen += scsi_emulate_block_limits(outbuf + buflen, &bl); break; } case 0xb1: /* block device characteristics */ diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index c5497bb..b50ce64 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -16,6 +16,7 @@ #include "qemu-common.h" #include "qemu/error-report.h" #include "hw/scsi/scsi.h" +#include "hw/scsi/emulation.h" #include "sysemu/block-backend.h" #ifdef __linux__ @@ -181,7 +182,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) /* Also take care of the opt xfer len. */ stl_be_p(&r->buf[12], MIN_NON_ZERO(max_transfer, ldl_be_p(&r->buf[12]))); - } else if (page == 0x00 && s->needs_vpd_bl_emulation) { + } else if (s->needs_vpd_bl_emulation && page == 0x00) { /* * Now we're capable of supplying the VPD Block Limits * response if the hardware can't. Add it in the INQUIRY @@ -209,9 +210,24 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s) } } -static int scsi_emulate_block_limits(SCSIGenericReq *r) +static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) { - r->buflen = scsi_disk_emulate_vpd_page(&r->req, r->buf); + int len; + uint8_t buf[64]; + + SCSIBlockLimits bl = { + .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize + }; + + memset(r->buf, 0, r->buflen); + stb_p(buf, s->type); + stb_p(buf + 1, 0xb0); + len = scsi_emulate_block_limits(buf + 4, &bl); + assert(len <= sizeof(buf) - 4); + stw_be_p(buf + 2, len); + + memcpy(r->buf, buf, MIN(r->buflen, len + 4)); + r->io_header.sb_len_wr = 0; /* @@ -253,13 +269,12 @@ static void scsi_read_complete(void * opaque, int ret) * resulted in sense error but would need emulation. * In this case, emulate a valid VPD response. */ - if (s->needs_vpd_bl_emulation) { - int is_vpd_bl = r->req.cmd.buf[0] == INQUIRY && - r->req.cmd.buf[1] & 0x01 && - r->req.cmd.buf[2] == 0xb0; - - if (is_vpd_bl && sg_io_sense_from_errno(-ret, &r->io_header, &sense)) { - len = scsi_emulate_block_limits(r); + if (s->needs_vpd_bl_emulation && + r->req.cmd.buf[0] == INQUIRY && + (r->req.cmd.buf[1] & 0x01) && + r->req.cmd.buf[2] == 0xb0) { + if (sg_io_sense_from_errno(-ret, &r->io_header, &sense)) { + len = scsi_generic_emulate_block_limits(r, s); /* * No need to let scsi_read_complete go on and handle an * INQUIRY VPD BL request we created manually. diff --git a/include/hw/scsi/emulation.h b/include/hw/scsi/emulation.h new file mode 100644 index 0000000..09fba1f --- /dev/null +++ b/include/hw/scsi/emulation.h @@ -0,0 +1,16 @@ +#ifndef HW_SCSI_EMULATION_H +#define HW_SCSI_EMULATION_H 1 + +typedef struct SCSIBlockLimits { + bool wsnz; + uint16_t min_io_size; + uint32_t max_unmap_descr; + uint32_t opt_io_size; + uint32_t max_unmap_sectors; + uint32_t unmap_sectors; + uint32_t max_io_sectors; +} SCSIBlockLimits; + +int scsi_emulate_block_limits(uint8_t *outbuf, const SCSIBlockLimits *bl); + +#endif diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index ee3a411..acef25f 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -189,7 +189,6 @@ void scsi_device_report_change(SCSIDevice *dev, SCSISense sense); void scsi_device_unit_attention_reported(SCSIDevice *dev); void scsi_generic_read_device_inquiry(SCSIDevice *dev); int scsi_device_get_sense(SCSIDevice *dev, uint8_t *buf, int len, bool fixed); -int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf); int scsi_SG_IO_FROM_DEV(BlockBackend *blk, uint8_t *cmd, uint8_t cmd_size, uint8_t *buf, uint8_t buf_size); SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int target, int lun); -- cgit v1.1 From 763c56872b08b98fde062a1feca003f200e7bd5c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Oct 2018 00:58:21 +0200 Subject: scsi-generic: do not do VPD emulation for sense other than ILLEGAL_REQUEST Pass other sense, such as UNIT_ATTENTION or BUSY, directly to the guest. Reported-by: Max Reitz Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-generic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index b50ce64..7237b41 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -246,7 +246,6 @@ static void scsi_read_complete(void * opaque, int ret) { SCSIGenericReq *r = (SCSIGenericReq *)opaque; SCSIDevice *s = r->req.dev; - SCSISense sense; int len; assert(r->req.aiocb != NULL); @@ -269,11 +268,14 @@ static void scsi_read_complete(void * opaque, int ret) * resulted in sense error but would need emulation. * In this case, emulate a valid VPD response. */ - if (s->needs_vpd_bl_emulation && + if (s->needs_vpd_bl_emulation && ret == 0 && + (r->io_header.driver_status & SG_ERR_DRIVER_SENSE) && r->req.cmd.buf[0] == INQUIRY && (r->req.cmd.buf[1] & 0x01) && r->req.cmd.buf[2] == 0xb0) { - if (sg_io_sense_from_errno(-ret, &r->io_header, &sense)) { + SCSISense sense = + scsi_parse_sense_buf(r->req.sense, r->io_header.sb_len_wr); + if (sense.key == ILLEGAL_REQUEST) { len = scsi_generic_emulate_block_limits(r, s); /* * No need to let scsi_read_complete go on and handle an -- cgit v1.1 From ca95173c7fb64a1544b1f560766976425659e5e4 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 5 Nov 2018 13:55:37 +0000 Subject: include/qemu/thread.h: Document qemu_thread_atexit* API Add documentation for the qemu_thread_atexit_add() and qemu_thread_atexit_remove() functions. We include a (previously undocumented) constraint that notifiers may not be called if a thread is exiting because the entire process is exiting. This is fine for our current use because the callers use it only for cleaning up resources which go away on process exit (memory, Win32 fibers), and we will need the flexibility for the new posix implementation. Signed-off-by: Peter Maydell Reviewed-by: Eric Blake Message-Id: <20181105135538.28025-2-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- include/qemu/thread.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index b2661b6..55d83a9 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -162,7 +162,29 @@ void qemu_thread_exit(void *retval); void qemu_thread_naming(bool enable); struct Notifier; +/** + * qemu_thread_atexit_add: + * @notifier: Notifier to add + * + * Add the specified notifier to a list which will be run via + * notifier_list_notify() when this thread exits (either by calling + * qemu_thread_exit() or by returning from its start_routine). + * The usual usage is that the caller passes a Notifier which is + * a per-thread variable; it can then use the callback to free + * other per-thread data. + * + * If the thread exits as part of the entire process exiting, + * it is unspecified whether notifiers are called or not. + */ void qemu_thread_atexit_add(struct Notifier *notifier); +/** + * qemu_thread_atexit_remove: + * @notifier: Notifier to remove + * + * Remove the specified notifier from the thread-exit notification + * list. It is not valid to try to remove a notifier which is not + * on the list. + */ void qemu_thread_atexit_remove(struct Notifier *notifier); struct QemuSpin { -- cgit v1.1 From a458774ad711bceabefbf01e8f0b91d86ec72e0c Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Mon, 5 Nov 2018 13:55:38 +0000 Subject: util/qemu-thread-posix: Fix qemu_thread_atexit* for OSX Our current implementation of qemu_thread_atexit* is broken on OSX. This is because it works by cerating a piece of thread-specific data with pthread_key_create() and using the destructor function for that data to run the notifier function passed to it by the caller of qemu_thread_atexit_add(). The expected use case is that the caller uses a __thread variable as the notifier, and uses the callback to clean up information that it is keeping per-thread in __thread variables. Unfortunately, on OSX this does not work, because on OSX a __thread variable may be destroyed (freed) before the pthread_key_create() destructor runs. (POSIX imposes no ordering constraint here; the OSX implementation happens to implement __thread variables in terms of pthread_key_create((), whereas Linux uses different mechanisms that mean the __thread variables will still be present when the pthread_key_create() destructor is run.) Fix this by switching to a scheme similar to the one qemu-thread-win32 uses for qemu_thread_atexit: keep the thread's notifiers on a __thread variable, and run the notifiers on calls to qemu_thread_exit() and on return from the start routine passed to qemu_thread_start(). We do this with the pthread_cleanup_push() API. We take advantage of the qemu_thread_atexit_add() API permission not to run thread notifiers on process exit to avoid having to special case the main thread. Suggested-by: Paolo Bonzini Signed-off-by: Peter Maydell Reviewed-by: Eric Blake Message-Id: <20181105135538.28025-3-peter.maydell@linaro.org> Signed-off-by: Paolo Bonzini --- util/qemu-thread-posix.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index dfa66ff..865e476 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -443,42 +443,34 @@ void qemu_event_wait(QemuEvent *ev) } } -static pthread_key_t exit_key; - -union NotifierThreadData { - void *ptr; - NotifierList list; -}; -QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); +static __thread NotifierList thread_exit; +/* + * Note that in this implementation you can register a thread-exit + * notifier for the main thread, but it will never be called. + * This is OK because main thread exit can only happen when the + * entire process is exiting, and the API allows notifiers to not + * be called on process exit. + */ void qemu_thread_atexit_add(Notifier *notifier) { - union NotifierThreadData ntd; - ntd.ptr = pthread_getspecific(exit_key); - notifier_list_add(&ntd.list, notifier); - pthread_setspecific(exit_key, ntd.ptr); + notifier_list_add(&thread_exit, notifier); } void qemu_thread_atexit_remove(Notifier *notifier) { - union NotifierThreadData ntd; - ntd.ptr = pthread_getspecific(exit_key); notifier_remove(notifier); - pthread_setspecific(exit_key, ntd.ptr); -} - -static void qemu_thread_atexit_run(void *arg) -{ - union NotifierThreadData ntd = { .ptr = arg }; - notifier_list_notify(&ntd.list, NULL); } -static void __attribute__((constructor)) qemu_thread_atexit_init(void) +static void qemu_thread_atexit_notify(void *arg) { - pthread_key_create(&exit_key, qemu_thread_atexit_run); + /* + * Called when non-main thread exits (via qemu_thread_exit() + * or by returning from its start routine.) + */ + notifier_list_notify(&thread_exit, NULL); } - typedef struct { void *(*start_routine)(void *); void *arg; @@ -490,6 +482,7 @@ static void *qemu_thread_start(void *args) QemuThreadArgs *qemu_thread_args = args; void *(*start_routine)(void *) = qemu_thread_args->start_routine; void *arg = qemu_thread_args->arg; + void *r; #ifdef CONFIG_PTHREAD_SETNAME_NP /* Attempt to set the threads name; note that this is for debug, so @@ -501,7 +494,10 @@ static void *qemu_thread_start(void *args) #endif g_free(qemu_thread_args->name); g_free(qemu_thread_args); - return start_routine(arg); + pthread_cleanup_push(qemu_thread_atexit_notify, NULL); + r = start_routine(arg); + pthread_cleanup_pop(1); + return r; } void qemu_thread_create(QemuThread *thread, const char *name, -- cgit v1.1