diff options
Diffstat (limited to 'system')
-rw-r--r-- | system/cpu-timers.c | 6 | ||||
-rw-r--r-- | system/cpus.c | 38 | ||||
-rw-r--r-- | system/globals.c | 1 | ||||
-rw-r--r-- | system/main.c | 13 | ||||
-rw-r--r-- | system/memory.c | 158 | ||||
-rw-r--r-- | system/memory_ldst.c.inc | 2 | ||||
-rw-r--r-- | system/meson.build | 3 | ||||
-rw-r--r-- | system/physmem.c | 547 | ||||
-rw-r--r-- | system/qdev-monitor.c | 9 | ||||
-rw-r--r-- | system/qtest.c | 1 | ||||
-rw-r--r-- | system/ram-block-attributes.c | 444 | ||||
-rw-r--r-- | system/runstate.c | 109 | ||||
-rw-r--r-- | system/tpm.c | 5 | ||||
-rw-r--r-- | system/trace-events | 3 | ||||
-rw-r--r-- | system/vl.c | 13 |
15 files changed, 1099 insertions, 253 deletions
diff --git a/system/cpu-timers.c b/system/cpu-timers.c index cb35fa6..9919b46 100644 --- a/system/cpu-timers.c +++ b/system/cpu-timers.c @@ -246,14 +246,14 @@ void qemu_timer_notify_cb(void *opaque, QEMUClockType type) if (qemu_in_vcpu_thread()) { /* - * A CPU is currently running; kick it back out to the + * A CPU is currently running; send it out of the * tcg_cpu_exec() loop so it will recalculate its * icount deadline immediately. */ - qemu_cpu_kick(current_cpu); + cpu_exit(current_cpu); } else if (first_cpu) { /* - * qemu_cpu_kick is not enough to kick a halted CPU out of + * cpu_exit() is not enough to kick a halted CPU out of * qemu_tcg_wait_io_event. async_run_on_cpu, instead, * causes cpu_thread_is_idle to return false. This way, * handle_icount_deadline can run. diff --git a/system/cpus.c b/system/cpus.c index d16b0df..aa7bfcf 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -31,7 +31,7 @@ #include "qapi/qapi-events-run-state.h" #include "qapi/qmp/qerror.h" #include "exec/gdbstub.h" -#include "system/accel-ops.h" +#include "accel/accel-cpu-ops.h" #include "system/hw_accel.h" #include "exec/cpu-common.h" #include "qemu/thread.h" @@ -254,9 +254,15 @@ int64_t cpus_get_elapsed_ticks(void) return cpu_get_ticks(); } -static void generic_handle_interrupt(CPUState *cpu, int mask) +void cpu_set_interrupt(CPUState *cpu, int mask) { - cpu->interrupt_request |= mask; + /* Pairs with cpu_test_interrupt(). */ + qatomic_or(&cpu->interrupt_request, mask); +} + +void generic_handle_interrupt(CPUState *cpu, int mask) +{ + cpu_set_interrupt(cpu, mask); if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); @@ -265,11 +271,9 @@ static void generic_handle_interrupt(CPUState *cpu, int mask) void cpu_interrupt(CPUState *cpu, int mask) { - if (cpus_accel->handle_interrupt) { - cpus_accel->handle_interrupt(cpu, mask); - } else { - generic_handle_interrupt(cpu, mask); - } + g_assert(bql_locked()); + + cpus_accel->handle_interrupt(cpu, mask); } /* @@ -446,7 +450,7 @@ static void qemu_cpu_stop(CPUState *cpu, bool exit) qemu_cond_broadcast(&qemu_pause_cond); } -void qemu_wait_io_event_common(CPUState *cpu) +void qemu_process_cpu_events_common(CPUState *cpu) { qatomic_set_mb(&cpu->thread_kicked, false); if (cpu->stop) { @@ -455,10 +459,11 @@ void qemu_wait_io_event_common(CPUState *cpu) process_queued_cpu_work(cpu); } -void qemu_wait_io_event(CPUState *cpu) +void qemu_process_cpu_events(CPUState *cpu) { bool slept = false; + qatomic_set(&cpu->exit_request, false); while (cpu_thread_is_idle(cpu)) { if (!slept) { slept = true; @@ -470,7 +475,7 @@ void qemu_wait_io_event(CPUState *cpu) qemu_plugin_vcpu_resume_cb(cpu); } - qemu_wait_io_event_common(cpu); + qemu_process_cpu_events_common(cpu); } void cpus_kick_thread(CPUState *cpu) @@ -600,7 +605,7 @@ void cpu_pause(CPUState *cpu) qemu_cpu_stop(cpu, true); } else { cpu->stop = true; - qemu_cpu_kick(cpu); + cpu_exit(cpu); } } @@ -640,6 +645,7 @@ void pause_all_vcpus(void) while (!all_vcpus_paused()) { qemu_cond_wait(&qemu_pause_cond, &bql); + /* FIXME: is this needed? */ CPU_FOREACH(cpu) { qemu_cpu_kick(cpu); } @@ -668,7 +674,7 @@ void cpu_remove_sync(CPUState *cpu) { cpu->stop = true; cpu->unplug = true; - qemu_cpu_kick(cpu); + cpu_exit(cpu); bql_unlock(); qemu_thread_join(cpu->thread); bql_lock(); @@ -678,6 +684,8 @@ void cpus_register_accel(const AccelOpsClass *ops) { assert(ops != NULL); assert(ops->create_vcpu_thread != NULL); /* mandatory */ + assert(ops->handle_interrupt); + cpus_accel = ops; } @@ -768,9 +776,7 @@ int vm_prepare_start(bool step_pending) * WHPX accelerator needs to know whether we are going to step * any CPUs, before starting the first one. */ - if (cpus_accel->synchronize_pre_resume) { - cpus_accel->synchronize_pre_resume(step_pending); - } + accel_pre_resume(MACHINE(qdev_get_machine()), step_pending); /* We are sending this now, but the CPUs will be resumed shortly later */ qapi_event_send_resume(); diff --git a/system/globals.c b/system/globals.c index 9640c95..98f9876 100644 --- a/system/globals.c +++ b/system/globals.c @@ -52,7 +52,6 @@ bool vga_interface_created; Chardev *parallel_hds[MAX_PARALLEL_PORTS]; QEMUOptionRom option_rom[MAX_OPTION_ROMS]; int nb_option_roms; -int old_param; const char *qemu_name; unsigned int nb_prom_envs; const char *prom_envs[MAX_PROM_ENVS]; diff --git a/system/main.c b/system/main.c index 1c02206..b8f7157 100644 --- a/system/main.c +++ b/system/main.c @@ -69,8 +69,21 @@ int (*qemu_main)(void) = os_darwin_cfrunloop_main; int main(int argc, char **argv) { qemu_init(argc, argv); + + /* + * qemu_init acquires the BQL and replay mutex lock. BQL is acquired when + * initializing cpus, to block associated threads until initialization is + * complete. Replay_mutex lock is acquired on initialization, because it + * must be held when configuring icount_mode. + * + * On MacOS, qemu main event loop runs in a background thread, as main + * thread must be reserved for UI. Thus, we need to transfer lock ownership, + * and the simplest way to do that is to release them, and reacquire them + * from qemu_default_main. + */ bql_unlock(); replay_mutex_unlock(); + if (qemu_main) { QemuThread main_loop_thread; qemu_thread_create(&main_loop_thread, "qemu_main", diff --git a/system/memory.c b/system/memory.c index 63b983e..8b84661 100644 --- a/system/memory.c +++ b/system/memory.c @@ -22,13 +22,16 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/qemu-print.h" +#include "qemu/target-info.h" #include "qom/object.h" #include "trace.h" +#include "system/physmem.h" #include "system/ram_addr.h" #include "system/kvm.h" #include "system/runstate.h" #include "system/tcg.h" #include "qemu/accel.h" +#include "accel/accel-ops.h" #include "hw/boards.h" #include "migration/vmstate.h" #include "system/address-spaces.h" @@ -1794,16 +1797,37 @@ static void memory_region_finalize(Object *obj) { MemoryRegion *mr = MEMORY_REGION(obj); - assert(!mr->container); - - /* We know the region is not visible in any address space (it - * does not have a container and cannot be a root either because - * it has no references, so we can blindly clear mr->enabled. - * memory_region_set_enabled instead could trigger a transaction - * and cause an infinite loop. + /* + * Each memory region (that can be freed) must have an owner, and it + * always has the same lifecycle of its owner. It means when reaching + * here, the memory region's owner's refcount is zero. + * + * Here it is possible that the MR has: + * + * (1) mr->container set, which means this MR is a subregion of a + * container MR. In this case they must share the same owner as the + * container (otherwise the container should have kept a refcount + * of this MR's owner). + * + * (2) mr->subregions non-empty, which means this MR is a container of + * one or more other MRs (which might have the the owner as this + * MR, or a different owner). + * + * We know the MR, or any MR that is attached to this one as either + * container or children, is not visible in any address space, because + * otherwise the address space should have taken at least one refcount + * of this MR's owner. So we can blindly clear mr->enabled. + * + * memory_region_set_enabled instead could trigger a transaction and + * cause an infinite loop. */ mr->enabled = false; memory_region_transaction_begin(); + if (mr->container) { + /* Must share the owner; see above comments */ + assert(mr->container->owner == mr->owner); + memory_region_del_subregion(mr->container, mr); + } while (!QTAILQ_EMPTY(&mr->subregions)) { MemoryRegion *subregion = QTAILQ_FIRST(&mr->subregions); memory_region_del_subregion(mr, subregion); @@ -2021,13 +2045,9 @@ void memory_region_notify_iommu_one(IOMMUNotifier *notifier, return; } - if (notifier->notifier_flags & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { - /* Crop (iova, addr_mask) to range */ - tmp.iova = MAX(tmp.iova, notifier->start); - tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; - } else { - assert(entry->iova >= notifier->start && entry_end <= notifier->end); - } + /* Crop (iova, addr_mask) to range */ + tmp.iova = MAX(tmp.iova, notifier->start); + tmp.addr_mask = MIN(entry_end, notifier->end) - tmp.iova; if (event->type & notifier->notifier_flags) { notifier->notify(notifier, &tmp); @@ -2106,12 +2126,16 @@ RamDiscardManager *memory_region_get_ram_discard_manager(MemoryRegion *mr) return mr->rdm; } -void memory_region_set_ram_discard_manager(MemoryRegion *mr, - RamDiscardManager *rdm) +int memory_region_set_ram_discard_manager(MemoryRegion *mr, + RamDiscardManager *rdm) { g_assert(memory_region_is_ram(mr)); - g_assert(!rdm || !mr->rdm); + if (mr->rdm && rdm) { + return -EBUSY; + } + mr->rdm = rdm; + return 0; } uint64_t ram_discard_manager_get_min_granularity(const RamDiscardManager *rdm, @@ -2134,7 +2158,7 @@ bool ram_discard_manager_is_populated(const RamDiscardManager *rdm, int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *section, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); @@ -2143,15 +2167,15 @@ int ram_discard_manager_replay_populated(const RamDiscardManager *rdm, return rdmc->replay_populated(rdm, section, replay_fn, opaque); } -void ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *section, - ReplayRamDiscard replay_fn, - void *opaque) +int ram_discard_manager_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) { RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_GET_CLASS(rdm); g_assert(rdmc->replay_discarded); - rdmc->replay_discarded(rdm, section, replay_fn, opaque); + return rdmc->replay_discarded(rdm, section, replay_fn, opaque); } void ram_discard_manager_register_listener(RamDiscardManager *rdm, @@ -2174,18 +2198,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, } /* Called with rcu_read_lock held. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp) +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { MemoryRegion *mr; hwaddr xlat; hwaddr len = iotlb->addr_mask + 1; bool writable = iotlb->perm & IOMMU_WO; - if (mr_has_discard_manager) { - *mr_has_discard_manager = false; - } /* * The IOMMU TLB entry we have just covers translation through * this IOMMU to its immediate target. We need to translate @@ -2195,7 +2215,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); if (!memory_region_is_ram(mr)) { error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat); - return false; + return NULL; } else if (memory_region_has_ram_discard_manager(mr)) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); MemoryRegionSection tmp = { @@ -2203,9 +2223,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, .offset_within_region = xlat, .size = int128_make64(len), }; - if (mr_has_discard_manager) { - *mr_has_discard_manager = true; - } /* * Malicious VMs can map memory into the IOMMU, which is expected * to remain discarded. vfio will pin all pages, populating memory. @@ -2216,7 +2233,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_setg(errp, "iommu map to discarded memory (e.g., unplugged" " via virtio-mem): %" HWADDR_PRIx "", iotlb->translated_addr); - return false; + return NULL; } } @@ -2226,22 +2243,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, */ if (len & iotlb->addr_mask) { error_setg(errp, "iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; + return NULL; } - return true; + *xlat_p = xlat; + return mr; } void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) @@ -2266,7 +2272,7 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size) { assert(mr->ram_block); - cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, + physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, size, memory_region_get_dirty_log_mask(mr)); } @@ -2370,7 +2376,7 @@ DirtyBitmapSnapshot *memory_region_snapshot_and_clear_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *snapshot; assert(mr->ram_block); memory_region_sync_dirty_bitmap(mr, false); - snapshot = cpu_physical_memory_snapshot_and_clear_dirty(mr, addr, size, client); + snapshot = physical_memory_snapshot_and_clear_dirty(mr, addr, size, client); memory_global_after_dirty_log_sync(); return snapshot; } @@ -2379,7 +2385,7 @@ bool memory_region_snapshot_get_dirty(MemoryRegion *mr, DirtyBitmapSnapshot *sna hwaddr addr, hwaddr size) { assert(mr->ram_block); - return cpu_physical_memory_snapshot_get_dirty(snap, + return physical_memory_snapshot_get_dirty(snap, memory_region_get_ram_addr(mr) + addr, size); } @@ -2417,7 +2423,7 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { assert(mr->ram_block); - cpu_physical_memory_test_and_clear_dirty( + physical_memory_test_and_clear_dirty( memory_region_get_ram_addr(mr) + addr, size, client); } @@ -2558,6 +2564,21 @@ void memory_region_clear_flush_coalesced(MemoryRegion *mr) } } +void memory_region_enable_lockless_io(MemoryRegion *mr) +{ + mr->lockless_io = true; + /* + * reentrancy_guard has per device scope, that when enabled + * will effectively prevent concurrent access to device's IO + * MemoryRegion(s) by not calling accessor callback. + * + * Turn it off for lock-less IO enabled devices, to allow + * concurrent IO. + * TODO: remove this when reentrancy_guard becomes per transaction. + */ + mr->disable_reentrancy_guard = true; +} + void memory_region_add_eventfd(MemoryRegion *mr, hwaddr addr, unsigned size, @@ -2637,7 +2658,10 @@ static void memory_region_update_container_subregions(MemoryRegion *subregion) memory_region_transaction_begin(); - memory_region_ref(subregion); + if (mr->owner != subregion->owner) { + memory_region_ref(subregion); + } + QTAILQ_FOREACH(other, &mr->subregions, subregions_link) { if (subregion->priority >= other->priority) { QTAILQ_INSERT_BEFORE(other, subregion, subregions_link); @@ -2695,7 +2719,11 @@ void memory_region_del_subregion(MemoryRegion *mr, assert(alias->mapped_via_alias >= 0); } QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link); - memory_region_unref(subregion); + + if (mr->owner != subregion->owner) { + memory_region_unref(subregion); + } + memory_region_update_pending |= mr->enabled && subregion->enabled; memory_region_transaction_commit(); } @@ -3247,7 +3275,14 @@ static void do_address_space_destroy(AddressSpace *as) memory_region_unref(as->root); } -void address_space_destroy(AddressSpace *as) +static void do_address_space_destroy_free(AddressSpace *as) +{ + do_address_space_destroy(as); + g_free(as); +} + +/* Detach address space from global view, notify all listeners */ +static void address_space_detach(AddressSpace *as) { MemoryRegion *root = as->root; @@ -3262,9 +3297,20 @@ void address_space_destroy(AddressSpace *as) * values to expire before freeing the data. */ as->root = root; +} + +void address_space_destroy(AddressSpace *as) +{ + address_space_detach(as); call_rcu(as, do_address_space_destroy, rcu); } +void address_space_destroy_free(AddressSpace *as) +{ + address_space_detach(as); + call_rcu(as, do_address_space_destroy_free, rcu); +} + static const char *memory_region_type(MemoryRegion *mr) { if (mr->alias) { @@ -3515,7 +3561,7 @@ static void mtree_print_flatview(gpointer key, gpointer value, if (fvi->ac) { for (i = 0; i < fv_address_spaces->len; ++i) { as = g_array_index(fv_address_spaces, AddressSpace*, i); - if (fvi->ac->has_memory(current_machine, as, + if (fvi->ac->has_memory(current_machine->accelerator, as, int128_get64(range->addr.start), MR_SIZE(range->addr.size) + 1)) { qemu_printf(" %s", fvi->ac->name); diff --git a/system/memory_ldst.c.inc b/system/memory_ldst.c.inc index 7f32d3d..333da20 100644 --- a/system/memory_ldst.c.inc +++ b/system/memory_ldst.c.inc @@ -287,7 +287,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, dirty_log_mask = memory_region_get_dirty_log_mask(mr); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); - cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, + physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr, 4, dirty_log_mask); r = MEMTX_OK; } diff --git a/system/meson.build b/system/meson.build index c2f0082..6d21ff9 100644 --- a/system/meson.build +++ b/system/meson.build @@ -7,7 +7,7 @@ system_ss.add(files( 'vl.c', ), sdl, libpmem, libdaxctl) -libsystem_ss.add(files( +system_ss.add(files( 'balloon.c', 'bootdevice.c', 'cpus.c', @@ -17,6 +17,7 @@ libsystem_ss.add(files( 'dma-helpers.c', 'globals.c', 'ioport.c', + 'ram-block-attributes.c', 'memory_mapping.c', 'memory.c', 'physmem.c', diff --git a/system/physmem.c b/system/physmem.c index a8a9ca3..a340ca3 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -43,6 +43,8 @@ #include "system/kvm.h" #include "system/tcg.h" #include "system/qtest.h" +#include "system/physmem.h" +#include "system/ramblock.h" #include "qemu/timer.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -165,13 +167,11 @@ static bool ram_is_cpr_compatible(RAMBlock *rb); * CPUAddressSpace: all the information a CPU needs about an AddressSpace * @cpu: the CPU whose AddressSpace this is * @as: the AddressSpace itself - * @memory_dispatch: its dispatch pointer (cached, RCU protected) * @tcg_as_listener: listener for tracking changes to the AddressSpace */ typedef struct CPUAddressSpace { CPUState *cpu; AddressSpace *as; - struct AddressSpaceDispatch *memory_dispatch; MemoryListener tcg_as_listener; } CPUAddressSpace; @@ -692,7 +692,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, IOMMUTLBEntry iotlb; int iommu_idx; hwaddr addr = orig_addr; - AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch; + AddressSpaceDispatch *d = address_space_to_dispatch(cpu->cpu_ases[asidx].as); for (;;) { section = address_space_translate_internal(d, addr, &addr, plen, false); @@ -753,7 +753,7 @@ MemoryRegionSection *iotlb_to_section(CPUState *cpu, { int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; - AddressSpaceDispatch *d = cpuas->memory_dispatch; + AddressSpaceDispatch *d = address_space_to_dispatch(cpuas->as); int section_index = index & ~TARGET_PAGE_MASK; MemoryRegionSection *ret; @@ -795,12 +795,8 @@ void cpu_address_space_init(CPUState *cpu, int asidx, cpu->as = as; } - /* KVM cannot currently support multiple address spaces. */ - assert(asidx == 0 || !kvm_enabled()); - if (!cpu->cpu_ases) { cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); - cpu->cpu_ases_count = cpu->num_ases; } newas = &cpu->cpu_ases[asidx]; @@ -814,32 +810,29 @@ void cpu_address_space_init(CPUState *cpu, int asidx, } } -void cpu_address_space_destroy(CPUState *cpu, int asidx) +void cpu_destroy_address_spaces(CPUState *cpu) { CPUAddressSpace *cpuas; + int asidx; assert(cpu->cpu_ases); - assert(asidx >= 0 && asidx < cpu->num_ases); - /* KVM cannot currently support multiple address spaces. */ - assert(asidx == 0 || !kvm_enabled()); - - cpuas = &cpu->cpu_ases[asidx]; - if (tcg_enabled()) { - memory_listener_unregister(&cpuas->tcg_as_listener); - } - address_space_destroy(cpuas->as); - g_free_rcu(cpuas->as, rcu); + /* convenience alias just points to some cpu_ases[n] */ + cpu->as = NULL; - if (asidx == 0) { - /* reset the convenience alias for address space 0 */ - cpu->as = NULL; + for (asidx = 0; asidx < cpu->num_ases; asidx++) { + cpuas = &cpu->cpu_ases[asidx]; + if (!cpuas->as) { + /* This index was never initialized; no deinit needed */ + continue; + } + if (tcg_enabled()) { + memory_listener_unregister(&cpuas->tcg_as_listener); + } + g_clear_pointer(&cpuas->as, address_space_destroy_free); } - if (--cpu->cpu_ases_count == 0) { - g_free(cpu->cpu_ases); - cpu->cpu_ases = NULL; - } + g_clear_pointer(&cpu->cpu_ases, g_free); } AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) @@ -907,8 +900,197 @@ void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) } } +void physical_memory_dirty_bits_cleared(ram_addr_t start, ram_addr_t length) +{ + if (tcg_enabled()) { + tlb_reset_dirty_range_all(start, length); + } +} + +static bool physical_memory_get_dirty(ram_addr_t start, ram_addr_t length, + unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long end, page; + unsigned long idx, offset, base; + bool dirty = false; + + assert(client < DIRTY_MEMORY_NUM); + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + WITH_RCU_READ_LOCK_GUARD() { + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_bit(blocks->blocks[idx], + num, offset); + if (found < num) { + dirty = true; + break; + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + } + + return dirty; +} + +bool physical_memory_get_dirty_flag(ram_addr_t addr, unsigned client) +{ + return physical_memory_get_dirty(addr, 1, client); +} + +bool physical_memory_is_clean(ram_addr_t addr) +{ + bool vga = physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA); + bool code = physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE); + bool migration = + physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION); + return !(vga && code && migration); +} + +static bool physical_memory_all_dirty(ram_addr_t start, ram_addr_t length, + unsigned client) +{ + DirtyMemoryBlocks *blocks; + unsigned long end, page; + unsigned long idx, offset, base; + bool dirty = true; + + assert(client < DIRTY_MEMORY_NUM); + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + RCU_READ_LOCK_GUARD(); + + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + unsigned long num = next - base; + unsigned long found = find_next_zero_bit(blocks->blocks[idx], + num, offset); + if (found < num) { + dirty = false; + break; + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + + return dirty; +} + +uint8_t physical_memory_range_includes_clean(ram_addr_t start, + ram_addr_t length, + uint8_t mask) +{ + uint8_t ret = 0; + + if (mask & (1 << DIRTY_MEMORY_VGA) && + !physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { + ret |= (1 << DIRTY_MEMORY_VGA); + } + if (mask & (1 << DIRTY_MEMORY_CODE) && + !physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { + ret |= (1 << DIRTY_MEMORY_CODE); + } + if (mask & (1 << DIRTY_MEMORY_MIGRATION) && + !physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { + ret |= (1 << DIRTY_MEMORY_MIGRATION); + } + return ret; +} + +void physical_memory_set_dirty_flag(ram_addr_t addr, unsigned client) +{ + unsigned long page, idx, offset; + DirtyMemoryBlocks *blocks; + + assert(client < DIRTY_MEMORY_NUM); + + page = addr >> TARGET_PAGE_BITS; + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + + RCU_READ_LOCK_GUARD(); + + blocks = qatomic_rcu_read(&ram_list.dirty_memory[client]); + + set_bit_atomic(offset, blocks->blocks[idx]); +} + +void physical_memory_set_dirty_range(ram_addr_t start, ram_addr_t length, + uint8_t mask) +{ + DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM]; + unsigned long end, page; + unsigned long idx, offset, base; + int i; + + if (!mask && !xen_enabled()) { + return; + } + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = qatomic_rcu_read(&ram_list.dirty_memory[i]); + } + + idx = page / DIRTY_MEMORY_BLOCK_SIZE; + offset = page % DIRTY_MEMORY_BLOCK_SIZE; + base = page - offset; + while (page < end) { + unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE); + + if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx], + offset, next - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { + bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx], + offset, next - page); + } + + page = next; + idx++; + offset = 0; + base += DIRTY_MEMORY_BLOCK_SIZE; + } + } + + if (xen_enabled()) { + xen_hvm_modified_memory(start, length); + } +} + /* Note: start and end must be within the same ram block. */ -bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, +bool physical_memory_test_and_clear_dirty(ram_addr_t start, ram_addr_t length, unsigned client) { @@ -950,13 +1132,20 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, } if (dirty) { - cpu_physical_memory_dirty_bits_cleared(start, length); + physical_memory_dirty_bits_cleared(start, length); } return dirty; } -DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty +static void physical_memory_clear_dirty_range(ram_addr_t addr, ram_addr_t length) +{ + physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_MIGRATION); + physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_VGA); + physical_memory_test_and_clear_dirty(addr, length, DIRTY_MEMORY_CODE); +} + +DirtyBitmapSnapshot *physical_memory_snapshot_and_clear_dirty (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) { DirtyMemoryBlocks *blocks; @@ -1003,14 +1192,14 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty } } - cpu_physical_memory_dirty_bits_cleared(start, length); + physical_memory_dirty_bits_cleared(start, length); memory_region_clear_dirty_bitmap(mr, offset, length); return snap; } -bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, +bool physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, ram_addr_t start, ram_addr_t length) { @@ -1031,6 +1220,109 @@ bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap, return false; } +uint64_t physical_memory_set_dirty_lebitmap(unsigned long *bitmap, + ram_addr_t start, + ram_addr_t pages) +{ + unsigned long i, j; + unsigned long page_number, c, nbits; + hwaddr addr; + ram_addr_t ram_addr; + uint64_t num_dirty = 0; + unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS; + unsigned long hpratio = qemu_real_host_page_size() / TARGET_PAGE_SIZE; + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + + /* start address is aligned at the start of a word? */ + if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) && + (hpratio == 1)) { + unsigned long **blocks[DIRTY_MEMORY_NUM]; + unsigned long idx; + unsigned long offset; + long k; + long nr = BITS_TO_LONGS(pages); + + idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE; + offset = BIT_WORD((start >> TARGET_PAGE_BITS) % + DIRTY_MEMORY_BLOCK_SIZE); + + WITH_RCU_READ_LOCK_GUARD() { + for (i = 0; i < DIRTY_MEMORY_NUM; i++) { + blocks[i] = + qatomic_rcu_read(&ram_list.dirty_memory[i])->blocks; + } + + for (k = 0; k < nr; k++) { + if (bitmap[k]) { + unsigned long temp = leul_to_cpu(bitmap[k]); + + nbits = ctpopl(temp); + qatomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp); + + if (global_dirty_tracking) { + qatomic_or( + &blocks[DIRTY_MEMORY_MIGRATION][idx][offset], + temp); + if (unlikely( + global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += nbits; + } + } + + num_dirty += nbits; + + if (tcg_enabled()) { + qatomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset], + temp); + } + } + + if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) { + offset = 0; + idx++; + } + } + } + + if (xen_enabled()) { + xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS); + } + } else { + uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL + : DIRTY_CLIENTS_NOCODE; + + if (!global_dirty_tracking) { + clients &= ~(1 << DIRTY_MEMORY_MIGRATION); + } + + /* + * bitmap-traveling is faster than memory-traveling (for addr...) + * especially when most of the memory is not dirty. + */ + for (i = 0; i < len; i++) { + if (bitmap[i] != 0) { + c = leul_to_cpu(bitmap[i]); + nbits = ctpopl(c); + if (unlikely(global_dirty_tracking & GLOBAL_DIRTY_DIRTY_RATE)) { + total_dirty_pages += nbits; + } + num_dirty += nbits; + do { + j = ctzl(c); + c &= ~(1ul << j); + page_number = (i * HOST_LONG_BITS + j) * hpratio; + addr = page_number * TARGET_PAGE_SIZE; + ram_addr = start + addr; + physical_memory_set_dirty_range(ram_addr, + TARGET_PAGE_SIZE * hpratio, clients); + } while (c != 0); + } + } + } + + return num_dirty; +} + static int subpage_register(subpage_t *mmio, uint32_t start, uint32_t end, uint16_t section); static subpage_t *subpage_init(FlatView *fv, hwaddr base); @@ -1593,6 +1885,11 @@ ram_addr_t qemu_ram_get_offset(RAMBlock *rb) return rb->offset; } +ram_addr_t qemu_ram_get_fd_offset(RAMBlock *rb) +{ + return rb->fd_offset; +} + ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) { return rb->used_length; @@ -1782,9 +2079,9 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp) ram_block_notify_resize(block->host, oldsize, newsize); } - cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); + physical_memory_clear_dirty_range(block->offset, block->used_length); block->used_length = newsize; - cpu_physical_memory_set_dirty_range(block->offset, block->used_length, + physical_memory_set_dirty_range(block->offset, block->used_length, DIRTY_CLIENTS_ALL); memory_region_set_size(block->mr, unaligned_size); if (block->resized) { @@ -1806,7 +2103,7 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) #ifdef CONFIG_LIBPMEM /* The lack of support for pmem should not block the sync */ - if (ramblock_is_pmem(block)) { + if (ram_block_is_pmem(block)) { void *addr = ramblock_ptr(block, start); pmem_persist(addr, length); return; @@ -1916,7 +2213,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } assert(new_block->guest_memfd < 0); - ret = ram_block_discard_require(true); + ret = ram_block_coordinated_discard_require(true); if (ret < 0) { error_setg_errno(errp, -ret, "cannot set up private guest memory: discard currently blocked"); @@ -1932,6 +2229,24 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) } /* + * The attribute bitmap of the RamBlockAttributes is default to + * discarded, which mimics the behavior of kvm_set_phys_mem() when it + * calls kvm_set_memory_attributes_private(). This leads to a brief + * period of inconsistency between the creation of the RAMBlock and its + * mapping into the physical address space. However, this is not + * problematic, as no users rely on the attribute status to perform + * any actions during this interval. + */ + new_block->attributes = ram_block_attributes_create(new_block); + if (!new_block->attributes) { + error_setg(errp, "Failed to create ram block attribute"); + close(new_block->guest_memfd); + ram_block_coordinated_discard_require(false); + qemu_mutex_unlock_ramlist(); + goto out_free; + } + + /* * Add a specific guest_memfd blocker if a generic one would not be * added by ram_block_add_cpr_blocker. */ @@ -1971,7 +2286,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_list.version++; qemu_mutex_unlock_ramlist(); - cpu_physical_memory_set_dirty_range(new_block->offset, + physical_memory_set_dirty_range(new_block->offset, new_block->used_length, DIRTY_CLIENTS_ALL); @@ -2287,8 +2602,9 @@ static void reclaim_ramblock(RAMBlock *block) } if (block->guest_memfd >= 0) { + ram_block_attributes_destroy(block->attributes); close(block->guest_memfd); - ram_block_discard_require(false); + ram_block_coordinated_discard_require(false); } g_free(block); @@ -2756,9 +3072,6 @@ static void tcg_log_global_after_sync(MemoryListener *listener) static void tcg_commit_cpu(CPUState *cpu, run_on_cpu_data data) { - CPUAddressSpace *cpuas = data.host_ptr; - - cpuas->memory_dispatch = address_space_to_dispatch(cpuas->as); tlb_flush(cpu); } @@ -2774,11 +3087,7 @@ static void tcg_commit(MemoryListener *listener) cpu = cpuas->cpu; /* - * Defer changes to as->memory_dispatch until the cpu is quiescent. - * Otherwise we race between (1) other cpu threads and (2) ongoing - * i/o for the current cpu thread, with data cached by mmu_lookup(). - * - * In addition, queueing the work function will kick the cpu back to + * Queueing the work function will kick the cpu back to * the main loop, which will end the RCU critical section and reclaim * the memory data structures. * @@ -2826,19 +3135,19 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, addr += ramaddr; /* No early return if dirty_log_mask is or becomes 0, because - * cpu_physical_memory_set_dirty_range will still call + * physical_memory_set_dirty_range will still call * xen_modified_memory. */ if (dirty_log_mask) { dirty_log_mask = - cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask); + physical_memory_range_includes_clean(addr, length, dirty_log_mask); } if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { assert(tcg_enabled()); tb_invalidate_phys_range(NULL, addr, addr + length - 1); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } - cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); + physical_memory_set_dirty_range(addr, length, dirty_log_mask); } void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) @@ -2885,7 +3194,7 @@ bool prepare_mmio_access(MemoryRegion *mr) { bool release_lock = false; - if (!bql_locked()) { + if (!bql_locked() && !mr->lockless_io) { bql_lock(); release_lock = true; } @@ -3012,7 +3321,7 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, l = len; mr = flatview_translate(fv, addr, &mr_addr, &l, true, attrs); - if (!flatview_access_allowed(mr, attrs, addr, len)) { + if (!flatview_access_allowed(mr, attrs, mr_addr, l)) { return MEMTX_ACCESS_ERROR; } return flatview_write_continue(fv, addr, attrs, buf, len, @@ -3103,7 +3412,7 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, l = len; mr = flatview_translate(fv, addr, &mr_addr, &l, false, attrs); - if (!flatview_access_allowed(mr, attrs, addr, len)) { + if (!flatview_access_allowed(mr, attrs, mr_addr, l)) { return MEMTX_ACCESS_ERROR; } return flatview_read_continue(fv, addr, attrs, buf, len, @@ -3170,68 +3479,45 @@ MemTxResult address_space_set(AddressSpace *as, hwaddr addr, return error; } -void cpu_physical_memory_rw(hwaddr addr, void *buf, - hwaddr len, bool is_write) +void cpu_physical_memory_read(hwaddr addr, void *buf, hwaddr len) { - address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED, - buf, len, is_write); + address_space_read(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, buf, len); } -enum write_rom_type { - WRITE_DATA, - FLUSH_CACHE, -}; - -static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, - hwaddr addr, - MemTxAttrs attrs, - const void *ptr, - hwaddr len, - enum write_rom_type type) +void cpu_physical_memory_write(hwaddr addr, const void *buf, hwaddr len) { - hwaddr l; - uint8_t *ram_ptr; - hwaddr addr1; - MemoryRegion *mr; - const uint8_t *buf = ptr; + address_space_write(&address_space_memory, addr, + MEMTXATTRS_UNSPECIFIED, buf, len); +} +/* used for ROM loading : can write in RAM and ROM */ +MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const void *buf, hwaddr len) +{ RCU_READ_LOCK_GUARD(); while (len > 0) { - l = len; - mr = address_space_translate(as, addr, &addr1, &l, true, attrs); + hwaddr addr1, l = len; + MemoryRegion *mr = address_space_translate(as, addr, &addr1, &l, + true, attrs); if (!memory_region_supports_direct_access(mr)) { l = memory_access_size(mr, l, addr1); } else { /* ROM/RAM case */ - ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1); - switch (type) { - case WRITE_DATA: - memcpy(ram_ptr, buf, l); - invalidate_and_set_dirty(mr, addr1, l); - break; - case FLUSH_CACHE: - flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); - break; - } + void *ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1); + memcpy(ram_ptr, buf, l); + invalidate_and_set_dirty(mr, addr1, l); } len -= l; - buf += l; addr += l; + buf += l; } return MEMTX_OK; } -/* used for ROM loading : can write in RAM and ROM */ -MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, - const void *buf, hwaddr len) -{ - return address_space_write_rom_internal(as, addr, attrs, - buf, len, WRITE_DATA); -} - -void cpu_flush_icache_range(hwaddr start, hwaddr len) +void address_space_flush_icache_range(AddressSpace *as, hwaddr addr, hwaddr len) { /* * This function should do the same thing as an icache flush that was @@ -3243,9 +3529,22 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len) return; } - address_space_write_rom_internal(&address_space_memory, - start, MEMTXATTRS_UNSPECIFIED, - NULL, len, FLUSH_CACHE); + RCU_READ_LOCK_GUARD(); + while (len > 0) { + hwaddr addr1, l = len; + MemoryRegion *mr = address_space_translate(as, addr, &addr1, &l, true, + MEMTXATTRS_UNSPECIFIED); + + if (!memory_region_supports_direct_access(mr)) { + l = memory_access_size(mr, l, addr1); + } else { + /* ROM/RAM case */ + void *ram_ptr = qemu_map_ram_ptr(mr->ram_block, addr1); + flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l); + } + len -= l; + addr += l; + } } /* @@ -3361,6 +3660,17 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, return flatview_access_valid(fv, addr, len, is_write, attrs); } +bool address_space_is_io(AddressSpace *as, hwaddr addr) +{ + MemoryRegion *mr; + + RCU_READ_LOCK_GUARD(); + mr = address_space_translate(as, addr, &addr, NULL, false, + MEMTXATTRS_UNSPECIFIED); + + return !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); +} + static hwaddr flatview_extend_translation(FlatView *fv, hwaddr addr, hwaddr target_len, @@ -3755,19 +4065,6 @@ int cpu_memory_rw_debug(CPUState *cpu, vaddr addr, return 0; } -bool cpu_physical_memory_is_io(hwaddr phys_addr) -{ - MemoryRegion*mr; - hwaddr l = 1; - - RCU_READ_LOCK_GUARD(); - mr = address_space_translate(&address_space_memory, - phys_addr, &phys_addr, &l, false, - MEMTXATTRS_UNSPECIFIED); - - return !(memory_region_is_ram(mr) || memory_region_is_romd(mr)); -} - int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) { RAMBlock *block; @@ -3784,18 +4081,18 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) } /* - * Unmap pages of memory from start to start+length such that + * Unmap pages of memory from offset to offset+length such that * they a) read as 0, b) Trigger whatever fault mechanism * the OS provides for postcopy. * The pages must be unmapped by the end of the function. * Returns: 0 on success, none-0 on failure * */ -int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) +int ram_block_discard_range(RAMBlock *rb, uint64_t offset, size_t length) { int ret = -1; - uint8_t *host_startaddr = rb->host + start; + uint8_t *host_startaddr = rb->host + offset; if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) { error_report("%s: Unaligned start address: %p", @@ -3803,7 +4100,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) goto err; } - if ((start + length) <= rb->max_length) { + if ((offset + length) <= rb->max_length) { bool need_madvise, need_fallocate; if (!QEMU_IS_ALIGNED(length, rb->page_size)) { error_report("%s: Unaligned length: %zx", __func__, length); @@ -3854,11 +4151,11 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) } ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start + rb->fd_offset, length); + offset + rb->fd_offset, length); if (ret) { ret = -errno; error_report("%s: Failed to fallocate %s:%" PRIx64 "+%" PRIx64 - " +%zx (%d)", __func__, rb->idstr, start, + " +%zx (%d)", __func__, rb->idstr, offset, rb->fd_offset, length, ret); goto err; } @@ -3866,7 +4163,7 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) ret = -ENOSYS; error_report("%s: fallocate not available/file" "%s:%" PRIx64 "+%" PRIx64 " +%zx (%d)", __func__, - rb->idstr, start, rb->fd_offset, length, ret); + rb->idstr, offset, rb->fd_offset, length, ret); goto err; #endif } @@ -3886,13 +4183,13 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) ret = -errno; error_report("%s: Failed to discard range " "%s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + __func__, rb->idstr, offset, length, ret); goto err; } #else ret = -ENOSYS; error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + __func__, rb->idstr, offset, length, ret); goto err; #endif } @@ -3900,14 +4197,14 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) need_madvise, need_fallocate, ret); } else { error_report("%s: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")", - __func__, rb->idstr, start, length, rb->max_length); + __func__, rb->idstr, offset, length, rb->max_length); } err: return ret; } -int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t offset, size_t length) { int ret = -1; @@ -3915,23 +4212,23 @@ int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, #ifdef CONFIG_FALLOCATE_PUNCH_HOLE /* ignore fd_offset with guest_memfd */ ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, length); + offset, length); if (ret) { ret = -errno; error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + __func__, rb->idstr, offset, length, ret); } #else ret = -ENOSYS; error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + __func__, rb->idstr, offset, length, ret); #endif return ret; } -bool ramblock_is_pmem(RAMBlock *rb) +bool ram_block_is_pmem(RAMBlock *rb) { return rb->flags & RAM_PMEM; } diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 5588ed2..2ac92d0 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -628,7 +628,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, DeviceClass *dc; const char *driver, *path; char *id; - DeviceState *dev = NULL; + DeviceState *dev; BusState *bus = NULL; QDict *properties; @@ -717,10 +717,9 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, return dev; err_del_dev: - if (dev) { - object_unparent(OBJECT(dev)); - object_unref(OBJECT(dev)); - } + object_unparent(OBJECT(dev)); + object_unref(OBJECT(dev)); + return NULL; } diff --git a/system/qtest.c b/system/qtest.c index 301b03b..fa42c9f 100644 --- a/system/qtest.c +++ b/system/qtest.c @@ -29,6 +29,7 @@ #include "qemu/error-report.h" #include "qemu/module.h" #include "qemu/cutils.h" +#include "qemu/target-info.h" #include "qom/object_interfaces.h" #define MAX_IRQ 256 diff --git a/system/ram-block-attributes.c b/system/ram-block-attributes.c new file mode 100644 index 0000000..68e8a02 --- /dev/null +++ b/system/ram-block-attributes.c @@ -0,0 +1,444 @@ +/* + * QEMU ram block attributes + * + * Copyright Intel + * + * Author: + * Chenyi Qiang <chenyi.qiang@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "system/ramblock.h" +#include "trace.h" + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(RamBlockAttributes, + ram_block_attributes, + RAM_BLOCK_ATTRIBUTES, + OBJECT, + { TYPE_RAM_DISCARD_MANAGER }, + { }) + +static size_t +ram_block_attributes_get_block_size(const RamBlockAttributes *attr) +{ + /* + * Because page conversion could be manipulated in the size of at least 4K + * or 4K aligned, Use the host page size as the granularity to track the + * memory attribute. + */ + g_assert(attr && attr->ram_block); + g_assert(attr->ram_block->page_size == qemu_real_host_page_size()); + return attr->ram_block->page_size; +} + + +static bool +ram_block_attributes_rdm_is_populated(const RamDiscardManager *rdm, + const MemoryRegionSection *section) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + const size_t block_size = ram_block_attributes_get_block_size(attr); + const uint64_t first_bit = section->offset_within_region / block_size; + const uint64_t last_bit = + first_bit + int128_get64(section->size) / block_size - 1; + unsigned long first_discarded_bit; + + first_discarded_bit = find_next_zero_bit(attr->bitmap, last_bit + 1, + first_bit); + return first_discarded_bit > last_bit; +} + +typedef int (*ram_block_attributes_section_cb)(MemoryRegionSection *s, + void *arg); + +static int +ram_block_attributes_notify_populate_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + return rdl->notify_populate(rdl, section); +} + +static int +ram_block_attributes_notify_discard_cb(MemoryRegionSection *section, + void *arg) +{ + RamDiscardListener *rdl = arg; + + rdl->notify_discard(rdl, section); + return 0; +} + +static int +ram_block_attributes_for_each_populated_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static int +ram_block_attributes_for_each_discarded_section(const RamBlockAttributes *attr, + MemoryRegionSection *section, + void *arg, + ram_block_attributes_section_cb cb) +{ + unsigned long first_bit, last_bit; + uint64_t offset, size; + const size_t block_size = ram_block_attributes_get_block_size(attr); + int ret = 0; + + first_bit = section->offset_within_region / block_size; + first_bit = find_next_zero_bit(attr->bitmap, attr->bitmap_size, + first_bit); + + while (first_bit < attr->bitmap_size) { + MemoryRegionSection tmp = *section; + + offset = first_bit * block_size; + last_bit = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit + 1) - 1; + size = (last_bit - first_bit + 1) * block_size; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + break; + } + + ret = cb(&tmp, arg); + if (ret) { + error_report("%s: Failed to notify RAM discard listener: %s", + __func__, strerror(-ret)); + break; + } + + first_bit = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, + last_bit + 2); + } + + return ret; +} + +static uint64_t +ram_block_attributes_rdm_get_min_granularity(const RamDiscardManager *rdm, + const MemoryRegion *mr) +{ + const RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + + g_assert(mr == attr->ram_block->mr); + return ram_block_attributes_get_block_size(attr); +} + +static void +ram_block_attributes_rdm_register_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl, + MemoryRegionSection *section) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(section->mr == attr->ram_block->mr); + rdl->section = memory_region_section_new_copy(section); + + QLIST_INSERT_HEAD(&attr->rdl_list, rdl, next); + + ret = ram_block_attributes_for_each_populated_section(attr, section, rdl, + ram_block_attributes_notify_populate_cb); + if (ret) { + error_report("%s: Failed to register RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } +} + +static void +ram_block_attributes_rdm_unregister_listener(RamDiscardManager *rdm, + RamDiscardListener *rdl) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + int ret; + + g_assert(rdl->section); + g_assert(rdl->section->mr == attr->ram_block->mr); + + if (rdl->double_discard_supported) { + rdl->notify_discard(rdl, rdl->section); + } else { + ret = ram_block_attributes_for_each_populated_section(attr, + rdl->section, rdl, ram_block_attributes_notify_discard_cb); + if (ret) { + error_report("%s: Failed to unregister RAM discard listener: %s", + __func__, strerror(-ret)); + exit(1); + } + } + + memory_region_section_free_copy(rdl->section); + rdl->section = NULL; + QLIST_REMOVE(rdl, next); +} + +typedef struct RamBlockAttributesReplayData { + ReplayRamDiscardState fn; + void *opaque; +} RamBlockAttributesReplayData; + +static int ram_block_attributes_rdm_replay_cb(MemoryRegionSection *section, + void *arg) +{ + RamBlockAttributesReplayData *data = arg; + + return data->fn(section, data->opaque); +} + +static int +ram_block_attributes_rdm_replay_populated(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_populated_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static int +ram_block_attributes_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *section, + ReplayRamDiscardState replay_fn, + void *opaque) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(rdm); + RamBlockAttributesReplayData data = { .fn = replay_fn, .opaque = opaque }; + + g_assert(section->mr == attr->ram_block->mr); + return ram_block_attributes_for_each_discarded_section(attr, section, &data, + ram_block_attributes_rdm_replay_cb); +} + +static bool +ram_block_attributes_is_valid_range(RamBlockAttributes *attr, uint64_t offset, + uint64_t size) +{ + MemoryRegion *mr = attr->ram_block->mr; + + g_assert(mr); + + uint64_t region_size = memory_region_size(mr); + const size_t block_size = ram_block_attributes_get_block_size(attr); + + if (!QEMU_IS_ALIGNED(offset, block_size) || + !QEMU_IS_ALIGNED(size, block_size)) { + return false; + } + if (offset + size <= offset) { + return false; + } + if (offset + size > region_size) { + return false; + } + return true; +} + +static void ram_block_attributes_notify_discard(RamBlockAttributes *attr, + uint64_t offset, + uint64_t size) +{ + RamDiscardListener *rdl; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + rdl->notify_discard(rdl, &tmp); + } +} + +static int +ram_block_attributes_notify_populate(RamBlockAttributes *attr, + uint64_t offset, uint64_t size) +{ + RamDiscardListener *rdl; + int ret = 0; + + QLIST_FOREACH(rdl, &attr->rdl_list, next) { + MemoryRegionSection tmp = *rdl->section; + + if (!memory_region_section_intersect_range(&tmp, offset, size)) { + continue; + } + ret = rdl->notify_populate(rdl, &tmp); + if (ret) { + break; + } + } + + return ret; +} + +int ram_block_attributes_state_change(RamBlockAttributes *attr, + uint64_t offset, uint64_t size, + bool to_discard) +{ + const size_t block_size = ram_block_attributes_get_block_size(attr); + const unsigned long first_bit = offset / block_size; + const unsigned long nbits = size / block_size; + const unsigned long last_bit = first_bit + nbits - 1; + const bool is_discarded = find_next_bit(attr->bitmap, attr->bitmap_size, + first_bit) > last_bit; + const bool is_populated = find_next_zero_bit(attr->bitmap, + attr->bitmap_size, first_bit) > last_bit; + unsigned long bit; + int ret = 0; + + if (!ram_block_attributes_is_valid_range(attr, offset, size)) { + error_report("%s, invalid range: offset 0x%" PRIx64 ", size " + "0x%" PRIx64, __func__, offset, size); + return -EINVAL; + } + + trace_ram_block_attributes_state_change(offset, size, + is_discarded ? "discarded" : + is_populated ? "populated" : + "mixture", + to_discard ? "discarded" : + "populated"); + if (to_discard) { + if (is_discarded) { + /* Already private */ + } else if (is_populated) { + /* Completely shared */ + bitmap_clear(attr->bitmap, first_bit, nbits); + ram_block_attributes_notify_discard(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (!test_bit(bit, attr->bitmap)) { + continue; + } + clear_bit(bit, attr->bitmap); + ram_block_attributes_notify_discard(attr, bit * block_size, + block_size); + } + } + } else { + if (is_populated) { + /* Already shared */ + } else if (is_discarded) { + /* Completely private */ + bitmap_set(attr->bitmap, first_bit, nbits); + ret = ram_block_attributes_notify_populate(attr, offset, size); + } else { + /* Unexpected mixture: process individual blocks */ + for (bit = first_bit; bit < first_bit + nbits; bit++) { + if (test_bit(bit, attr->bitmap)) { + continue; + } + set_bit(bit, attr->bitmap); + ret = ram_block_attributes_notify_populate(attr, + bit * block_size, + block_size); + if (ret) { + break; + } + } + } + } + + return ret; +} + +RamBlockAttributes *ram_block_attributes_create(RAMBlock *ram_block) +{ + const int block_size = qemu_real_host_page_size(); + RamBlockAttributes *attr; + MemoryRegion *mr = ram_block->mr; + + attr = RAM_BLOCK_ATTRIBUTES(object_new(TYPE_RAM_BLOCK_ATTRIBUTES)); + + attr->ram_block = ram_block; + if (memory_region_set_ram_discard_manager(mr, RAM_DISCARD_MANAGER(attr))) { + object_unref(OBJECT(attr)); + return NULL; + } + attr->bitmap_size = + ROUND_UP(int128_get64(mr->size), block_size) / block_size; + attr->bitmap = bitmap_new(attr->bitmap_size); + + return attr; +} + +void ram_block_attributes_destroy(RamBlockAttributes *attr) +{ + g_assert(attr); + + g_free(attr->bitmap); + memory_region_set_ram_discard_manager(attr->ram_block->mr, NULL); + object_unref(OBJECT(attr)); +} + +static void ram_block_attributes_init(Object *obj) +{ + RamBlockAttributes *attr = RAM_BLOCK_ATTRIBUTES(obj); + + QLIST_INIT(&attr->rdl_list); +} + +static void ram_block_attributes_finalize(Object *obj) +{ +} + +static void ram_block_attributes_class_init(ObjectClass *klass, + const void *data) +{ + RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(klass); + + rdmc->get_min_granularity = ram_block_attributes_rdm_get_min_granularity; + rdmc->register_listener = ram_block_attributes_rdm_register_listener; + rdmc->unregister_listener = ram_block_attributes_rdm_unregister_listener; + rdmc->is_populated = ram_block_attributes_rdm_is_populated; + rdmc->replay_populated = ram_block_attributes_rdm_replay_populated; + rdmc->replay_discarded = ram_block_attributes_rdm_replay_discarded; +} diff --git a/system/runstate.c b/system/runstate.c index de74d96..32467aa 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -76,9 +76,6 @@ typedef struct { } RunStateTransition; static const RunStateTransition runstate_transitions_def[] = { - { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, - { RUN_STATE_PRELAUNCH, RUN_STATE_SUSPENDED }, - { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH }, @@ -118,6 +115,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_SUSPENDED }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, @@ -306,18 +304,6 @@ struct VMChangeStateEntry { static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head = QTAILQ_HEAD_INITIALIZER(vm_change_state_head); -/** - * qemu_add_vm_change_state_handler_prio: - * @cb: the callback to invoke - * @opaque: user data passed to the callback - * @priority: low priorities execute first when the vm runs and the reverse is - * true when the vm stops - * - * Register a callback function that is invoked when the vm starts or stops - * running. - * - * Returns: an entry to be freed using qemu_del_vm_change_state_handler() - */ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( VMChangeStateHandler *cb, void *opaque, int priority) { @@ -325,24 +311,6 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler_prio( opaque, priority); } -/** - * qemu_add_vm_change_state_handler_prio_full: - * @cb: the main callback to invoke - * @prepare_cb: a callback to invoke before the main callback - * @cb_ret: the main callback to invoke with return value - * @opaque: user data passed to the callbacks - * @priority: low priorities execute first when the vm runs and the reverse is - * true when the vm stops - * - * Register a main callback function and an optional prepare callback function - * that are invoked when the vm starts or stops running. The main callback and - * the prepare callback are called in two separate phases: First all prepare - * callbacks are called and only then all main callbacks are called. As its - * name suggests, the prepare callback can be used to do some preparatory work - * before invoking the main callback. - * - * Returns: an entry to be freed using qemu_del_vm_change_state_handler() - */ VMChangeStateEntry * qemu_add_vm_change_state_handler_prio_full(VMChangeStateHandler *cb, VMChangeStateHandler *prepare_cb, @@ -437,6 +405,7 @@ static ShutdownCause reset_requested; static ShutdownCause shutdown_requested; static int shutdown_exit_code = EXIT_SUCCESS; static int shutdown_signal; +static bool force_shutdown; static pid_t shutdown_pid; static int powerdown_requested; static int debug_requested; @@ -457,6 +426,11 @@ ShutdownCause qemu_shutdown_requested_get(void) return shutdown_requested; } +bool qemu_force_shutdown_requested(void) +{ + return force_shutdown; +} + ShutdownCause qemu_reset_requested_get(void) { return reset_requested; @@ -590,6 +564,58 @@ static void qemu_system_wakeup(void) } } +static char *tdx_parse_panic_message(char *message) +{ + bool printable = false; + char *buf = NULL; + int len = 0, i; + + /* + * Although message is defined as a json string, we shouldn't + * unconditionally treat it as is because the guest generated it and + * it's not necessarily trustable. + */ + if (message) { + /* The caller guarantees the NULL-terminated string. */ + len = strlen(message); + + printable = len > 0; + for (i = 0; i < len; i++) { + if (!(0x20 <= message[i] && message[i] <= 0x7e)) { + printable = false; + break; + } + } + } + + if (len == 0) { + buf = g_malloc(1); + buf[0] = '\0'; + } else { + if (!printable) { + /* 3 = length of "%02x " */ + buf = g_malloc(len * 3); + for (i = 0; i < len; i++) { + if (message[i] == '\0') { + break; + } else { + sprintf(buf + 3 * i, "%02x ", message[i]); + } + } + if (i > 0) { + /* replace the last ' '(space) to NULL */ + buf[i * 3 - 1] = '\0'; + } else { + buf[0] = '\0'; + } + } else { + buf = g_strdup(message); + } + } + + return buf; +} + void qemu_system_guest_panicked(GuestPanicInformation *info) { qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); @@ -631,7 +657,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) S390CrashReason_str(info->u.s390.reason), info->u.s390.psw_mask, info->u.s390.psw_addr); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) { + char *message = tdx_parse_panic_message(info->u.tdx.message); + qemu_log_mask(LOG_GUEST_ERROR, + "\nTDX guest reports fatal error." + " error code: 0x%" PRIx32 " error message:\"%s\"\n", + info->u.tdx.error_code, message); + g_free(message); + if (info->u.tdx.gpa != -1ull) { + qemu_log_mask(LOG_GUEST_ERROR, "Additional error information " + "can be found at gpa page: 0x%" PRIx64 "\n", + info->u.tdx.gpa); + } } + qapi_free_GuestPanicInformation(info); } } @@ -740,6 +779,7 @@ void qemu_system_killed(int signal, pid_t pid) * we are in a signal handler. */ shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL; + force_shutdown = true; qemu_notify_event(); } @@ -755,6 +795,9 @@ void qemu_system_shutdown_request(ShutdownCause reason) trace_qemu_system_shutdown_request(reason); replay_shutdown_request(reason); shutdown_requested = reason; + if (reason == SHUTDOWN_CAUSE_HOST_QMP_QUIT) { + force_shutdown = true; + } qemu_notify_event(); } diff --git a/system/tpm.c b/system/tpm.c index 8df0f6e..903b29c 100644 --- a/system/tpm.c +++ b/system/tpm.c @@ -21,6 +21,7 @@ #include "system/tpm.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/help_option.h" static QLIST_HEAD(, TPMBackend) tpm_backends = QLIST_HEAD_INITIALIZER(tpm_backends); @@ -179,9 +180,9 @@ int tpm_config_parse(QemuOptsList *opts_list, const char *optstr) { QemuOpts *opts; - if (!strcmp(optstr, "help")) { + if (is_help_option(optstr)) { tpm_display_backend_drivers(); - return -1; + exit(EXIT_SUCCESS); } opts = qemu_opts_parse_noisily(opts_list, optstr, true); if (!opts) { diff --git a/system/trace-events b/system/trace-events index be12ebf..82856e4 100644 --- a/system/trace-events +++ b/system/trace-events @@ -52,3 +52,6 @@ dirtylimit_state_finalize(void) dirtylimit_throttle_pct(int cpu_index, uint64_t pct, int64_t time_us) "CPU[%d] throttle percent: %" PRIu64 ", throttle adjust time %"PRIi64 " us" dirtylimit_set_vcpu(int cpu_index, uint64_t quota) "CPU[%d] set dirty page rate limit %"PRIu64 dirtylimit_vcpu_execute(int cpu_index, int64_t sleep_time_us) "CPU[%d] sleep %"PRIi64 " us" + +# ram-block-attributes.c +ram_block_attributes_state_change(uint64_t offset, uint64_t size, const char *from, const char *to) "offset 0x%"PRIx64" size 0x%"PRIx64" from '%s' to '%s'" diff --git a/system/vl.c b/system/vl.c index fd402b8..646239e 100644 --- a/system/vl.c +++ b/system/vl.c @@ -1192,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) return -1; } } - /* For legacy, keep user files in a specific global order. */ - fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); fw_cfg_add_file(fw_cfg, name, buf, size); - fw_cfg_reset_order_override(fw_cfg); return 0; } @@ -2745,7 +2742,6 @@ static void qemu_create_cli_devices(void) } /* init generic devices */ - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, &error_fatal); QTAILQ_FOREACH(opt, &device_opts, next) { @@ -2756,7 +2752,6 @@ static void qemu_create_cli_devices(void) assert(ret_data == NULL); /* error_fatal aborts */ loc_pop(&opt->loc); } - rom_reset_order_override(); } static bool qemu_machine_creation_done(Error **errp) @@ -3529,10 +3524,6 @@ void qemu_init(int argc, char **argv) prom_envs[nb_prom_envs] = optarg; nb_prom_envs++; break; - case QEMU_OPTION_old_param: - warn_report("-old-param is deprecated"); - old_param = 1; - break; case QEMU_OPTION_rtc: opts = qemu_opts_parse_noisily(qemu_find_opts("rtc"), optarg, false); @@ -3846,6 +3837,8 @@ void qemu_init(int argc, char **argv) } qemu_init_displays(); accel_setup_post(current_machine); - os_setup_post(); + if (migrate_mode() != MIG_MODE_CPR_EXEC) { + os_setup_post(); + } resume_mux_open(); } |