/* * QEMU System Emulator * * Copyright (c) 2003-2008 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ #include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/cutils.h" #include "migration/vmstate.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "exec/exec-all.h" #include "sysemu/cpus.h" #include "sysemu/qtest.h" #include "qemu/main-loop.h" #include "qemu/option.h" #include "qemu/seqlock.h" #include "sysemu/replay.h" #include "sysemu/runstate.h" #include "hw/core/cpu.h" #include "sysemu/cpu-timers.h" #include "sysemu/cpu-throttle.h" #include "timers-state.h" /* * ICOUNT: Instruction Counter * * this module is split off from cpu-timers because the icount part * is TCG-specific, and does not need to be built for other accels. */ static bool icount_sleep = true; /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ #define MAX_ICOUNT_SHIFT 10 /* * 0 = Do not count executed instructions. * 1 = Fixed conversion of insn to ns via "shift" option * 2 = Runtime adaptive algorithm to compute shift */ int use_icount; static void icount_enable_precise(void) { use_icount = 1; } static void icount_enable_adaptive(void) { use_icount = 2; } /* * The current number of executed instructions is based on what we * originally budgeted minus the current state of the decrementing * icount counters in extra/u16.low. */ static int64_t cpu_get_icount_executed(CPUState *cpu) { return (cpu->icount_budget - (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); } /* * Update the global shared timer_state.qemu_icount to take into * account executed instructions. This is done by the TCG vCPU * thread so the main-loop can see time has moved forward. */ static void cpu_update_icount_locked(CPUState *cpu) { int64_t executed = cpu_get_icount_executed(cpu); cpu->icount_budget -= executed; qatomic_set_i64(&timers_state.qemu_icount, timers_state.qemu_icount + executed); } /* * Update the global shared timer_state.qemu_icount to take into * account executed instructions. This is done by the TCG vCPU * thread so the main-loop can see time has moved forward. */ void cpu_update_icount(CPUState *cpu) { seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); cpu_update_icount_locked(cpu); seqlock_write_unlock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); } static int64_t cpu_get_icount_raw_locked(void) { CPUState *cpu = current_cpu; if (cpu && cpu->running) { if (!cpu->can_do_io) { error_report("Bad icount read"); exit(1); } /* Take into account what has run */ cpu_update_icount_locked(cpu); } /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ return qatomic_read_i64(&timers_state.qemu_icount); } static int64_t cpu_get_icount_locked(void) { int64_t icount = cpu_get_icount_raw_locked(); return qatomic_read_i64(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount); } int64_t cpu_get_icount_raw(void) { int64_t icount; unsigned start; do { start = seqlock_read_begin(&timers_state.vm_clock_seqlock); icount = cpu_get_icount_raw_locked(); } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); return icount; } /* Return the virtual CPU time, based on the instruction counter. */ int64_t cpu_get_icount(void) { int64_t icount; unsigned start; do { start = seqlock_read_begin(&timers_state.vm_clock_seqlock); icount = cpu_get_icount_locked(); } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); return icount; } int64_t cpu_icount_to_ns(int64_t icount) { return icount << qatomic_read(&timers_state.icount_time_shift); } /* * Correlation between real and virtual time is always going to be * fairly approximate, so ignore small variation. * When the guest is idle real and virtual time will be aligned in * the IO wait loop. */ #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) static void icount_adjust(void) { int64_t cur_time; int64_t cur_icount; int64_t delta; /* Protected by TimersState mutex. */ static int64_t last_delta; /* If the VM is not running, then do nothing. */ if (!runstate_is_running()) { return; } seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock_locked()); cur_icount = cpu_get_icount_locked(); delta = cur_icount - cur_time; /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ if (delta > 0 && last_delta + ICOUNT_WOBBLE < delta * 2 && timers_state.icount_time_shift > 0) { /* The guest is getting too far ahead. Slow time down. */ qatomic_set(&timers_state.icount_time_shift, timers_state.icount_time_shift - 1); } if (delta < 0 && last_delta - ICOUNT_WOBBLE > delta * 2 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { /* The guest is getting too far behind. Speed time up. */ qatomic_set(&timers_state.icount_time_shift, timers_state.icount_time_shift + 1); } last_delta = delta; qatomic_set_i64(&timers_state.qemu_icount_bias, cur_icount - (timers_state.qemu_icount << timers_state.icount_time_shift)); seqlock_write_unlock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); } static void icount_adjust_rt(void *opaque) { timer_mod(timers_state.icount_rt_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); icount_adjust(); } static void icount_adjust_vm(void *opaque) { timer_mod(timers_state.icount_vm_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + NANOSECONDS_PER_SECOND / 10); icount_adjust(); } int64_t qemu_icount_round(int64_t count) { int shift = qatomic_read(&timers_state.icount_time_shift); return (count + (1 << shift) - 1) >> shift; } static void icount_warp_rt(void) { unsigned seq; int64_t warp_start; /* * The icount_warp_timer is rescheduled soon after vm_clock_warp_start * changes from -1 to another value, so the race here is okay. */ do { seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); warp_start = timers_state.vm_clock_warp_start; } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); if (warp_start == -1) { return; } seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); if (runstate_is_running()) { int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, cpu_get_clock_locked()); int64_t warp_delta; warp_delta = clock - timers_state.vm_clock_warp_start; if (icount_enabled() == 2) { /* * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too * far ahead of real time. */ int64_t cur_icount = cpu_get_icount_locked(); int64_t delta = clock - cur_icount; warp_delta = MIN(warp_delta, delta); } qatomic_set_i64(&timers_state.qemu_icount_bias, timers_state.qemu_icount_bias + warp_delta); } timers_state.vm_clock_warp_start = -1; seqlock_write_unlock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } } static void icount_timer_cb(void *opaque) { /* * No need for a checkpoint because the timer already synchronizes * with CHECKPOINT_CLOCK_VIRTUAL_RT. */ icount_warp_rt(); } void qemu_start_warp_timer(void) { int64_t clock; int64_t deadline; assert(icount_enabled()); /* * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers * do not fire, so computing the deadline does not make sense. */ if (!runstate_is_running()) { return; } if (replay_mode != REPLAY_MODE_PLAY) { if (!all_cpu_threads_idle()) { return; } if (qtest_enabled()) { /* When testing, qtest commands advance icount. */ return; } replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); } else { /* warp clock deterministically in record/replay mode */ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { /* * vCPU is sleeping and warp can't be started. * It is probably a race condition: notification sent * to vCPU was processed in advance and vCPU went to sleep. * Therefore we have to wake it up for doing someting. */ if (replay_has_checkpoint()) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } return; } } /* We want to use the earliest deadline from ALL vm_clocks */ clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, ~QEMU_TIMER_ATTR_EXTERNAL); if (deadline < 0) { static bool notified; if (!icount_sleep && !notified) { warn_report("icount sleep disabled and no active timers"); notified = true; } return; } if (deadline > 0) { /* * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to * sleep. Otherwise, the CPU might be waiting for a future timer * interrupt to wake it up, but the interrupt never comes because * the vCPU isn't running any insns and thus doesn't advance the * QEMU_CLOCK_VIRTUAL. */ if (!icount_sleep) { /* * We never let VCPUs sleep in no sleep icount mode. * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance * to the next QEMU_CLOCK_VIRTUAL event and notify it. * It is useful when we want a deterministic execution time, * isolated from host latencies. */ seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); qatomic_set_i64(&timers_state.qemu_icount_bias, timers_state.qemu_icount_bias + deadline); seqlock_write_unlock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } else { /* * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some * "real" time, (related to the time left until the next event) has * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. * This avoids that the warps are visible externally; for example, * you will not be sending network packets continuously instead of * every 100ms. */ seqlock_write_lock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); if (timers_state.vm_clock_warp_start == -1 || timers_state.vm_clock_warp_start > clock) { timers_state.vm_clock_warp_start = clock; } seqlock_write_unlock(&timers_state.vm_clock_seqlock, &timers_state.vm_clock_lock); timer_mod_anticipate(timers_state.icount_warp_timer, clock + deadline); } } else if (deadline == 0) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } } void qemu_account_warp_timer(void) { if (!icount_enabled() || !icount_sleep) { return; } /* * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers * do not fire, so computing the deadline does not make sense. */ if (!runstate_is_running()) { return; } /* warp clock deterministically in record/replay mode */ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { return; } timer_del(timers_state.icount_warp_timer); icount_warp_rt(); } void configure_icount(QemuOpts *opts, Error **errp) { const char *option = qemu_opt_get(opts, "shift"); bool sleep = qemu_opt_get_bool(opts, "sleep", true); bool align = qemu_opt_get_bool(opts, "align", false); long time_shift = -1; if (!option) { if (qemu_opt_get(opts, "align") != NULL) { error_setg(errp, "Please specify shift option when using align"); } return; } if (align && !sleep) { error_setg(errp, "align=on and sleep=off are incompatible"); return; } if (strcmp(option, "auto") != 0) { if (qemu_strtol(option, NULL, 0, &time_shift) < 0 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { error_setg(errp, "icount: Invalid shift value"); return; } } else if (icount_align_option) { error_setg(errp, "shift=auto and align=on are incompatible"); return; } else if (!icount_sleep) { error_setg(errp, "shift=auto and sleep=off are incompatible"); return; } icount_sleep = sleep; if (icount_sleep) { timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, icount_timer_cb, NULL); } icount_align_option = align; if (time_shift >= 0) { timers_state.icount_time_shift = time_shift; icount_enable_precise(); return; } icount_enable_adaptive(); /* * 125MIPS seems a reasonable initial guess at the guest speed. * It will be corrected fairly quickly anyway. */ timers_state.icount_time_shift = 3; /* * Have both realtime and virtual time triggers for speed adjustment. * The realtime trigger catches emulated time passing too slowly, * the virtual time trigger catches emulated time passing too fast. * Realtime triggers occur even when idle, so use them less frequently * than VM triggers. */ timers_state.vm_clock_warp_start = -1; timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, icount_adjust_rt, NULL); timer_mod(timers_state.icount_rt_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, icount_adjust_vm, NULL); timer_mod(timers_state.icount_vm_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + NANOSECONDS_PER_SECOND / 10); }