diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 13:47:20 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-06-21 13:47:20 -0700 |
commit | f200ff158d5abcb974a6b597a962b6b2fbea2b06 (patch) | |
tree | 0bad1924d9b2c6ccb7c816a530fe7d5ee76c97b0 | |
parent | 5cdcfd861e3cdb98d3239ba78c97a1a2b13d2a70 (diff) | |
parent | c79a8e840c435bc26a251e34b043318e8b2081db (diff) | |
download | qemu-f200ff158d5abcb974a6b597a962b6b2fbea2b06.zip qemu-f200ff158d5abcb974a6b597a962b6b2fbea2b06.tar.gz qemu-f200ff158d5abcb974a6b597a962b6b2fbea2b06.tar.bz2 |
Merge tag 'pull-tcg-20220621' of https://gitlab.com/rth7680/qemu into staging
Speed empty timer list in qemu_clock_deadline_ns_all.
Implement remainder for Power3.1 hosts.
Optimize ppc host icache flushing.
Cleanups to tcg_accel_ops_init.
Fix mmio crash accessing unmapped physical memory.
# -----BEGIN PGP SIGNATURE-----
#
# iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmKyLesdHHJpY2hhcmQu
# aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV8O1wf5AW6JeeUTs2r3owsK
# UpVaRqjlLpNeuktoOQoG8lbVzm1ulEv7zgXYJTZg4cc/83WQZ2G8WzTj3W+Qr/S9
# ECRd73Kou+fK3jTo8I+wPLQjLjkIV4xSABMGz/onxhoAeyS+xcAI4qGuSGrtIg2r
# sQ61V4fWCwvQJdHMyG756Xsh8Xjf18mrNQZ5PLGkyn/e9UIAc4KH6FsgWJdinGEs
# V/oibY20kCXpLxN0ajNmx3x4/NFs/ymMtn1z9fdhVGjAVPY0N6YsxjsGqd/WP/5U
# ui/x0wAhl/VNK2M2+z3hVGfNlMpkzTVG2A3ndD+tYI3nofwTYb/UiakhID7ZX1cQ
# yKDyAw==
# =3Rhw
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 21 Jun 2022 01:45:31 PM PDT
# gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg: issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate]
* tag 'pull-tcg-20220621' of https://gitlab.com/rth7680/qemu:
util/cacheflush: Optimize flushing when ppc host has coherent icache
util/cacheflush: Merge aarch64 ctr_el0 usage
util: Merge cacheflush.c and cacheinfo.c
softmmu: Always initialize xlat in address_space_translate_for_iotlb
qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all
accel/tcg: Reorganize tcg_accel_ops_init()
accel/tcg: Init TCG cflags in vCPU thread handler
target/avr: Drop avr_cpu_memory_rw_debug()
tcg/ppc: implement rem[u]_i{32,64} with mod[su][wd]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | accel/tcg/tcg-accel-ops-mttcg.c | 5 | ||||
-rw-r--r-- | accel/tcg/tcg-accel-ops-rr.c | 7 | ||||
-rw-r--r-- | accel/tcg/tcg-accel-ops.c | 15 | ||||
-rw-r--r-- | softmmu/physmem.c | 13 | ||||
-rw-r--r-- | target/avr/cpu.c | 1 | ||||
-rw-r--r-- | target/avr/cpu.h | 2 | ||||
-rw-r--r-- | target/avr/helper.c | 6 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c.inc | 22 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.h | 4 | ||||
-rw-r--r-- | util/cacheflush.c | 247 | ||||
-rw-r--r-- | util/cacheinfo.c | 200 | ||||
-rw-r--r-- | util/meson.build | 2 | ||||
-rw-r--r-- | util/qemu-timer.c | 3 |
13 files changed, 284 insertions, 243 deletions
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index d50239e..ba997f6 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -70,6 +70,8 @@ static void *mttcg_cpu_thread_fn(void *arg) assert(tcg_enabled()); g_assert(!icount_enabled()); + tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); + rcu_register_thread(); force_rcu.notifier.notify = mttcg_force_rcu; force_rcu.cpu = cpu; @@ -139,9 +141,6 @@ void mttcg_start_vcpu_thread(CPUState *cpu) { char thread_name[VCPU_THREAD_NAME_SIZE]; - g_assert(tcg_enabled()); - tcg_cpu_init_cflags(cpu, current_machine->smp.max_cpus > 1); - cpu->thread = g_new0(QemuThread, 1); cpu->halt_cond = g_malloc0(sizeof(QemuCond)); qemu_cond_init(cpu->halt_cond); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 1a72149..cc8adc2 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -152,7 +152,9 @@ static void *rr_cpu_thread_fn(void *arg) Notifier force_rcu; CPUState *cpu = arg; - assert(tcg_enabled()); + g_assert(tcg_enabled()); + tcg_cpu_init_cflags(cpu, false); + rcu_register_thread(); force_rcu.notify = rr_force_rcu; rcu_add_force_rcu_notifier(&force_rcu); @@ -275,9 +277,6 @@ void rr_start_vcpu_thread(CPUState *cpu) static QemuCond *single_tcg_halt_cond; static QemuThread *single_tcg_cpu_thread; - g_assert(tcg_enabled()); - tcg_cpu_init_cflags(cpu, false); - if (!single_tcg_cpu_thread) { cpu->thread = g_new0(QemuThread, 1); cpu->halt_cond = g_new0(QemuCond, 1); diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 684dc5a..786d90c 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -97,16 +97,17 @@ static void tcg_accel_ops_init(AccelOpsClass *ops) ops->create_vcpu_thread = mttcg_start_vcpu_thread; ops->kick_vcpu_thread = mttcg_kick_vcpu_thread; ops->handle_interrupt = tcg_handle_interrupt; - } else if (icount_enabled()) { - ops->create_vcpu_thread = rr_start_vcpu_thread; - ops->kick_vcpu_thread = rr_kick_vcpu_thread; - ops->handle_interrupt = icount_handle_interrupt; - ops->get_virtual_clock = icount_get; - ops->get_elapsed_ticks = icount_get; } else { ops->create_vcpu_thread = rr_start_vcpu_thread; ops->kick_vcpu_thread = rr_kick_vcpu_thread; - ops->handle_interrupt = tcg_handle_interrupt; + + if (icount_enabled()) { + ops->handle_interrupt = icount_handle_interrupt; + ops->get_virtual_clock = icount_get; + ops->get_elapsed_ticks = icount_get; + } else { + ops->handle_interrupt = tcg_handle_interrupt; + } } } diff --git a/softmmu/physmem.c b/softmmu/physmem.c index fb16be5..dc3c3e5 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -669,7 +669,7 @@ void tcg_iommu_init_notifier_list(CPUState *cpu) /* Called from RCU critical section */ MemoryRegionSection * -address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, +address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr, hwaddr *xlat, hwaddr *plen, MemTxAttrs attrs, int *prot) { @@ -678,6 +678,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, IOMMUMemoryRegionClass *imrc; IOMMUTLBEntry iotlb; int iommu_idx; + hwaddr addr = orig_addr; AddressSpaceDispatch *d = qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); @@ -722,6 +723,16 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, return section; translate_fail: + /* + * We should be given a page-aligned address -- certainly + * tlb_set_page_with_attrs() does so. The page offset of xlat + * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0. + * The page portion of xlat will be logged by memory_region_access_valid() + * when this memory access is rejected, so use the original untranslated + * physical address. + */ + assert((orig_addr & ~TARGET_PAGE_MASK) == 0); + *xlat = orig_addr; return &d->map.sections[PHYS_SECTION_UNASSIGNED]; } diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 5d70e34..05b992f 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -214,7 +214,6 @@ static void avr_cpu_class_init(ObjectClass *oc, void *data) cc->has_work = avr_cpu_has_work; cc->dump_state = avr_cpu_dump_state; cc->set_pc = avr_cpu_set_pc; - cc->memory_rw_debug = avr_cpu_memory_rw_debug; dc->vmsd = &vms_avr_cpu; cc->sysemu_ops = &avr_sysemu_ops; cc->disas_set_info = avr_cpu_disas_set_info; diff --git a/target/avr/cpu.h b/target/avr/cpu.h index d304f33..96419c0 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -184,8 +184,6 @@ void avr_cpu_tcg_init(void); void avr_cpu_list(void); int cpu_avr_exec(CPUState *cpu); -int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf, - int len, bool is_write); enum { TB_FLAGS_FULL_ACCESS = 1, diff --git a/target/avr/helper.c b/target/avr/helper.c index c27f702..db76452 100644 --- a/target/avr/helper.c +++ b/target/avr/helper.c @@ -93,12 +93,6 @@ void avr_cpu_do_interrupt(CPUState *cs) cs->exception_index = -1; } -int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf, - int len, bool is_write) -{ - return cpu_memory_rw_debug(cs, addr, buf, len, is_write); -} - hwaddr avr_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { return addr; /* I assume 1:1 address correspondence */ diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index de4483e..1cbd047 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -371,6 +371,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define MULHWU XO31( 11) #define DIVW XO31(491) #define DIVWU XO31(459) +#define MODSW XO31(779) +#define MODUW XO31(267) #define CMP XO31( 0) #define CMPL XO31( 32) #define LHBRX XO31(790) @@ -403,6 +405,8 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define MULHDU XO31( 9) #define DIVD XO31(489) #define DIVDU XO31(457) +#define MODSD XO31(777) +#define MODUD XO31(265) #define LBZX XO31( 87) #define LHZX XO31(279) @@ -2806,6 +2810,14 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2])); break; + case INDEX_op_rem_i32: + tcg_out32(s, MODSW | TAB(args[0], args[1], args[2])); + break; + + case INDEX_op_remu_i32: + tcg_out32(s, MODUW | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_shl_i32: if (const_args[2]) { /* Limit immediate shift count lest we create an illegal insn. */ @@ -2947,6 +2959,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2])); break; + case INDEX_op_rem_i64: + tcg_out32(s, MODSD | TAB(args[0], args[1], args[2])); + break; + case INDEX_op_remu_i64: + tcg_out32(s, MODUD | TAB(args[0], args[1], args[2])); + break; case INDEX_op_qemu_ld_i32: tcg_out_qemu_ld(s, args, false); @@ -3722,6 +3740,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_div_i32: case INDEX_op_divu_i32: + case INDEX_op_rem_i32: + case INDEX_op_remu_i32: case INDEX_op_nand_i32: case INDEX_op_nor_i32: case INDEX_op_muluh_i32: @@ -3732,6 +3752,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_nor_i64: case INDEX_op_div_i64: case INDEX_op_divu_i64: + case INDEX_op_rem_i64: + case INDEX_op_remu_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: return C_O1_I2(r, r, r); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index e6cf725..b5cd225 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -83,7 +83,7 @@ extern bool have_vsx; /* optional instructions */ #define TCG_TARGET_HAS_div_i32 1 -#define TCG_TARGET_HAS_rem_i32 0 +#define TCG_TARGET_HAS_rem_i32 have_isa_3_00 #define TCG_TARGET_HAS_rot_i32 1 #define TCG_TARGET_HAS_ext8s_i32 1 #define TCG_TARGET_HAS_ext16s_i32 1 @@ -117,7 +117,7 @@ extern bool have_vsx; #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 -#define TCG_TARGET_HAS_rem_i64 0 +#define TCG_TARGET_HAS_rem_i64 have_isa_3_00 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 #define TCG_TARGET_HAS_ext16s_i64 1 diff --git a/util/cacheflush.c b/util/cacheflush.c index 4b57186..2c2c73e 100644 --- a/util/cacheflush.c +++ b/util/cacheflush.c @@ -1,5 +1,5 @@ /* - * Flush the host cpu caches. + * Info about, and flushing the host cpu caches. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -9,8 +9,218 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/bitops.h" +#include "qemu/host-utils.h" +#include "qemu/atomic.h" +int qemu_icache_linesize = 0; +int qemu_icache_linesize_log; +int qemu_dcache_linesize = 0; +int qemu_dcache_linesize_log; + +/* + * Operating system specific cache detection mechanisms. + */ + +#if defined(_WIN32) + +static void sys_cache_info(int *isize, int *dsize) +{ + SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; + DWORD size = 0; + BOOL success; + size_t i, n; + + /* + * Check for the required buffer size first. Note that if the zero + * size we use for the probe results in success, then there is no + * data available; fail in that case. + */ + success = GetLogicalProcessorInformation(0, &size); + if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { + return; + } + + n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); + if (!GetLogicalProcessorInformation(buf, &size)) { + goto fail; + } + + for (i = 0; i < n; i++) { + if (buf[i].Relationship == RelationCache + && buf[i].Cache.Level == 1) { + switch (buf[i].Cache.Type) { + case CacheUnified: + *isize = *dsize = buf[i].Cache.LineSize; + break; + case CacheInstruction: + *isize = buf[i].Cache.LineSize; + break; + case CacheData: + *dsize = buf[i].Cache.LineSize; + break; + default: + break; + } + } + } + fail: + g_free(buf); +} + +#elif defined(CONFIG_DARWIN) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + long size; + size_t len = sizeof(size); + if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +# include <sys/sysctl.h> +static void sys_cache_info(int *isize, int *dsize) +{ + /* There's only a single sysctl for both I/D cache line sizes. */ + int size; + size_t len = sizeof(size); + if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { + *isize = *dsize = size; + } +} +#else +/* POSIX */ + +static void sys_cache_info(int *isize, int *dsize) +{ +# ifdef _SC_LEVEL1_ICACHE_LINESIZE + int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); + if (tmp_isize > 0) { + *isize = tmp_isize; + } +# endif +# ifdef _SC_LEVEL1_DCACHE_LINESIZE + int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (tmp_dsize > 0) { + *dsize = tmp_dsize; + } +# endif +} +#endif /* sys_cache_info */ + + +/* + * Architecture (+ OS) specific cache detection mechanisms. + */ + +#if defined(__powerpc__) +static bool have_coherent_icache; +#endif + +#if defined(__aarch64__) && !defined(CONFIG_DARWIN) +/* Apple does not expose CTR_EL0, so we must use system interfaces. */ +static uint64_t save_ctr_el0; +static void arch_cache_info(int *isize, int *dsize) +{ + uint64_t ctr; + + /* + * The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, + * but (at least under Linux) these are marked protected by the + * kernel. However, CTR_EL0 contains the minimum linesize in the + * entire hierarchy, and is used by userspace cache flushing. + * + * We will also use this value in flush_idcache_range. + */ + asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); + save_ctr_el0 = ctr; + + if (*isize == 0 || *dsize == 0) { + if (*isize == 0) { + *isize = 4 << (ctr & 0xf); + } + if (*dsize == 0) { + *dsize = 4 << ((ctr >> 16) & 0xf); + } + } +} + +#elif defined(_ARCH_PPC) && defined(__linux__) +# include "elf.h" + +static void arch_cache_info(int *isize, int *dsize) +{ + if (*isize == 0) { + *isize = qemu_getauxval(AT_ICACHEBSIZE); + } + if (*dsize == 0) { + *dsize = qemu_getauxval(AT_DCACHEBSIZE); + } + have_coherent_icache = qemu_getauxval(AT_HWCAP) & PPC_FEATURE_ICACHE_SNOOP; +} + +#else +static void arch_cache_info(int *isize, int *dsize) { } +#endif /* arch_cache_info */ + +/* + * ... and if all else fails ... + */ + +static void fallback_cache_info(int *isize, int *dsize) +{ + /* If we can only find one of the two, assume they're the same. */ + if (*isize) { + if (*dsize) { + /* Success! */ + } else { + *dsize = *isize; + } + } else if (*dsize) { + *isize = *dsize; + } else { +#if defined(_ARCH_PPC) + /* + * For PPC, we're going to use the cache sizes computed for + * flush_idcache_range. Which means that we must use the + * architecture minimum. + */ + *isize = *dsize = 16; +#else + /* Otherwise, 64 bytes is not uncommon. */ + *isize = *dsize = 64; +#endif + } +} + +static void __attribute__((constructor)) init_cache_info(void) +{ + int isize = 0, dsize = 0; + + sys_cache_info(&isize, &dsize); + arch_cache_info(&isize, &dsize); + fallback_cache_info(&isize, &dsize); + + assert((isize & (isize - 1)) == 0); + assert((dsize & (dsize - 1)) == 0); + + qemu_icache_linesize = isize; + qemu_icache_linesize_log = ctz32(isize); + qemu_dcache_linesize = dsize; + qemu_dcache_linesize_log = ctz32(dsize); + + qatomic64_init(); +} + + +/* + * Architecture (+ OS) specific cache flushing mechanisms. + */ + #if defined(__i386__) || defined(__x86_64__) || defined(__s390__) /* Caches are coherent and do not require flushing; symbol inline. */ @@ -29,17 +239,6 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) #else /* - * TODO: unify this with cacheinfo.c. - * We want to save the whole contents of CTR_EL0, so that we - * have more than the linesize, but also IDC and DIC. - */ -static uint64_t save_ctr_el0; -static void __attribute__((constructor)) init_ctr_el0(void) -{ - asm volatile("mrs\t%0, ctr_el0" : "=r"(save_ctr_el0)); -} - -/* * This is a copy of gcc's __aarch64_sync_cache_range, modified * to fit this three-operand interface. */ @@ -48,8 +247,8 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) const unsigned CTR_IDC = 1u << 28; const unsigned CTR_DIC = 1u << 29; const uint64_t ctr_el0 = save_ctr_el0; - const uintptr_t icache_lsize = 4 << extract64(ctr_el0, 0, 4); - const uintptr_t dcache_lsize = 4 << extract64(ctr_el0, 16, 4); + const uintptr_t icache_lsize = qemu_icache_linesize; + const uintptr_t dcache_lsize = qemu_dcache_linesize; uintptr_t p; /* @@ -104,8 +303,24 @@ void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len) { uintptr_t p, b, e; - size_t dsize = qemu_dcache_linesize; - size_t isize = qemu_icache_linesize; + size_t dsize, isize; + + /* + * Some processors have coherent caches and support a simplified + * flushing procedure. See + * POWER9 UM, 4.6.2.2 Instruction Cache Block Invalidate (icbi) + * https://ibm.ent.box.com/s/tmklq90ze7aj8f4n32er1mu3sy9u8k3k + */ + if (have_coherent_icache) { + asm volatile ("sync\n\t" + "icbi 0,%0\n\t" + "isync" + : : "r"(rx) : "memory"); + return; + } + + dsize = qemu_dcache_linesize; + isize = qemu_icache_linesize; b = rw & ~(dsize - 1); e = (rw + len + dsize - 1) & ~(dsize - 1); diff --git a/util/cacheinfo.c b/util/cacheinfo.c deleted file mode 100644 index ab1644d..0000000 --- a/util/cacheinfo.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * cacheinfo.c - helpers to query the host about its caches - * - * Copyright (C) 2017, Emilio G. Cota <cota@braap.org> - * License: GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "qemu/host-utils.h" -#include "qemu/atomic.h" -#include "qemu/cacheinfo.h" - -int qemu_icache_linesize = 0; -int qemu_icache_linesize_log; -int qemu_dcache_linesize = 0; -int qemu_dcache_linesize_log; - -/* - * Operating system specific detection mechanisms. - */ - -#if defined(_WIN32) - -static void sys_cache_info(int *isize, int *dsize) -{ - SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf; - DWORD size = 0; - BOOL success; - size_t i, n; - - /* Check for the required buffer size first. Note that if the zero - size we use for the probe results in success, then there is no - data available; fail in that case. */ - success = GetLogicalProcessorInformation(0, &size); - if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - return; - } - - n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n); - if (!GetLogicalProcessorInformation(buf, &size)) { - goto fail; - } - - for (i = 0; i < n; i++) { - if (buf[i].Relationship == RelationCache - && buf[i].Cache.Level == 1) { - switch (buf[i].Cache.Type) { - case CacheUnified: - *isize = *dsize = buf[i].Cache.LineSize; - break; - case CacheInstruction: - *isize = buf[i].Cache.LineSize; - break; - case CacheData: - *dsize = buf[i].Cache.LineSize; - break; - default: - break; - } - } - } - fail: - g_free(buf); -} - -#elif defined(__APPLE__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - long size; - size_t len = sizeof(size); - if (!sysctlbyname("hw.cachelinesize", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -# include <sys/sysctl.h> -static void sys_cache_info(int *isize, int *dsize) -{ - /* There's only a single sysctl for both I/D cache line sizes. */ - int size; - size_t len = sizeof(size); - if (!sysctlbyname("machdep.cacheline_size", &size, &len, NULL, 0)) { - *isize = *dsize = size; - } -} -#else -/* POSIX */ - -static void sys_cache_info(int *isize, int *dsize) -{ -# ifdef _SC_LEVEL1_ICACHE_LINESIZE - int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE); - if (tmp_isize > 0) { - *isize = tmp_isize; - } -# endif -# ifdef _SC_LEVEL1_DCACHE_LINESIZE - int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE); - if (tmp_dsize > 0) { - *dsize = tmp_dsize; - } -# endif -} -#endif /* sys_cache_info */ - -/* - * Architecture (+ OS) specific detection mechanisms. - */ - -#if defined(__aarch64__) - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0 || *dsize == 0) { - uint64_t ctr; - - /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1, - but (at least under Linux) these are marked protected by the - kernel. However, CTR_EL0 contains the minimum linesize in the - entire hierarchy, and is used by userspace cache flushing. */ - asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr)); - if (*isize == 0) { - *isize = 4 << (ctr & 0xf); - } - if (*dsize == 0) { - *dsize = 4 << ((ctr >> 16) & 0xf); - } - } -} - -#elif defined(_ARCH_PPC) && defined(__linux__) -# include "elf.h" - -static void arch_cache_info(int *isize, int *dsize) -{ - if (*isize == 0) { - *isize = qemu_getauxval(AT_ICACHEBSIZE); - } - if (*dsize == 0) { - *dsize = qemu_getauxval(AT_DCACHEBSIZE); - } -} - -#else -static void arch_cache_info(int *isize, int *dsize) { } -#endif /* arch_cache_info */ - -/* - * ... and if all else fails ... - */ - -static void fallback_cache_info(int *isize, int *dsize) -{ - /* If we can only find one of the two, assume they're the same. */ - if (*isize) { - if (*dsize) { - /* Success! */ - } else { - *dsize = *isize; - } - } else if (*dsize) { - *isize = *dsize; - } else { -#if defined(_ARCH_PPC) - /* - * For PPC, we're going to use the cache sizes computed for - * flush_idcache_range. Which means that we must use the - * architecture minimum. - */ - *isize = *dsize = 16; -#else - /* Otherwise, 64 bytes is not uncommon. */ - *isize = *dsize = 64; -#endif - } -} - -static void __attribute__((constructor)) init_cache_info(void) -{ - int isize = 0, dsize = 0; - - sys_cache_info(&isize, &dsize); - arch_cache_info(&isize, &dsize); - fallback_cache_info(&isize, &dsize); - - assert((isize & (isize - 1)) == 0); - assert((dsize & (dsize - 1)) == 0); - - qemu_icache_linesize = isize; - qemu_icache_linesize_log = ctz32(isize); - qemu_dcache_linesize = dsize; - qemu_dcache_linesize_log = ctz32(dsize); - - qatomic64_init(); -} diff --git a/util/meson.build b/util/meson.build index 8f16018..4939b0b 100644 --- a/util/meson.build +++ b/util/meson.build @@ -27,7 +27,7 @@ util_ss.add(files('envlist.c', 'path.c', 'module.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) -util_ss.add(files('cacheinfo.c', 'cacheflush.c')) +util_ss.add(files('cacheflush.c')) util_ss.add(files('error.c', 'error-report.c')) util_ss.add(files('qemu-print.c')) util_ss.add(files('id.c')) diff --git a/util/qemu-timer.c b/util/qemu-timer.c index a670a57..6a0de33 100644 --- a/util/qemu-timer.c +++ b/util/qemu-timer.c @@ -261,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) } QLIST_FOREACH(timer_list, &clock->timerlists, list) { + if (!qatomic_read(&timer_list->active_timers)) { + continue; + } qemu_mutex_lock(&timer_list->active_timers_lock); ts = timer_list->active_timers; /* Skip all external timers */ |