aboutsummaryrefslogtreecommitdiff
path: root/accel
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-05-11 11:44:23 +0100
committerRichard Henderson <richard.henderson@linaro.org>2023-05-11 11:44:23 +0100
commit278238505d28d292927bff7683f39fb4fbca7fd1 (patch)
tree65ecb25e2af9e5be78657452772f8592e7a8c4c8 /accel
parentfff86d48a2cdcdfa75f845cac3e0d3cdd848d9e4 (diff)
parent335dfd253fc242b009a1b9b5d4fffbf4ea52928d (diff)
downloadqemu-278238505d28d292927bff7683f39fb4fbca7fd1.zip
qemu-278238505d28d292927bff7683f39fb4fbca7fd1.tar.gz
qemu-278238505d28d292927bff7683f39fb4fbca7fd1.tar.bz2
Merge tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu into staging
target/m68k: Fix gen_load_fp regression accel/tcg: Ensure fairness with icount disas: Move disas.c into the target-independent source sets tcg: Use common routines for calling slow path helpers tcg/*: Cleanups to qemu_ld/st constraints tcg: Remove TARGET_ALIGNED_ONLY accel/tcg: Reorg system mode load/store helpers # -----BEGIN PGP SIGNATURE----- # # iQFRBAABCgA7FiEEekgeeIaLTbaoWgXAZN846K9+IV8FAmRcxtYdHHJpY2hhcmQu # aGVuZGVyc29uQGxpbmFyby5vcmcACgkQZN846K9+IV9arQf8Di7CnMQE/jW+8w6v # 5af0dX8/St2JnCXzG+qiW6mJm50Cy4GunCN66JcCAswpENvQLLsJP13c+4KTeB1T # rGBbedFXTw1LsaoOcBvwhq7RTIROz4GESTS4EZoJMlMhMv0VotekUPPz4NFMZRKX # LMvShM2C+f2p4HmDnnbki7M3+tMqpgoGCeBFX8Jy7/5sbpS/7ceXRio3ZRAhasPu # vjA0zqUtoTs7ijKpXf3uRl/c7xql+f0d7SDdCRt4OKasfLCCDwkjtMf6plZ2jzuS # OgwKc5N1jaMF6erHYZJIbfLLdUl20/JJEcbpU3Eh1XuHnzn1msS9JDOm2tvzwsto # OpOKUg== # =Lhy3 # -----END PGP SIGNATURE----- # gpg: Signature made Thu 11 May 2023 11:43:34 AM BST # gpg: using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F # gpg: issuer "richard.henderson@linaro.org" # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [ultimate] * tag 'pull-tcg-20230511-2' of https://gitlab.com/rth7680/qemu: (53 commits) target/loongarch: Do not include tcg-ldst.h accel/tcg: Reorg system mode store helpers accel/tcg: Reorg system mode load helpers accel/tcg: Introduce tlb_read_idx accel/tcg: Add cpu_in_serial_context tcg: Remove TARGET_ALIGNED_ONLY target/sh4: Remove TARGET_ALIGNED_ONLY target/sh4: Use MO_ALIGN where required target/nios2: Remove TARGET_ALIGNED_ONLY target/mips: Remove TARGET_ALIGNED_ONLY target/mips: Use MO_ALIGN instead of 0 target/mips: Add missing default_tcg_memop_mask target/mips: Add MO_ALIGN to gen_llwp, gen_scwp tcg/s390x: Simplify constraints on qemu_ld/st tcg/s390x: Use ALGFR in constructing softmmu host address tcg/riscv: Simplify constraints on qemu_ld/st tcg/ppc: Remove unused constraint J tcg/ppc: Remove unused constraints A, B, C, D tcg/ppc: Adjust constraints on qemu_ld/st tcg/ppc: Reorg tcg_out_tlb_read ... Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'accel')
-rw-r--r--accel/tcg/cpu-exec-common.c3
-rw-r--r--accel/tcg/cputlb.c1077
-rw-r--r--accel/tcg/internal.h9
-rw-r--r--accel/tcg/tb-maint.c2
-rw-r--r--accel/tcg/tcg-accel-ops-icount.c21
-rw-r--r--accel/tcg/tcg-accel-ops-icount.h3
-rw-r--r--accel/tcg/tcg-accel-ops-rr.c37
7 files changed, 678 insertions, 474 deletions
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index e7962c9..9a5fabf 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -22,6 +22,7 @@
#include "sysemu/tcg.h"
#include "exec/exec-all.h"
#include "qemu/plugin.h"
+#include "internal.h"
bool tcg_allowed;
@@ -81,6 +82,8 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
{
+ /* Prevent looping if already executing in a serial context. */
+ g_assert(!cpu_in_serial_context(cpu));
cpu->exception_index = EXCP_ATOMIC;
cpu_loop_exit_restore(cpu, pc);
}
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 3117886..6177770 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1441,34 +1441,17 @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
}
}
-static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
-{
-#if TCG_OVERSIZED_GUEST
- return *(target_ulong *)((uintptr_t)entry + ofs);
-#else
- /* ofs might correspond to .addr_write, so use qatomic_read */
- return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
-#endif
-}
-
/* Return true if ADDR is present in the victim tlb, and has been copied
back to the main tlb. */
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
- size_t elt_ofs, target_ulong page)
+ MMUAccessType access_type, target_ulong page)
{
size_t vidx;
assert_cpu_is_self(env_cpu(env));
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
- target_ulong cmp;
-
- /* elt_ofs might correspond to .addr_write, so use qatomic_read */
-#if TCG_OVERSIZED_GUEST
- cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
-#else
- cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
-#endif
+ target_ulong cmp = tlb_read_idx(vtlb, access_type);
if (cmp == page) {
/* Found entry in victim tlb, swap tlb and iotlb. */
@@ -1490,11 +1473,6 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
return false;
}
-/* Macro to call the above, with local variables from the use context. */
-#define VICTIM_TLB_HIT(TY, ADDR) \
- victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
- (ADDR) & TARGET_PAGE_MASK)
-
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUTLBEntryFull *full, uintptr_t retaddr)
{
@@ -1527,29 +1505,12 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
- target_ulong tlb_addr, page_addr;
- size_t elt_ofs;
- int flags;
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
+ target_ulong page_addr = addr & TARGET_PAGE_MASK;
+ int flags = TLB_FLAGS_MASK;
- switch (access_type) {
- case MMU_DATA_LOAD:
- elt_ofs = offsetof(CPUTLBEntry, addr_read);
- break;
- case MMU_DATA_STORE:
- elt_ofs = offsetof(CPUTLBEntry, addr_write);
- break;
- case MMU_INST_FETCH:
- elt_ofs = offsetof(CPUTLBEntry, addr_code);
- break;
- default:
- g_assert_not_reached();
- }
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
-
- flags = TLB_FLAGS_MASK;
- page_addr = addr & TARGET_PAGE_MASK;
if (!tlb_hit_page(tlb_addr, page_addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
CPUState *cs = env_cpu(env);
if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
@@ -1571,7 +1532,7 @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
*/
flags &= ~TLB_INVALID_MASK;
}
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
+ tlb_addr = tlb_read_idx(entry, access_type);
}
flags &= tlb_addr;
@@ -1756,6 +1717,179 @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
#endif
/*
+ * Probe for a load/store operation.
+ * Return the host address and into @flags.
+ */
+
+typedef struct MMULookupPageData {
+ CPUTLBEntryFull *full;
+ void *haddr;
+ target_ulong addr;
+ int flags;
+ int size;
+} MMULookupPageData;
+
+typedef struct MMULookupLocals {
+ MMULookupPageData page[2];
+ MemOp memop;
+ int mmu_idx;
+} MMULookupLocals;
+
+/**
+ * mmu_lookup1: translate one page
+ * @env: cpu context
+ * @data: lookup parameters
+ * @mmu_idx: virtual address context
+ * @access_type: load/store/code
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Resolve the translation for the one page at @data.addr, filling in
+ * the rest of @data with the results. If the translation fails,
+ * tlb_fill will longjmp out. Return true if the softmmu tlb for
+ * @mmu_idx may have resized.
+ */
+static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
+ int mmu_idx, MMUAccessType access_type, uintptr_t ra)
+{
+ target_ulong addr = data->addr;
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
+ bool maybe_resized = false;
+
+ /* If the TLB entry is for a different page, reload and try again. */
+ if (!tlb_hit(tlb_addr, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
+ addr & TARGET_PAGE_MASK)) {
+ tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra);
+ maybe_resized = true;
+ index = tlb_index(env, mmu_idx, addr);
+ entry = tlb_entry(env, mmu_idx, addr);
+ }
+ tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
+ }
+
+ data->flags = tlb_addr & TLB_FLAGS_MASK;
+ data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+ /* Compute haddr speculatively; depending on flags it might be invalid. */
+ data->haddr = (void *)((uintptr_t)addr + entry->addend);
+
+ return maybe_resized;
+}
+
+/**
+ * mmu_watch_or_dirty
+ * @env: cpu context
+ * @data: lookup parameters
+ * @access_type: load/store/code
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Trigger watchpoints for @data.addr:@data.size;
+ * record writes to protected clean pages.
+ */
+static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
+ MMUAccessType access_type, uintptr_t ra)
+{
+ CPUTLBEntryFull *full = data->full;
+ target_ulong addr = data->addr;
+ int flags = data->flags;
+ int size = data->size;
+
+ /* On watchpoint hit, this will longjmp out. */
+ if (flags & TLB_WATCHPOINT) {
+ int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra);
+ flags &= ~TLB_WATCHPOINT;
+ }
+
+ /* Note that notdirty is only set for writes. */
+ if (flags & TLB_NOTDIRTY) {
+ notdirty_write(env_cpu(env), addr, size, full, ra);
+ flags &= ~TLB_NOTDIRTY;
+ }
+ data->flags = flags;
+}
+
+/**
+ * mmu_lookup: translate page(s)
+ * @env: cpu context
+ * @addr: virtual address
+ * @oi: combined mmu_idx and MemOp
+ * @ra: return address into tcg generated code, or 0
+ * @access_type: load/store/code
+ * @l: output result
+ *
+ * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
+ * bytes. Return true if the lookup crosses a page boundary.
+ */
+static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
+{
+ unsigned a_bits;
+ bool crosspage;
+ int flags;
+
+ l->memop = get_memop(oi);
+ l->mmu_idx = get_mmuidx(oi);
+
+ tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
+
+ /* Handle CPU specific unaligned behaviour */
+ a_bits = get_alignment_bits(l->memop);
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra);
+ }
+
+ l->page[0].addr = addr;
+ l->page[0].size = memop_size(l->memop);
+ l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
+ l->page[1].size = 0;
+ crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
+
+ if (likely(!crosspage)) {
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
+
+ flags = l->page[0].flags;
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
+ }
+ if (unlikely(flags & TLB_BSWAP)) {
+ l->memop ^= MO_BSWAP;
+ }
+ } else {
+ /* Finish compute of page crossing. */
+ int size0 = l->page[1].addr - addr;
+ l->page[1].size = l->page[0].size - size0;
+ l->page[0].size = size0;
+
+ /*
+ * Lookup both pages, recognizing exceptions from either. If the
+ * second lookup potentially resized, refresh first CPUTLBEntryFull.
+ */
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
+ if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) {
+ uintptr_t index = tlb_index(env, l->mmu_idx, addr);
+ l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index];
+ }
+
+ flags = l->page[0].flags | l->page[1].flags;
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
+ mmu_watch_or_dirty(env, &l->page[1], type, ra);
+ }
+
+ /*
+ * Since target/sparc is the only user of TLB_BSWAP, and all
+ * Sparc accesses are aligned, any treatment across two pages
+ * would be arbitrary. Refuse it until there's a use.
+ */
+ tcg_debug_assert((flags & TLB_BSWAP) == 0);
+ }
+
+ return crosspage;
+}
+
+/*
* Probe for an atomic operation. Do not allow unaligned operations,
* or io operations to proceed. Return the host address.
*
@@ -1802,7 +1936,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
if (prot & PAGE_WRITE) {
tlb_addr = tlb_addr_write(tlbe);
if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
+ addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_STORE, mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
@@ -1835,7 +1970,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
} else /* if (prot & PAGE_READ) */ {
tlb_addr = tlbe->addr_read;
if (!tlb_hit(tlb_addr, addr)) {
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
+ addr & TARGET_PAGE_MASK)) {
tlb_fill(env_cpu(env), addr, size,
MMU_DATA_LOAD, mmu_idx, retaddr);
index = tlb_index(env, mmu_idx, addr);
@@ -1927,210 +2063,260 @@ load_memop(const void *haddr, MemOp op)
}
}
-static inline uint64_t QEMU_ALWAYS_INLINE
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
- uintptr_t retaddr, MemOp op, bool code_read,
- FullLoadHelper *full_load)
-{
- const size_t tlb_off = code_read ?
- offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
- const MMUAccessType access_type =
- code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
- const size_t size = memop_size(op);
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index;
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
- void *haddr;
- uint64_t res;
+/*
+ * For the benefit of TCG generated code, we want to avoid the
+ * complication of ABI-specific return type promotion and always
+ * return a value extended to the register size of the host. This is
+ * tcg_target_long, except in the case of a 32-bit host and 64-bit
+ * data, and for that we always have uint64_t.
+ *
+ * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
+ */
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
+/**
+ * do_ld_mmio_beN:
+ * @env: cpu context
+ * @p: translation parameters
+ * @ret_be: accumulated data
+ * @mmu_idx: virtual address context
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
+ * The bytes are concatenated in big-endian order with @ret_be.
+ */
+static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t ret_be, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
+{
+ CPUTLBEntryFull *full = p->full;
+ target_ulong addr = p->addr;
+ int i, size = p->size;
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, access_type,
- mmu_idx, retaddr);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ for (i = 0; i < size; i++) {
+ uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
+ ret_be = (ret_be << 8) | x;
}
+ return ret_be;
+}
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
+/**
+ * do_ld_bytes_beN
+ * @p: translation parameters
+ * @ret_be: accumulated data
+ *
+ * Load @p->size bytes from @p->haddr, which is RAM.
+ * The bytes to concatenated in big-endian order with @ret_be.
+ */
+static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
+{
+ uint8_t *haddr = p->haddr;
+ int i, size = p->size;
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
- addr & TARGET_PAGE_MASK)) {
- tlb_fill(env_cpu(env), addr, size,
- access_type, mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- }
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
- tlb_addr &= ~TLB_INVALID_MASK;
+ for (i = 0; i < size; i++) {
+ ret_be = (ret_be << 8) | haddr[i];
}
+ return ret_be;
+}
- /* Handle anything that isn't just a straight memory access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUTLBEntryFull *full;
- bool need_swap;
-
- /* For anything that is unaligned, recurse through full_load. */
- if ((addr & (size - 1)) != 0) {
- goto do_unaligned_access;
- }
+/*
+ * Wrapper for the above.
+ */
+static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t ret_be, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
+ } else {
+ return do_ld_bytes_beN(p, ret_be);
+ }
+}
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
+ } else {
+ return *(uint8_t *)p->haddr;
+ }
+}
- /* Handle watchpoints. */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- /* On watchpoint hit, this will longjmp out. */
- cpu_check_watchpoint(env_cpu(env), addr, size,
- full->attrs, BP_MEM_READ, retaddr);
- }
+static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint64_t ret;
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
+ }
- /* Handle I/O access. */
- if (likely(tlb_addr & TLB_MMIO)) {
- return io_readx(env, full, mmu_idx, addr, retaddr,
- access_type, op ^ (need_swap * MO_BSWAP));
- }
+ /* Perform the load host endian, then swap if necessary. */
+ ret = load_memop(p->haddr, MO_UW);
+ if (memop & MO_BSWAP) {
+ ret = bswap16(ret);
+ }
+ return ret;
+}
- haddr = (void *)((uintptr_t)addr + entry->addend);
+static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint32_t ret;
- /*
- * Keep these two load_memop separate to ensure that the compiler
- * is able to fold the entire function to a single instruction.
- * There is a build-time assert inside to remind you of this. ;-)
- */
- if (unlikely(need_swap)) {
- return load_memop(haddr, op ^ MO_BSWAP);
- }
- return load_memop(haddr, op);
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (size > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
- >= TARGET_PAGE_SIZE)) {
- target_ulong addr1, addr2;
- uint64_t r1, r2;
- unsigned shift;
- do_unaligned_access:
- addr1 = addr & ~((target_ulong)size - 1);
- addr2 = addr1 + size;
- r1 = full_load(env, addr1, oi, retaddr);
- r2 = full_load(env, addr2, oi, retaddr);
- shift = (addr & (size - 1)) * 8;
-
- if (memop_big_endian(op)) {
- /* Big-endian combine. */
- res = (r1 << shift) | (r2 >> ((size * 8) - shift));
- } else {
- /* Little-endian combine. */
- res = (r1 >> shift) | (r2 << ((size * 8) - shift));
- }
- return res & MAKE_64BIT_MASK(0, size * 8);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
}
- haddr = (void *)((uintptr_t)addr + entry->addend);
- return load_memop(haddr, op);
+ /* Perform the load host endian. */
+ ret = load_memop(p->haddr, MO_UL);
+ if (memop & MO_BSWAP) {
+ ret = bswap32(ret);
+ }
+ return ret;
}
-/*
- * For the benefit of TCG generated code, we want to avoid the
- * complication of ABI-specific return type promotion and always
- * return a value extended to the register size of the host. This is
- * tcg_target_long, except in the case of a 32-bit host and 64-bit
- * data, and for that we always have uint64_t.
- *
- * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
- */
+static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
+ MMUAccessType type, MemOp memop, uintptr_t ra)
+{
+ uint64_t ret;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
+ }
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+ /* Perform the load host endian. */
+ ret = load_memop(p->haddr, MO_UQ);
+ if (memop & MO_BSWAP) {
+ ret = bswap64(ret);
+ }
+ return ret;
+}
+
+static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- validate_memop(oi, MO_UB);
- return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ tcg_debug_assert(!crosspage);
+
+ return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
}
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_ldub_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_UB);
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- validate_memop(oi, MO_LEUW);
- return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
- full_le_lduw_mmu);
+ MMULookupLocals l;
+ bool crosspage;
+ uint16_t ret;
+ uint8_t a, b;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
+ b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra);
+
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = a | (b << 8);
+ } else {
+ ret = b | (a << 8);
+ }
+ return ret;
}
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_le_lduw_mmu(env, addr, oi, retaddr);
-}
-
-static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUW);
- return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
- full_be_lduw_mmu);
+ validate_memop(oi, MO_LEUW);
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_be_lduw_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_BEUW);
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- validate_memop(oi, MO_LEUL);
- return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
- full_le_ldul_mmu);
+ MMULookupLocals l;
+ bool crosspage;
+ uint32_t ret;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap32(ret);
+ }
+ return ret;
}
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_le_ldul_mmu(env, addr, oi, retaddr);
+ validate_memop(oi, MO_LEUL);
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
+ MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUL);
- return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
- full_be_ldul_mmu);
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
+static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
+ uintptr_t ra, MMUAccessType access_type)
{
- return full_be_ldul_mmu(env, addr, oi, retaddr);
+ MMULookupLocals l;
+ bool crosspage;
+ uint64_t ret;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
+ if (likely(!crosspage)) {
+ return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
+ }
+
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap64(ret);
+ }
+ return ret;
}
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_LEUQ);
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
- helper_le_ldq_mmu);
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUQ);
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
- helper_be_ldq_mmu);
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
}
/*
@@ -2173,56 +2359,85 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
* Load helpers for cpu_ldst.h.
*/
-static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
- MemOpIdx oi, uintptr_t retaddr,
- FullLoadHelper *full_load)
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- uint64_t ret;
-
- ret = full_load(env, addr, oi, retaddr);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
- return ret;
}
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
+ uint8_t ret;
+
+ validate_memop(oi, MO_UB);
+ ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
+ uint16_t ret;
+
+ validate_memop(oi, MO_BEUW);
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
+ uint32_t ret;
+
+ validate_memop(oi, MO_BEUL);
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
+ uint64_t ret;
+
+ validate_memop(oi, MO_BEUQ);
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
+ uint16_t ret;
+
+ validate_memop(oi, MO_LEUW);
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
+ uint32_t ret;
+
+ validate_memop(oi, MO_LEUL);
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
+ uint64_t ret;
+
+ validate_memop(oi, MO_LEUQ);
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
@@ -2317,324 +2532,300 @@ store_memop(void *haddr, uint64_t val, MemOp op)
}
}
-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr);
-
-static void __attribute__((noinline))
-store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
- uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
- bool big_endian)
+/**
+ * do_st_mmio_leN:
+ * @env: cpu context
+ * @p: translation parameters
+ * @val_le: data to store
+ * @mmu_idx: virtual address context
+ * @ra: return address into tcg generated code, or 0
+ *
+ * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
+ * The bytes to store are extracted in little-endian order from @val_le;
+ * return the bytes of @val_le beyond @p->size that have not been stored.
+ */
+static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
{
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
- uintptr_t index, index2;
- CPUTLBEntry *entry, *entry2;
- target_ulong page1, page2, tlb_addr, tlb_addr2;
- MemOpIdx oi;
- size_t size2;
- int i;
+ CPUTLBEntryFull *full = p->full;
+ target_ulong addr = p->addr;
+ int i, size = p->size;
- /*
- * Ensure the second page is in the TLB. Note that the first page
- * is already guaranteed to be filled, and that the second page
- * cannot evict the first. An exception to this rule is PAGE_WRITE_INV
- * handling: the first page could have evicted itself.
- */
- page1 = addr & TARGET_PAGE_MASK;
- page2 = (addr + size) & TARGET_PAGE_MASK;
- size2 = (addr + size) & ~TARGET_PAGE_MASK;
- index2 = tlb_index(env, mmu_idx, page2);
- entry2 = tlb_entry(env, mmu_idx, page2);
-
- tlb_addr2 = tlb_addr_write(entry2);
- if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
- if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
- tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index2 = tlb_index(env, mmu_idx, page2);
- entry2 = tlb_entry(env, mmu_idx, page2);
- }
- tlb_addr2 = tlb_addr_write(entry2);
+ QEMU_IOTHREAD_LOCK_GUARD();
+ for (i = 0; i < size; i++, val_le >>= 8) {
+ io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
}
+ return val_le;
+}
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = tlb_addr_write(entry);
+/**
+ * do_st_bytes_leN:
+ * @p: translation parameters
+ * @val_le: data to store
+ *
+ * Store @p->size bytes at @p->haddr, which is RAM.
+ * The bytes to store are extracted in little-endian order from @val_le;
+ * return the bytes of @val_le beyond @p->size that have not been stored.
+ */
+static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
+{
+ uint8_t *haddr = p->haddr;
+ int i, size = p->size;
- /*
- * Handle watchpoints. Since this may trap, all checks
- * must happen before any store.
- */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- cpu_check_watchpoint(env_cpu(env), addr, size - size2,
- env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
- BP_MEM_WRITE, retaddr);
- }
- if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
- cpu_check_watchpoint(env_cpu(env), page2, size2,
- env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
- BP_MEM_WRITE, retaddr);
+ for (i = 0; i < size; i++, val_le >>= 8) {
+ haddr[i] = val_le;
}
+ return val_le;
+}
- /*
- * XXX: not efficient, but simple.
- * This loop must go in the forward direction to avoid issues
- * with self-modifying code in Windows 64-bit.
- */
- oi = make_memop_idx(MO_UB, mmu_idx);
- if (big_endian) {
- for (i = 0; i < size; ++i) {
- /* Big-endian extract. */
- uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
- }
+/*
+ * Wrapper for the above.
+ */
+static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ return val_le >> (p->size * 8);
} else {
- for (i = 0; i < size; ++i) {
- /* Little-endian extract. */
- uint8_t val8 = val >> (i * 8);
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
- }
+ return do_st_bytes_leN(p, val_le);
}
}
-static inline void QEMU_ALWAYS_INLINE
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr, MemOp op)
+static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
+ int mmu_idx, uintptr_t ra)
{
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
- const size_t size = memop_size(op);
- uintptr_t mmu_idx = get_mmuidx(oi);
- uintptr_t index;
- CPUTLBEntry *entry;
- target_ulong tlb_addr;
- void *haddr;
-
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ *(uint8_t *)p->haddr = val;
}
+}
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
- tlb_addr = tlb_addr_write(entry);
-
- /* If the TLB entry is for a different page, reload and try again. */
- if (!tlb_hit(tlb_addr, addr)) {
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
- addr & TARGET_PAGE_MASK)) {
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
- mmu_idx, retaddr);
- index = tlb_index(env, mmu_idx, addr);
- entry = tlb_entry(env, mmu_idx, addr);
+static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap16(val);
}
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
+ store_memop(p->haddr, val, MO_UW);
}
+}
- /* Handle anything that isn't just a straight memory access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUTLBEntryFull *full;
- bool need_swap;
-
- /* For anything that is unaligned, recurse through byte stores. */
- if ((addr & (size - 1)) != 0) {
- goto do_unaligned_access;
- }
-
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
- /* Handle watchpoints. */
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
- /* On watchpoint hit, this will longjmp out. */
- cpu_check_watchpoint(env_cpu(env), addr, size,
- full->attrs, BP_MEM_WRITE, retaddr);
- }
-
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
-
- /* Handle I/O access. */
- if (tlb_addr & TLB_MMIO) {
- io_writex(env, full, mmu_idx, val, addr, retaddr,
- op ^ (need_swap * MO_BSWAP));
- return;
- }
-
- /* Ignore writes to ROM. */
- if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
- return;
- }
-
- /* Handle clean RAM pages. */
- if (tlb_addr & TLB_NOTDIRTY) {
- notdirty_write(env_cpu(env), addr, size, full, retaddr);
- }
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
-
- /*
- * Keep these two store_memop separate to ensure that the compiler
- * is able to fold the entire function to a single instruction.
- * There is a build-time assert inside to remind you of this. ;-)
- */
- if (unlikely(need_swap)) {
- store_memop(haddr, val, op ^ MO_BSWAP);
- } else {
- store_memop(haddr, val, op);
+static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
+{
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap32(val);
}
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (size > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
- >= TARGET_PAGE_SIZE)) {
- do_unaligned_access:
- store_helper_unaligned(env, addr, val, retaddr, size,
- mmu_idx, memop_big_endian(op));
- return;
+ store_memop(p->haddr, val, MO_UL);
}
-
- haddr = (void *)((uintptr_t)addr + entry->addend);
- store_memop(haddr, val, op);
}
-static void __attribute__((noinline))
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
+ int mmu_idx, MemOp memop, uintptr_t ra)
{
- validate_memop(oi, MO_UB);
- store_helper(env, addr, val, oi, retaddr, MO_UB);
+ if (unlikely(p->flags & TLB_MMIO)) {
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ /* Swap to host endian if necessary, then store. */
+ if (memop & MO_BSWAP) {
+ val = bswap64(val);
+ }
+ store_memop(p->haddr, val, MO_UQ);
+ }
}
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
- MemOpIdx oi, uintptr_t retaddr)
+ MemOpIdx oi, uintptr_t ra)
{
- full_stb_mmu(env, addr, val, oi, retaddr);
+ MMULookupLocals l;
+ bool crosspage;
+
+ validate_memop(oi, MO_UB);
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ tcg_debug_assert(!crosspage);
+
+ do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
}
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
+ MemOpIdx oi, uintptr_t ra)
{
- validate_memop(oi, MO_LEUW);
- store_helper(env, addr, val, oi, retaddr, MO_LEUW);
+ MMULookupLocals l;
+ bool crosspage;
+ uint8_t a, b;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ a = val, b = val >> 8;
+ } else {
+ b = val, a = val >> 8;
+ }
+ do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
+ do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
}
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_le_stw_mmu(env, addr, val, oi, retaddr);
-}
-
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
-{
- validate_memop(oi, MO_BEUW);
- store_helper(env, addr, val, oi, retaddr, MO_BEUW);
+ validate_memop(oi, MO_LEUW);
+ do_st2_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_be_stw_mmu(env, addr, val, oi, retaddr);
+ validate_memop(oi, MO_BEUW);
+ do_st2_mmu(env, addr, val, oi, retaddr);
}
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ MemOpIdx oi, uintptr_t ra)
{
- validate_memop(oi, MO_LEUL);
- store_helper(env, addr, val, oi, retaddr, MO_LEUL);
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ /* Swap to little endian for simplicity, then store by bytes. */
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap32(val);
+ }
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- full_le_stl_mmu(env, addr, val, oi, retaddr);
+ validate_memop(oi, MO_LEUL);
+ do_st4_mmu(env, addr, val, oi, retaddr);
}
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
- MemOpIdx oi, uintptr_t retaddr)
+void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
+ MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUL);
- store_helper(env, addr, val, oi, retaddr, MO_BEUL);
+ do_st4_mmu(env, addr, val, oi, retaddr);
}
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
- MemOpIdx oi, uintptr_t retaddr)
+static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
+ MemOpIdx oi, uintptr_t ra)
{
- full_be_stl_mmu(env, addr, val, oi, retaddr);
+ MMULookupLocals l;
+ bool crosspage;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ return;
+ }
+
+ /* Swap to little endian for simplicity, then store by bytes. */
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap64(val);
+ }
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
}
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_LEUQ);
- store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
+ do_st8_mmu(env, addr, val, oi, retaddr);
}
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
validate_memop(oi, MO_BEUQ);
- store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
+ do_st8_mmu(env, addr, val, oi, retaddr);
}
/*
* Store Helpers for cpu_ldst.h
*/
-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
- uint64_t val, MemOpIdx oi, uintptr_t retaddr);
-
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
- uint64_t val, MemOpIdx oi, uintptr_t ra,
- FullStoreHelper *full_store)
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
{
- full_store(env, addr, val, oi, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
+ helper_ret_stb_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
+ helper_be_stw_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
+ helper_be_stl_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
+ helper_be_stq_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
+ helper_le_stw_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
+ helper_le_stl_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
MemOpIdx oi, uintptr_t retaddr)
{
- cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
+ helper_le_stq_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
@@ -2726,98 +2917,50 @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
/* Code access functions. */
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
-}
-
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
- return full_ldub_code(env, addr, oi, 0);
-}
-
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
+ return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
- return full_lduw_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
+ return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
- return full_ldl_code(env, addr, oi, 0);
-}
-
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
- MemOpIdx oi, uintptr_t retaddr)
-{
- return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
+ return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
{
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
- return full_ldq_code(env, addr, oi, 0);
+ return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH);
}
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- return full_ldub_code(env, addr, oi, retaddr);
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint16_t ret;
-
- ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap16(ret);
- }
- return ret;
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint32_t ret;
-
- ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap32(ret);
- }
- return ret;
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int idx = get_mmuidx(oi);
- uint64_t ret;
-
- ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
- if ((mop & MO_BSWAP) != MO_TE) {
- ret = bswap64(ret);
- }
- return ret;
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
}
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 7bb0fdb..24f225c 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -64,6 +64,15 @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
}
}
+/*
+ * Return true if CS is not running in parallel with other cpus, either
+ * because there are no other cpus or we are within an exclusive context.
+ */
+static inline bool cpu_in_serial_context(CPUState *cs)
+{
+ return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
+}
+
extern int64_t max_delay;
extern int64_t max_advance;
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 0dd173f..991746f 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -760,7 +760,7 @@ void tb_flush(CPUState *cpu)
if (tcg_enabled()) {
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
- if (cpu_in_exclusive_context(cpu)) {
+ if (cpu_in_serial_context(cpu)) {
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
} else {
async_safe_run_on_cpu(cpu, do_tb_flush,
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
index 84cc742..3d2cfbb 100644
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -89,7 +89,20 @@ void icount_handle_deadline(void)
}
}
-void icount_prepare_for_run(CPUState *cpu)
+/* Distribute the budget evenly across all CPUs */
+int64_t icount_percpu_budget(int cpu_count)
+{
+ int64_t limit = icount_get_limit();
+ int64_t timeslice = limit / cpu_count;
+
+ if (timeslice == 0) {
+ timeslice = limit;
+ }
+
+ return timeslice;
+}
+
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
{
int insns_left;
@@ -101,13 +114,13 @@ void icount_prepare_for_run(CPUState *cpu)
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
g_assert(cpu->icount_extra == 0);
- cpu->icount_budget = icount_get_limit();
+ replay_mutex_lock();
+
+ cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
insns_left = MIN(0xffff, cpu->icount_budget);
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
cpu->icount_extra = cpu->icount_budget - insns_left;
- replay_mutex_lock();
-
if (cpu->icount_budget == 0) {
/*
* We're called without the iothread lock, so must take it while
diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h
index 1b6fd9c..16a301b 100644
--- a/accel/tcg/tcg-accel-ops-icount.h
+++ b/accel/tcg/tcg-accel-ops-icount.h
@@ -11,7 +11,8 @@
#define TCG_ACCEL_OPS_ICOUNT_H
void icount_handle_deadline(void);
-void icount_prepare_for_run(CPUState *cpu);
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
+int64_t icount_percpu_budget(int cpu_count);
void icount_process_data(CPUState *cpu);
void icount_handle_interrupt(CPUState *cpu, int mask);
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 290833a..5788efa 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -24,6 +24,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/lockable.h"
#include "sysemu/tcg.h"
#include "sysemu/replay.h"
#include "sysemu/cpu-timers.h"
@@ -140,6 +141,33 @@ static void rr_force_rcu(Notifier *notify, void *data)
}
/*
+ * Calculate the number of CPUs that we will process in a single iteration of
+ * the main CPU thread loop so that we can fairly distribute the instruction
+ * count across CPUs.
+ *
+ * The CPU count is cached based on the CPU list generation ID to avoid
+ * iterating the list every time.
+ */
+static int rr_cpu_count(void)
+{
+ static unsigned int last_gen_id = ~0;
+ static int cpu_count;
+ CPUState *cpu;
+
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
+
+ if (cpu_list_generation_id_get() != last_gen_id) {
+ cpu_count = 0;
+ CPU_FOREACH(cpu) {
+ ++cpu_count;
+ }
+ last_gen_id = cpu_list_generation_id_get();
+ }
+
+ return cpu_count;
+}
+
+/*
* In the single-threaded case each vCPU is simulated in turn. If
* there is more than a single vCPU we create a simple timer to kick
* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
@@ -185,11 +213,16 @@ static void *rr_cpu_thread_fn(void *arg)
cpu->exit_request = 1;
while (1) {
+ /* Only used for icount_enabled() */
+ int64_t cpu_budget = 0;
+
qemu_mutex_unlock_iothread();
replay_mutex_lock();
qemu_mutex_lock_iothread();
if (icount_enabled()) {
+ int cpu_count = rr_cpu_count();
+
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
icount_account_warp_timer();
/*
@@ -197,6 +230,8 @@ static void *rr_cpu_thread_fn(void *arg)
* waking up the I/O thread and waiting for completion.
*/
icount_handle_deadline();
+
+ cpu_budget = icount_percpu_budget(cpu_count);
}
replay_mutex_unlock();
@@ -218,7 +253,7 @@ static void *rr_cpu_thread_fn(void *arg)
qemu_mutex_unlock_iothread();
if (icount_enabled()) {
- icount_prepare_for_run(cpu);
+ icount_prepare_for_run(cpu, cpu_budget);
}
r = tcg_cpus_exec(cpu);
if (icount_enabled()) {