aboutsummaryrefslogtreecommitdiff
path: root/accel/tcg
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2023-02-14 22:16:17 -1000
committerRichard Henderson <richard.henderson@linaro.org>2023-05-16 15:21:39 -0700
commit35c653c4029794f67a523191941104fe12f2b22d (patch)
tree96c9d11792e9314dced10a33fc5b283f06c2298b /accel/tcg
parent0bbf501570801a101a741a7f79e1865c4ec411e2 (diff)
downloadqemu-35c653c4029794f67a523191941104fe12f2b22d.zip
qemu-35c653c4029794f67a523191941104fe12f2b22d.tar.gz
qemu-35c653c4029794f67a523191941104fe12f2b22d.tar.bz2
tcg: Add 128-bit guest memory primitives
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'accel/tcg')
-rw-r--r--accel/tcg/cputlb.c399
-rw-r--r--accel/tcg/ldst_atomicity.c.inc184
-rw-r--r--accel/tcg/tcg-runtime.h3
-rw-r--r--accel/tcg/user-exec.c92
4 files changed, 562 insertions, 116 deletions
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 34796ef..49e49f7 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -40,6 +40,7 @@
#include "qemu/plugin-memory.h"
#endif
#include "tcg/tcg-ldst.h"
+#include "exec/helper-proto.h"
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
/* #define DEBUG_TLB */
@@ -2162,6 +2163,31 @@ static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
return (ret_be << (p->size * 8)) | x;
}
+/**
+ * do_ld_parts_be16
+ * @p: translation parameters
+ * @ret_be: accumulated data
+ *
+ * As do_ld_bytes_beN, but with one atomic load.
+ * 16 aligned bytes are guaranteed to cover the load.
+ */
+static Int128 do_ld_whole_be16(CPUArchState *env, uintptr_t ra,
+ MMULookupPageData *p, uint64_t ret_be)
+{
+ int o = p->addr & 15;
+ Int128 x, y = load_atomic16_or_exit(env, ra, p->haddr - o);
+ int size = p->size;
+
+ if (!HOST_BIG_ENDIAN) {
+ y = bswap128(y);
+ }
+ y = int128_lshift(y, o * 8);
+ y = int128_urshift(y, (16 - size) * 8);
+ x = int128_make64(ret_be);
+ x = int128_lshift(x, size * 8);
+ return int128_or(x, y);
+}
+
/*
* Wrapper for the above.
*/
@@ -2211,6 +2237,63 @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
}
}
+/*
+ * Wrapper for the above, for 8 < size < 16.
+ */
+static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
+ uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
+{
+ int size = p->size;
+ uint64_t b;
+ MemOp atom;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ p->size = size - 8;
+ a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
+ p->addr += p->size;
+ p->size = 8;
+ b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
+ return int128_make128(b, a);
+ }
+
+ /*
+ * It is a given that we cross a page and therefore there is no
+ * atomicity for the load as a whole, but subobjects may need attention.
+ */
+ atom = mop & MO_ATOM_MASK;
+ switch (atom) {
+ case MO_ATOM_SUBALIGN:
+ p->size = size - 8;
+ a = do_ld_parts_beN(p, a);
+ p->haddr += size - 8;
+ p->size = 8;
+ b = do_ld_parts_beN(p, 0);
+ break;
+
+ case MO_ATOM_WITHIN16_PAIR:
+ /* Since size > 8, this is the half that must be atomic. */
+ return do_ld_whole_be16(env, ra, p, a);
+
+ case MO_ATOM_IFALIGN_PAIR:
+ /*
+ * Since size > 8, both halves are misaligned,
+ * and so neither is atomic.
+ */
+ case MO_ATOM_IFALIGN:
+ case MO_ATOM_WITHIN16:
+ case MO_ATOM_NONE:
+ p->size = size - 8;
+ a = do_ld_bytes_beN(p, a);
+ b = ldq_be_p(p->haddr + size - 8);
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ return int128_make128(b, a);
+}
+
static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
MMUAccessType type, uintptr_t ra)
{
@@ -2399,6 +2482,80 @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
}
+static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MMULookupLocals l;
+ bool crosspage;
+ uint64_t a, b;
+ Int128 ret;
+ int first;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
+ if (likely(!crosspage)) {
+ /* Perform the load host endian. */
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
+ QEMU_IOTHREAD_LOCK_GUARD();
+ a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
+ ra, MMU_DATA_LOAD, MO_64);
+ b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
+ ra, MMU_DATA_LOAD, MO_64);
+ ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
+ HOST_BIG_ENDIAN ? a : b);
+ } else {
+ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
+ }
+ if (l.memop & MO_BSWAP) {
+ ret = bswap128(ret);
+ }
+ return ret;
+ }
+
+ first = l.page[0].size;
+ if (first == 8) {
+ MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
+
+ a = do_ld_8(env, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
+ b = do_ld_8(env, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
+ if ((mop8 & MO_BSWAP) == MO_LE) {
+ ret = int128_make128(a, b);
+ } else {
+ ret = int128_make128(b, a);
+ }
+ return ret;
+ }
+
+ if (first < 8) {
+ a = do_ld_beN(env, &l.page[0], 0, l.mmu_idx,
+ MMU_DATA_LOAD, l.memop, ra);
+ ret = do_ld16_beN(env, &l.page[1], a, l.mmu_idx, l.memop, ra);
+ } else {
+ ret = do_ld16_beN(env, &l.page[0], 0, l.mmu_idx, l.memop, ra);
+ b = int128_getlo(ret);
+ ret = int128_lshift(ret, l.page[1].size * 8);
+ a = int128_gethi(ret);
+ b = do_ld_beN(env, &l.page[1], b, l.mmu_idx,
+ MMU_DATA_LOAD, l.memop, ra);
+ ret = int128_make128(b, a);
+ }
+ if ((l.memop & MO_BSWAP) == MO_LE) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t oi, uintptr_t retaddr)
+{
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
+ return do_ld16_mmu(env, addr, oi, retaddr);
+}
+
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
+{
+ return helper_ld16_mmu(env, addr, oi, GETPC());
+}
+
/*
* Load helpers for cpu_ldst.h.
*/
@@ -2487,59 +2644,23 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- MemOp mop = get_memop(oi);
- int mmu_idx = get_mmuidx(oi);
- MemOpIdx new_oi;
- unsigned a_bits;
- uint64_t h, l;
-
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
- a_bits = get_alignment_bits(mop);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
- mmu_idx, ra);
- }
-
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
- new_oi = make_memop_idx(mop, mmu_idx);
+ Int128 ret;
- h = helper_ldq_mmu(env, addr, new_oi, ra);
- l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
-
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
- return int128_make128(l, h);
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
+ ret = do_ld16_mmu(env, addr, oi, ra);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- MemOp mop = get_memop(oi);
- int mmu_idx = get_mmuidx(oi);
- MemOpIdx new_oi;
- unsigned a_bits;
- uint64_t h, l;
+ Int128 ret;
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
- a_bits = get_alignment_bits(mop);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
- mmu_idx, ra);
- }
-
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
- new_oi = make_memop_idx(mop, mmu_idx);
-
- l = helper_ldq_mmu(env, addr, new_oi, ra);
- h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
-
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
- return int128_make128(l, h);
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
+ ret = do_ld16_mmu(env, addr, oi, ra);
+ plugin_load_cb(env, addr, oi);
+ return ret;
}
/*
@@ -2625,6 +2746,60 @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
}
}
+/*
+ * Wrapper for the above, for 8 < size < 16.
+ */
+static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
+ Int128 val_le, int mmu_idx,
+ MemOp mop, uintptr_t ra)
+{
+ int size = p->size;
+ MemOp atom;
+
+ if (unlikely(p->flags & TLB_MMIO)) {
+ p->size = 8;
+ do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
+ p->size = size - 8;
+ p->addr += 8;
+ return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
+ return int128_gethi(val_le) >> ((size - 8) * 8);
+ }
+
+ /*
+ * It is a given that we cross a page and therefore there is no atomicity
+ * for the store as a whole, but subobjects may need attention.
+ */
+ atom = mop & MO_ATOM_MASK;
+ switch (atom) {
+ case MO_ATOM_SUBALIGN:
+ store_parts_leN(p->haddr, 8, int128_getlo(val_le));
+ return store_parts_leN(p->haddr + 8, p->size - 8,
+ int128_gethi(val_le));
+
+ case MO_ATOM_WITHIN16_PAIR:
+ /* Since size > 8, this is the half that must be atomic. */
+ if (!HAVE_al16) {
+ cpu_loop_exit_atomic(env_cpu(env), ra);
+ }
+ return store_whole_le16(p->haddr, p->size, val_le);
+
+ case MO_ATOM_IFALIGN_PAIR:
+ /*
+ * Since size > 8, both halves are misaligned,
+ * and so neither is atomic.
+ */
+ case MO_ATOM_IFALIGN:
+ case MO_ATOM_NONE:
+ stq_le_p(p->haddr, int128_getlo(val_le));
+ return store_bytes_leN(p->haddr + 8, p->size - 8,
+ int128_gethi(val_le));
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
int mmu_idx, uintptr_t ra)
{
@@ -2781,6 +2956,80 @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
do_st8_mmu(env, addr, val, oi, retaddr);
}
+static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MMULookupLocals l;
+ bool crosspage;
+ uint64_t a, b;
+ int first;
+
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
+ if (likely(!crosspage)) {
+ /* Swap to host endian if necessary, then store. */
+ if (l.memop & MO_BSWAP) {
+ val = bswap128(val);
+ }
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
+ QEMU_IOTHREAD_LOCK_GUARD();
+ if (HOST_BIG_ENDIAN) {
+ b = int128_getlo(val), a = int128_gethi(val);
+ } else {
+ a = int128_getlo(val), b = int128_gethi(val);
+ }
+ io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
+ io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
+ } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
+ /* nothing */
+ } else {
+ store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
+ }
+ return;
+ }
+
+ first = l.page[0].size;
+ if (first == 8) {
+ MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
+
+ if (l.memop & MO_BSWAP) {
+ val = bswap128(val);
+ }
+ if (HOST_BIG_ENDIAN) {
+ b = int128_getlo(val), a = int128_gethi(val);
+ } else {
+ a = int128_getlo(val), b = int128_gethi(val);
+ }
+ do_st_8(env, &l.page[0], a, l.mmu_idx, mop8, ra);
+ do_st_8(env, &l.page[1], b, l.mmu_idx, mop8, ra);
+ return;
+ }
+
+ if ((l.memop & MO_BSWAP) != MO_LE) {
+ val = bswap128(val);
+ }
+ if (first < 8) {
+ do_st_leN(env, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
+ val = int128_urshift(val, first * 8);
+ do_st16_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
+ } else {
+ b = do_st16_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
+ do_st_leN(env, &l.page[1], b, l.mmu_idx, l.memop, ra);
+ }
+}
+
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi, uintptr_t retaddr)
+{
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
+ do_st16_mmu(env, addr, val, oi, retaddr);
+}
+
+void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi)
+{
+ helper_st16_mmu(env, addr, val, oi, GETPC());
+}
+
/*
* Store Helpers for cpu_ldst.h
*/
@@ -2845,58 +3094,20 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
plugin_store_cb(env, addr, oi);
}
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
- MemOpIdx oi, uintptr_t ra)
+void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int mmu_idx = get_mmuidx(oi);
- MemOpIdx new_oi;
- unsigned a_bits;
-
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
- a_bits = get_alignment_bits(mop);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
- mmu_idx, ra);
- }
-
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
- new_oi = make_memop_idx(mop, mmu_idx);
-
- helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
- helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
-
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
+ do_st16_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
- MemOpIdx oi, uintptr_t ra)
+void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi, uintptr_t retaddr)
{
- MemOp mop = get_memop(oi);
- int mmu_idx = get_mmuidx(oi);
- MemOpIdx new_oi;
- unsigned a_bits;
-
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
- a_bits = get_alignment_bits(mop);
-
- /* Handle CPU specific unaligned behaviour */
- if (addr & ((1 << a_bits) - 1)) {
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
- mmu_idx, ra);
- }
-
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
- new_oi = make_memop_idx(mop, mmu_idx);
-
- helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
- helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
-
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
+ do_st16_mmu(env, addr, val, oi, retaddr);
+ plugin_store_cb(env, addr, oi);
}
#include "ldst_common.c.inc"
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
index 1f39e43..ce73b32 100644
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -440,6 +440,21 @@ static inline uint64_t load_atom_8_by_4(void *pv)
}
/**
+ * load_atom_8_by_8_or_4:
+ * @pv: host address
+ *
+ * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
+ */
+static inline uint64_t load_atom_8_by_8_or_4(void *pv)
+{
+ if (HAVE_al8_fast) {
+ return load_atomic8(pv);
+ } else {
+ return load_atom_8_by_4(pv);
+ }
+}
+
+/**
* load_atom_2:
* @p: host address
* @memop: the full memory op
@@ -572,6 +587,64 @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
}
/**
+ * load_atom_16:
+ * @p: host address
+ * @memop: the full memory op
+ *
+ * Load 16 bytes from @p, honoring the atomicity of @memop.
+ */
+static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
+ void *pv, MemOp memop)
+{
+ uintptr_t pi = (uintptr_t)pv;
+ int atmax;
+ Int128 r;
+ uint64_t a, b;
+
+ /*
+ * If the host does not support 16-byte atomics, wait until we have
+ * examined the atomicity parameters below.
+ */
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
+ return load_atomic16(pv);
+ }
+
+ atmax = required_atomicity(env, pi, memop);
+ switch (atmax) {
+ case MO_8:
+ memcpy(&r, pv, 16);
+ return r;
+ case MO_16:
+ a = load_atom_8_by_2(pv);
+ b = load_atom_8_by_2(pv + 8);
+ break;
+ case MO_32:
+ a = load_atom_8_by_4(pv);
+ b = load_atom_8_by_4(pv + 8);
+ break;
+ case MO_64:
+ if (!HAVE_al8) {
+ cpu_loop_exit_atomic(env_cpu(env), ra);
+ }
+ a = load_atomic8(pv);
+ b = load_atomic8(pv + 8);
+ break;
+ case -MO_64:
+ if (!HAVE_al8) {
+ cpu_loop_exit_atomic(env_cpu(env), ra);
+ }
+ a = load_atom_extract_al8x2(pv);
+ b = load_atom_extract_al8x2(pv + 8);
+ break;
+ case MO_128:
+ return load_atomic16_or_exit(env, ra, pv);
+ default:
+ g_assert_not_reached();
+ }
+ return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
+}
+
+/**
* store_atomic2:
* @pv: host address
* @val: value to store
@@ -613,6 +686,35 @@ static inline void store_atomic8(void *pv, uint64_t val)
}
/**
+ * store_atomic16:
+ * @pv: host address
+ * @val: value to store
+ *
+ * Atomically store 16 aligned bytes to @pv.
+ */
+static inline void store_atomic16(void *pv, Int128Alias val)
+{
+#if defined(CONFIG_ATOMIC128)
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
+ qatomic_set__nocheck(pu, val.u);
+#elif defined(CONFIG_CMPXCHG128)
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
+ __uint128_t o;
+
+ /*
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
+ * and accept the sequential consistency that comes with it.
+ */
+ do {
+ o = *pu;
+ } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
+#else
+ qemu_build_not_reached();
+#endif
+}
+
+/**
* store_atom_4x2
*/
static inline void store_atom_4_by_2(void *pv, uint32_t val)
@@ -1055,3 +1157,85 @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
}
cpu_loop_exit_atomic(env_cpu(env), ra);
}
+
+/**
+ * store_atom_16:
+ * @p: host address
+ * @val: the value to store
+ * @memop: the full memory op
+ *
+ * Store 16 bytes to @p, honoring the atomicity of @memop.
+ */
+static void store_atom_16(CPUArchState *env, uintptr_t ra,
+ void *pv, MemOp memop, Int128 val)
+{
+ uintptr_t pi = (uintptr_t)pv;
+ uint64_t a, b;
+ int atmax;
+
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
+ store_atomic16(pv, val);
+ return;
+ }
+
+ atmax = required_atomicity(env, pi, memop);
+
+ a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
+ b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
+ switch (atmax) {
+ case MO_8:
+ memcpy(pv, &val, 16);
+ return;
+ case MO_16:
+ store_atom_8_by_2(pv, a);
+ store_atom_8_by_2(pv + 8, b);
+ return;
+ case MO_32:
+ store_atom_8_by_4(pv, a);
+ store_atom_8_by_4(pv + 8, b);
+ return;
+ case MO_64:
+ if (HAVE_al8) {
+ store_atomic8(pv, a);
+ store_atomic8(pv + 8, b);
+ return;
+ }
+ break;
+ case -MO_64:
+ if (HAVE_al16) {
+ uint64_t val_le;
+ int s2 = pi & 15;
+ int s1 = 16 - s2;
+
+ if (HOST_BIG_ENDIAN) {
+ val = bswap128(val);
+ }
+ switch (s2) {
+ case 1 ... 7:
+ val_le = store_whole_le16(pv, s1, val);
+ store_bytes_leN(pv + s1, s2, val_le);
+ break;
+ case 9 ... 15:
+ store_bytes_leN(pv, s1, int128_getlo(val));
+ val = int128_urshift(val, s1 * 8);
+ store_whole_le16(pv + s1, s2, val);
+ break;
+ case 0: /* aligned */
+ case 8: /* atmax MO_64 */
+ default:
+ g_assert_not_reached();
+ }
+ return;
+ }
+ break;
+ case MO_128:
+ if (HAVE_al16) {
+ store_atomic16(pv, val);
+ return;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ cpu_loop_exit_atomic(env_cpu(env), ra);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index b8e6421..d9adc64 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -39,6 +39,9 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
#endif /* IN_HELPER_PROTO */
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
+
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
i32, env, tl, i32, i32, i32)
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index d9f9766..8f86254 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -1121,18 +1121,45 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return cpu_to_le64(ret);
}
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
- MemOpIdx oi, uintptr_t ra)
+static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
+ MemOp mop, uintptr_t ra)
{
void *haddr;
Int128 ret;
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
- memcpy(&ret, haddr, 16);
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
+ ret = load_atom_16(env, ra, haddr, mop);
clear_helper_retaddr();
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ return ret;
+}
+
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
+
+ if (mop & MO_BSWAP) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
+{
+ return helper_ld16_mmu(env, addr, oi, GETPC());
+}
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ Int128 ret;
+
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
if (!HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
@@ -1142,15 +1169,12 @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
+ MemOp mop = get_memop(oi);
Int128 ret;
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
- memcpy(&ret, haddr, 16);
- clear_helper_retaddr();
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
-
if (HOST_BIG_ENDIAN) {
ret = bswap128(ret);
}
@@ -1307,33 +1331,57 @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
+static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOp mop, uintptr_t ra)
+{
+ void *haddr;
+
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
+ store_atom_16(env, ra, haddr, mop, val);
+ clear_helper_retaddr();
+}
+
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+
+ if (mop & MO_BSWAP) {
+ val = bswap128(val);
+ }
+ do_st16_he_mmu(env, addr, val, mop, ra);
+}
+
+void helper_st_i128(CPUArchState *env, target_ulong addr,
+ Int128 val, MemOpIdx oi)
+{
+ helper_st16_mmu(env, addr, val, oi, GETPC());
+}
+
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
+ MemOp mop = get_memop(oi);
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
if (!HOST_BIG_ENDIAN) {
val = bswap128(val);
}
- memcpy(haddr, &val, 16);
- clear_helper_retaddr();
+ do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
Int128 val, MemOpIdx oi, uintptr_t ra)
{
- void *haddr;
+ MemOp mop = get_memop(oi);
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
if (HOST_BIG_ENDIAN) {
val = bswap128(val);
}
- memcpy(haddr, &val, 16);
- clear_helper_retaddr();
+ do_st16_he_mmu(env, addr, val, mop, ra);
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}