aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg/mve_helper.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg/mve_helper.c')
-rw-r--r--target/arm/tcg/mve_helper.c186
1 files changed, 102 insertions, 84 deletions
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 274003e..63ddcf3 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -22,8 +22,7 @@
#include "internals.h"
#include "vec_internal.h"
#include "exec/helper-proto.h"
-#include "exec/cpu_ldst.h"
-#include "exec/exec-all.h"
+#include "accel/tcg/cpu-ldst.h"
#include "tcg/tcg.h"
#include "fpu/softfloat.h"
#include "crypto/clmul.h"
@@ -149,13 +148,15 @@ static void mve_advance_vpt(CPUARMState *env)
}
/* For loads, predicated lanes are zeroed instead of keeping their old values */
-#define DO_VLDR(OP, MSIZE, LDTYPE, ESIZE, TYPE) \
+#define DO_VLDR(OP, MFLAG, MSIZE, MTYPE, LDTYPE, ESIZE, TYPE) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
{ \
TYPE *d = vd; \
uint16_t mask = mve_element_mask(env); \
uint16_t eci_mask = mve_eci_mask(env); \
unsigned b, e; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
/* \
* R_SXTM allows the dest reg to become UNKNOWN for abandoned \
* beats so we don't care if we update part of the dest and \
@@ -164,46 +165,48 @@ static void mve_advance_vpt(CPUARMState *env)
for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
if (eci_mask & (1 << b)) { \
d[H##ESIZE(e)] = (mask & (1 << b)) ? \
- cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+ (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0;\
} \
addr += MSIZE; \
} \
mve_advance_vpt(env); \
}
-#define DO_VSTR(OP, MSIZE, STTYPE, ESIZE, TYPE) \
+#define DO_VSTR(OP, MFLAG, MSIZE, STTYPE, ESIZE, TYPE) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, uint32_t addr) \
{ \
TYPE *d = vd; \
uint16_t mask = mve_element_mask(env); \
unsigned b, e; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
for (b = 0, e = 0; b < 16; b += ESIZE, e++) { \
if (mask & (1 << b)) { \
- cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+ cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \
} \
addr += MSIZE; \
} \
mve_advance_vpt(env); \
}
-DO_VLDR(vldrb, 1, ldub, 1, uint8_t)
-DO_VLDR(vldrh, 2, lduw, 2, uint16_t)
-DO_VLDR(vldrw, 4, ldl, 4, uint32_t)
+DO_VLDR(vldrb, MO_UB, 1, uint8_t, ldb, 1, uint8_t)
+DO_VLDR(vldrh, MO_TEUW, 2, uint16_t, ldw, 2, uint16_t)
+DO_VLDR(vldrw, MO_TEUL, 4, uint32_t, ldl, 4, uint32_t)
-DO_VSTR(vstrb, 1, stb, 1, uint8_t)
-DO_VSTR(vstrh, 2, stw, 2, uint16_t)
-DO_VSTR(vstrw, 4, stl, 4, uint32_t)
+DO_VSTR(vstrb, MO_UB, 1, stb, 1, uint8_t)
+DO_VSTR(vstrh, MO_TEUW, 2, stw, 2, uint16_t)
+DO_VSTR(vstrw, MO_TEUL, 4, stl, 4, uint32_t)
-DO_VLDR(vldrb_sh, 1, ldsb, 2, int16_t)
-DO_VLDR(vldrb_sw, 1, ldsb, 4, int32_t)
-DO_VLDR(vldrb_uh, 1, ldub, 2, uint16_t)
-DO_VLDR(vldrb_uw, 1, ldub, 4, uint32_t)
-DO_VLDR(vldrh_sw, 2, ldsw, 4, int32_t)
-DO_VLDR(vldrh_uw, 2, lduw, 4, uint32_t)
+DO_VLDR(vldrb_sh, MO_SB, 1, int8_t, ldb, 2, int16_t)
+DO_VLDR(vldrb_sw, MO_SB, 1, int8_t, ldb, 4, int32_t)
+DO_VLDR(vldrb_uh, MO_UB, 1, uint8_t, ldb, 2, uint16_t)
+DO_VLDR(vldrb_uw, MO_UB, 1, uint8_t, ldb, 4, uint32_t)
+DO_VLDR(vldrh_sw, MO_TESW, 2, int16_t, ldw, 4, int32_t)
+DO_VLDR(vldrh_uw, MO_TEUW, 2, uint16_t, ldw, 4, uint32_t)
-DO_VSTR(vstrb_h, 1, stb, 2, int16_t)
-DO_VSTR(vstrb_w, 1, stb, 4, int32_t)
-DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
+DO_VSTR(vstrb_h, MO_UB, 1, stb, 2, int16_t)
+DO_VSTR(vstrb_w, MO_UB, 1, stb, 4, int32_t)
+DO_VSTR(vstrh_w, MO_TEUW, 2, stw, 4, int32_t)
#undef DO_VLDR
#undef DO_VSTR
@@ -215,7 +218,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
* For loads, predicated lanes are zeroed instead of retaining
* their previous values.
*/
-#define DO_VLDR_SG(OP, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB) \
+#define DO_VLDR_SG(OP, MFLAG, MTYPE, LDTYPE, ESIZE, TYPE, OFFTYPE, ADDRFN, WB)\
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
@@ -225,13 +228,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
if (!(eci_mask & 1)) { \
continue; \
} \
addr = ADDRFN(base, m[H##ESIZE(e)]); \
d[H##ESIZE(e)] = (mask & 1) ? \
- cpu_##LDTYPE##_data_ra(env, addr, GETPC()) : 0; \
+ (MTYPE)cpu_##LDTYPE##_mmu(env, addr, oi, GETPC()) : 0; \
if (WB) { \
m[H##ESIZE(e)] = addr; \
} \
@@ -240,7 +245,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
}
/* We know here TYPE is unsigned so always the same as the offset type */
-#define DO_VSTR_SG(OP, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
+#define DO_VSTR_SG(OP, MFLAG, STTYPE, ESIZE, TYPE, ADDRFN, WB) \
void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm, \
uint32_t base) \
{ \
@@ -250,13 +255,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MFLAG | MO_ALIGN, mmu_idx); \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE, eci_mask >>= ESIZE) { \
if (!(eci_mask & 1)) { \
continue; \
} \
addr = ADDRFN(base, m[H##ESIZE(e)]); \
if (mask & 1) { \
- cpu_##STTYPE##_data_ra(env, addr, d[H##ESIZE(e)], GETPC()); \
+ cpu_##STTYPE##_mmu(env, addr, d[H##ESIZE(e)], oi, GETPC()); \
} \
if (WB) { \
m[H##ESIZE(e)] = addr; \
@@ -283,13 +290,15 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
if (!(eci_mask & 1)) { \
continue; \
} \
addr = ADDRFN(base, m[H4(e & ~1)]); \
addr += 4 * (e & 1); \
- d[H4(e)] = (mask & 1) ? cpu_ldl_data_ra(env, addr, GETPC()) : 0; \
+ d[H4(e)] = (mask & 1) ? cpu_ldl_mmu(env, addr, oi, GETPC()) : 0; \
if (WB && (e & 1)) { \
m[H4(e & ~1)] = addr - 4; \
} \
@@ -307,6 +316,8 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
uint16_t eci_mask = mve_eci_mask(env); \
unsigned e; \
uint32_t addr; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (e = 0; e < 16 / 4; e++, mask >>= 4, eci_mask >>= 4) { \
if (!(eci_mask & 1)) { \
continue; \
@@ -314,7 +325,7 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
addr = ADDRFN(base, m[H4(e & ~1)]); \
addr += 4 * (e & 1); \
if (mask & 1) { \
- cpu_stl_data_ra(env, addr, d[H4(e)], GETPC()); \
+ cpu_stl_mmu(env, addr, d[H4(e)], oi, GETPC()); \
} \
if (WB && (e & 1)) { \
m[H4(e & ~1)] = addr - 4; \
@@ -328,40 +339,44 @@ DO_VSTR(vstrh_w, 2, stw, 4, int32_t)
#define ADDR_ADD_OSW(BASE, OFFSET) ((BASE) + ((OFFSET) << 2))
#define ADDR_ADD_OSD(BASE, OFFSET) ((BASE) + ((OFFSET) << 3))
-DO_VLDR_SG(vldrb_sg_sh, ldsb, 2, int16_t, uint16_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrb_sg_sw, ldsb, 4, int32_t, uint32_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrh_sg_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_sh, MO_SB, int8_t, ldb, 2, int16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_sw, MO_SB, int8_t, ldb, 4, int32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_sw, MO_TESW, int16_t, ldw, 4, int32_t, uint32_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrb_sg_ub, ldub, 1, uint8_t, uint8_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrb_sg_uh, ldub, 2, uint16_t, uint16_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrb_sg_uw, ldub, 4, uint32_t, uint32_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrh_sg_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrh_sg_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD, false)
-DO_VLDR_SG(vldrw_sg_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_ub, MO_UB, uint8_t, ldb, 1, uint8_t, uint8_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uh, MO_UB, uint8_t, ldb, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrb_sg_uw, MO_UB, uint8_t, ldb, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uh, MO_TEUW, uint16_t, ldw, 2, uint16_t, uint16_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrh_sg_uw, MO_TEUW, uint16_t, ldw, 4, uint32_t, uint32_t, ADDR_ADD, false)
+DO_VLDR_SG(vldrw_sg_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, false)
DO_VLDR64_SG(vldrd_sg_ud, ADDR_ADD, false)
-DO_VLDR_SG(vldrh_sg_os_sw, ldsw, 4, int32_t, uint32_t, ADDR_ADD_OSH, false)
-DO_VLDR_SG(vldrh_sg_os_uh, lduw, 2, uint16_t, uint16_t, ADDR_ADD_OSH, false)
-DO_VLDR_SG(vldrh_sg_os_uw, lduw, 4, uint32_t, uint32_t, ADDR_ADD_OSH, false)
-DO_VLDR_SG(vldrw_sg_os_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD_OSW, false)
+DO_VLDR_SG(vldrh_sg_os_sw, MO_TESW, int16_t, ldw, 4,
+ int32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uh, MO_TEUW, uint16_t, ldw, 2,
+ uint16_t, uint16_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrh_sg_os_uw, MO_TEUW, uint16_t, ldw, 4,
+ uint32_t, uint32_t, ADDR_ADD_OSH, false)
+DO_VLDR_SG(vldrw_sg_os_uw, MO_TEUL, uint32_t, ldl, 4,
+ uint32_t, uint32_t, ADDR_ADD_OSW, false)
DO_VLDR64_SG(vldrd_sg_os_ud, ADDR_ADD_OSD, false)
-DO_VSTR_SG(vstrb_sg_ub, stb, 1, uint8_t, ADDR_ADD, false)
-DO_VSTR_SG(vstrb_sg_uh, stb, 2, uint16_t, ADDR_ADD, false)
-DO_VSTR_SG(vstrb_sg_uw, stb, 4, uint32_t, ADDR_ADD, false)
-DO_VSTR_SG(vstrh_sg_uh, stw, 2, uint16_t, ADDR_ADD, false)
-DO_VSTR_SG(vstrh_sg_uw, stw, 4, uint32_t, ADDR_ADD, false)
-DO_VSTR_SG(vstrw_sg_uw, stl, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_ub, MO_UB, stb, 1, uint8_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uh, MO_UB, stb, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrb_sg_uw, MO_UB, stb, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrh_sg_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD, false)
+DO_VSTR_SG(vstrw_sg_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, false)
DO_VSTR64_SG(vstrd_sg_ud, ADDR_ADD, false)
-DO_VSTR_SG(vstrh_sg_os_uh, stw, 2, uint16_t, ADDR_ADD_OSH, false)
-DO_VSTR_SG(vstrh_sg_os_uw, stw, 4, uint32_t, ADDR_ADD_OSH, false)
-DO_VSTR_SG(vstrw_sg_os_uw, stl, 4, uint32_t, ADDR_ADD_OSW, false)
+DO_VSTR_SG(vstrh_sg_os_uh, MO_TEUW, stw, 2, uint16_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrh_sg_os_uw, MO_TEUW, stw, 4, uint32_t, ADDR_ADD_OSH, false)
+DO_VSTR_SG(vstrw_sg_os_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD_OSW, false)
DO_VSTR64_SG(vstrd_sg_os_ud, ADDR_ADD_OSD, false)
-DO_VLDR_SG(vldrw_sg_wb_uw, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
+DO_VLDR_SG(vldrw_sg_wb_uw, MO_TEUL, uint32_t, ldl, 4, uint32_t, uint32_t, ADDR_ADD, true)
DO_VLDR64_SG(vldrd_sg_wb_ud, ADDR_ADD, true)
-DO_VSTR_SG(vstrw_sg_wb_uw, stl, 4, uint32_t, ADDR_ADD, true)
+DO_VSTR_SG(vstrw_sg_wb_uw, MO_TEUL, stl, 4, uint32_t, ADDR_ADD, true)
DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
/*
@@ -388,13 +403,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
uint16_t mask = mve_eci_mask(env); \
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat] * 4; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
for (e = 0; e < 4; e++, data >>= 8) { \
uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
qd[H1(off[beat])] = data; \
@@ -412,13 +429,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
uint32_t addr, data; \
int y; /* y counts 0 2 0 2 */ \
uint16_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat] * 8 + (beat & 1) * 4; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y); \
qd[H2(off[beat])] = data; \
data >>= 16; \
@@ -437,13 +456,15 @@ DO_VSTR64_SG(vstrd_sg_wb_ud, ADDR_ADD, true)
uint32_t addr, data; \
uint32_t *qd; \
int y; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat] * 4; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
y = (beat + (O1 & 2)) & 3; \
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
qd[H4(off[beat] >> 2)] = data; \
@@ -474,13 +495,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
uint8_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat] * 2; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
for (e = 0; e < 4; e++, data >>= 8) { \
qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
qd[H1(off[beat] + (e >> 1))] = data; \
@@ -498,13 +521,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
uint32_t addr, data; \
int e; \
uint16_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat] * 4; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
for (e = 0; e < 2; e++, data >>= 16) { \
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
qd[H2(off[beat])] = data; \
@@ -521,13 +546,15 @@ DO_VLD4W(vld43w, 6, 7, 8, 9)
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
uint32_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
continue; \
} \
addr = base + off[beat]; \
- data = cpu_ldl_le_data_ra(env, addr, GETPC()); \
+ data = cpu_ldl_mmu(env, addr, oi, GETPC()); \
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
qd[H4(off[beat] >> 3)] = data; \
} \
@@ -550,6 +577,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
uint16_t mask = mve_eci_mask(env); \
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -561,7 +590,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
uint8_t *qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + e); \
data = (data << 8) | qd[H1(off[beat])]; \
} \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -575,6 +604,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
uint32_t addr, data; \
int y; /* y counts 0 2 0 2 */ \
uint16_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0, y = 0; beat < 4; beat++, mask >>= 4, y ^= 2) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -585,7 +616,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
data = qd[H2(off[beat])]; \
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + y + 1); \
data |= qd[H2(off[beat])] << 16; \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -599,6 +630,8 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
uint32_t addr, data; \
uint32_t *qd; \
int y; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -608,7 +641,7 @@ DO_VLD2W(vld21w, 8, 12, 16, 20)
y = (beat + (O1 & 2)) & 3; \
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + y); \
data = qd[H4(off[beat] >> 2)]; \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -636,6 +669,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
uint8_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -647,7 +682,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
qd = (uint8_t *)aa32_vfp_qreg(env, qnidx + (e & 1)); \
data = (data << 8) | qd[H1(off[beat] + (e >> 1))]; \
} \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -661,6 +696,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
uint32_t addr, data; \
int e; \
uint16_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -672,7 +709,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
qd = (uint16_t *)aa32_vfp_qreg(env, qnidx + e); \
data = (data << 16) | qd[H2(off[beat])]; \
} \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -685,6 +722,8 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
static const uint8_t off[4] = { O1, O2, O3, O4 }; \
uint32_t addr, data; \
uint32_t *qd; \
+ int mmu_idx = arm_to_core_mmu_idx(arm_mmu_idx(env)); \
+ MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mmu_idx); \
for (beat = 0; beat < 4; beat++, mask >>= 4) { \
if ((mask & 1) == 0) { \
/* ECI says skip this beat */ \
@@ -693,7 +732,7 @@ DO_VST4W(vst43w, 6, 7, 8, 9)
addr = base + off[beat]; \
qd = (uint32_t *)aa32_vfp_qreg(env, qnidx + (beat & 1)); \
data = qd[H4(off[beat] >> 3)]; \
- cpu_stl_le_data_ra(env, addr, data, GETPC()); \
+ cpu_stl_mmu(env, addr, data, oi, GETPC()); \
} \
}
@@ -2165,27 +2204,6 @@ DO_VSHLL_ALL(vshllt, true)
DO_VSHRN(OP##tb, true, 1, uint8_t, 2, uint16_t, FN) \
DO_VSHRN(OP##th, true, 2, uint16_t, 4, uint32_t, FN)
-static inline uint64_t do_urshr(uint64_t x, unsigned sh)
-{
- if (likely(sh < 64)) {
- return (x >> sh) + ((x >> (sh - 1)) & 1);
- } else if (sh == 64) {
- return x >> 63;
- } else {
- return 0;
- }
-}
-
-static inline int64_t do_srshr(int64_t x, unsigned sh)
-{
- if (likely(sh < 64)) {
- return (x >> sh) + ((x >> (sh - 1)) & 1);
- } else {
- /* Rounding the sign bit always produces 0. */
- return 0;
- }
-}
-
DO_VSHRN_ALL(vshrn, DO_SHR)
DO_VSHRN_ALL(vrshrn, do_urshr)