aboutsummaryrefslogtreecommitdiff
path: root/target/arm/translate-mve.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-08-13 17:11:57 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-08-25 10:48:50 +0100
commit075e7e97e3a042854b8ea2827559891a577b4a6b (patch)
treebaf862c09e846f5aa3c054bbc00992a6c0e79ea9 /target/arm/translate-mve.c
parentfac80f0856cc465b21e2e59a64146b3540e055db (diff)
downloadqemu-075e7e97e3a042854b8ea2827559891a577b4a6b.zip
qemu-075e7e97e3a042854b8ea2827559891a577b4a6b.tar.gz
qemu-075e7e97e3a042854b8ea2827559891a577b4a6b.tar.bz2
target/arm: Implement MVE interleaving loads/stores
Implement the MVE interleaving load/store functions VLD2, VLD4, VST2 and VST4. VLD2 loads 16 bytes of data from memory and writes to 2 consecutive Qregs; VLD4 loads 16 bytes of data from memory and writes to 4 consecutive Qregs. The 'pattern' field in the encoding determines the offset into memory which is accessed and also which elements in the Qregs are written to. (The intention is that a sequence of four consecutive VLD4 with different pattern values performs a complete de-interleaving load of 64 bytes into all elements of the 4 Qregs.) VST2 and VST4 do the same, but for stores. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/translate-mve.c')
-rw-r--r--target/arm/translate-mve.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index d3cb339..78229c4 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -35,6 +35,7 @@ static inline int vidup_imm(DisasContext *s, int x)
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
+typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
@@ -378,6 +379,99 @@ static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a)
return do_ldst_sg_imm(s, a, fns[a->w], MO_64);
}
+static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn,
+ int addrinc)
+{
+ TCGv_i32 rn;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd) ||
+ !fn || (a->rn == 13 && a->w) || a->rn == 15) {
+ /* Variously UNPREDICTABLE or UNDEF or related-encoding */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ rn = load_reg(s, a->rn);
+ /*
+ * We pass the index of Qd, not a pointer, because the helper must
+ * access multiple Q registers starting at Qd and working up.
+ */
+ fn(cpu_env, tcg_constant_i32(a->qd), rn);
+
+ if (a->w) {
+ tcg_gen_addi_i32(rn, rn, addrinc);
+ store_reg(s, a->rn, rn);
+ } else {
+ tcg_temp_free_i32(rn);
+ }
+ mve_update_and_store_eci(s);
+ return true;
+}
+
+/* This macro is just to make the arrays more compact in these functions */
+#define F(N) gen_helper_mve_##N
+
+static bool trans_VLD2(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vld20b), F(vld20h), F(vld20w), NULL, },
+ { F(vld21b), F(vld21h), F(vld21w), NULL, },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ };
+ if (a->qd > 6) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VLD4(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vld40b), F(vld40h), F(vld40w), NULL, },
+ { F(vld41b), F(vld41h), F(vld41w), NULL, },
+ { F(vld42b), F(vld42h), F(vld42w), NULL, },
+ { F(vld43b), F(vld43h), F(vld43w), NULL, },
+ };
+ if (a->qd > 4) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+static bool trans_VST2(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vst20b), F(vst20h), F(vst20w), NULL, },
+ { F(vst21b), F(vst21h), F(vst21w), NULL, },
+ { NULL, NULL, NULL, NULL },
+ { NULL, NULL, NULL, NULL },
+ };
+ if (a->qd > 6) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 32);
+}
+
+static bool trans_VST4(DisasContext *s, arg_vldst_il *a)
+{
+ static MVEGenLdStIlFn * const fns[4][4] = {
+ { F(vst40b), F(vst40h), F(vst40w), NULL, },
+ { F(vst41b), F(vst41h), F(vst41w), NULL, },
+ { F(vst42b), F(vst42h), F(vst42w), NULL, },
+ { F(vst43b), F(vst43h), F(vst43w), NULL, },
+ };
+ if (a->qd > 4) {
+ return false;
+ }
+ return do_vldst_il(s, a, fns[a->pat][a->size], 64);
+}
+
+#undef F
+
static bool trans_VDUP(DisasContext *s, arg_VDUP *a)
{
TCGv_ptr qd;