diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-08-13 17:11:57 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-08-25 10:48:50 +0100 |
commit | 075e7e97e3a042854b8ea2827559891a577b4a6b (patch) | |
tree | baf862c09e846f5aa3c054bbc00992a6c0e79ea9 /target/arm/translate-mve.c | |
parent | fac80f0856cc465b21e2e59a64146b3540e055db (diff) | |
download | qemu-075e7e97e3a042854b8ea2827559891a577b4a6b.zip qemu-075e7e97e3a042854b8ea2827559891a577b4a6b.tar.gz qemu-075e7e97e3a042854b8ea2827559891a577b4a6b.tar.bz2 |
target/arm: Implement MVE interleaving loads/stores
Implement the MVE interleaving load/store functions VLD2, VLD4, VST2
and VST4. VLD2 loads 16 bytes of data from memory and writes to 2
consecutive Qregs; VLD4 loads 16 bytes of data from memory and writes
to 4 consecutive Qregs. The 'pattern' field in the encoding
determines the offset into memory which is accessed and also which
elements in the Qregs are written to. (The intention is that a
sequence of four consecutive VLD4 with different pattern values
performs a complete de-interleaving load of 64 bytes into all
elements of the 4 Qregs.) VST2 and VST4 do the same, but for stores.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/translate-mve.c')
-rw-r--r-- | target/arm/translate-mve.c | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index d3cb339..78229c4 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -35,6 +35,7 @@ static inline int vidup_imm(DisasContext *s, int x) typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void MVEGenLdStSGFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); +typedef void MVEGenLdStIlFn(TCGv_ptr, TCGv_i32, TCGv_i32); typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr); typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32); @@ -378,6 +379,99 @@ static bool trans_VSTRD_sg_imm(DisasContext *s, arg_vldst_sg_imm *a) return do_ldst_sg_imm(s, a, fns[a->w], MO_64); } +static bool do_vldst_il(DisasContext *s, arg_vldst_il *a, MVEGenLdStIlFn *fn, + int addrinc) +{ + TCGv_i32 rn; + + if (!dc_isar_feature(aa32_mve, s) || + !mve_check_qreg_bank(s, a->qd) || + !fn || (a->rn == 13 && a->w) || a->rn == 15) { + /* Variously UNPREDICTABLE or UNDEF or related-encoding */ + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + rn = load_reg(s, a->rn); + /* + * We pass the index of Qd, not a pointer, because the helper must + * access multiple Q registers starting at Qd and working up. + */ + fn(cpu_env, tcg_constant_i32(a->qd), rn); + + if (a->w) { + tcg_gen_addi_i32(rn, rn, addrinc); + store_reg(s, a->rn, rn); + } else { + tcg_temp_free_i32(rn); + } + mve_update_and_store_eci(s); + return true; +} + +/* This macro is just to make the arrays more compact in these functions */ +#define F(N) gen_helper_mve_##N + +static bool trans_VLD2(DisasContext *s, arg_vldst_il *a) +{ + static MVEGenLdStIlFn * const fns[4][4] = { + { F(vld20b), F(vld20h), F(vld20w), NULL, }, + { F(vld21b), F(vld21h), F(vld21w), NULL, }, + { NULL, NULL, NULL, NULL }, + { NULL, NULL, NULL, NULL }, + }; + if (a->qd > 6) { + return false; + } + return do_vldst_il(s, a, fns[a->pat][a->size], 32); +} + +static bool trans_VLD4(DisasContext *s, arg_vldst_il *a) +{ + static MVEGenLdStIlFn * const fns[4][4] = { + { F(vld40b), F(vld40h), F(vld40w), NULL, }, + { F(vld41b), F(vld41h), F(vld41w), NULL, }, + { F(vld42b), F(vld42h), F(vld42w), NULL, }, + { F(vld43b), F(vld43h), F(vld43w), NULL, }, + }; + if (a->qd > 4) { + return false; + } + return do_vldst_il(s, a, fns[a->pat][a->size], 64); +} + +static bool trans_VST2(DisasContext *s, arg_vldst_il *a) +{ + static MVEGenLdStIlFn * const fns[4][4] = { + { F(vst20b), F(vst20h), F(vst20w), NULL, }, + { F(vst21b), F(vst21h), F(vst21w), NULL, }, + { NULL, NULL, NULL, NULL }, + { NULL, NULL, NULL, NULL }, + }; + if (a->qd > 6) { + return false; + } + return do_vldst_il(s, a, fns[a->pat][a->size], 32); +} + +static bool trans_VST4(DisasContext *s, arg_vldst_il *a) +{ + static MVEGenLdStIlFn * const fns[4][4] = { + { F(vst40b), F(vst40h), F(vst40w), NULL, }, + { F(vst41b), F(vst41h), F(vst41w), NULL, }, + { F(vst42b), F(vst42h), F(vst42w), NULL, }, + { F(vst43b), F(vst43h), F(vst43w), NULL, }, + }; + if (a->qd > 4) { + return false; + } + return do_vldst_il(s, a, fns[a->pat][a->size], 64); +} + +#undef F + static bool trans_VDUP(DisasContext *s, arg_VDUP *a) { TCGv_ptr qd; |