diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-07-08 20:45:14 +0530 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-07-11 13:19:35 +0100 |
commit | e9ad3ef19ee4af62152fdc7f1150bf59a7f997d0 (patch) | |
tree | cc331437b8d8ed6085e86986a647c0bf3e46ec49 /target/arm/translate-sme.c | |
parent | ad939afbfa4a0709d44d5806576607154a7489a8 (diff) | |
download | qemu-e9ad3ef19ee4af62152fdc7f1150bf59a7f997d0.zip qemu-e9ad3ef19ee4af62152fdc7f1150bf59a7f997d0.tar.gz qemu-e9ad3ef19ee4af62152fdc7f1150bf59a7f997d0.tar.bz2 |
target/arm: Implement SME MOVA
We can reuse the SVE functions for implementing moves to/from
horizontal tile slices, but we need new ones for moves to/from
vertical tile slices.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20220708151540.18136-20-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/translate-sme.c')
-rw-r--r-- | target/arm/translate-sme.c | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c index 9715045..79c33d3 100644 --- a/target/arm/translate-sme.c +++ b/target/arm/translate-sme.c @@ -35,6 +35,74 @@ #include "decode-sme.c.inc" +/* + * Resolve tile.size[index] to a host pointer, where tile and index + * are always decoded together, dependent on the element size. + */ +static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs, + int tile_index, bool vertical) +{ + int tile = tile_index >> (4 - esz); + int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz); + int pos, len, offset; + TCGv_i32 tmp; + TCGv_ptr addr; + + /* Compute the final index, which is Rs+imm. */ + tmp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs)); + tcg_gen_addi_i32(tmp, tmp, index); + + /* Prepare a power-of-two modulo via extraction of @len bits. */ + len = ctz32(streaming_vec_reg_size(s)) - esz; + + if (vertical) { + /* + * Compute the byte offset of the index within the tile: + * (index % (svl / size)) * size + * = (index % (svl >> esz)) << esz + * Perform the power-of-two modulo via extraction of the low @len bits. + * Perform the multiply by shifting left by @pos bits. + * Perform these operations simultaneously via deposit into zero. + */ + pos = esz; + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); + + /* + * For big-endian, adjust the indexed column byte offset within + * the uint64_t host words that make up env->zarray[]. + */ + if (HOST_BIG_ENDIAN && esz < MO_64) { + tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz)); + } + } else { + /* + * Compute the byte offset of the index within the tile: + * (index % (svl / size)) * (size * sizeof(row)) + * = (index % (svl >> esz)) << (esz + log2(sizeof(row))) + */ + pos = esz + ctz32(sizeof(ARMVectorReg)); + tcg_gen_deposit_z_i32(tmp, tmp, pos, len); + + /* Row slices are always aligned and need no endian adjustment. */ + } + + /* The tile byte offset within env->zarray is the row. */ + offset = tile * sizeof(ARMVectorReg); + + /* Include the byte offset of zarray to make this relative to env. */ + offset += offsetof(CPUARMState, zarray); + tcg_gen_addi_i32(tmp, tmp, offset); + + /* Add the byte offset to env to produce the final pointer. */ + addr = tcg_temp_new_ptr(); + tcg_gen_ext_i32_ptr(addr, tmp); + tcg_temp_free_i32(tmp); + tcg_gen_add_ptr(addr, addr, cpu_env); + + return addr; +} + static bool trans_ZERO(DisasContext *s, arg_ZERO *a) { if (!dc_isar_feature(aa64_sme, s)) { @@ -46,3 +114,62 @@ static bool trans_ZERO(DisasContext *s, arg_ZERO *a) } return true; } + +static bool trans_MOVA(DisasContext *s, arg_MOVA *a) +{ + static gen_helper_gvec_4 * const h_fns[5] = { + gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h, + gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d, + gen_helper_sve_sel_zpzz_q + }; + static gen_helper_gvec_3 * const cz_fns[5] = { + gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h, + gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d, + gen_helper_sme_mova_cz_q, + }; + static gen_helper_gvec_3 * const zc_fns[5] = { + gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h, + gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d, + gen_helper_sme_mova_zc_q, + }; + + TCGv_ptr t_za, t_zr, t_pg; + TCGv_i32 t_desc; + int svl; + + if (!dc_isar_feature(aa64_sme, s)) { + return false; + } + if (!sme_smza_enabled_check(s)) { + return true; + } + + t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v); + t_zr = vec_full_reg_ptr(s, a->zr); + t_pg = pred_full_reg_ptr(s, a->pg); + + svl = streaming_vec_reg_size(s); + t_desc = tcg_constant_i32(simd_desc(svl, svl, 0)); + + if (a->v) { + /* Vertical slice -- use sme mova helpers. */ + if (a->to_vec) { + zc_fns[a->esz](t_zr, t_za, t_pg, t_desc); + } else { + cz_fns[a->esz](t_za, t_zr, t_pg, t_desc); + } + } else { + /* Horizontal slice -- reuse sve sel helpers. */ + if (a->to_vec) { + h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc); + } else { + h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc); + } + } + + tcg_temp_free_ptr(t_za); + tcg_temp_free_ptr(t_zr); + tcg_temp_free_ptr(t_pg); + + return true; +} |