diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-09-13 10:54:38 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-09-21 16:28:27 +0100 |
commit | a7789fabe15ee0e0d0f227c287a5c0d01ebcc4b4 (patch) | |
tree | a2a5b547237d7f43186ae8429f607784f79ce706 | |
parent | 752970ef7c82e14a65ed979ee19a8cfcd84871e4 (diff) | |
download | qemu-a7789fabe15ee0e0d0f227c287a5c0d01ebcc4b4.zip qemu-a7789fabe15ee0e0d0f227c287a5c0d01ebcc4b4.tar.gz qemu-a7789fabe15ee0e0d0f227c287a5c0d01ebcc4b4.tar.bz2 |
target/arm: Optimize MVE VSHLL and VMOVL
Optimize the MVE VSHLL insns by using TCG vector ops when possible.
This includes the VMOVL insn, which we handle in mve.decode as "VSHLL
with zero shift count".
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210913095440.13462-11-peter.maydell@linaro.org
-rw-r--r-- | target/arm/translate-mve.c | 67 |
1 files changed, 59 insertions, 8 deletions
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 00fa437..5d66f70 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -1735,16 +1735,67 @@ DO_2SHIFT_SCALAR(VQSHL_U_scalar, vqshli_u) DO_2SHIFT_SCALAR(VQRSHL_S_scalar, vqrshli_s) DO_2SHIFT_SCALAR(VQRSHL_U_scalar, vqrshli_u) -#define DO_VSHLL(INSN, FN) \ - static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ - { \ - static MVEGenTwoOpShiftFn * const fns[] = { \ - gen_helper_mve_##FN##b, \ - gen_helper_mve_##FN##h, \ - }; \ - return do_2shift(s, a, fns[a->size], false); \ +#define DO_VSHLL(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_2shift *a) \ + { \ + static MVEGenTwoOpShiftFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + }; \ + return do_2shift_vec(s, a, fns[a->size], false, do_gvec_##FN); \ } +/* + * For the VSHLL vector helpers, the vece is the size of the input + * (ie MO_8 or MO_16); the helpers want to work in the output size. + * The shift count can be 0..<input size>, inclusive. (0 is VMOVL.) + */ +static void do_gvec_vshllbs(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + unsigned ovece = vece + 1; + unsigned ibits = vece == MO_8 ? 8 : 16; + tcg_gen_gvec_shli(ovece, dofs, aofs, ibits, oprsz, maxsz); + tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); +} + +static void do_gvec_vshllbu(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + unsigned ovece = vece + 1; + tcg_gen_gvec_andi(ovece, dofs, aofs, + ovece == MO_16 ? 0xff : 0xffff, oprsz, maxsz); + tcg_gen_gvec_shli(ovece, dofs, dofs, shift, oprsz, maxsz); +} + +static void do_gvec_vshllts(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + unsigned ovece = vece + 1; + unsigned ibits = vece == MO_8 ? 8 : 16; + if (shift == 0) { + tcg_gen_gvec_sari(ovece, dofs, aofs, ibits, oprsz, maxsz); + } else { + tcg_gen_gvec_andi(ovece, dofs, aofs, + ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); + tcg_gen_gvec_sari(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); + } +} + +static void do_gvec_vshlltu(unsigned vece, uint32_t dofs, uint32_t aofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + unsigned ovece = vece + 1; + unsigned ibits = vece == MO_8 ? 8 : 16; + if (shift == 0) { + tcg_gen_gvec_shri(ovece, dofs, aofs, ibits, oprsz, maxsz); + } else { + tcg_gen_gvec_andi(ovece, dofs, aofs, + ovece == MO_16 ? 0xff00 : 0xffff0000, oprsz, maxsz); + tcg_gen_gvec_shri(ovece, dofs, dofs, ibits - shift, oprsz, maxsz); + } +} + DO_VSHLL(VSHLL_BS, vshllbs) DO_VSHLL(VSHLL_BU, vshllbu) DO_VSHLL(VSHLL_TS, vshllts) |