diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2021-08-13 17:11:52 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-08-25 10:48:49 +0100 |
commit | 688ba4cf33f4976e26124c4c24e9eb738615b0bf (patch) | |
tree | 5a13ebbca9856a90caa11b5adf88596a5104cc7f | |
parent | 345910f8c1d687404b62194d929ca32f2ab54e80 (diff) | |
download | qemu-688ba4cf33f4976e26124c4c24e9eb738615b0bf.zip qemu-688ba4cf33f4976e26124c4c24e9eb738615b0bf.tar.gz qemu-688ba4cf33f4976e26124c4c24e9eb738615b0bf.tar.bz2 |
target/arm: Implement MVE integer min/max across vector
Implement the MVE integer min/max across vector insns
VMAXV, VMINV, VMAXAV and VMINAV, which find the maximum
from the vector elements and a general purpose register,
and store the maximum back into the general purpose
register.
These insns overlap with VRMLALDAVH (they use what would
be RdaHi=0b110).
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | target/arm/helper-mve.h | 20 | ||||
-rw-r--r-- | target/arm/mve.decode | 18 | ||||
-rw-r--r-- | target/arm/mve_helper.c | 66 | ||||
-rw-r--r-- | target/arm/translate-mve.c | 48 |
4 files changed, 150 insertions, 2 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 0ee5ea3..2c66fcb 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -379,6 +379,26 @@ DEF_HELPER_FLAGS_3(mve_vaddvuh, TCG_CALL_NO_WG, i32, env, ptr, i32) DEF_HELPER_FLAGS_3(mve_vaddvsw, TCG_CALL_NO_WG, i32, env, ptr, i32) DEF_HELPER_FLAGS_3(mve_vaddvuw, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvsb, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvsh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvsw, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvub, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvuh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxvuw, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxavb, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxavh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vmaxavw, TCG_CALL_NO_WG, i32, env, ptr, i32) + +DEF_HELPER_FLAGS_3(mve_vminvsb, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminvsh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminvsw, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminvub, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminvuh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminvuw, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminavb, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminavh, TCG_CALL_NO_WG, i32, env, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vminavw, TCG_CALL_NO_WG, i32, env, ptr, i32) + DEF_HELPER_FLAGS_3(mve_vaddlv_s, TCG_CALL_NO_WG, i64, env, ptr, i64) DEF_HELPER_FLAGS_3(mve_vaddlv_u, TCG_CALL_NO_WG, i64, env, ptr, i64) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index bdcd660..83dc030 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -40,6 +40,7 @@ &vcmp qm qn size mask &vcmp_scalar qn rm size mask &shl_scalar qda rm size +&vmaxv qm rda size @vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0 # Note that both Rn and Qd are 3 bits only (no D bit) @@ -97,6 +98,8 @@ @vcmp_scalar .... .... .. size:2 qn:3 . .... .... .... rm:4 &vcmp_scalar \ mask=%mask_22_13 +@vmaxv .... .... .... size:2 .. rda:4 .... .... .... &vmaxv qm=%qm + # Vector loads and stores # Widening loads and narrowing stores: @@ -314,8 +317,19 @@ VMLALDAV_U 1111 1110 1 ... ... . ... . 1110 . 0 . 0 ... 0 @vmlaldav VMLSLDAV 1110 1110 1 ... ... . ... . 1110 . 0 . 0 ... 1 @vmlaldav -VRMLALDAVH_S 1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz -VRMLALDAVH_U 1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz +{ + VMAXV_S 1110 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINV_S 1110 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv + VMAXAV 1110 1110 1110 .. 00 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINAV 1110 1110 1110 .. 00 .... 1111 1 0 . 0 ... 0 @vmaxv + VRMLALDAVH_S 1110 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz +} + +{ + VMAXV_U 1111 1110 1110 .. 10 .... 1111 0 0 . 0 ... 0 @vmaxv + VMINV_U 1111 1110 1110 .. 10 .... 1111 1 0 . 0 ... 0 @vmaxv + VRMLALDAVH_U 1111 1110 1 ... ... 0 ... . 1111 . 0 . 0 ... 0 @vmlaldav_nosz +} VRMLSLDAVH 1111 1110 1 ... ... 0 ... . 1110 . 0 . 0 ... 1 @vmlaldav_nosz diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index ac608fc..924ad7f 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -1254,6 +1254,72 @@ DO_VADDV(vaddvub, 1, uint8_t) DO_VADDV(vaddvuh, 2, uint16_t) DO_VADDV(vaddvuw, 4, uint32_t) +/* + * Vector max/min across vector. Unlike VADDV, we must + * read ra as the element size, not its full width. + * We work with int64_t internally for simplicity. + */ +#define DO_VMAXMINV(OP, ESIZE, TYPE, RATYPE, FN) \ + uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \ + uint32_t ra_in) \ + { \ + uint16_t mask = mve_element_mask(env); \ + unsigned e; \ + TYPE *m = vm; \ + int64_t ra = (RATYPE)ra_in; \ + for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \ + if (mask & 1) { \ + ra = FN(ra, m[H##ESIZE(e)]); \ + } \ + } \ + mve_advance_vpt(env); \ + return ra; \ + } \ + +#define DO_VMAXMINV_U(INSN, FN) \ + DO_VMAXMINV(INSN##b, 1, uint8_t, uint8_t, FN) \ + DO_VMAXMINV(INSN##h, 2, uint16_t, uint16_t, FN) \ + DO_VMAXMINV(INSN##w, 4, uint32_t, uint32_t, FN) +#define DO_VMAXMINV_S(INSN, FN) \ + DO_VMAXMINV(INSN##b, 1, int8_t, int8_t, FN) \ + DO_VMAXMINV(INSN##h, 2, int16_t, int16_t, FN) \ + DO_VMAXMINV(INSN##w, 4, int32_t, int32_t, FN) + +/* + * Helpers for max and min of absolute values across vector: + * note that we only take the absolute value of 'm', not 'n' + */ +static int64_t do_maxa(int64_t n, int64_t m) +{ + if (m < 0) { + m = -m; + } + return MAX(n, m); +} + +static int64_t do_mina(int64_t n, int64_t m) +{ + if (m < 0) { + m = -m; + } + return MIN(n, m); +} + +DO_VMAXMINV_S(vmaxvs, DO_MAX) +DO_VMAXMINV_U(vmaxvu, DO_MAX) +DO_VMAXMINV_S(vminvs, DO_MIN) +DO_VMAXMINV_U(vminvu, DO_MIN) +/* + * VMAXAV, VMINAV treat the general purpose input as unsigned + * and the vector elements as signed. + */ +DO_VMAXMINV(vmaxavb, 1, int8_t, uint8_t, do_maxa) +DO_VMAXMINV(vmaxavh, 2, int16_t, uint16_t, do_maxa) +DO_VMAXMINV(vmaxavw, 4, int32_t, uint32_t, do_maxa) +DO_VMAXMINV(vminavb, 1, int8_t, uint8_t, do_mina) +DO_VMAXMINV(vminavh, 2, int16_t, uint16_t, do_mina) +DO_VMAXMINV(vminavw, 4, int32_t, uint32_t, do_mina) + #define DO_VADDLV(OP, TYPE, LTYPE) \ uint64_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vm, \ uint64_t ra) \ diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index 44731fc..2fce74f 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -1321,3 +1321,51 @@ DO_VCMP(VCMPGE, vcmpge) DO_VCMP(VCMPLT, vcmplt) DO_VCMP(VCMPGT, vcmpgt) DO_VCMP(VCMPLE, vcmple) + +static bool do_vmaxv(DisasContext *s, arg_vmaxv *a, MVEGenVADDVFn fn) +{ + /* + * MIN/MAX operations across a vector: compute the min or + * max of the initial value in a general purpose register + * and all the elements in the vector, and store it back + * into the general purpose register. + */ + TCGv_ptr qm; + TCGv_i32 rda; + + if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qm) || + !fn || a->rda == 13 || a->rda == 15) { + /* Rda cases are UNPREDICTABLE */ + return false; + } + if (!mve_eci_check(s) || !vfp_access_check(s)) { + return true; + } + + qm = mve_qreg_ptr(a->qm); + rda = load_reg(s, a->rda); + fn(rda, cpu_env, qm, rda); + store_reg(s, a->rda, rda); + tcg_temp_free_ptr(qm); + mve_update_eci(s); + return true; +} + +#define DO_VMAXV(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_vmaxv *a) \ + { \ + static MVEGenVADDVFn * const fns[] = { \ + gen_helper_mve_##FN##b, \ + gen_helper_mve_##FN##h, \ + gen_helper_mve_##FN##w, \ + NULL, \ + }; \ + return do_vmaxv(s, a, fns[a->size]); \ + } + +DO_VMAXV(VMAXV_S, vmaxvs) +DO_VMAXV(VMAXV_U, vmaxvu) +DO_VMAXV(VMAXAV, vmaxav) +DO_VMAXV(VMINV_S, vminvs) +DO_VMAXV(VMINV_U, vminvu) +DO_VMAXV(VMINAV, vminav) |