diff options
-rw-r--r-- | target/arm/helper-mve.h | 5 | ||||
-rw-r--r-- | target/arm/mve.decode | 8 | ||||
-rw-r--r-- | target/arm/mve_helper.c | 81 | ||||
-rw-r--r-- | target/arm/translate-mve.c | 14 |
4 files changed, 108 insertions, 0 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h index 6d4052a..f6345c7 100644 --- a/target/arm/helper-mve.h +++ b/target/arm/helper-mve.h @@ -182,6 +182,11 @@ DEF_HELPER_FLAGS_4(mve_vcvt_rm_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vcvt_rm_ss, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) DEF_HELPER_FLAGS_4(mve_vcvt_rm_us, TCG_CALL_NO_WG, void, env, ptr, ptr, i32) +DEF_HELPER_FLAGS_3(mve_vcvtb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr) +DEF_HELPER_FLAGS_3(mve_vcvtt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr) +DEF_HELPER_FLAGS_3(mve_vcvtb_hs, TCG_CALL_NO_WG, void, env, ptr, ptr) +DEF_HELPER_FLAGS_3(mve_vcvtt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr) + DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr) DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr) DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr) diff --git a/target/arm/mve.decode b/target/arm/mve.decode index 55e8ce3..10f0f1d 100644 --- a/target/arm/mve.decode +++ b/target/arm/mve.decode @@ -221,6 +221,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op # The VSHLL T2 encoding is not a @2op pattern, but is here because it # overlaps what would be size=0b11 VMULH/VRMULH { + VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz + VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2 VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b @@ -235,6 +237,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op } { + VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz + VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1 VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b @@ -247,6 +251,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op } { + VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz + VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2 VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h @@ -260,6 +266,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op } { + VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz + VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1 VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c index a793199..1ed76ac 100644 --- a/target/arm/mve_helper.c +++ b/target/arm/mve_helper.c @@ -3332,3 +3332,84 @@ DO_VCVT_RMODE(vcvt_rm_sh, 2, uint16_t, helper_vfp_toshh) DO_VCVT_RMODE(vcvt_rm_uh, 2, uint16_t, helper_vfp_touhh) DO_VCVT_RMODE(vcvt_rm_ss, 4, uint32_t, helper_vfp_tosls) DO_VCVT_RMODE(vcvt_rm_us, 4, uint32_t, helper_vfp_touls) + +/* + * VCVT between halfprec and singleprec. As usual for halfprec + * conversions, FZ16 is ignored and AHP is observed. + */ +static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top) +{ + uint16_t *d = vd; + uint32_t *m = vm; + uint16_t r; + uint16_t mask = mve_element_mask(env); + bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); + unsigned e; + float_status *fpst; + float_status scratch_fpst; + float_status *base_fpst = &env->vfp.standard_fp_status; + bool old_fz = get_flush_to_zero(base_fpst); + set_flush_to_zero(false, base_fpst); + for (e = 0; e < 16 / 4; e++, mask >>= 4) { + if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) { + continue; + } + fpst = base_fpst; + if (!(mask & 1)) { + /* We need the result but without updating flags */ + scratch_fpst = *fpst; + fpst = &scratch_fpst; + } + r = float32_to_float16(m[H4(e)], ieee, fpst); + mergemask(&d[H2(e * 2 + top)], r, mask >> (top * 2)); + } + set_flush_to_zero(old_fz, base_fpst); + mve_advance_vpt(env); +} + +static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top) +{ + uint32_t *d = vd; + uint16_t *m = vm; + uint32_t r; + uint16_t mask = mve_element_mask(env); + bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); + unsigned e; + float_status *fpst; + float_status scratch_fpst; + float_status *base_fpst = &env->vfp.standard_fp_status; + bool old_fiz = get_flush_inputs_to_zero(base_fpst); + set_flush_inputs_to_zero(false, base_fpst); + for (e = 0; e < 16 / 4; e++, mask >>= 4) { + if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) { + continue; + } + fpst = base_fpst; + if (!(mask & (1 << (top * 2)))) { + /* We need the result but without updating flags */ + scratch_fpst = *fpst; + fpst = &scratch_fpst; + } + r = float16_to_float32(m[H2(e * 2 + top)], ieee, fpst); + mergemask(&d[H4(e)], r, mask); + } + set_flush_inputs_to_zero(old_fiz, base_fpst); + mve_advance_vpt(env); +} + +void HELPER(mve_vcvtb_sh)(CPUARMState *env, void *vd, void *vm) +{ + do_vcvt_sh(env, vd, vm, 0); +} +void HELPER(mve_vcvtt_sh)(CPUARMState *env, void *vd, void *vm) +{ + do_vcvt_sh(env, vd, vm, 1); +} +void HELPER(mve_vcvtb_hs)(CPUARMState *env, void *vd, void *vm) +{ + do_vcvt_hs(env, vd, vm, 0); +} +void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm) +{ + do_vcvt_hs(env, vd, vm, 1); +} diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c index e80a55e..194ef99 100644 --- a/target/arm/translate-mve.c +++ b/target/arm/translate-mve.c @@ -627,6 +627,20 @@ DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true) DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false) DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true) +#define DO_VCVT_SH(INSN, FN) \ + static bool trans_##INSN(DisasContext *s, arg_1op *a) \ + { \ + if (!dc_isar_feature(aa32_mve_fp, s)) { \ + return false; \ + } \ + return do_1op(s, a, gen_helper_mve_##FN); \ + } \ + +DO_VCVT_SH(VCVTB_SH, vcvtb_sh) +DO_VCVT_SH(VCVTT_SH, vcvtt_sh) +DO_VCVT_SH(VCVTB_HS, vcvtb_hs) +DO_VCVT_SH(VCVTT_HS, vcvtt_hs) + /* Narrowing moves: only size 0 and 1 are valid */ #define DO_VMOVN(INSN, FN) \ static bool trans_##INSN(DisasContext *s, arg_1op *a) \ |