target/arm: Convert Neon VSHLL, VMOVL to decodetree

Convert the VSHLL and VMOVL insns from the 2-reg-shift group to decodetree. Since the loop always has two passes, we unroll it to avoid the awkward reassignment of one TCGv to another. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20200522145520.6778-8-peter.maydell@linaro.org
author: Peter Maydell <peter.maydell@linaro.org> 2020-05-22 15:55:18 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2020-06-05 17:23:10 +0100
commit: 968bf842742a5ffbb0041cb31089e61a9f7a833d (patch)
tree: c5d2a10ce6a59e7c5d562108a719a04667517ccb /target/arm/translate-neon.inc.c
parent: b4a3a77bb7a0dff1cc5673fe3be467d9e3635d44 (diff)
download: qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.zip
qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.tar.gz
qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.tar.bz2
1 files changed, 81 insertions, 0 deletions
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 562470c..3d56604 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -1578,3 +1578,84 @@ DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
 DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
 DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
 DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)
+
+static bool do_vshll_2sh(DisasContext *s, arg_2reg_shift *a,
+                         NeonGenWidenFn *widenfn, bool u)
+{
+    TCGv_i64 tmp;
+    TCGv_i32 rm0, rm1;
+    uint64_t widen_mask = 0;
+
+    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+        return false;
+    }
+
+    /* UNDEF accesses to D16-D31 if they don't exist. */
+    if (!dc_isar_feature(aa32_simd_r32, s) &&
+        ((a->vd | a->vm) & 0x10)) {
+        return false;
+    }
+
+    if (a->vd & 1) {
+        return false;
+    }
+
+    if (!vfp_access_check(s)) {
+        return true;
+    }
+
+    /*
+     * This is a widen-and-shift operation. The shift is always less
+     * than the width of the source type, so after widening the input
+     * vector we can simply shift the whole 64-bit widened register,
+     * and then clear the potential overflow bits resulting from left
+     * bits of the narrow input appearing as right bits of the left
+     * neighbour narrow input. Calculate a mask of bits to clear.
+     */
+    if ((a->shift != 0) && (a->size < 2 || u)) {
+        int esize = 8 << a->size;
+        widen_mask = MAKE_64BIT_MASK(0, esize);
+        widen_mask >>= esize - a->shift;
+        widen_mask = dup_const(a->size + 1, widen_mask);
+    }
+
+    rm0 = neon_load_reg(a->vm, 0);
+    rm1 = neon_load_reg(a->vm, 1);
+    tmp = tcg_temp_new_i64();
+
+    widenfn(tmp, rm0);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tmp, a->vd);
+
+    widenfn(tmp, rm1);
+    if (a->shift != 0) {
+        tcg_gen_shli_i64(tmp, tmp, a->shift);
+        tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
+    }
+    neon_store_reg64(tmp, a->vd + 1);
+    tcg_temp_free_i64(tmp);
+    return true;
+}
+
+static bool trans_VSHLL_S_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    NeonGenWidenFn *widenfn[] = {
+        gen_helper_neon_widen_s8,
+        gen_helper_neon_widen_s16,
+        tcg_gen_ext_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], false);
+}
+
+static bool trans_VSHLL_U_2sh(DisasContext *s, arg_2reg_shift *a)
+{
+    NeonGenWidenFn *widenfn[] = {
+        gen_helper_neon_widen_u8,
+        gen_helper_neon_widen_u16,
+        tcg_gen_extu_i32_i64,
+    };
+    return do_vshll_2sh(s, a, widenfn[a->size], true);
+}
author	Peter Maydell <peter.maydell@linaro.org>	2020-05-22 15:55:18 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2020-06-05 17:23:10 +0100
commit	968bf842742a5ffbb0041cb31089e61a9f7a833d (patch)
tree	c5d2a10ce6a59e7c5d562108a719a04667517ccb /target/arm/translate-neon.inc.c
parent	b4a3a77bb7a0dff1cc5673fe3be467d9e3635d44 (diff)
download	qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.zip qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.tar.gz qemu-968bf842742a5ffbb0041cb31089e61a9f7a833d.tar.bz2