aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg/gengvec.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg/gengvec.c')
-rw-r--r--target/arm/tcg/gengvec.c490
1 files changed, 452 insertions, 38 deletions
diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c
index 56a1dc1..01867f8 100644
--- a/target/arm/tcg/gengvec.c
+++ b/target/arm/tcg/gengvec.c
@@ -88,6 +88,25 @@ GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
#undef GEN_CMP0
+void gen_gvec_sshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Signed shift out of range results in all-sign-bits */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_sari(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+}
+
+void gen_gvec_ushr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ /* Unsigned shift out of range results in all-zero-bits */
+ if (shift >= (8 << vece)) {
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift, opr_sz, max_sz);
+ }
+}
+
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(a, a, shift);
@@ -285,7 +304,7 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
tcg_gen_add_i32(d, d, t);
}
- void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
TCGv_i64 t = tcg_temp_new_i64();
@@ -297,10 +316,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +510,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +702,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +790,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1060,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max, zero;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,26 +1079,21 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
- * The choice of LT (signed) and GEU (unsigned) are biased toward
+ * The choice of GE (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
* is significant so we must use an unsigned compare; otherwise we
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
- tcg_gen_andc_vec(vece, lval, lval, lsh);
- tcg_gen_andc_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GEU, vece, rval, rsh, max, zero, rval);
} else {
- tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
- tcg_gen_and_vec(vece, lval, lval, lsh);
- tcg_gen_and_vec(vece, rval, rval, rsh);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, lval, lsh, max, zero, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_GE, vece, rval, rsh, max, zero, rval);
}
tcg_gen_or_vec(vece, dst, lval, rval);
}
@@ -1093,7 +1103,7 @@ void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ INDEX_op_shrv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_ushl_vec,
@@ -1169,7 +1179,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,29 +1190,28 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
/* Select in-bound left shift. */
- tcg_gen_andc_vec(vece, lval, lval, tmp);
+ zero = tcg_constant_vec_matching(dst, vece, 0);
+ tcg_gen_cmpsel_vec(TCG_COND_GT, vece, lval, lsh, max, zero, lval);
/* Select between left and right shift. */
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
@@ -1211,7 +1220,7 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
{
static const TCGOpcode vecop_list[] = {
INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ INDEX_op_sarv_vec, INDEX_op_cmpsel_vec, 0
};
static const GVecGen3 ops[4] = {
{ .fniv = gen_sshl_vec,
@@ -1304,6 +1313,42 @@ void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
opr_sz, max_sz, 0, fns[vece]);
}
+void gen_neon_sqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshli_b, gen_helper_neon_sqshli_h,
+ gen_helper_neon_sqshli_s, gen_helper_neon_sqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_uqshli(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_uqshli_b, gen_helper_neon_uqshli_h,
+ gen_helper_neon_uqshli_s, gen_helper_neon_uqshli_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
+void gen_neon_sqshlui(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ int64_t c, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_2_ptr * const fns[] = {
+ gen_helper_neon_sqshlui_b, gen_helper_neon_sqshlui_h,
+ gen_helper_neon_sqshlui_s, gen_helper_neon_sqshlui_d,
+ };
+ tcg_debug_assert(vece <= MO_64);
+ tcg_debug_assert(c >= 0 && c <= (8 << vece));
+ tcg_gen_gvec_2_ptr(rd_ofs, rn_ofs, tcg_env, opr_sz, max_sz, c, fns[vece]);
+}
+
void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz)
{
uint64_t max = MAKE_64BIT_MASK(0, 8 << esz);
@@ -2313,3 +2358,372 @@ void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
assert(vece <= MO_32);
tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]);
}
+
+void gen_gvec_cls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni4 = gen_helper_neon_cls_s8,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_cls_s16,
+ .vece = MO_16 },
+ { .fni4 = tcg_gen_clrsb_i32,
+ .vece = MO_32 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_clz32_i32(TCGv_i32 d, TCGv_i32 n)
+{
+ tcg_gen_clzi_i32(d, n, 32);
+}
+
+void gen_gvec_clz(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni4 = gen_helper_neon_clz_u8,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_clz_u16,
+ .vece = MO_16 },
+ { .fni4 = gen_clz32_i32,
+ .vece = MO_32 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+void gen_gvec_cnt(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_cnt_b);
+}
+
+void gen_gvec_rbit(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_rbit_b);
+}
+
+void gen_gvec_rev16(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_8);
+ tcg_gen_gvec_rotli(MO_16, rd_ofs, rn_ofs, 8, opr_sz, max_sz);
+}
+
+static void gen_bswap32_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ tcg_gen_bswap64_i64(d, n);
+ tcg_gen_rotli_i64(d, d, 32);
+}
+
+void gen_gvec_rev32(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g = {
+ .fni8 = gen_bswap32_i64,
+ .fni4 = tcg_gen_bswap32_i32,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_32
+ };
+
+ switch (vece) {
+ case MO_16:
+ tcg_gen_gvec_rotli(MO_32, rd_ofs, rn_ofs, 16, opr_sz, max_sz);
+ break;
+ case MO_8:
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void gen_gvec_rev64(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const GVecGen2 g[] = {
+ { .fni8 = tcg_gen_bswap64_i64,
+ .vece = MO_64 },
+ { .fni8 = tcg_gen_hswap_i64,
+ .vece = MO_64 },
+ };
+
+ switch (vece) {
+ case MO_32:
+ tcg_gen_gvec_rotli(MO_64, rd_ofs, rn_ofs, 32, opr_sz, max_sz);
+ break;
+ case MO_8:
+ case MO_16:
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void gen_saddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ int half = 4 << vece;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, n, half);
+ tcg_gen_sari_vec(vece, d, n, half);
+ tcg_gen_sari_vec(vece, t, t, half);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_saddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_ext32s_i64(t, n);
+ tcg_gen_sari_i64(d, n, 32);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_saddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_helper_neon_addlp_s8,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_helper_neon_addlp_s16,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_saddlp_vec,
+ .fni8 = gen_saddlp_s_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_sadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_saddlp_vec(vece, t, n);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_sadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_helper_neon_addlp_s8(t, n);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_sadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_helper_neon_addlp_s16(t, n);
+ tcg_gen_vec_add32_i64(d, d, t);
+}
+
+static void gen_sadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_saddlp_s_i64(t, n);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_sadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_shli_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_b_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_h_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fniv = gen_sadalp_vec,
+ .fni8 = gen_sadalp_s_i64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_uaddlp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ int half = 4 << vece;
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, half));
+
+ tcg_gen_shri_vec(vece, t, n, half);
+ tcg_gen_and_vec(vece, d, n, m);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_uaddlp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_16, 0xff));
+
+ tcg_gen_shri_i64(t, n, 8);
+ tcg_gen_and_i64(d, n, m);
+ tcg_gen_and_i64(t, t, m);
+ /* No carry between widened unsigned elements. */
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaddlp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ TCGv_i64 m = tcg_constant_i64(dup_const(MO_32, 0xffff));
+
+ tcg_gen_shri_i64(t, n, 16);
+ tcg_gen_and_i64(d, n, m);
+ tcg_gen_and_i64(t, t, m);
+ /* No carry between widened unsigned elements. */
+ tcg_gen_add_i64(d, d, t);
+}
+
+static void gen_uaddlp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_ext32u_i64(t, n);
+ tcg_gen_shri_i64(d, n, 32);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_uaddlp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_b_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_h_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uaddlp_vec,
+ .fni8 = gen_uaddlp_s_i64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+static void gen_uadalp_vec(unsigned vece, TCGv_vec d, TCGv_vec n)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_uaddlp_vec(vece, t, n);
+ tcg_gen_add_vec(vece, d, d, t);
+}
+
+static void gen_uadalp_b_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_b_i64(t, n);
+ tcg_gen_vec_add16_i64(d, d, t);
+}
+
+static void gen_uadalp_h_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_h_i64(t, n);
+ tcg_gen_vec_add32_i64(d, d, t);
+}
+
+static void gen_uadalp_s_i64(TCGv_i64 d, TCGv_i64 n)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_uaddlp_s_i64(t, n);
+ tcg_gen_add_i64(d, d, t);
+}
+
+void gen_gvec_uadalp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2 g[] = {
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_b_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_h_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uadalp_vec,
+ .fni8 = gen_uadalp_s_i64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ assert(vece <= MO_32);
+ tcg_gen_gvec_2(rd_ofs, rn_ofs, opr_sz, max_sz, &g[vece]);
+}
+
+void gen_gvec_fabs(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ uint64_t s_bit = 1ull << ((8 << vece) - 1);
+ tcg_gen_gvec_andi(vece, dofs, aofs, s_bit - 1, oprsz, maxsz);
+}
+
+void gen_gvec_fneg(unsigned vece, uint32_t dofs, uint32_t aofs,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ uint64_t s_bit = 1ull << ((8 << vece) - 1);
+ tcg_gen_gvec_xori(vece, dofs, aofs, s_bit, oprsz, maxsz);
+}
+
+void gen_gvec_urecpe(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_32);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_urecpe_s);
+}
+
+void gen_gvec_ursqrte(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t opr_sz, uint32_t max_sz)
+{
+ assert(vece == MO_32);
+ tcg_gen_gvec_2_ool(rd_ofs, rn_ofs, opr_sz, max_sz, 0,
+ gen_helper_gvec_ursqrte_s);
+}