aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg/translate-sve.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg/translate-sve.c')
-rw-r--r--target/arm/tcg/translate-sve.c1177
1 files changed, 1010 insertions, 167 deletions
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index f3cf028..07b827f 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -31,9 +31,9 @@ typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
TCGv_ptr, TCGv_ptr, TCGv_i32);
-typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
+typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i64);
typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
- TCGv_ptr, TCGv_i64, TCGv_i32);
+ TCGv_ptr, TCGv_i64, TCGv_i64);
/*
* Helpers for extracting complex instruction fields.
@@ -89,7 +89,7 @@ static inline int expand_imm_sh8u(DisasContext *s, int x)
*/
static inline int msz_dtype(DisasContext *s, int msz)
{
- static const uint8_t dtype[4] = { 0, 5, 10, 15 };
+ static const uint8_t dtype[5] = { 0, 5, 10, 15, 18 };
return dtype[msz];
}
@@ -190,6 +190,10 @@ static bool gen_gvec_fpst_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
arg_rrr_esz *a, int data)
{
+ /* These insns use MO_8 to encode BFloat16 */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
@@ -403,6 +407,10 @@ static bool gen_gvec_fpst_zzzp(DisasContext *s, gen_helper_gvec_4_ptr *fn,
static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
arg_rprr_esz *a)
{
+ /* These insns use MO_8 to encode BFloat16. */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
@@ -778,6 +786,9 @@ DO_ZPZ(NOT_zpz, aa64_sve, sve_not_zpz)
DO_ZPZ(ABS, aa64_sve, sve_abs)
DO_ZPZ(NEG, aa64_sve, sve_neg)
DO_ZPZ(RBIT, aa64_sve, sve_rbit)
+DO_ZPZ(ORQV, aa64_sme2p1_or_sve2p1, sve2p1_orqv)
+DO_ZPZ(EORQV, aa64_sme2p1_or_sve2p1, sve2p1_eorqv)
+DO_ZPZ(ANDQV, aa64_sme2p1_or_sve2p1, sve2p1_andqv)
static gen_helper_gvec_3 * const fabs_fns[4] = {
NULL, gen_helper_sve_fabs_h,
@@ -828,6 +839,41 @@ TRANS_FEAT(SXTW, aa64_sve, gen_gvec_ool_arg_zpz,
TRANS_FEAT(UXTW, aa64_sve, gen_gvec_ool_arg_zpz,
a->esz == 3 ? gen_helper_sve_uxtw_d : NULL, a, 0)
+static gen_helper_gvec_3 * const addqv_fns[4] = {
+ gen_helper_sve2p1_addqv_b, gen_helper_sve2p1_addqv_h,
+ gen_helper_sve2p1_addqv_s, gen_helper_sve2p1_addqv_d,
+};
+TRANS_FEAT(ADDQV, aa64_sme2p1_or_sve2p1,
+ gen_gvec_ool_arg_zpz, addqv_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const smaxqv_fns[4] = {
+ gen_helper_sve2p1_smaxqv_b, gen_helper_sve2p1_smaxqv_h,
+ gen_helper_sve2p1_smaxqv_s, gen_helper_sve2p1_smaxqv_d,
+};
+TRANS_FEAT(SMAXQV, aa64_sme2p1_or_sve2p1,
+ gen_gvec_ool_arg_zpz, smaxqv_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const sminqv_fns[4] = {
+ gen_helper_sve2p1_sminqv_b, gen_helper_sve2p1_sminqv_h,
+ gen_helper_sve2p1_sminqv_s, gen_helper_sve2p1_sminqv_d,
+};
+TRANS_FEAT(SMINQV, aa64_sme2p1_or_sve2p1,
+ gen_gvec_ool_arg_zpz, sminqv_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const umaxqv_fns[4] = {
+ gen_helper_sve2p1_umaxqv_b, gen_helper_sve2p1_umaxqv_h,
+ gen_helper_sve2p1_umaxqv_s, gen_helper_sve2p1_umaxqv_d,
+};
+TRANS_FEAT(UMAXQV, aa64_sme2p1_or_sve2p1,
+ gen_gvec_ool_arg_zpz, umaxqv_fns[a->esz], a, 0)
+
+static gen_helper_gvec_3 * const uminqv_fns[4] = {
+ gen_helper_sve2p1_uminqv_b, gen_helper_sve2p1_uminqv_h,
+ gen_helper_sve2p1_uminqv_s, gen_helper_sve2p1_uminqv_d,
+};
+TRANS_FEAT(UMINQV, aa64_sme2p1_or_sve2p1,
+ gen_gvec_ool_arg_zpz, uminqv_fns[a->esz], a, 0)
+
/*
*** SVE Integer Reduction Group
*/
@@ -1679,6 +1725,22 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
+static bool trans_PTRUE_cnt(DisasContext *s, arg_PTRUE_cnt *a)
+{
+ if (!dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ /* Canonical TRUE is 0 count, invert bit, plus element size. */
+ int val = (1 << 15) | (1 << a->esz);
+
+ /* Write val to the first uint64_t; clear all of the rest. */
+ tcg_gen_gvec_dup_imm(MO_64, pred_full_reg_offset(s, a->rd),
+ 8, size_for_gvec(pred_full_reg_size(s)), val);
+ }
+ return true;
+}
+
/* Note pat == 31 is #all, to set all elements. */
TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
do_predset, 0, FFR_PRED_NUM, 31, false)
@@ -2148,6 +2210,55 @@ static bool do_EXT(DisasContext *s, int rd, int rn, int rm, int imm)
TRANS_FEAT(EXT, aa64_sve, do_EXT, a->rd, a->rn, a->rm, a->imm)
TRANS_FEAT(EXT_sve2, aa64_sve2, do_EXT, a->rd, a->rn, (a->rn + 1) % 32, a->imm)
+static bool trans_EXTQ(DisasContext *s, arg_EXTQ *a)
+{
+ unsigned vl, dofs, sofs0, sofs1, sofs2, imm;
+
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ imm = a->imm;
+ if (imm == 0) {
+ /* So far we never optimize Zdn with MOVPRFX, so zd = zn is a nop. */
+ return true;
+ }
+
+ vl = vec_full_reg_size(s);
+ dofs = vec_full_reg_offset(s, a->rd);
+ sofs2 = vec_full_reg_offset(s, a->rn);
+
+ if (imm & 8) {
+ sofs0 = dofs + 8;
+ sofs1 = sofs2;
+ sofs2 += 8;
+ } else {
+ sofs0 = dofs;
+ sofs1 = dofs + 8;
+ }
+ imm = (imm & 7) << 3;
+
+ for (unsigned i = 0; i < vl; i += 16) {
+ TCGv_i64 s0 = tcg_temp_new_i64();
+ TCGv_i64 s1 = tcg_temp_new_i64();
+ TCGv_i64 s2 = tcg_temp_new_i64();
+
+ tcg_gen_ld_i64(s0, tcg_env, sofs0 + i);
+ tcg_gen_ld_i64(s1, tcg_env, sofs1 + i);
+ tcg_gen_ld_i64(s2, tcg_env, sofs2 + i);
+
+ tcg_gen_extract2_i64(s0, s0, s1, imm);
+ tcg_gen_extract2_i64(s1, s1, s2, imm);
+
+ tcg_gen_st_i64(s0, tcg_env, dofs + i);
+ tcg_gen_st_i64(s1, tcg_env, dofs + i + 8);
+ }
+ return true;
+}
+
/*
*** SVE Permute - Unpredicated Group
*/
@@ -2195,6 +2306,27 @@ static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a)
return true;
}
+static bool trans_DUPQ(DisasContext *s, arg_DUPQ *a)
+{
+ unsigned vl, dofs, nofs;
+
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vl = vec_full_reg_size(s);
+ dofs = vec_full_reg_offset(s, a->rd);
+ nofs = vec_reg_offset(s, a->rn, a->imm, a->esz);
+
+ for (unsigned i = 0; i < vl; i += 16) {
+ tcg_gen_gvec_dup_mem(a->esz, dofs + i, nofs + i, 16, 16);
+ }
+ return true;
+}
+
static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
{
typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
@@ -2256,12 +2388,124 @@ static gen_helper_gvec_4 * const sve2_tbl_fns[4] = {
TRANS_FEAT(TBL_sve2, aa64_sve2, gen_gvec_ool_zzzz, sve2_tbl_fns[a->esz],
a->rd, a->rn, (a->rn + 1) % 32, a->rm, 0)
+static gen_helper_gvec_3 * const tblq_fns[4] = {
+ gen_helper_sve2p1_tblq_b, gen_helper_sve2p1_tblq_h,
+ gen_helper_sve2p1_tblq_s, gen_helper_sve2p1_tblq_d
+};
+TRANS_FEAT(TBLQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ tblq_fns[a->esz], a, 0)
+
static gen_helper_gvec_3 * const tbx_fns[4] = {
gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d
};
TRANS_FEAT(TBX, aa64_sve2, gen_gvec_ool_arg_zzz, tbx_fns[a->esz], a, 0)
+static gen_helper_gvec_3 * const tbxq_fns[4] = {
+ gen_helper_sve2p1_tbxq_b, gen_helper_sve2p1_tbxq_h,
+ gen_helper_sve2p1_tbxq_s, gen_helper_sve2p1_tbxq_d
+};
+TRANS_FEAT(TBXQ, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ tbxq_fns[a->esz], a, 0)
+
+static bool trans_PMOV_pv(DisasContext *s, arg_PMOV_pv *a)
+{
+ static gen_helper_gvec_2 * const fns[4] = {
+ NULL, gen_helper_pmov_pv_h,
+ gen_helper_pmov_pv_s, gen_helper_pmov_pv_d
+ };
+ unsigned vl, pl, vofs, pofs;
+ TCGv_i64 tmp;
+
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vl = vec_full_reg_size(s);
+ if (a->esz != MO_8) {
+ tcg_gen_gvec_2_ool(pred_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vl, vl, a->imm, fns[a->esz]);
+ return true;
+ }
+
+ /*
+ * Copy the low PL bytes from vector Zn, zero-extending to a
+ * multiple of 8 bytes, so that Pd is properly cleared.
+ */
+
+ pl = vl / 8;
+ pofs = pred_full_reg_offset(s, a->rd);
+ vofs = vec_full_reg_offset(s, a->rn);
+
+ QEMU_BUILD_BUG_ON(sizeof(ARMPredicateReg) != 32);
+ for (unsigned i = 32; i >= 8; i >>= 1) {
+ if (pl & i) {
+ tcg_gen_gvec_mov(MO_64, pofs, vofs, i, i);
+ pofs += i;
+ vofs += i;
+ }
+ }
+ switch (pl & 7) {
+ case 0:
+ return true;
+ case 2:
+ tmp = tcg_temp_new_i64();
+ tcg_gen_ld16u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 6 : 0));
+ break;
+ case 4:
+ tmp = tcg_temp_new_i64();
+ tcg_gen_ld32u_i64(tmp, tcg_env, vofs + (HOST_BIG_ENDIAN ? 4 : 0));
+ break;
+ case 6:
+ tmp = tcg_temp_new_i64();
+ tcg_gen_ld_i64(tmp, tcg_env, vofs);
+ tcg_gen_extract_i64(tmp, tmp, 0, 48);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_gen_st_i64(tmp, tcg_env, pofs);
+ return true;
+}
+
+static bool trans_PMOV_vp(DisasContext *s, arg_PMOV_pv *a)
+{
+ static gen_helper_gvec_2 * const fns[4] = {
+ NULL, gen_helper_pmov_vp_h,
+ gen_helper_pmov_vp_s, gen_helper_pmov_vp_d
+ };
+ unsigned vl;
+
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ vl = vec_full_reg_size(s);
+
+ if (a->esz == MO_8) {
+ /*
+ * The low PL bytes are copied from Pn to Zd unchanged.
+ * We know that the unused portion of Pn is zero, and
+ * that imm == 0, so the balance of Zd must be zeroed.
+ */
+ tcg_gen_gvec_mov(MO_64, vec_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->rn),
+ size_for_gvec(vl / 8), vl);
+ } else {
+ tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
+ pred_full_reg_offset(s, a->rn),
+ vl, vl, a->imm, fns[a->esz]);
+ }
+ return true;
+}
+
static bool trans_UNPK(DisasContext *s, arg_UNPK *a)
{
static gen_helper_gvec_2 * const fns[4][2] = {
@@ -2352,6 +2596,23 @@ TRANS_FEAT(PUNPKHI, aa64_sve, do_perm_pred2, a, 1, gen_helper_sve_punpk_p)
*** SVE Permute - Interleaving Group
*/
+static bool do_interleave_q(DisasContext *s, gen_helper_gvec_3 *fn,
+ arg_rrr_esz *a, int data)
+{
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ if (vsz < 32) {
+ unallocated_encoding(s);
+ } else {
+ tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vsz, vsz, data, fn);
+ }
+ }
+ return true;
+}
+
static gen_helper_gvec_3 * const zip_fns[4] = {
gen_helper_sve_zip_b, gen_helper_sve_zip_h,
gen_helper_sve_zip_s, gen_helper_sve_zip_d,
@@ -2361,26 +2622,43 @@ TRANS_FEAT(ZIP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
TRANS_FEAT(ZIP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
zip_fns[a->esz], a, vec_full_reg_size(s) / 2)
-TRANS_FEAT(ZIP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_zip_q, a, 0)
-TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_zip_q, a,
- QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
+TRANS_FEAT_NONSTREAMING(ZIP1_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_zip_q, a, 0)
+TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_zip_q, a,
+ QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
+
+static gen_helper_gvec_3 * const zipq_fns[4] = {
+ gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h,
+ gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d,
+};
+TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 0)
+TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 16 / 2)
static gen_helper_gvec_3 * const uzp_fns[4] = {
gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
};
-
TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
uzp_fns[a->esz], a, 0)
TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
uzp_fns[a->esz], a, 1 << a->esz)
-TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_uzp_q, a, 0)
-TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_uzp_q, a, 16)
+TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_uzp_q, a, 0)
+TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_uzp_q, a, 16)
+
+static gen_helper_gvec_3 * const uzpq_fns[4] = {
+ gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h,
+ gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d,
+};
+TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 0)
+TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 1 << a->esz)
static gen_helper_gvec_3 * const trn_fns[4] = {
gen_helper_sve_trn_b, gen_helper_sve_trn_h,
@@ -2392,10 +2670,10 @@ TRANS_FEAT(TRN1_z, aa64_sve, gen_gvec_ool_arg_zzz,
TRANS_FEAT(TRN2_z, aa64_sve, gen_gvec_ool_arg_zzz,
trn_fns[a->esz], a, 1 << a->esz)
-TRANS_FEAT(TRN1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_trn_q, a, 0)
-TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
- gen_helper_sve2_trn_q, a, 16)
+TRANS_FEAT_NONSTREAMING(TRN1_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_trn_q, a, 0)
+TRANS_FEAT_NONSTREAMING(TRN2_q, aa64_sve_f64mm, do_interleave_q,
+ gen_helper_sve2_trn_q, a, 16)
/*
*** SVE Permute Vector - Predicated Group
@@ -2981,6 +3259,36 @@ static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
return true;
}
+static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a)
+{
+ TCGv_i32 t_png;
+ uint32_t desc = 0;
+
+ if (dc_isar_feature(aa64_sve2p1, s)) {
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ } else if (dc_isar_feature(aa64_sme2, s)) {
+ if (!sme_sm_enabled_check(s)) {
+ return true;
+ }
+ } else {
+ return false;
+ }
+
+ t_png = tcg_temp_new_i32();
+ tcg_gen_ld16u_i32(t_png, tcg_env,
+ pred_full_reg_offset(s, a->rn) ^
+ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl);
+
+ gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc));
+ return true;
+}
+
static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
{
if (!dc_isar_feature(aa64_sve, s)) {
@@ -3091,7 +3399,9 @@ static bool trans_CTERM(DisasContext *s, arg_CTERM *a)
return true;
}
-static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
+typedef void gen_while_fn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+static bool do_WHILE(DisasContext *s, arg_while *a,
+ bool lt, int scale, int data, gen_while_fn *fn)
{
TCGv_i64 op0, op1, t0, t1, tmax;
TCGv_i32 t2;
@@ -3101,14 +3411,8 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
TCGCond cond;
uint64_t maxval;
/* Note that GE/HS has a->eq == 0 and GT/HI has a->eq == 1. */
- bool eq = a->eq == a->lt;
+ bool eq = a->eq == lt;
- /* The greater-than conditions are all SVE2. */
- if (a->lt
- ? !dc_isar_feature(aa64_sve, s)
- : !dc_isar_feature(aa64_sve2, s)) {
- return false;
- }
if (!sve_access_check(s)) {
return true;
}
@@ -3132,7 +3436,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
t0 = tcg_temp_new_i64();
t1 = tcg_temp_new_i64();
- if (a->lt) {
+ if (lt) {
tcg_gen_sub_i64(t0, op1, op0);
if (a->u) {
maxval = a->sf ? UINT64_MAX : UINT32_MAX;
@@ -3152,7 +3456,7 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
}
}
- tmax = tcg_constant_i64(vsz >> a->esz);
+ tmax = tcg_constant_i64((vsz << scale) >> a->esz);
if (eq) {
/* Equality means one more iteration. */
tcg_gen_addi_i64(t0, t0, 1);
@@ -3181,24 +3485,38 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
t2 = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(t2, t0);
- /* Scale elements to bits. */
- tcg_gen_shli_i32(t2, t2, a->esz);
-
desc = FIELD_DP32(desc, PREDDESC, OPRSZ, vsz / 8);
desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, data);
ptr = tcg_temp_new_ptr();
tcg_gen_addi_ptr(ptr, tcg_env, pred_full_reg_offset(s, a->rd));
- if (a->lt) {
- gen_helper_sve_whilel(t2, ptr, t2, tcg_constant_i32(desc));
- } else {
- gen_helper_sve_whileg(t2, ptr, t2, tcg_constant_i32(desc));
- }
+ fn(t2, ptr, t2, tcg_constant_i32(desc));
+
do_pred_flags(t2);
return true;
}
+TRANS_FEAT(WHILE_lt, aa64_sve, do_WHILE,
+ a, true, 0, 0, gen_helper_sve_whilel)
+TRANS_FEAT(WHILE_gt, aa64_sve2, do_WHILE,
+ a, false, 0, 0, gen_helper_sve_whileg)
+
+TRANS_FEAT(WHILE_lt_pair, aa64_sme2_or_sve2p1, do_WHILE,
+ a, true, 1, 0, gen_helper_sve_while2l)
+TRANS_FEAT(WHILE_gt_pair, aa64_sme2_or_sve2p1, do_WHILE,
+ a, false, 1, 0, gen_helper_sve_while2g)
+
+TRANS_FEAT(WHILE_lt_cnt2, aa64_sme2_or_sve2p1, do_WHILE,
+ a, true, 1, 1, gen_helper_sve_whilecl)
+TRANS_FEAT(WHILE_lt_cnt4, aa64_sme2_or_sve2p1, do_WHILE,
+ a, true, 2, 2, gen_helper_sve_whilecl)
+TRANS_FEAT(WHILE_gt_cnt2, aa64_sme2_or_sve2p1, do_WHILE,
+ a, false, 1, 1, gen_helper_sve_whilecg)
+TRANS_FEAT(WHILE_gt_cnt4, aa64_sme2_or_sve2p1, do_WHILE,
+ a, false, 2, 2, gen_helper_sve_whilecg)
+
static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
{
TCGv_i64 op0, op1, diff, t1, tmax;
@@ -3217,7 +3535,7 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
op0 = read_cpu_reg(s, a->rn, 1);
op1 = read_cpu_reg(s, a->rm, 1);
- tmax = tcg_constant_i64(vsz);
+ tmax = tcg_constant_i64(vsz >> a->esz);
diff = tcg_temp_new_i64();
if (a->rw) {
@@ -3227,15 +3545,15 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
tcg_gen_sub_i64(diff, op0, op1);
tcg_gen_sub_i64(t1, op1, op0);
tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, diff, t1);
- /* Round down to a multiple of ESIZE. */
- tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+ /* Divide, rounding down, by ESIZE. */
+ tcg_gen_shri_i64(diff, diff, a->esz);
/* If op1 == op0, diff == 0, and the condition is always true. */
tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
} else {
/* WHILEWR */
tcg_gen_sub_i64(diff, op1, op0);
- /* Round down to a multiple of ESIZE. */
- tcg_gen_andi_i64(diff, diff, -1 << a->esz);
+ /* Divide, rounding down, by ESIZE. */
+ tcg_gen_shri_i64(diff, diff, a->esz);
/* If op0 >= op1, diff <= 0, the condition is always true. */
tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
}
@@ -3258,6 +3576,42 @@ static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
return true;
}
+static bool do_pext(DisasContext *s, arg_pext *a, int n)
+{
+ TCGv_i32 t_png;
+ TCGv_ptr t_pd;
+ int pl;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ t_png = tcg_temp_new_i32();
+ tcg_gen_ld16u_i32(t_png, tcg_env,
+ pred_full_reg_offset(s, a->rn) ^
+ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ t_pd = tcg_temp_new_ptr();
+ pl = pred_full_reg_size(s);
+
+ for (int i = 0; i < n; ++i) {
+ int rd = (a->rd + i) % 16;
+ int part = a->imm * n + i;
+ unsigned desc = 0;
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pl);
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, part);
+
+ tcg_gen_addi_ptr(t_pd, tcg_env, pred_full_reg_offset(s, rd));
+ gen_helper_pext(t_pd, t_png, tcg_constant_i32(desc));
+ }
+ return true;
+}
+
+TRANS_FEAT(PEXT_1, aa64_sme2_or_sve2p1, do_pext, a, 1)
+TRANS_FEAT(PEXT_2, aa64_sme2_or_sve2p1, do_pext, a, 2)
+
/*
*** SVE Integer Wide Immediate - Unpredicated Group
*/
@@ -3385,8 +3739,8 @@ DO_ZZI(UMIN, umin)
#undef DO_ZZI
static gen_helper_gvec_4 * const dot_fns[2][2] = {
- { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
- { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
+ { gen_helper_gvec_sdot_4b, gen_helper_gvec_sdot_4h },
+ { gen_helper_gvec_udot_4b, gen_helper_gvec_udot_4h }
};
TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
dot_fns[a->u][a->sz], a->rd, a->rn, a->rm, a->ra, 0)
@@ -3395,19 +3749,24 @@ TRANS_FEAT(DOT_zzzz, aa64_sve, gen_gvec_ool_zzzz,
* SVE Multiply - Indexed
*/
-TRANS_FEAT(SDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sdot_idx_b, a)
-TRANS_FEAT(SDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sdot_idx_h, a)
-TRANS_FEAT(UDOT_zzxw_s, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_udot_idx_b, a)
-TRANS_FEAT(UDOT_zzxw_d, aa64_sve, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_udot_idx_h, a)
-
-TRANS_FEAT(SUDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_sudot_idx_b, a)
-TRANS_FEAT(USDOT_zzxw_s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
- gen_helper_gvec_usdot_idx_b, a)
+TRANS_FEAT(SDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sdot_idx_4b, a)
+TRANS_FEAT(SDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sdot_idx_4h, a)
+TRANS_FEAT(UDOT_zzxw_4s, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_udot_idx_4b, a)
+TRANS_FEAT(UDOT_zzxw_4d, aa64_sve, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_udot_idx_4h, a)
+
+TRANS_FEAT(SUDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sudot_idx_4b, a)
+TRANS_FEAT(USDOT_zzxw_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_usdot_idx_4b, a)
+
+TRANS_FEAT(SDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_sdot_idx_2h, a)
+TRANS_FEAT(UDOT_zzxw_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzxz,
+ gen_helper_gvec_udot_idx_2h, a)
#define DO_SVE2_RRX(NAME, FUNC) \
TRANS_FEAT(NAME, aa64_sve, gen_gvec_ool_zzz, FUNC, \
@@ -3524,31 +3883,38 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
*** SVE Floating Point Multiply-Add Indexed Group
*/
+static bool do_fmla_zzxz(DisasContext *s, arg_rrxr_esz *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ /* These insns use MO_8 to encode BFloat16 */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
+ return gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
- NULL, gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_bfmla_idx, gen_helper_gvec_fmla_idx_h,
gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
};
-TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
- fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+TRANS_FEAT(FMLA_zzxz, aa64_sve, do_fmla_zzxz, a, fmla_idx_fns[a->esz])
static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
- { NULL, NULL },
+ { gen_helper_gvec_bfmls_idx, gen_helper_gvec_ah_bfmls_idx },
{ gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
{ gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
{ gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
};
-TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
- fmls_idx_fns[a->esz][s->fpcr_ah],
- a->rd, a->rn, a->rm, a->ra, a->index,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+TRANS_FEAT(FMLS_zzxz, aa64_sve, do_fmla_zzxz, a,
+ fmls_idx_fns[a->esz][s->fpcr_ah])
/*
*** SVE Floating Point Multiply Indexed Group
*/
static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
- NULL, gen_helper_gvec_fmul_idx_h,
+ gen_helper_gvec_fmul_idx_b16, gen_helper_gvec_fmul_idx_h,
gen_helper_gvec_fmul_idx_s, gen_helper_gvec_fmul_idx_d,
};
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
@@ -3621,6 +3987,54 @@ DO_VPZ_AH(FMAXV, fmaxv)
#undef DO_VPZ
+static gen_helper_gvec_3_ptr * const faddqv_fns[4] = {
+ NULL, gen_helper_sve2p1_faddqv_h,
+ gen_helper_sve2p1_faddqv_s, gen_helper_sve2p1_faddqv_d,
+};
+TRANS_FEAT(FADDQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
+ faddqv_fns[a->esz], a, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+
+static gen_helper_gvec_3_ptr * const fmaxnmqv_fns[4] = {
+ NULL, gen_helper_sve2p1_fmaxnmqv_h,
+ gen_helper_sve2p1_fmaxnmqv_s, gen_helper_sve2p1_fmaxnmqv_d,
+};
+TRANS_FEAT(FMAXNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
+ fmaxnmqv_fns[a->esz], a, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+
+static gen_helper_gvec_3_ptr * const fminnmqv_fns[4] = {
+ NULL, gen_helper_sve2p1_fminnmqv_h,
+ gen_helper_sve2p1_fminnmqv_s, gen_helper_sve2p1_fminnmqv_d,
+};
+TRANS_FEAT(FMINNMQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
+ fminnmqv_fns[a->esz], a, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+
+static gen_helper_gvec_3_ptr * const fmaxqv_fns[4] = {
+ NULL, gen_helper_sve2p1_fmaxqv_h,
+ gen_helper_sve2p1_fmaxqv_s, gen_helper_sve2p1_fmaxqv_d,
+};
+static gen_helper_gvec_3_ptr * const fmaxqv_ah_fns[4] = {
+ NULL, gen_helper_sve2p1_ah_fmaxqv_h,
+ gen_helper_sve2p1_ah_fmaxqv_s, gen_helper_sve2p1_ah_fmaxqv_d,
+};
+TRANS_FEAT(FMAXQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
+ (s->fpcr_ah ? fmaxqv_ah_fns : fmaxqv_fns)[a->esz], a, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+
+static gen_helper_gvec_3_ptr * const fminqv_fns[4] = {
+ NULL, gen_helper_sve2p1_fminqv_h,
+ gen_helper_sve2p1_fminqv_s, gen_helper_sve2p1_fminqv_d,
+};
+static gen_helper_gvec_3_ptr * const fminqv_ah_fns[4] = {
+ NULL, gen_helper_sve2p1_ah_fminqv_h,
+ gen_helper_sve2p1_ah_fminqv_s, gen_helper_sve2p1_ah_fminqv_d,
+};
+TRANS_FEAT(FMINQV, aa64_sme2p1_or_sve2p1, gen_gvec_fpst_arg_zpz,
+ (s->fpcr_ah ? fminqv_ah_fns : fminqv_fns)[a->esz], a, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+
/*
*** SVE Floating Point Unary Operations - Unpredicated Group
*/
@@ -3747,7 +4161,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
#define DO_FP3(NAME, name) \
static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
- NULL, gen_helper_gvec_##name##_h, \
+ gen_helper_gvec_##name##_b16, gen_helper_gvec_##name##_h, \
gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
}; \
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
@@ -3803,13 +4217,34 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
name##_zpzz_fns[a->esz], a)
-DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
-DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
-DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
-DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
-DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
-DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
-DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
+/* Similar, but for insns where sz == 0 encodes bfloat16 */
+#define DO_ZPZZ_FP_B16(NAME, FEAT, name) \
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
+ gen_helper_##name##_b16, gen_helper_##name##_h, \
+ gen_helper_##name##_s, gen_helper_##name##_d \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
+
+#define DO_ZPZZ_AH_FP_B16(NAME, FEAT, name, ah_name) \
+ static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
+ gen_helper_##name##_b16, gen_helper_##name##_h, \
+ gen_helper_##name##_s, gen_helper_##name##_d \
+ }; \
+ static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
+ gen_helper_##ah_name##_b16, gen_helper_##ah_name##_h, \
+ gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
+ s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
+ name##_zpzz_fns[a->esz], a)
+
+DO_ZPZZ_FP_B16(FADD_zpzz, aa64_sve, sve_fadd)
+DO_ZPZZ_FP_B16(FSUB_zpzz, aa64_sve, sve_fsub)
+DO_ZPZZ_FP_B16(FMUL_zpzz, aa64_sve, sve_fmul)
+DO_ZPZZ_AH_FP_B16(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
+DO_ZPZZ_AH_FP_B16(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
+DO_ZPZZ_FP_B16(FMINNM_zpzz, aa64_sve, sve_fminnum)
+DO_ZPZZ_FP_B16(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
@@ -3940,19 +4375,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a,
+ gen_helper_gvec_5_ptr *fn)
+{
+ /* These insns use MO_8 to encode BFloat16 */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
+ return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
- NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
- NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \
gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
- s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
- a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+ TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
@@ -4143,7 +4587,7 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
*/
void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
- int len, int rn, int imm)
+ int len, int rn, int imm, MemOp align)
{
int len_align = QEMU_ALIGN_DOWN(len, 16);
int len_remain = len % 16;
@@ -4172,12 +4616,15 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
for (i = 0; i < len_align; i += 16) {
tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_extr_i128_i64(t0, t1, t16);
tcg_gen_st_i64(t0, base, vofs + i);
tcg_gen_st_i64(t1, base, vofs + i + 8);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
+ if (len_align) {
+ align = MO_UNALN;
+ }
} else {
TCGLabel *loop = gen_new_label();
TCGv_ptr tp, i = tcg_temp_new_ptr();
@@ -4187,7 +4634,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
t16 = tcg_temp_new_i128();
tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tp = tcg_temp_new_ptr();
@@ -4202,6 +4649,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_st_i64(t1, tp, vofs + 8);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ align = MO_UNALN;
}
/*
@@ -4210,7 +4658,9 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
*/
if (len_remain >= 8) {
t0 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LEUQ | MO_ATOM_NONE | align);
+ align = MO_UNALN;
tcg_gen_st_i64(t0, base, vofs + len_align);
len_remain -= 8;
len_align += 8;
@@ -4225,12 +4675,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ MO_LE | ctz32(len_remain)
+ | MO_ATOM_NONE | align);
break;
case 6:
t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LEUL | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
@@ -4245,7 +4697,7 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
/* Similarly for stores. */
void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
- int len, int rn, int imm)
+ int len, int rn, int imm, MemOp align)
{
int len_align = QEMU_ALIGN_DOWN(len, 16);
int len_remain = len % 16;
@@ -4277,9 +4729,12 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_ld_i64(t1, base, vofs + i + 8);
tcg_gen_concat_i64_i128(t16, t0, t1);
tcg_gen_qemu_st_i128(t16, clean_addr, midx,
- MO_LE | MO_128 | MO_ATOM_NONE);
+ MO_LE | MO_128 | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
+ if (len_align) {
+ align = MO_UNALN;
+ }
} else {
TCGLabel *loop = gen_new_label();
TCGv_ptr tp, i = tcg_temp_new_ptr();
@@ -4303,13 +4758,16 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
+ align = MO_UNALN;
}
/* Predicate register stores can be any multiple of 2. */
if (len_remain >= 8) {
t0 = tcg_temp_new_i64();
tcg_gen_ld_i64(t0, base, vofs + len_align);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LEUQ | MO_ATOM_NONE | align);
+ align = MO_UNALN;
len_remain -= 8;
len_align += 8;
if (len_remain) {
@@ -4325,11 +4783,13 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_st_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
+ MO_LE | ctz32(len_remain)
+ | MO_ATOM_NONE | align);
break;
case 6:
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LEUL | MO_ATOM_NONE | align);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_shri_i64(t0, t0, 32);
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
@@ -4349,7 +4809,8 @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_16 : MO_UNALN);
}
return true;
}
@@ -4362,7 +4823,8 @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_2 : MO_UNALN);
}
return true;
}
@@ -4375,7 +4837,8 @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_16 : MO_UNALN);
}
return true;
}
@@ -4388,7 +4851,8 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, tcg_env, off, size, a->rn, a->imm * size,
+ s->align_mem ? MO_ALIGN_2 : MO_UNALN);
}
return true;
}
@@ -4398,34 +4862,37 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
*/
/* The memory mode of the dtype. */
-static const MemOp dtype_mop[16] = {
+static const MemOp dtype_mop[19] = {
MO_UB, MO_UB, MO_UB, MO_UB,
MO_SL, MO_UW, MO_UW, MO_UW,
MO_SW, MO_SW, MO_UL, MO_UL,
- MO_SB, MO_SB, MO_SB, MO_UQ
+ MO_SB, MO_SB, MO_SB, MO_UQ,
+ /* Artificial values used by decode */
+ MO_UL, MO_UQ, MO_128,
};
#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
/* The vector element size of dtype. */
-static const uint8_t dtype_esz[16] = {
+static const uint8_t dtype_esz[19] = {
0, 1, 2, 3,
3, 1, 2, 3,
3, 2, 2, 3,
- 3, 2, 1, 3
+ 3, 2, 1, 3,
+ /* Artificial values used by decode */
+ 4, 4, 4,
};
-uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
+uint64_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
uint32_t msz, bool is_write, uint32_t data)
{
uint32_t sizem1;
- uint32_t desc = 0;
+ uint64_t desc = 0;
/* Assert all of the data fits, with or without MTE enabled. */
assert(nregs >= 1 && nregs <= 4);
sizem1 = (nregs << msz) - 1;
assert(sizem1 <= R_MTEDESC_SIZEM1_MASK >> R_MTEDESC_SIZEM1_SHIFT);
- assert(data < 1u << SVE_MTEDESC_SHIFT);
if (s->mte_active[0]) {
desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
@@ -4433,9 +4900,9 @@ uint32_t make_svemte_desc(DisasContext *s, unsigned vsz, uint32_t nregs,
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
desc = FIELD_DP32(desc, MTEDESC, SIZEM1, sizem1);
- desc <<= SVE_MTEDESC_SHIFT;
+ desc <<= 32;
}
- return simd_desc(vsz, vsz, desc | data);
+ return simd_desc(vsz, vsz, data) | desc;
}
static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@@ -4443,7 +4910,7 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_gvec_mem *fn)
{
TCGv_ptr t_pg;
- uint32_t desc;
+ uint64_t desc;
if (!s->mte_active[0]) {
addr = clean_data_tbi(s, addr);
@@ -4459,11 +4926,11 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
t_pg = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
- fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+ fn(tcg_env, t_pg, addr, tcg_constant_i64(desc));
}
/* Indexed by [mte][be][dtype][nreg] */
-static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
+static gen_helper_gvec_mem * const ldr_fns[2][2][19][4] = {
{ /* mte inactive, little-endian */
{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
@@ -4487,7 +4954,13 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
- gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+ gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r },
+
+ { gen_helper_sve_ld1squ_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dqu_le_r, NULL, NULL, NULL },
+ { NULL, gen_helper_sve_ld2qq_le_r,
+ gen_helper_sve_ld3qq_le_r, gen_helper_sve_ld4qq_le_r },
+ },
/* mte inactive, big-endian */
{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
@@ -4512,7 +4985,14 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
{ gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
- gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
+ gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r },
+
+ { gen_helper_sve_ld1squ_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dqu_be_r, NULL, NULL, NULL },
+ { NULL, gen_helper_sve_ld2qq_be_r,
+ gen_helper_sve_ld3qq_be_r, gen_helper_sve_ld4qq_be_r },
+ },
+ },
{ /* mte active, little-endian */
{ { gen_helper_sve_ld1bb_r_mte,
@@ -4545,7 +5025,15 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1dd_le_r_mte,
gen_helper_sve_ld2dd_le_r_mte,
gen_helper_sve_ld3dd_le_r_mte,
- gen_helper_sve_ld4dd_le_r_mte } },
+ gen_helper_sve_ld4dd_le_r_mte },
+
+ { gen_helper_sve_ld1squ_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dqu_le_r_mte, NULL, NULL, NULL },
+ { NULL,
+ gen_helper_sve_ld2qq_le_r_mte,
+ gen_helper_sve_ld3qq_le_r_mte,
+ gen_helper_sve_ld4qq_le_r_mte },
+ },
/* mte active, big-endian */
{ { gen_helper_sve_ld1bb_r_mte,
@@ -4578,7 +5066,16 @@ static gen_helper_gvec_mem * const ldr_fns[2][2][16][4] = {
{ gen_helper_sve_ld1dd_be_r_mte,
gen_helper_sve_ld2dd_be_r_mte,
gen_helper_sve_ld3dd_be_r_mte,
- gen_helper_sve_ld4dd_be_r_mte } } },
+ gen_helper_sve_ld4dd_be_r_mte },
+
+ { gen_helper_sve_ld1squ_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dqu_be_r_mte, NULL, NULL, NULL },
+ { NULL,
+ gen_helper_sve_ld2qq_be_r_mte,
+ gen_helper_sve_ld3qq_be_r_mte,
+ gen_helper_sve_ld4qq_be_r_mte },
+ },
+ },
};
static void do_ld_zpa(DisasContext *s, int zt, int pg,
@@ -4597,9 +5094,32 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg,
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
{
- if (a->rm == 31 || !dc_isar_feature(aa64_sve, s)) {
+ if (a->rm == 31) {
return false;
}
+
+ /* dtypes 16-18 are artificial, representing 128-bit element */
+ switch (a->dtype) {
+ case 0 ... 15:
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ break;
+ case 16: case 17:
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ break;
+ case 18:
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
if (sve_access_check(s)) {
TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@@ -4611,9 +5131,28 @@ static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
{
- if (!dc_isar_feature(aa64_sve, s)) {
- return false;
+ /* dtypes 16-18 are artificial, representing 128-bit element */
+ switch (a->dtype) {
+ case 0 ... 15:
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ break;
+ case 16: case 17:
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ break;
+ case 18:
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
+
if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s);
int elements = vsz >> dtype_esz[a->dtype];
@@ -4839,7 +5378,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr t_pg;
int poff;
- uint32_t desc;
+ uint64_t desc;
/* Load the first quadword using the normal predicated load helpers. */
if (!s->mte_active[0]) {
@@ -4870,7 +5409,7 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
gen_helper_gvec_mem *fn
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
desc = make_svemte_desc(s, 16, 1, dtype_msz(dtype), false, zt);
- fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+ fn(tcg_env, t_pg, addr, tcg_constant_i64(desc));
/* Replicate that first quadword. */
if (vsz > 16) {
@@ -4913,7 +5452,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
unsigned vsz_r32;
TCGv_ptr t_pg;
int poff, doff;
- uint32_t desc;
+ uint64_t desc;
if (vsz < 32) {
/*
@@ -4954,7 +5493,7 @@ static void do_ldro(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype)
gen_helper_gvec_mem *fn
= ldr_fns[s->mte_active[0]][s->be_data == MO_BE][dtype][0];
desc = make_svemte_desc(s, 32, 1, dtype_msz(dtype), false, zt);
- fn(tcg_env, t_pg, addr, tcg_constant_i32(desc));
+ fn(tcg_env, t_pg, addr, tcg_constant_i64(desc));
/*
* Replicate that first octaword.
@@ -5060,7 +5599,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
int msz, int esz, int nreg)
{
- static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
+ static gen_helper_gvec_mem * const fn_single[2][2][4][5] = {
{ { { gen_helper_sve_st1bb_r,
gen_helper_sve_st1bh_r,
gen_helper_sve_st1bs_r,
@@ -5071,9 +5610,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_le_r },
{ NULL, NULL,
gen_helper_sve_st1ss_le_r,
- gen_helper_sve_st1sd_le_r },
+ gen_helper_sve_st1sd_le_r,
+ gen_helper_sve_st1sq_le_r, },
{ NULL, NULL, NULL,
- gen_helper_sve_st1dd_le_r } },
+ gen_helper_sve_st1dd_le_r,
+ gen_helper_sve_st1dq_le_r, } },
{ { gen_helper_sve_st1bb_r,
gen_helper_sve_st1bh_r,
gen_helper_sve_st1bs_r,
@@ -5084,9 +5625,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_be_r },
{ NULL, NULL,
gen_helper_sve_st1ss_be_r,
- gen_helper_sve_st1sd_be_r },
+ gen_helper_sve_st1sd_be_r,
+ gen_helper_sve_st1sq_be_r },
{ NULL, NULL, NULL,
- gen_helper_sve_st1dd_be_r } } },
+ gen_helper_sve_st1dd_be_r,
+ gen_helper_sve_st1dq_be_r } } },
{ { { gen_helper_sve_st1bb_r_mte,
gen_helper_sve_st1bh_r_mte,
@@ -5098,9 +5641,11 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_le_r_mte },
{ NULL, NULL,
gen_helper_sve_st1ss_le_r_mte,
- gen_helper_sve_st1sd_le_r_mte },
+ gen_helper_sve_st1sd_le_r_mte,
+ gen_helper_sve_st1sq_le_r_mte },
{ NULL, NULL, NULL,
- gen_helper_sve_st1dd_le_r_mte } },
+ gen_helper_sve_st1dd_le_r_mte,
+ gen_helper_sve_st1dq_le_r_mte } },
{ { gen_helper_sve_st1bb_r_mte,
gen_helper_sve_st1bh_r_mte,
gen_helper_sve_st1bs_r_mte,
@@ -5111,59 +5656,73 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
gen_helper_sve_st1hd_be_r_mte },
{ NULL, NULL,
gen_helper_sve_st1ss_be_r_mte,
- gen_helper_sve_st1sd_be_r_mte },
+ gen_helper_sve_st1sd_be_r_mte,
+ gen_helper_sve_st1sq_be_r_mte },
{ NULL, NULL, NULL,
- gen_helper_sve_st1dd_be_r_mte } } },
+ gen_helper_sve_st1dd_be_r_mte,
+ gen_helper_sve_st1dq_be_r_mte } } },
};
- static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
+ static gen_helper_gvec_mem * const fn_multiple[2][2][3][5] = {
{ { { gen_helper_sve_st2bb_r,
gen_helper_sve_st2hh_le_r,
gen_helper_sve_st2ss_le_r,
- gen_helper_sve_st2dd_le_r },
+ gen_helper_sve_st2dd_le_r,
+ gen_helper_sve_st2qq_le_r },
{ gen_helper_sve_st3bb_r,
gen_helper_sve_st3hh_le_r,
gen_helper_sve_st3ss_le_r,
- gen_helper_sve_st3dd_le_r },
+ gen_helper_sve_st3dd_le_r,
+ gen_helper_sve_st3qq_le_r },
{ gen_helper_sve_st4bb_r,
gen_helper_sve_st4hh_le_r,
gen_helper_sve_st4ss_le_r,
- gen_helper_sve_st4dd_le_r } },
+ gen_helper_sve_st4dd_le_r,
+ gen_helper_sve_st4qq_le_r } },
{ { gen_helper_sve_st2bb_r,
gen_helper_sve_st2hh_be_r,
gen_helper_sve_st2ss_be_r,
- gen_helper_sve_st2dd_be_r },
+ gen_helper_sve_st2dd_be_r,
+ gen_helper_sve_st2qq_be_r },
{ gen_helper_sve_st3bb_r,
gen_helper_sve_st3hh_be_r,
gen_helper_sve_st3ss_be_r,
- gen_helper_sve_st3dd_be_r },
+ gen_helper_sve_st3dd_be_r,
+ gen_helper_sve_st3qq_be_r },
{ gen_helper_sve_st4bb_r,
gen_helper_sve_st4hh_be_r,
gen_helper_sve_st4ss_be_r,
- gen_helper_sve_st4dd_be_r } } },
+ gen_helper_sve_st4dd_be_r,
+ gen_helper_sve_st4qq_be_r } } },
{ { { gen_helper_sve_st2bb_r_mte,
gen_helper_sve_st2hh_le_r_mte,
gen_helper_sve_st2ss_le_r_mte,
- gen_helper_sve_st2dd_le_r_mte },
+ gen_helper_sve_st2dd_le_r_mte,
+ gen_helper_sve_st2qq_le_r_mte },
{ gen_helper_sve_st3bb_r_mte,
gen_helper_sve_st3hh_le_r_mte,
gen_helper_sve_st3ss_le_r_mte,
- gen_helper_sve_st3dd_le_r_mte },
+ gen_helper_sve_st3dd_le_r_mte,
+ gen_helper_sve_st3qq_le_r_mte },
{ gen_helper_sve_st4bb_r_mte,
gen_helper_sve_st4hh_le_r_mte,
gen_helper_sve_st4ss_le_r_mte,
- gen_helper_sve_st4dd_le_r_mte } },
+ gen_helper_sve_st4dd_le_r_mte,
+ gen_helper_sve_st4qq_le_r_mte } },
{ { gen_helper_sve_st2bb_r_mte,
gen_helper_sve_st2hh_be_r_mte,
gen_helper_sve_st2ss_be_r_mte,
- gen_helper_sve_st2dd_be_r_mte },
+ gen_helper_sve_st2dd_be_r_mte,
+ gen_helper_sve_st2qq_be_r_mte },
{ gen_helper_sve_st3bb_r_mte,
gen_helper_sve_st3hh_be_r_mte,
gen_helper_sve_st3ss_be_r_mte,
- gen_helper_sve_st3dd_be_r_mte },
+ gen_helper_sve_st3dd_be_r_mte,
+ gen_helper_sve_st3qq_be_r_mte },
{ gen_helper_sve_st4bb_r_mte,
gen_helper_sve_st4hh_be_r_mte,
gen_helper_sve_st4ss_be_r_mte,
- gen_helper_sve_st4dd_be_r_mte } } },
+ gen_helper_sve_st4dd_be_r_mte,
+ gen_helper_sve_st4qq_be_r_mte } } },
};
gen_helper_gvec_mem *fn;
int be = s->be_data == MO_BE;
@@ -5182,12 +5741,32 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
{
- if (!dc_isar_feature(aa64_sve, s)) {
- return false;
- }
if (a->rm == 31 || a->msz > a->esz) {
return false;
}
+ switch (a->esz) {
+ case MO_8 ... MO_64:
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ break;
+ case MO_128:
+ if (a->nreg == 0) {
+ assert(a->msz < a->esz);
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else {
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
if (sve_access_check(s)) {
TCGv_i64 addr = tcg_temp_new_i64();
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
@@ -5199,12 +5778,32 @@ static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
{
- if (!dc_isar_feature(aa64_sve, s)) {
- return false;
- }
if (a->msz > a->esz) {
return false;
}
+ switch (a->esz) {
+ case MO_8 ... MO_64:
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ break;
+ case MO_128:
+ if (a->nreg == 0) {
+ assert(a->msz < a->esz);
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else {
+ if (!dc_isar_feature(aa64_sme2p1_or_sve2p1, s)) {
+ return false;
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s);
int elements = vsz >> a->esz;
@@ -5228,14 +5827,14 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
TCGv_ptr t_zm = tcg_temp_new_ptr();
TCGv_ptr t_pg = tcg_temp_new_ptr();
TCGv_ptr t_zt = tcg_temp_new_ptr();
- uint32_t desc;
+ uint64_t desc;
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
tcg_gen_addi_ptr(t_zm, tcg_env, vec_full_reg_offset(s, zm));
tcg_gen_addi_ptr(t_zt, tcg_env, vec_full_reg_offset(s, zt));
desc = make_svemte_desc(s, vec_full_reg_size(s), 1, msz, is_write, scale);
- fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i32(desc));
+ fn(tcg_env, t_zt, t_pg, t_zm, scalar, tcg_constant_i64(desc));
}
/* Indexed by [mte][be][ff][xs][u][msz]. */
@@ -5566,6 +6165,14 @@ gather_load_fn64[2][2][2][3][2][4] = {
gen_helper_sve_ldffdd_be_zd_mte, } } } } },
};
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn128[2][2] = {
+ { gen_helper_sve_ldqq_le_zd,
+ gen_helper_sve_ldqq_be_zd },
+ { gen_helper_sve_ldqq_le_zd_mte,
+ gen_helper_sve_ldqq_be_zd_mte }
+};
+
static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
@@ -5587,6 +6194,8 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
case MO_64:
fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
break;
+ default:
+ g_assert_not_reached();
}
assert(fn != NULL);
@@ -5595,6 +6204,32 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
return true;
}
+static bool trans_LD1Q(DisasContext *s, arg_LD1Q *a)
+{
+ gen_helper_gvec_mem_scatter *fn = NULL;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ fn = gather_load_fn128[mte][be];
+ assert(fn != NULL);
+
+ /*
+ * Unlike LD1_zprz, a->rm is the scalar register and it can be XZR, not XSP.
+ * a->rn is the vector register.
+ */
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ cpu_reg(s, a->rm), MO_128, false, fn);
+ return true;
+}
+
static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
@@ -5754,6 +6389,14 @@ static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
gen_helper_sve_stdd_be_zd_mte, } } },
};
+static gen_helper_gvec_mem_scatter * const
+scatter_store_fn128[2][2] = {
+ { gen_helper_sve_stqq_le_zd,
+ gen_helper_sve_stqq_be_zd },
+ { gen_helper_sve_stqq_le_zd_mte,
+ gen_helper_sve_stqq_be_zd_mte }
+};
+
static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
{
gen_helper_gvec_mem_scatter *fn;
@@ -5785,6 +6428,29 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
return true;
}
+static bool trans_ST1Q(DisasContext *s, arg_ST1Q *a)
+{
+ gen_helper_gvec_mem_scatter *fn;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
+
+ if (!dc_isar_feature(aa64_sve2p1, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ fn = scatter_store_fn128[mte][be];
+ /*
+ * Unlike ST1_zprz, a->rm is the scalar register, and it
+ * can be XZR, not XSP. a->rn is the vector register.
+ */
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0,
+ cpu_reg(s, a->rm), MO_128, true, fn);
+ return true;
+}
+
static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
@@ -5911,6 +6577,7 @@ TRANS_FEAT(MOVPRFX_z, aa64_sve, do_movz_zpz, a->rd, a->rn, a->pg, a->esz, false)
*/
TRANS_FEAT(MUL_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, tcg_gen_gvec_mul, a)
+TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_fn_arg_zzz, gen_gvec_sve2_sqdmulh, a)
static gen_helper_gvec_3 * const smulh_zzz_fns[4] = {
gen_helper_gvec_smulh_b, gen_helper_gvec_smulh_h,
@@ -5929,13 +6596,6 @@ TRANS_FEAT(UMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
TRANS_FEAT(PMUL_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
gen_helper_gvec_pmul_b, a, 0)
-static gen_helper_gvec_3 * const sqdmulh_zzz_fns[4] = {
- gen_helper_sve2_sqdmulh_b, gen_helper_sve2_sqdmulh_h,
- gen_helper_sve2_sqdmulh_s, gen_helper_sve2_sqdmulh_d,
-};
-TRANS_FEAT(SQDMULH_zzz, aa64_sve2, gen_gvec_ool_arg_zzz,
- sqdmulh_zzz_fns[a->esz], a, 0)
-
static gen_helper_gvec_3 * const sqrdmulh_zzz_fns[4] = {
gen_helper_sve2_sqrdmulh_b, gen_helper_sve2_sqrdmulh_h,
gen_helper_sve2_sqrdmulh_s, gen_helper_sve2_sqrdmulh_d,
@@ -7008,17 +7668,26 @@ DO_ZPZZ_FP(FMINNMP, aa64_sve2, sve2_fminnmp_zpzz)
DO_ZPZZ_FP(FMAXP, aa64_sve2, sve2_fmaxp_zpzz)
DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
+static bool do_fmmla(DisasContext *s, arg_rrrr_esz *a,
+ gen_helper_gvec_4_ptr *fn)
+{
+ if (sve_access_check(s)) {
+ if (vec_full_reg_size(s) < 4 * memop_size(a->esz)) {
+ unallocated_encoding(s);
+ } else {
+ gen_gvec_fpst_zzzz(s, fn, a->rd, a->rn, a->rm, a->ra, 0, FPST_A64);
+ }
+ }
+ return true;
+}
+
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, do_fmmla, a, gen_helper_fmmla_s)
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, do_fmmla, a, gen_helper_fmmla_d)
+
/*
* SVE Integer Multiply-Add (unpredicated)
*/
-TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
- gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_A64)
-TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
- gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_A64)
-
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
gen_helper_sve2_sqdmlal_zzzw_s, gen_helper_sve2_sqdmlal_zzzw_d,
@@ -7111,8 +7780,13 @@ static gen_helper_gvec_4 * const sqrdcmlah_fns[] = {
TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
sqrdcmlah_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->rot)
-TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
- a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
+TRANS_FEAT(USDOT_zzzz_4s, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_usdot_4b, a, 0)
+
+TRANS_FEAT(SDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_sdot_2h, a, 0)
+TRANS_FEAT(UDOT_zzzz_2s, aa64_sme2_or_sve2p1, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_udot_2h, a, 0)
TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
gen_helper_crypto_aesmc, a->rd, a->rd, 0)
@@ -7174,7 +7848,7 @@ static bool do_FMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sub, bool sel)
{
return gen_gvec_ptr_zzzz(s, gen_helper_sve2_fmlal_zzxw_s,
a->rd, a->rn, a->rm, a->ra,
- (a->index << 2) | (sel << 1) | sub, tcg_env);
+ (a->index << 3) | (sel << 1) | sub, tcg_env);
}
TRANS_FEAT(FMLALB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, false)
@@ -7189,6 +7863,11 @@ TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
gen_helper_gvec_ummla_b, a, 0)
+TRANS_FEAT(FDOT_zzzz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzzz,
+ gen_helper_sme2_fdot_h, a, 0)
+TRANS_FEAT(FDOT_zzxz, aa64_sme2_or_sve2p1, gen_gvec_env_arg_zzxz,
+ gen_helper_sme2_fdot_idx_h, a)
+
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
gen_helper_gvec_bfdot, a, 0)
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_env_arg_zzxz,
@@ -7218,6 +7897,36 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
+static bool do_BFMLSL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
+{
+ if (s->fpcr_ah) {
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl,
+ a->rd, a->rn, a->rm, a->ra, sel, FPST_AH);
+ } else {
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl,
+ a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
+ }
+}
+
+TRANS_FEAT(BFMLSLB_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, false)
+TRANS_FEAT(BFMLSLT_zzzw, aa64_sme2_or_sve2p1, do_BFMLSL_zzzw, a, true)
+
+static bool do_BFMLSL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
+{
+ if (s->fpcr_ah) {
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_ah_bfmlsl_idx,
+ a->rd, a->rn, a->rm, a->ra,
+ (a->index << 1) | sel, FPST_AH);
+ } else {
+ return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlsl_idx,
+ a->rd, a->rn, a->rm, a->ra,
+ (a->index << 1) | sel, FPST_A64);
+ }
+}
+
+TRANS_FEAT(BFMLSLB_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, false)
+TRANS_FEAT(BFMLSLT_zzxw, aa64_sme2_or_sve2p1, do_BFMLSL_zzxw, a, true)
+
static bool trans_PSEL(DisasContext *s, arg_psel *a)
{
int vl = vec_full_reg_size(s);
@@ -7226,7 +7935,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
TCGv_i64 tmp, didx, dbit;
TCGv_ptr ptr;
- if (!dc_isar_feature(aa64_sme, s)) {
+ if (!dc_isar_feature(aa64_sme_or_sve2p1, s)) {
return false;
}
if (!sve_access_check(s)) {
@@ -7265,6 +7974,7 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
tcg_gen_neg_i64(tmp, tmp);
/* Apply to either copy the source, or write zeros. */
+ pl = size_for_gvec(pl);
tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
pred_full_reg_offset(s, a->pn), tmp, pl, pl);
return true;
@@ -7319,7 +8029,7 @@ static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
}
-TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
+TRANS_FEAT(SCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
{
@@ -7370,4 +8080,137 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
}
-TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
+TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
+
+static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a)
+{
+ static gen_helper_gvec_3_ptr * const fn[] = {
+ gen_helper_sme2_bfclamp,
+ gen_helper_sme2_fclamp_h,
+ gen_helper_sme2_fclamp_s,
+ gen_helper_sme2_fclamp_d,
+ };
+
+ /* This insn uses MO_8 to encode BFloat16. */
+ if (a->esz == MO_8
+ ? !dc_isar_feature(aa64_sve_b16b16, s)
+ : !dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
+ return false;
+ }
+
+ /* So far we never optimize rda with MOVPRFX */
+ assert(a->rd == a->ra);
+ return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
+TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
+ gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0)
+TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
+ gen_helper_sme2_uqcvtn_sh, a->rd, a->rn, 0)
+TRANS_FEAT(SQCVTUN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
+ gen_helper_sme2_sqcvtun_sh, a->rd, a->rn, 0)
+
+static bool gen_ldst_c(DisasContext *s, TCGv_i64 addr, int zd, int png,
+ MemOp esz, bool is_write, int n, bool strided)
+{
+ typedef void ldst_c_fn(TCGv_env, TCGv_ptr, TCGv_i64,
+ TCGv_i32, TCGv_i64);
+ static ldst_c_fn * const f_ldst[2][2][4] = {
+ { { gen_helper_sve2p1_ld1bb_c,
+ gen_helper_sve2p1_ld1hh_le_c,
+ gen_helper_sve2p1_ld1ss_le_c,
+ gen_helper_sve2p1_ld1dd_le_c, },
+ { gen_helper_sve2p1_ld1bb_c,
+ gen_helper_sve2p1_ld1hh_be_c,
+ gen_helper_sve2p1_ld1ss_be_c,
+ gen_helper_sve2p1_ld1dd_be_c, } },
+
+ { { gen_helper_sve2p1_st1bb_c,
+ gen_helper_sve2p1_st1hh_le_c,
+ gen_helper_sve2p1_st1ss_le_c,
+ gen_helper_sve2p1_st1dd_le_c, },
+ { gen_helper_sve2p1_st1bb_c,
+ gen_helper_sve2p1_st1hh_be_c,
+ gen_helper_sve2p1_st1ss_be_c,
+ gen_helper_sve2p1_st1dd_be_c, } }
+ };
+
+ TCGv_i32 t_png;
+ TCGv_i64 t_desc;
+ TCGv_ptr t_zd;
+ uint64_t desc, lg2_rstride = 0;
+ bool be = s->be_data == MO_BE;
+
+ assert(n == 2 || n == 4);
+ if (strided) {
+ lg2_rstride = 3;
+ if (n == 4) {
+ /* Validate ZD alignment. */
+ if (zd & 4) {
+ return false;
+ }
+ lg2_rstride = 2;
+ }
+ /* Ignore non-temporal bit */
+ zd &= ~8;
+ }
+
+ if (strided || !dc_isar_feature(aa64_sve2p1, s)
+ ? !sme_sm_enabled_check(s)
+ : !sve_access_check(s)) {
+ return true;
+ }
+
+ if (!s->mte_active[0]) {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ desc = n == 2 ? 0 : 1;
+ desc = desc | (lg2_rstride << 1);
+ desc = make_svemte_desc(s, vec_full_reg_size(s), 1, esz, is_write, desc);
+ t_desc = tcg_constant_i64(desc);
+
+ t_png = tcg_temp_new_i32();
+ tcg_gen_ld16u_i32(t_png, tcg_env,
+ pred_full_reg_offset(s, png) ^
+ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ t_zd = tcg_temp_new_ptr();
+ tcg_gen_addi_ptr(t_zd, tcg_env, vec_full_reg_offset(s, zd));
+
+ f_ldst[is_write][be][esz](tcg_env, t_zd, addr, t_png, t_desc);
+ return true;
+}
+
+static bool gen_ldst_zcrr_c(DisasContext *s, arg_zcrr_ldst *a,
+ bool is_write, bool strided)
+{
+ TCGv_i64 addr = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
+ tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
+ return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write,
+ a->nreg, strided);
+}
+
+static bool gen_ldst_zcri_c(DisasContext *s, arg_zcri_ldst *a,
+ bool is_write, bool strided)
+{
+ TCGv_i64 addr = tcg_temp_new_i64();
+
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
+ a->imm * a->nreg * vec_full_reg_size(s));
+ return gen_ldst_c(s, addr, a->rd, a->png, a->esz, is_write,
+ a->nreg, strided);
+}
+
+TRANS_FEAT(LD1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, false, false)
+TRANS_FEAT(LD1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, false, false)
+TRANS_FEAT(ST1_zcrr, aa64_sme2_or_sve2p1, gen_ldst_zcrr_c, a, true, false)
+TRANS_FEAT(ST1_zcri, aa64_sme2_or_sve2p1, gen_ldst_zcri_c, a, true, false)
+
+TRANS_FEAT(LD1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, false, true)
+TRANS_FEAT(LD1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, false, true)
+TRANS_FEAT(ST1_zcrr_stride, aa64_sme2, gen_ldst_zcrr_c, a, true, true)
+TRANS_FEAT(ST1_zcri_stride, aa64_sme2, gen_ldst_zcri_c, a, true, true)