aboutsummaryrefslogtreecommitdiff
path: root/target/arm/translate-sve.c
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/translate-sve.c')
-rw-r--r--target/arm/translate-sve.c393
1 files changed, 328 insertions, 65 deletions
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 62b5f30..41f8b12 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1286,6 +1286,19 @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
return true;
}
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
+ }
+ return true;
+}
+
static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
{
if (!dc_isar_feature(aa64_sve, s)) {
@@ -1299,6 +1312,19 @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
return true;
}
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+ TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+ tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
+ }
+ return true;
+}
+
static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
{
if (!dc_isar_feature(aa64_sve, s)) {
@@ -1311,6 +1337,18 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
return true;
}
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
+{
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (sme_enabled_check(s)) {
+ TCGv_i64 reg = cpu_reg(s, a->rd);
+ tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
+ }
+ return true;
+}
+
/*
*** SVE Compute Vector Address Group
*/
@@ -1320,10 +1358,10 @@ static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
}
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
/*
*** SVE Integer Misc - Unpredicated Group
@@ -1333,14 +1371,15 @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
NULL, gen_helper_sve_fexpa_h,
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
};
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
- fexpa_fns[a->esz], a->rd, a->rn, 0)
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
+ fexpa_fns[a->esz], a->rd, a->rn, 0)
static gen_helper_gvec_3 * const ftssel_fns[4] = {
NULL, gen_helper_sve_ftssel_h,
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
};
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
+ ftssel_fns[a->esz], a, 0)
/*
*** SVE Predicate Logical Operations Group
@@ -1785,7 +1824,8 @@ static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
/* Note pat == 31 is #all, to set all elements. */
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
+ do_predset, 0, FFR_PRED_NUM, 31, false)
/* Note pat == 32 is #unimp, to set no elements. */
TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
@@ -1799,11 +1839,13 @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a)
.rd = a->rd, .pg = a->pg, .s = a->s,
.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
};
+
+ s->is_nonstreaming = true;
return trans_AND_pppp(s, &alt_a);
}
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
void (*gen_fn)(TCGv_i32, TCGv_ptr,
@@ -2533,7 +2575,8 @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
static gen_helper_gvec_3 * const compact_fns[4] = {
NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
};
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
+ compact_fns[a->esz], a, 0)
/* Call the helper that computes the ARM LastActiveElement pseudocode
* function, scaled by the element size. This includes the not found
@@ -2858,6 +2901,8 @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, revh_fns[a->esz], a, 0)
TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
+
TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
gen_helper_sve_splice, a, a->esz)
@@ -3856,9 +3901,9 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
NULL, gen_helper_sve_ftmad_h,
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
};
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
- ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
- a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
+ ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
+ a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
/*
*** SVE Floating Point Accumulating Reduction Group
@@ -3881,6 +3926,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -3918,12 +3964,18 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
DO_FP3(FADD_zzz, fadd)
DO_FP3(FSUB_zzz, fsub)
DO_FP3(FMUL_zzz, fmul)
-DO_FP3(FTSMUL, ftsmul)
DO_FP3(FRECPS, recps)
DO_FP3(FRSQRTS, rsqrts)
#undef DO_FP3
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
+ NULL, gen_helper_gvec_ftsmul_h,
+ gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
+};
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
+ ftsmul_fns[a->esz], a, 0)
+
/*
*** SVE Floating Point Arithmetic - Predicated Group
*/
@@ -4256,7 +4308,8 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
* The load should begin at the address Rn + IMM.
*/
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
{
int len_align = QEMU_ALIGN_DOWN(len, 8);
int len_remain = len % 8;
@@ -4282,7 +4335,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
t0 = tcg_temp_new_i64();
for (i = 0; i < len_align; i += 8) {
tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_st_i64(t0, cpu_env, vofs + i);
+ tcg_gen_st_i64(t0, base, vofs + i);
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
}
tcg_temp_free_i64(t0);
@@ -4295,6 +4348,12 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
clean_addr = new_tmp_a64_local(s);
tcg_gen_mov_i64(clean_addr, t0);
+ if (base != cpu_env) {
+ TCGv_ptr b = tcg_temp_local_new_ptr();
+ tcg_gen_mov_ptr(b, base);
+ base = b;
+ }
+
gen_set_label(loop);
t0 = tcg_temp_new_i64();
@@ -4302,7 +4361,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
tp = tcg_temp_new_ptr();
- tcg_gen_add_ptr(tp, cpu_env, i);
+ tcg_gen_add_ptr(tp, base, i);
tcg_gen_addi_ptr(i, i, 8);
tcg_gen_st_i64(t0, tp, vofs);
tcg_temp_free_ptr(tp);
@@ -4310,6 +4369,11 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
tcg_temp_free_ptr(i);
+
+ if (base != cpu_env) {
+ tcg_temp_free_ptr(base);
+ assert(len_remain == 0);
+ }
}
/*
@@ -4338,13 +4402,14 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
default:
g_assert_not_reached();
}
- tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
+ tcg_gen_st_i64(t0, base, vofs + len_align);
tcg_temp_free_i64(t0);
}
}
/* Similarly for stores. */
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
{
int len_align = QEMU_ALIGN_DOWN(len, 8);
int len_remain = len % 8;
@@ -4370,7 +4435,7 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
t0 = tcg_temp_new_i64();
for (i = 0; i < len_align; i += 8) {
- tcg_gen_ld_i64(t0, cpu_env, vofs + i);
+ tcg_gen_ld_i64(t0, base, vofs + i);
tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
tcg_gen_addi_i64(clean_addr, clean_addr, 8);
}
@@ -4384,11 +4449,17 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
clean_addr = new_tmp_a64_local(s);
tcg_gen_mov_i64(clean_addr, t0);
+ if (base != cpu_env) {
+ TCGv_ptr b = tcg_temp_local_new_ptr();
+ tcg_gen_mov_ptr(b, base);
+ base = b;
+ }
+
gen_set_label(loop);
t0 = tcg_temp_new_i64();
tp = tcg_temp_new_ptr();
- tcg_gen_add_ptr(tp, cpu_env, i);
+ tcg_gen_add_ptr(tp, base, i);
tcg_gen_ld_i64(t0, tp, vofs);
tcg_gen_addi_ptr(i, i, 8);
tcg_temp_free_ptr(tp);
@@ -4399,12 +4470,17 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
tcg_temp_free_ptr(i);
+
+ if (base != cpu_env) {
+ tcg_temp_free_ptr(base);
+ assert(len_remain == 0);
+ }
}
/* Predicate register stores can be any multiple of 2. */
if (len_remain) {
t0 = tcg_temp_new_i64();
- tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
+ tcg_gen_ld_i64(t0, base, vofs + len_align);
switch (len_remain) {
case 2:
@@ -4436,7 +4512,7 @@ static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- do_ldr(s, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
}
return true;
}
@@ -4449,7 +4525,7 @@ static bool trans_LDR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- do_ldr(s, off, size, a->rn, a->imm * size);
+ gen_sve_ldr(s, cpu_env, off, size, a->rn, a->imm * size);
}
return true;
}
@@ -4462,7 +4538,7 @@ static bool trans_STR_zri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = vec_full_reg_size(s);
int off = vec_full_reg_offset(s, a->rd);
- do_str(s, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
}
return true;
}
@@ -4475,7 +4551,7 @@ static bool trans_STR_pri(DisasContext *s, arg_rri *a)
if (sve_access_check(s)) {
int size = pred_full_reg_size(s);
int off = pred_full_reg_offset(s, a->rd);
- do_str(s, off, size, a->rn, a->imm * size);
+ gen_sve_str(s, cpu_env, off, size, a->rn, a->imm * size);
}
return true;
}
@@ -4793,6 +4869,7 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (sve_access_check(s)) {
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@@ -4894,6 +4971,7 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (sve_access_check(s)) {
int vsz = vec_full_reg_size(s);
int elements = vsz >> dtype_esz[a->dtype];
@@ -5048,6 +5126,7 @@ static bool trans_LD1RO_zprr(DisasContext *s, arg_rprr_load *a)
if (a->rm == 31) {
return false;
}
+ s->is_nonstreaming = true;
if (sve_access_check(s)) {
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@@ -5062,6 +5141,7 @@ static bool trans_LD1RO_zpri(DisasContext *s, arg_rpri_load *a)
if (!dc_isar_feature(aa64_sve_f64mm, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (sve_access_check(s)) {
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
@@ -5657,6 +5737,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5688,6 +5769,7 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5722,6 +5804,7 @@ static bool trans_LDNT1_zprz(DisasContext *s, arg_LD1_zprz *a)
if (!dc_isar_feature(aa64_sve2, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5845,6 +5928,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5875,6 +5959,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5909,6 +5994,7 @@ static bool trans_STNT1_zprz(DisasContext *s, arg_ST1_zprz *a)
if (!dc_isar_feature(aa64_sve2, s)) {
return false;
}
+ s->is_nonstreaming = true;
if (!sve_access_check(s)) {
return true;
}
@@ -5953,6 +6039,17 @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
return true;
}
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
+{
+ if (!dc_isar_feature(aa64_sve, s)) {
+ return false;
+ }
+ /* Prefetch is a nop within QEMU. */
+ s->is_nonstreaming = true;
+ (void)sve_access_check(s);
+ return true;
+}
+
/*
* Move Prefix
*
@@ -6181,9 +6278,13 @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz *a, bool sel)
gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
NULL, gen_helper_sve2_pmull_d,
};
- if (a->esz == 0
- ? !dc_isar_feature(aa64_sve2_pmull128, s)
- : !dc_isar_feature(aa64_sve, s)) {
+
+ if (a->esz == 0) {
+ if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
+ return false;
+ }
+ s->is_nonstreaming = true;
+ } else if (!dc_isar_feature(aa64_sve, s)) {
return false;
}
return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
@@ -6371,22 +6472,22 @@ static gen_helper_gvec_3 * const bext_fns[4] = {
gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
};
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
- bext_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bext_fns[a->esz], a, 0)
static gen_helper_gvec_3 * const bdep_fns[4] = {
gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
};
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
- bdep_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bdep_fns[a->esz], a, 0)
static gen_helper_gvec_3 * const bgrp_fns[4] = {
gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
};
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
- bgrp_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+ bgrp_fns[a->esz], a, 0)
static gen_helper_gvec_3 * const cadd_fns[4] = {
gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
@@ -7094,21 +7195,21 @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
static gen_helper_gvec_flags_4 * const match_fns[4] = {
gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
};
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
};
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
static gen_helper_gvec_4 * const histcnt_fns[4] = {
NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
};
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
- histcnt_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
+ histcnt_fns[a->esz], a, 0)
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
- a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
+ a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
@@ -7120,10 +7221,12 @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
* SVE Integer Multiply-Add (unpredicated)
*/
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_s,
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz, gen_helper_fmmla_d,
- a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
+ gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
+ 0, FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
+ gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
+ 0, FPST_FPCR)
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
@@ -7220,20 +7323,21 @@ TRANS_FEAT(SQRDCMLAH_zzzz, aa64_sve2, gen_gvec_ool_zzzz,
TRANS_FEAT(USDOT_zzzz, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
- gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
+ gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
- gen_helper_crypto_aese, a, false)
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
- gen_helper_crypto_aese, a, true)
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_aese, a, false)
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_aese, a, true)
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
- gen_helper_crypto_sm4e, a, 0)
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
- gen_helper_crypto_sm4ekey, a, 0)
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_sm4e, a, 0)
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+ gen_helper_crypto_sm4ekey, a, 0)
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
+ gen_gvec_rax1, a)
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
@@ -7284,20 +7388,20 @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, false, true)
TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
- gen_helper_gvec_smmla_b, a, 0)
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
- gen_helper_gvec_usmmla_b, a, 0)
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
- gen_helper_gvec_ummla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_smmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_usmmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_ummla_b, a, 0)
TRANS_FEAT(BFDOT_zzzz, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
gen_helper_gvec_bfdot, a, 0)
TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
gen_helper_gvec_bfdot_idx, a)
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
- gen_helper_gvec_bfmmla, a, 0)
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_zzzz,
+ gen_helper_gvec_bfmmla, a, 0)
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
{
@@ -7317,3 +7421,162 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
+
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
+{
+ int vl = vec_full_reg_size(s);
+ int pl = pred_gvec_reg_size(s);
+ int elements = vl >> a->esz;
+ TCGv_i64 tmp, didx, dbit;
+ TCGv_ptr ptr;
+
+ if (!dc_isar_feature(aa64_sme, s)) {
+ return false;
+ }
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ tmp = tcg_temp_new_i64();
+ dbit = tcg_temp_new_i64();
+ didx = tcg_temp_new_i64();
+ ptr = tcg_temp_new_ptr();
+
+ /* Compute the predicate element. */
+ tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
+ if (is_power_of_2(elements)) {
+ tcg_gen_andi_i64(tmp, tmp, elements - 1);
+ } else {
+ tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
+ }
+
+ /* Extract the predicate byte and bit indices. */
+ tcg_gen_shli_i64(tmp, tmp, a->esz);
+ tcg_gen_andi_i64(dbit, tmp, 7);
+ tcg_gen_shri_i64(didx, tmp, 3);
+ if (HOST_BIG_ENDIAN) {
+ tcg_gen_xori_i64(didx, didx, 7);
+ }
+
+ /* Load the predicate word. */
+ tcg_gen_trunc_i64_ptr(ptr, didx);
+ tcg_gen_add_ptr(ptr, ptr, cpu_env);
+ tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
+
+ /* Extract the predicate bit and replicate to MO_64. */
+ tcg_gen_shr_i64(tmp, tmp, dbit);
+ tcg_gen_andi_i64(tmp, tmp, 1);
+ tcg_gen_neg_i64(tmp, tmp);
+
+ /* Apply to either copy the source, or write zeros. */
+ tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
+ pred_full_reg_offset(s, a->pn), tmp, pl, pl);
+
+ tcg_temp_free_i64(tmp);
+ tcg_temp_free_i64(dbit);
+ tcg_temp_free_i64(didx);
+ tcg_temp_free_ptr(ptr);
+ return true;
+}
+
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+ tcg_gen_smax_i32(d, a, n);
+ tcg_gen_smin_i32(d, d, m);
+}
+
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+ tcg_gen_smax_i64(d, a, n);
+ tcg_gen_smin_i64(d, d, m);
+}
+
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec a)
+{
+ tcg_gen_smax_vec(vece, d, a, n);
+ tcg_gen_smin_vec(vece, d, d, m);
+}
+
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop[] = {
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_sclamp_i32,
+ .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_sclamp_i64,
+ .fniv = gen_sclamp_vec,
+ .fno = gen_helper_gvec_sclamp_d,
+ .opt_opc = vecop,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
+}
+
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_sclamp, a)
+
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+ tcg_gen_umax_i32(d, a, n);
+ tcg_gen_umin_i32(d, d, m);
+}
+
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+ tcg_gen_umax_i64(d, a, n);
+ tcg_gen_umin_i64(d, d, m);
+}
+
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec a)
+{
+ tcg_gen_umax_vec(vece, d, a, n);
+ tcg_gen_umin_vec(vece, d, d, m);
+}
+
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode vecop[] = {
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_b,
+ .opt_opc = vecop,
+ .vece = MO_8 },
+ { .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_h,
+ .opt_opc = vecop,
+ .vece = MO_16 },
+ { .fni4 = gen_uclamp_i32,
+ .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_s,
+ .opt_opc = vecop,
+ .vece = MO_32 },
+ { .fni8 = gen_uclamp_i64,
+ .fniv = gen_uclamp_vec,
+ .fno = gen_helper_gvec_uclamp_d,
+ .opt_opc = vecop,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &ops[vece]);
+}
+
+TRANS_FEAT(UCLAMP, aa64_sme, gen_gvec_fn_arg_zzzz, gen_uclamp, a)