aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2025-07-04 08:20:46 -0600
committerPeter Maydell <peter.maydell@linaro.org>2025-07-04 15:52:22 +0100
commitab6bf3d93d3296b46c106c5867db09e9d3bd8880 (patch)
treef27550efa42dff809cbdf538078fe3f4ae4f1307
parent16fe3bb942e80ae0a2cd0690629bb73cc131092b (diff)
downloadqemu-ab6bf3d93d3296b46c106c5867db09e9d3bd8880.zip
qemu-ab6bf3d93d3296b46c106c5867db09e9d3bd8880.tar.gz
qemu-ab6bf3d93d3296b46c106c5867db09e9d3bd8880.tar.bz2
target/arm: Implement SME2 SEL
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20250704142112.1018902-84-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/tcg/helper-sme.h5
-rw-r--r--target/arm/tcg/sme.decode9
-rw-r--r--target/arm/tcg/sme_helper.c317
-rw-r--r--target/arm/tcg/translate-sme.c31
4 files changed, 362 insertions, 0 deletions
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
index 78ba3d1..467073e 100644
--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -313,3 +313,8 @@ DEF_HELPER_FLAGS_5(sme2_fclamp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i3
DEF_HELPER_FLAGS_5(sme2_fclamp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sme2_fclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(sme2_bfclamp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_5(sme2_sel_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
+DEF_HELPER_FLAGS_5(sme2_sel_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
+DEF_HELPER_FLAGS_5(sme2_sel_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
+DEF_HELPER_FLAGS_5(sme2_sel_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32, i32)
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index c4b85a3..0a2ceea 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -878,6 +878,15 @@ UCLAMP 11000001 esz:2 1 zm:5 110001 zn:5 .... 1 \
UCLAMP 11000001 esz:2 1 zm:5 110011 zn:5 ...0 1 \
&zzz_en zd=%zd_ax4 n=4
+### SME2 Multi-vector SVE Select
+
+%sel_pg 10:3 !function=plus_8
+
+SEL 11000001 esz:2 1 ....0 100 ... ....0 ....0 \
+ n=2 zd=%zd_ax2 zn=%zn_ax2 zm=%zm_ax2 pg=%sel_pg
+SEL 11000001 esz:2 1 ...01 100 ... ...00 ...00 \
+ n=4 zd=%zd_ax4 zn=%zn_ax4 zm=%zm_ax4 pg=%sel_pg
+
### SME Multiple Zero
&zero_za rv off ngrp nvec
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 8a1f9fb..c1166e4 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -2118,3 +2118,320 @@ FCLAMP(sme2_fclamp_d, float64, H8)
FCLAMP(sme2_bfclamp, bfloat16, H2)
#undef FCLAMP
+
+void HELPER(sme2_sel_b)(void *vd, void *vn, void *vm,
+ uint32_t png, uint32_t desc)
+{
+ int vl = simd_oprsz(desc);
+ int nreg = simd_data(desc);
+ int elements = vl / sizeof(uint8_t);
+ DecodeCounter p = decode_counter(png, vl, MO_8);
+
+ if (p.lg2_stride == 0) {
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint8_t *d = vd + r * sizeof(ARMVectorReg);
+ uint8_t *n = vn + r * sizeof(ARMVectorReg);
+ uint8_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, n, vl); /* all true */
+ } else if (elements <= split) {
+ memcpy(d, m, vl); /* all false */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H1(e)] = m[H1(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H1(e)] = n[H1(e)];
+ }
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint8_t *d = vd + r * sizeof(ARMVectorReg);
+ uint8_t *n = vn + r * sizeof(ARMVectorReg);
+ uint8_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, m, vl); /* all false */
+ } else if (elements <= split) {
+ memcpy(d, n, vl); /* all true */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H1(e)] = n[H1(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H1(e)] = m[H1(e)];
+ }
+ }
+ }
+ }
+ } else {
+ int estride = 1 << p.lg2_stride;
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint8_t *d = vd + r * sizeof(ARMVectorReg);
+ uint8_t *n = vn + r * sizeof(ARMVectorReg);
+ uint8_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e++) {
+ d[H1(e)] = m[H1(e)];
+ }
+ for (; e < elements; e += estride) {
+ d[H1(e)] = n[H1(e)];
+ for (int i = 1; i < estride; i++) {
+ d[H1(e + i)] = m[H1(e + i)];
+ }
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint8_t *d = vd + r * sizeof(ARMVectorReg);
+ uint8_t *n = vn + r * sizeof(ARMVectorReg);
+ uint8_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e += estride) {
+ d[H1(e)] = n[H1(e)];
+ for (int i = 1; i < estride; i++) {
+ d[H1(e + i)] = m[H1(e + i)];
+ }
+ }
+ for (; e < elements; e++) {
+ d[H1(e)] = m[H1(e)];
+ }
+ }
+ }
+ }
+}
+
+void HELPER(sme2_sel_h)(void *vd, void *vn, void *vm,
+ uint32_t png, uint32_t desc)
+{
+ int vl = simd_oprsz(desc);
+ int nreg = simd_data(desc);
+ int elements = vl / sizeof(uint16_t);
+ DecodeCounter p = decode_counter(png, vl, MO_16);
+
+ if (p.lg2_stride == 0) {
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint16_t *d = vd + r * sizeof(ARMVectorReg);
+ uint16_t *n = vn + r * sizeof(ARMVectorReg);
+ uint16_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, n, vl); /* all true */
+ } else if (elements <= split) {
+ memcpy(d, m, vl); /* all false */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H2(e)] = m[H2(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H2(e)] = n[H2(e)];
+ }
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint16_t *d = vd + r * sizeof(ARMVectorReg);
+ uint16_t *n = vn + r * sizeof(ARMVectorReg);
+ uint16_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, m, vl); /* all false */
+ } else if (elements <= split) {
+ memcpy(d, n, vl); /* all true */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H2(e)] = n[H2(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H2(e)] = m[H2(e)];
+ }
+ }
+ }
+ }
+ } else {
+ int estride = 1 << p.lg2_stride;
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint16_t *d = vd + r * sizeof(ARMVectorReg);
+ uint16_t *n = vn + r * sizeof(ARMVectorReg);
+ uint16_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e++) {
+ d[H2(e)] = m[H2(e)];
+ }
+ for (; e < elements; e += estride) {
+ d[H2(e)] = n[H2(e)];
+ for (int i = 1; i < estride; i++) {
+ d[H2(e + i)] = m[H2(e + i)];
+ }
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint16_t *d = vd + r * sizeof(ARMVectorReg);
+ uint16_t *n = vn + r * sizeof(ARMVectorReg);
+ uint16_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e += estride) {
+ d[H2(e)] = n[H2(e)];
+ for (int i = 1; i < estride; i++) {
+ d[H2(e + i)] = m[H2(e + i)];
+ }
+ }
+ for (; e < elements; e++) {
+ d[H2(e)] = m[H2(e)];
+ }
+ }
+ }
+ }
+}
+
+void HELPER(sme2_sel_s)(void *vd, void *vn, void *vm,
+ uint32_t png, uint32_t desc)
+{
+ int vl = simd_oprsz(desc);
+ int nreg = simd_data(desc);
+ int elements = vl / sizeof(uint32_t);
+ DecodeCounter p = decode_counter(png, vl, MO_32);
+
+ if (p.lg2_stride == 0) {
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint32_t *d = vd + r * sizeof(ARMVectorReg);
+ uint32_t *n = vn + r * sizeof(ARMVectorReg);
+ uint32_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, n, vl); /* all true */
+ } else if (elements <= split) {
+ memcpy(d, m, vl); /* all false */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H4(e)] = m[H4(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H4(e)] = n[H4(e)];
+ }
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint32_t *d = vd + r * sizeof(ARMVectorReg);
+ uint32_t *n = vn + r * sizeof(ARMVectorReg);
+ uint32_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, m, vl); /* all false */
+ } else if (elements <= split) {
+ memcpy(d, n, vl); /* all true */
+ } else {
+ for (int e = 0; e < split; e++) {
+ d[H4(e)] = n[H4(e)];
+ }
+ for (int e = split; e < elements; e++) {
+ d[H4(e)] = m[H4(e)];
+ }
+ }
+ }
+ }
+ } else {
+ /* p.esz must be MO_64, so stride must be 2. */
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint32_t *d = vd + r * sizeof(ARMVectorReg);
+ uint32_t *n = vn + r * sizeof(ARMVectorReg);
+ uint32_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e++) {
+ d[H4(e)] = m[H4(e)];
+ }
+ for (; e < elements; e += 2) {
+ d[H4(e)] = n[H4(e)];
+ d[H4(e + 1)] = m[H4(e + 1)];
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint32_t *d = vd + r * sizeof(ARMVectorReg);
+ uint32_t *n = vn + r * sizeof(ARMVectorReg);
+ uint32_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+ int e = 0;
+
+ for (; e < MIN(split, elements); e += 2) {
+ d[H4(e)] = n[H4(e)];
+ d[H4(e + 1)] = m[H4(e + 1)];
+ }
+ for (; e < elements; e++) {
+ d[H4(e)] = m[H4(e)];
+ }
+ }
+ }
+ }
+}
+
+void HELPER(sme2_sel_d)(void *vd, void *vn, void *vm,
+ uint32_t png, uint32_t desc)
+{
+ int vl = simd_oprsz(desc);
+ int nreg = simd_data(desc);
+ int elements = vl / sizeof(uint64_t);
+ DecodeCounter p = decode_counter(png, vl, MO_64);
+
+ if (p.invert) {
+ for (int r = 0; r < nreg; r++) {
+ uint64_t *d = vd + r * sizeof(ARMVectorReg);
+ uint64_t *n = vn + r * sizeof(ARMVectorReg);
+ uint64_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, n, vl); /* all true */
+ } else if (elements <= split) {
+ memcpy(d, m, vl); /* all false */
+ } else {
+ memcpy(d, m, split * sizeof(uint64_t));
+ memcpy(d + split, n + split,
+ (elements - split) * sizeof(uint64_t));
+ }
+ }
+ } else {
+ for (int r = 0; r < nreg; r++) {
+ uint64_t *d = vd + r * sizeof(ARMVectorReg);
+ uint64_t *n = vn + r * sizeof(ARMVectorReg);
+ uint64_t *m = vm + r * sizeof(ARMVectorReg);
+ int split = p.count - r * elements;
+
+ if (split <= 0) {
+ memcpy(d, m, vl); /* all false */
+ } else if (elements <= split) {
+ memcpy(d, n, vl); /* all true */
+ } else {
+ memcpy(d, n, split * sizeof(uint64_t));
+ memcpy(d + split, m + split,
+ (elements - split) * sizeof(uint64_t));
+ }
+ }
+ }
+}
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index b6316ac..7407597 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1642,3 +1642,34 @@ static gen_helper_gvec_3 * const uclamp_fns[] = {
gen_helper_sme2_uclamp_d,
};
TRANS(UCLAMP, do_clamp, a, uclamp_fns)
+
+static bool trans_SEL(DisasContext *s, arg_SEL *a)
+{
+ typedef void sme_sel_fn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32, TCGv_i32);
+ static sme_sel_fn * const fns[4] = {
+ gen_helper_sme2_sel_b, gen_helper_sme2_sel_h,
+ gen_helper_sme2_sel_s, gen_helper_sme2_sel_d
+ };
+
+ if (!dc_isar_feature(aa64_sme2, s)) {
+ return false;
+ }
+ if (sme_sm_enabled_check(s)) {
+ int svl = streaming_vec_reg_size(s);
+ uint32_t desc = simd_desc(svl, svl, a->n);
+ TCGv_ptr t_d = tcg_temp_new_ptr();
+ TCGv_ptr t_n = tcg_temp_new_ptr();
+ TCGv_ptr t_m = tcg_temp_new_ptr();
+ TCGv_i32 png = tcg_temp_new_i32();
+
+ tcg_gen_addi_ptr(t_d, tcg_env, vec_full_reg_offset(s, a->zd));
+ tcg_gen_addi_ptr(t_n, tcg_env, vec_full_reg_offset(s, a->zn));
+ tcg_gen_addi_ptr(t_m, tcg_env, vec_full_reg_offset(s, a->zm));
+
+ tcg_gen_ld16u_i32(png, tcg_env, pred_full_reg_offset(s, a->pg)
+ ^ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ fns[a->esz](t_d, t_n, t_m, png, tcg_constant_i32(desc));
+ }
+ return true;
+}