aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2025-07-04 08:20:54 -0600
committerPeter Maydell <peter.maydell@linaro.org>2025-07-04 15:53:23 +0100
commit945a379438736571d9bb6086f1e264452c5427f1 (patch)
tree14dff478d69a898a81162132e152b3243436a52a
parente421e4e9727c2b835a09b9d43777f3a56b252899 (diff)
downloadqemu-945a379438736571d9bb6086f1e264452c5427f1.zip
qemu-945a379438736571d9bb6086f1e264452c5427f1.tar.gz
qemu-945a379438736571d9bb6086f1e264452c5427f1.tar.bz2
target/arm: Implement ZIPQ, UZPQ for SME2p1/SVE2p1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20250704142112.1018902-92-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/tcg/helper-sve.h10
-rw-r--r--target/arm/tcg/sve.decode6
-rw-r--r--target/arm/tcg/sve_helper.c29
-rw-r--r--target/arm/tcg/translate-sve.c19
4 files changed, 63 insertions, 1 deletions
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index 04b9545..0f510ea 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -701,12 +701,22 @@ DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index 3271c9c..e98275e 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -657,6 +657,12 @@ UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0
TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0
TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0
+# SVE2.1 permute vector elements (quadwords)
+ZIPQ1 01000100 .. 0 ..... 111 000 ..... ..... @rd_rn_rm
+ZIPQ2 01000100 .. 0 ..... 111 001 ..... ..... @rd_rn_rm
+UZPQ1 01000100 .. 0 ..... 111 010 ..... ..... @rd_rn_rm
+UZPQ2 01000100 .. 0 ..... 111 011 ..... ..... @rd_rn_rm
+
### SVE Permute - Predicated Group
# SVE compress active elements
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index a9dc3c4..dbbfbc1 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -3550,6 +3550,35 @@ DO_UZP(sve_uzp_s, uint32_t, H1_4)
DO_UZP(sve_uzp_d, uint64_t, H1_8)
DO_UZP(sve2_uzp_q, Int128, )
+typedef void perseg_zzz_fn(void *vd, void *vn, void *vm, uint32_t desc);
+
+static void do_perseg_zzz(void *vd, void *vn, void *vm,
+ uint32_t desc, perseg_zzz_fn *fn)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+
+ desc = simd_desc(16, 16, simd_data(desc));
+ for (intptr_t i = 0; i < oprsz; i += 16) {
+ fn(vd + i, vn + i, vm + i, desc);
+ }
+}
+
+#define DO_PERSEG_ZZZ(NAME, FUNC) \
+ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+ { do_perseg_zzz(vd, vn, vm, desc, FUNC); }
+
+DO_PERSEG_ZZZ(sve2p1_uzpq_b, helper_sve_uzp_b)
+DO_PERSEG_ZZZ(sve2p1_uzpq_h, helper_sve_uzp_h)
+DO_PERSEG_ZZZ(sve2p1_uzpq_s, helper_sve_uzp_s)
+DO_PERSEG_ZZZ(sve2p1_uzpq_d, helper_sve_uzp_d)
+
+DO_PERSEG_ZZZ(sve2p1_zipq_b, helper_sve_zip_b)
+DO_PERSEG_ZZZ(sve2p1_zipq_h, helper_sve_zip_h)
+DO_PERSEG_ZZZ(sve2p1_zipq_s, helper_sve_zip_s)
+DO_PERSEG_ZZZ(sve2p1_zipq_d, helper_sve_zip_d)
+
+#undef DO_PERSEG_ZZZ
+
#define DO_TRN(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 01c4eaa..28cfcb9 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -2606,11 +2606,19 @@ TRANS_FEAT_NONSTREAMING(ZIP2_q, aa64_sve_f64mm, do_interleave_q,
gen_helper_sve2_zip_q, a,
QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
+static gen_helper_gvec_3 * const zipq_fns[4] = {
+ gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h,
+ gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d,
+};
+TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 0)
+TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 16 / 2)
+
static gen_helper_gvec_3 * const uzp_fns[4] = {
gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
};
-
TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
uzp_fns[a->esz], a, 0)
TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
@@ -2621,6 +2629,15 @@ TRANS_FEAT_NONSTREAMING(UZP1_q, aa64_sve_f64mm, do_interleave_q,
TRANS_FEAT_NONSTREAMING(UZP2_q, aa64_sve_f64mm, do_interleave_q,
gen_helper_sve2_uzp_q, a, 16)
+static gen_helper_gvec_3 * const uzpq_fns[4] = {
+ gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h,
+ gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d,
+};
+TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 0)
+TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 1 << a->esz)
+
static gen_helper_gvec_3 * const trn_fns[4] = {
gen_helper_sve_trn_b, gen_helper_sve_trn_h,
gen_helper_sve_trn_s, gen_helper_sve_trn_d,