diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2022-05-04 08:07:02 -0700 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-05-04 08:07:02 -0700 |
commit | 1fba9dc71a170b3a05b9d3272dd8ecfe7f26e215 (patch) | |
tree | dff839976b32a0e492588934f2ce98597912a9b0 /target | |
parent | 9cf289af47bcfae5c75de37d8e5d6fd23705322c (diff) | |
parent | 0c5c4d5b3c1cb594e808dd4120cc56ee28fa31dd (diff) | |
download | qemu-1fba9dc71a170b3a05b9d3272dd8ecfe7f26e215.zip qemu-1fba9dc71a170b3a05b9d3272dd8ecfe7f26e215.tar.gz qemu-1fba9dc71a170b3a05b9d3272dd8ecfe7f26e215.tar.bz2 |
Merge tag 'pull-request-2022-05-04' of https://gitlab.com/thuth/qemu into staging
* Silence the warning about the msa5 feature when using the "max" CPU on s390x
* Implement the s390x Vector-Enhancements Facility 2
* Remove the old libopcode-based s390 disassembler
* Fix branch-relative-long test compilation with Clang
# -----BEGIN PGP SIGNATURE-----
#
# iQJFBAABCAAvFiEEJ7iIR+7gJQEY8+q5LtnXdP5wLbUFAmJyXKURHHRodXRoQHJl
# ZGhhdC5jb20ACgkQLtnXdP5wLbX1bg//bSZhEFekeak8nsM2piEwA/d3hEz5aTqN
# 9UW296E3MpE6cyfai+rQw1HzACA/sbOHLGBpOfo+dPkCq7JPhif62xOWd/6pfjvl
# d6+GRB7YusSnyePwQ7AJwWK7xOFi9LqYiqfM7wqUQf/TbetB4/ufssVc47LBsrqR
# 5OWJMRf0G/GItpCCy4IDp1oEJnKI9lGN+VG9hWJePeGYPLelmx0uHH02kgDCOb93
# atCOEeoDEsrVsbtwt9/NDw5H3DvgL2/bYGtVMkkXivysT3QhrxzoJMYRndK03CSx
# 2rWnmGGqorlzIJ8RdKvu27c9XfTtf8ssaidZMuCk4WD54H7Ln32L9EvRCpjtT8o2
# RHgxnkWSa2NWHhVrX9r0syRc7tFfFK3U7G5kYlZov+o1IyrgA7prwIjKzTk5ZIAl
# ZPmXWTUuewWSnGsJsRK9R8+UQ+nB6x8gxqK1s0dHf2rTgtIgWsx5s9WEdxGqeQ5h
# 5IvIBOML4aXnp2i0QGoGdq4zaDl1ac8AGpLd2jqc9svlHl44Q7NfY2MiWMVGCOP+
# O7DdO/tfmuJyPZS4QolGHghJFycC3Qr3Z42/dJrNK8bwaVGG/ysWkrutxcUzS3z9
# /xkkBWz8Vlktcy4Ft8lqkvofQGUYuJIfbU++EBu6yAp+mSzbO7elE8TZbgpGOVQv
# BFgwW3J4iqI=
# =7QOT
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 04 May 2022 03:59:49 AM PDT
# gpg: using RSA key 27B88847EEE0250118F3EAB92ED9D774FE702DB5
# gpg: issuer "thuth@redhat.com"
# gpg: Good signature from "Thomas Huth <th.huth@gmx.de>" [undefined]
# gpg: aka "Thomas Huth <thuth@redhat.com>" [undefined]
# gpg: aka "Thomas Huth <th.huth@posteo.de>" [unknown]
# gpg: aka "Thomas Huth <huth@tuxfamily.org>" [undefined]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 27B8 8847 EEE0 2501 18F3 EAB9 2ED9 D774 FE70 2DB5
* tag 'pull-request-2022-05-04' of https://gitlab.com/thuth/qemu:
tests/tcg/s390x: Use a different PCRel32 notation in branch-relative-long.c
disas: Remove old libopcode s390 disassembler
tests/tcg/s390x: Tests for Vector Enhancements Facility 2
target/s390x: add S390_FEAT_VECTOR_ENH2 to qemu CPU model
target/s390x: vxeh2: vector {load, store} byte reversed element
target/s390x: vxeh2: vector {load, store} byte reversed elements
target/s390x: vxeh2: vector {load, store} elements reversed
target/s390x: vxeh2: vector shift double by bit
target/s390x: vxeh2: Update for changes to vector shifts
target/s390x: vxeh2: vector string search
target/s390x: vxeh2: vector convert short/32b
tcg: Implement tcg_gen_{h,w}swap_{i32,i64}
s390x/cpu_models: make "max" match the unmodified "qemu" CPU model under TCG
s390x/cpu_models: drop "msa5" from the TCG "max" model
target/s390x: Fix writeback to v1 in helper_vstl
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/s390x/cpu.c | 1 | ||||
-rw-r--r-- | target/s390x/cpu_models.c | 26 | ||||
-rw-r--r-- | target/s390x/gen-features.c | 14 | ||||
-rw-r--r-- | target/s390x/helper.h | 13 | ||||
-rw-r--r-- | target/s390x/tcg/insn-data.def | 40 | ||||
-rw-r--r-- | target/s390x/tcg/translate.c | 3 | ||||
-rw-r--r-- | target/s390x/tcg/translate_vx.c.inc | 461 | ||||
-rw-r--r-- | target/s390x/tcg/vec_fpu_helper.c | 31 | ||||
-rw-r--r-- | target/s390x/tcg/vec_helper.c | 2 | ||||
-rw-r--r-- | target/s390x/tcg/vec_int_helper.c | 55 | ||||
-rw-r--r-- | target/s390x/tcg/vec_string_helper.c | 99 |
11 files changed, 665 insertions, 80 deletions
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index ccdbaf8..c31bb23 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -178,7 +178,6 @@ static void s390_cpu_reset(CPUState *s, cpu_reset_type type) static void s390_cpu_disas_set_info(CPUState *cpu, disassemble_info *info) { info->mach = bfd_mach_s390_64; - info->print_insn = print_insn_s390; info->cap_arch = CS_ARCH_SYSZ; info->cap_insn_unit = 2; info->cap_insn_split = 6; diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 6d71428..1a562d2 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -89,7 +89,6 @@ static S390CPUDef s390_cpu_defs[] = { #define QEMU_MAX_CPU_TYPE 0x8561 #define QEMU_MAX_CPU_GEN 15 #define QEMU_MAX_CPU_EC_GA 1 -static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX }; static S390FeatBitmap qemu_max_cpu_feat; /* features part of a base model but not relevant for finding a base model */ @@ -728,7 +727,6 @@ static void s390_cpu_model_initfn(Object *obj) } } -static S390CPUDef s390_qemu_cpu_def; static S390CPUModel s390_qemu_cpu_model; /* Set the qemu CPU model (on machine initialization). Must not be called @@ -742,17 +740,8 @@ void s390_set_qemu_cpu_model(uint16_t type, uint8_t gen, uint8_t ec_ga, g_assert(def); g_assert(QTAILQ_EMPTY_RCU(&cpus)); - /* TCG emulates some features that can usually not be enabled with - * the emulated machine generation. Make sure they can be enabled - * when using the QEMU model by adding them to full_feat. We have - * to copy the definition to do that. - */ - memcpy(&s390_qemu_cpu_def, def, sizeof(s390_qemu_cpu_def)); - bitmap_or(s390_qemu_cpu_def.full_feat, s390_qemu_cpu_def.full_feat, - qemu_max_cpu_feat, S390_FEAT_MAX); - /* build the CPU model */ - s390_qemu_cpu_model.def = &s390_qemu_cpu_def; + s390_qemu_cpu_model.def = def; bitmap_zero(s390_qemu_cpu_model.features, S390_FEAT_MAX); s390_init_feat_bitmap(feat_init, s390_qemu_cpu_model.features); } @@ -885,9 +874,8 @@ static void s390_max_cpu_model_class_init(ObjectClass *oc, void *data) /* * The "max" model is neither static nor migration safe. Under KVM - * it represents the "host" model. Under TCG it represents some kind of - * "qemu" CPU model without compat handling and maybe with some additional - * CPU features that are not yet unlocked in the "qemu" model. + * it represents the "host" model. Under TCG it represents the "qemu" CPU + * model of the latest QEMU machine. */ xcc->desc = "Enables all features supported by the accelerator in the current host"; @@ -966,13 +954,13 @@ static void init_ignored_base_feat(void) static void register_types(void) { - static const S390FeatInit qemu_latest_init = { S390_FEAT_LIST_QEMU_LATEST }; + static const S390FeatInit qemu_max_init = { S390_FEAT_LIST_QEMU_MAX }; int i; init_ignored_base_feat(); /* init all bitmaps from gnerated data initially */ - s390_init_feat_bitmap(qemu_max_cpu_feat_init, qemu_max_cpu_feat); + s390_init_feat_bitmap(qemu_max_init, qemu_max_cpu_feat); for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) { s390_init_feat_bitmap(s390_cpu_defs[i].base_init, s390_cpu_defs[i].base_feat); @@ -982,9 +970,9 @@ static void register_types(void) s390_cpu_defs[i].full_feat); } - /* initialize the qemu model with latest definition */ + /* initialize the qemu model with the maximum definition ("max" model) */ s390_set_qemu_cpu_model(QEMU_MAX_CPU_TYPE, QEMU_MAX_CPU_GEN, - QEMU_MAX_CPU_EC_GA, qemu_latest_init); + QEMU_MAX_CPU_EC_GA, qemu_max_init); for (i = 0; i < ARRAY_SIZE(s390_cpu_defs); i++) { char *base_name = s390_base_cpu_type_name(s390_cpu_defs[i].name); diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index 2284612..c03ec2c 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -738,13 +738,17 @@ static uint16_t qemu_V6_2[] = { S390_FEAT_VECTOR_ENH, }; -static uint16_t qemu_LATEST[] = { +static uint16_t qemu_V7_0[] = { S390_FEAT_MISC_INSTRUCTION_EXT3, }; -/* add all new definitions before this point */ + +/* + * Features for the "qemu" CPU model of the latest QEMU machine and the "max" + * CPU model under TCG. Don't include features that are not part of the full + * feature set of the current "max" CPU model generation. + */ static uint16_t qemu_MAX[] = { - /* generates a dependency warning, leave it out for now */ - S390_FEAT_MSA_EXT_5, + S390_FEAT_VECTOR_ENH2, }; /****** END FEATURE DEFS ******/ @@ -866,7 +870,7 @@ static FeatGroupDefSpec QemuFeatDef[] = { QEMU_FEAT_INITIALIZER(V4_1), QEMU_FEAT_INITIALIZER(V6_0), QEMU_FEAT_INITIALIZER(V6_2), - QEMU_FEAT_INITIALIZER(LATEST), + QEMU_FEAT_INITIALIZER(V7_0), QEMU_FEAT_INITIALIZER(MAX), }; diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 69f69cf..bf33d86 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -203,8 +203,11 @@ DEF_HELPER_FLAGS_3(gvec_vpopct16, TCG_CALL_NO_RWG, void, ptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_verim8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_verim16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vsl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_vsl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vsra, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_vsra_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vsrl, TCG_CALL_NO_RWG, void, ptr, cptr, i64, i32) +DEF_HELPER_FLAGS_4(gvec_vsrl_ve2, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32) @@ -246,6 +249,12 @@ DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrs_zs32, void, ptr, cptr, cptr, cptr, env, i32) /* === Vector Floating-Point Instructions */ DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) @@ -275,6 +284,10 @@ DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32 DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32) DEF_HELPER_FLAGS_5(gvec_vfche128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) DEF_HELPER_5(gvec_vfche128_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdlg32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vclgd32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def index 6c8a8b2..5e448bb 100644 --- a/target/s390x/tcg/insn-data.def +++ b/target/s390x/tcg/insn-data.def @@ -1027,6 +1027,16 @@ F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC) /* VECTOR LOAD AND REPLICATE */ F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC) +/* VECTOR LOAD BYTE REVERSED ELEMENT */ + E(0xe601, VLEBRH, VRX, VE2, la2, 0, 0, 0, vlebr, 0, ES_16, IF_VEC) + E(0xe603, VLEBRF, VRX, VE2, la2, 0, 0, 0, vlebr, 0, ES_32, IF_VEC) + E(0xe602, VLEBRG, VRX, VE2, la2, 0, 0, 0, vlebr, 0, ES_64, IF_VEC) +/* VECTOR LOAD BYTE REVERSED ELEMENT AND REPLICATE */ + F(0xe605, VLBRREP, VRX, VE2, la2, 0, 0, 0, vlbrrep, 0, IF_VEC) +/* VECTOR LOAD BYTE REVERSED ELEMENT AND ZERO */ + F(0xe604, VLLEBRZ, VRX, VE2, la2, 0, 0, 0, vllebrz, 0, IF_VEC) +/* VECTOR LOAD BYTE REVERSED ELEMENTS */ + F(0xe606, VLBR, VRX, VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC) /* VECTOR LOAD ELEMENT */ E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC) E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC) @@ -1037,6 +1047,8 @@ E(0xe741, VLEIH, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_16, IF_VEC) E(0xe743, VLEIF, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_32, IF_VEC) E(0xe742, VLEIG, VRI_a, V, 0, 0, 0, 0, vlei, 0, ES_64, IF_VEC) +/* VECTOR LOAD ELEMENTS REVERSED */ + F(0xe607, VLER, VRX, VE2, la2, 0, 0, 0, vler, 0, IF_VEC) /* VECTOR LOAD GR FROM VR ELEMENT */ F(0xe721, VLGV, VRS_c, V, la2, 0, r1, 0, vlgv, 0, IF_VEC) /* VECTOR LOAD LOGICAL ELEMENT AND ZERO */ @@ -1077,11 +1089,19 @@ F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC) /* VECTOR STORE */ F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC) +/* VECTOR STORE BYTE REVERSED ELEMENT */ + E(0xe609, VSTEBRH, VRX, VE2, la2, 0, 0, 0, vstebr, 0, ES_16, IF_VEC) + E(0xe60b, VSTEBRF, VRX, VE2, la2, 0, 0, 0, vstebr, 0, ES_32, IF_VEC) + E(0xe60a, VSTEBRG, VRX, VE2, la2, 0, 0, 0, vstebr, 0, ES_64, IF_VEC) +/* VECTOR STORE BYTE REVERSED ELEMENTS */ + F(0xe60e, VSTBR, VRX, VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC) /* VECTOR STORE ELEMENT */ E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC) E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC) E(0xe70b, VSTEF, VRX, V, la2, 0, 0, 0, vste, 0, ES_32, IF_VEC) E(0xe70a, VSTEG, VRX, V, la2, 0, 0, 0, vste, 0, ES_64, IF_VEC) +/* VECTOR STORE ELEMENTS REVERSED */ + F(0xe60f, VSTER, VRX, VE2, la2, 0, 0, 0, vster, 0, IF_VEC) /* VECTOR STORE MULTIPLE */ F(0xe73e, VSTM, VRS_a, V, la2, 0, 0, 0, vstm, 0, IF_VEC) /* VECTOR STORE WITH LENGTH */ @@ -1204,19 +1224,23 @@ F(0xe778, VESRLV, VRR_c, V, 0, 0, 0, 0, vesv, 0, IF_VEC) F(0xe738, VESRL, VRS_a, V, la2, 0, 0, 0, ves, 0, IF_VEC) /* VECTOR SHIFT LEFT */ - F(0xe774, VSL, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC) + E(0xe774, VSL, VRR_c, V, 0, 0, 0, 0, vsl, 0, 0, IF_VEC) /* VECTOR SHIFT LEFT BY BYTE */ - F(0xe775, VSLB, VRR_c, V, 0, 0, 0, 0, vsl, 0, IF_VEC) + E(0xe775, VSLB, VRR_c, V, 0, 0, 0, 0, vsl, 0, 1, IF_VEC) +/* VECTOR SHIFT LEFT DOUBLE BY BIT */ + E(0xe786, VSLD, VRI_d, VE2, 0, 0, 0, 0, vsld, 0, 0, IF_VEC) /* VECTOR SHIFT LEFT DOUBLE BY BYTE */ - F(0xe777, VSLDB, VRI_d, V, 0, 0, 0, 0, vsldb, 0, IF_VEC) + E(0xe777, VSLDB, VRI_d, V, 0, 0, 0, 0, vsld, 0, 1, IF_VEC) /* VECTOR SHIFT RIGHT ARITHMETIC */ - F(0xe77e, VSRA, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC) + E(0xe77e, VSRA, VRR_c, V, 0, 0, 0, 0, vsra, 0, 0, IF_VEC) /* VECTOR SHIFT RIGHT ARITHMETIC BY BYTE */ - F(0xe77f, VSRAB, VRR_c, V, 0, 0, 0, 0, vsra, 0, IF_VEC) + E(0xe77f, VSRAB, VRR_c, V, 0, 0, 0, 0, vsra, 0, 1, IF_VEC) +/* VECTOR SHIFT RIGHT DOUBLE BY BIT */ + F(0xe787, VSRD, VRI_d, VE2, 0, 0, 0, 0, vsrd, 0, IF_VEC) /* VECTOR SHIFT RIGHT LOGICAL */ - F(0xe77c, VSRL, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC) + E(0xe77c, VSRL, VRR_c, V, 0, 0, 0, 0, vsrl, 0, 0, IF_VEC) /* VECTOR SHIFT RIGHT LOGICAL BY BYTE */ - F(0xe77d, VSRLB, VRR_c, V, 0, 0, 0, 0, vsrl, 0, IF_VEC) + E(0xe77d, VSRLB, VRR_c, V, 0, 0, 0, 0, vsrl, 0, 1, IF_VEC) /* VECTOR SUBTRACT */ F(0xe7f7, VS, VRR_c, V, 0, 0, 0, 0, vs, 0, IF_VEC) /* VECTOR SUBTRACT COMPUTE BORROW INDICATION */ @@ -1246,6 +1270,8 @@ F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) /* VECTOR STRING RANGE COMPARE */ F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC) +/* VECTOR STRING SEARCH */ + F(0xe78b, VSTRS, VRR_d, VE2, 0, 0, 0, 0, vstrs, 0, IF_VEC) /* === Vector Floating-Point Instructions */ diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 8f092da..b40cb84 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -6222,7 +6222,8 @@ enum DisasInsnEnum { #define FAC_PCI S390_FEAT_ZPCI /* z/PCI facility */ #define FAC_AIS S390_FEAT_ADAPTER_INT_SUPPRESSION #define FAC_V S390_FEAT_VECTOR /* vector facility */ -#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ +#define FAC_VE S390_FEAT_VECTOR_ENH /* vector enhancements facility 1 */ +#define FAC_VE2 S390_FEAT_VECTOR_ENH2 /* vector enhancements facility 2 */ #define FAC_MIE2 S390_FEAT_MISC_INSTRUCTION_EXT2 /* miscellaneous-instruction-extensions facility 2 */ #define FAC_MIE3 S390_FEAT_MISC_INSTRUCTION_EXT3 /* miscellaneous-instruction-extensions facility 3 */ diff --git a/target/s390x/tcg/translate_vx.c.inc b/target/s390x/tcg/translate_vx.c.inc index b829ce0..3526ba3 100644 --- a/target/s390x/tcg/translate_vx.c.inc +++ b/target/s390x/tcg/translate_vx.c.inc @@ -457,6 +457,129 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vlebr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es); + write_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlbrrep(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 tmp; + + if (es < ES_16 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es); + gen_gvec_dup_i64(es, get_field(s, v1), tmp); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vllebrz(DisasContext *s, DisasOps *o) +{ + const uint8_t m3 = get_field(s, m3); + TCGv_i64 tmp; + int es, lshift; + + switch (m3) { + case ES_16: + case ES_32: + case ES_64: + es = m3; + lshift = 0; + break; + case 6: + es = ES_32; + lshift = 32; + break; + default: + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es); + tcg_gen_shli_i64(tmp, tmp, lshift); + + write_vec_element_i64(tmp, get_field(s, v1), 0, ES_64); + write_vec_element_i64(tcg_constant_i64(0), get_field(s, v1), 1, ES_64); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 t0, t1; + + if (es < ES_16 || es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + + if (es == ES_128) { + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); + goto write; + } + + /* Begin with byte reversed doublewords... */ + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); + + /* + * For 16 and 32-bit elements, the doubleword bswap also reversed + * the order of the elements. Perform a larger order swap to put + * them back into place. For the 128-bit "element", finish the + * bswap by swapping the doublewords. + */ + switch (es) { + case ES_16: + tcg_gen_hswap_i64(t0, t0); + tcg_gen_hswap_i64(t1, t1); + break; + case ES_32: + tcg_gen_wswap_i64(t0, t0); + tcg_gen_wswap_i64(t1, t1); + break; + case ES_64: + break; + default: + g_assert_not_reached(); + } + +write: + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vle(DisasContext *s, DisasOps *o) { const uint8_t es = s->insn->data; @@ -492,6 +615,46 @@ static DisasJumpType op_vlei(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vler(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + + if (es < ES_16 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + /* Begin with the two doublewords swapped... */ + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ); + + /* ... then swap smaller elements within the doublewords as required. */ + switch (es) { + case MO_16: + tcg_gen_hswap_i64(t1, t1); + tcg_gen_hswap_i64(t0, t0); + break; + case MO_32: + tcg_gen_wswap_i64(t1, t1); + tcg_gen_wswap_i64(t0, t0); + break; + case MO_64: + break; + default: + g_assert_not_reached(); + } + + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vlgv(DisasContext *s, DisasOps *o) { const uint8_t es = get_field(s, m4); @@ -958,6 +1121,81 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vstebr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = s->insn->data; + const uint8_t enr = get_field(s, m3); + TCGv_i64 tmp; + + if (!valid_vec_element(enr, es)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, get_field(s, v1), enr, es); + tcg_gen_qemu_st_i64(tmp, o->addr1, get_mem_index(s), MO_LE | es); + tcg_temp_free_i64(tmp); + return DISAS_NEXT; +} + +static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 t0, t1; + + if (es < ES_16 || es > ES_128) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* Probe write access before actually modifying memory */ + gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16)); + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + + + if (es == ES_128) { + read_vec_element_i64(t1, get_field(s, v1), 0, ES_64); + read_vec_element_i64(t0, get_field(s, v1), 1, ES_64); + goto write; + } + + read_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + read_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + /* + * For 16 and 32-bit elements, the doubleword bswap below will + * reverse the order of the elements. Perform a larger order + * swap to put them back into place. For the 128-bit "element", + * finish the bswap by swapping the doublewords. + */ + switch (es) { + case MO_16: + tcg_gen_hswap_i64(t0, t0); + tcg_gen_hswap_i64(t1, t1); + break; + case MO_32: + tcg_gen_wswap_i64(t0, t0); + tcg_gen_wswap_i64(t1, t1); + break; + case MO_64: + break; + default: + g_assert_not_reached(); + } + +write: + tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ); + + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vste(DisasContext *s, DisasOps *o) { const uint8_t es = s->insn->data; @@ -976,6 +1214,50 @@ static DisasJumpType op_vste(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vster(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s, m3); + TCGv_i64 t0, t1; + + if (es < ES_16 || es > ES_64) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + /* Probe write access before actually modifying memory */ + gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16)); + + /* Begin with the two doublewords swapped... */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + read_vec_element_i64(t1, get_field(s, v1), 0, ES_64); + read_vec_element_i64(t0, get_field(s, v1), 1, ES_64); + + /* ... then swap smaller elements within the doublewords as required. */ + switch (es) { + case MO_16: + tcg_gen_hswap_i64(t1, t1); + tcg_gen_hswap_i64(t0, t0); + break; + case MO_32: + tcg_gen_wswap_i64(t1, t1); + tcg_gen_wswap_i64(t0, t0); + break; + case MO_64: + break; + default: + g_assert_not_reached(); + } + + tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_TEUQ); + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8); + tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_TEUQ); + + tcg_temp_free(t0); + tcg_temp_free(t1); + return DISAS_NEXT; +} + static DisasJumpType op_vstm(DisasContext *s, DisasOps *o) { const uint8_t v3 = get_field(s, v3); @@ -2018,31 +2300,61 @@ static DisasJumpType op_ves(DisasContext *s, DisasOps *o) return DISAS_NEXT; } -static DisasJumpType op_vsl(DisasContext *s, DisasOps *o) +static DisasJumpType gen_vsh_by_byte(DisasContext *s, DisasOps *o, + gen_helper_gvec_2i *gen, + gen_helper_gvec_3 *gen_ve2) { - TCGv_i64 shift = tcg_temp_new_i64(); + bool byte = s->insn->data; - read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); - if (s->fields.op2 == 0x74) { - tcg_gen_andi_i64(shift, shift, 0x7); + if (!byte && s390_has_feat(S390_FEAT_VECTOR_ENH2)) { + gen_gvec_3_ool(get_field(s, v1), get_field(s, v2), + get_field(s, v3), 0, gen_ve2); } else { - tcg_gen_andi_i64(shift, shift, 0x78); - } + TCGv_i64 shift = tcg_temp_new_i64(); - gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), - shift, 0, gen_helper_gvec_vsl); - tcg_temp_free_i64(shift); + read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); + tcg_gen_andi_i64(shift, shift, byte ? 0x78 : 7); + gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), shift, 0, gen); + tcg_temp_free_i64(shift); + } return DISAS_NEXT; } -static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o) +static DisasJumpType op_vsl(DisasContext *s, DisasOps *o) { - const uint8_t i4 = get_field(s, i4) & 0xf; - const int left_shift = (i4 & 7) * 8; - const int right_shift = 64 - left_shift; - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 t2 = tcg_temp_new_i64(); + return gen_vsh_by_byte(s, o, gen_helper_gvec_vsl, + gen_helper_gvec_vsl_ve2); +} + +static DisasJumpType op_vsra(DisasContext *s, DisasOps *o) +{ + return gen_vsh_by_byte(s, o, gen_helper_gvec_vsra, + gen_helper_gvec_vsra_ve2); +} + +static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o) +{ + return gen_vsh_by_byte(s, o, gen_helper_gvec_vsrl, + gen_helper_gvec_vsrl_ve2); +} + +static DisasJumpType op_vsld(DisasContext *s, DisasOps *o) +{ + const bool byte = s->insn->data; + const uint8_t mask = byte ? 15 : 7; + const uint8_t mul = byte ? 8 : 1; + const uint8_t i4 = get_field(s, i4); + const int right_shift = 64 - (i4 & 7) * mul; + TCGv_i64 t0, t1, t2; + + if (i4 & ~mask) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); if ((i4 & 8) == 0) { read_vec_element_i64(t0, get_field(s, v2), 0, ES_64); @@ -2053,8 +2365,10 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o) read_vec_element_i64(t1, get_field(s, v3), 0, ES_64); read_vec_element_i64(t2, get_field(s, v3), 1, ES_64); } + tcg_gen_extract2_i64(t0, t1, t0, right_shift); tcg_gen_extract2_i64(t1, t2, t1, right_shift); + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); @@ -2064,37 +2378,33 @@ static DisasJumpType op_vsldb(DisasContext *s, DisasOps *o) return DISAS_NEXT; } -static DisasJumpType op_vsra(DisasContext *s, DisasOps *o) +static DisasJumpType op_vsrd(DisasContext *s, DisasOps *o) { - TCGv_i64 shift = tcg_temp_new_i64(); + const uint8_t i4 = get_field(s, i4); + TCGv_i64 t0, t1, t2; - read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); - if (s->fields.op2 == 0x7e) { - tcg_gen_andi_i64(shift, shift, 0x7); - } else { - tcg_gen_andi_i64(shift, shift, 0x78); + if (i4 & ~7) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; } - gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), - shift, 0, gen_helper_gvec_vsra); - tcg_temp_free_i64(shift); - return DISAS_NEXT; -} + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); -static DisasJumpType op_vsrl(DisasContext *s, DisasOps *o) -{ - TCGv_i64 shift = tcg_temp_new_i64(); + read_vec_element_i64(t0, get_field(s, v2), 1, ES_64); + read_vec_element_i64(t1, get_field(s, v3), 0, ES_64); + read_vec_element_i64(t2, get_field(s, v3), 1, ES_64); - read_vec_element_i64(shift, get_field(s, v3), 7, ES_8); - if (s->fields.op2 == 0x7c) { - tcg_gen_andi_i64(shift, shift, 0x7); - } else { - tcg_gen_andi_i64(shift, shift, 0x78); - } + tcg_gen_extract2_i64(t0, t1, t0, i4); + tcg_gen_extract2_i64(t1, t2, t1, i4); - gen_gvec_2i_ool(get_field(s, v1), get_field(s, v2), - shift, 0, gen_helper_gvec_vsrl); - tcg_temp_free_i64(shift); + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64); + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64); + + tcg_temp_free(t0); + tcg_temp_free(t1); + tcg_temp_free(t2); return DISAS_NEXT; } @@ -2497,6 +2807,31 @@ static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) return DISAS_NEXT; } +static DisasJumpType op_vstrs(DisasContext *s, DisasOps *o) +{ + typedef void (*helper_vstrs)(TCGv_ptr, TCGv_ptr, TCGv_ptr, + TCGv_ptr, TCGv_ptr, TCGv_i32); + static const helper_vstrs fns[3][2] = { + { gen_helper_gvec_vstrs_8, gen_helper_gvec_vstrs_zs8 }, + { gen_helper_gvec_vstrs_16, gen_helper_gvec_vstrs_zs16 }, + { gen_helper_gvec_vstrs_32, gen_helper_gvec_vstrs_zs32 }, + }; + const uint8_t es = get_field(s, m5); + const uint8_t m6 = get_field(s, m6); + const bool zs = extract32(m6, 1, 1); + + if (es > ES_32 || m6 & ~2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2), + get_field(s, v3), get_field(s, v4), + cpu_env, 0, fns[es][zs]); + set_cc_static(s); + return DISAS_NEXT; +} + static DisasJumpType op_vfa(DisasContext *s, DisasOps *o) { const uint8_t fpf = get_field(s, m4); @@ -2720,23 +3055,59 @@ static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o) switch (s->fields.op2) { case 0xc3: - if (fpf == FPF_LONG) { + switch (fpf) { + case FPF_LONG: fn = gen_helper_gvec_vcdg64; + break; + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) { + fn = gen_helper_gvec_vcdg32; + } + break; + default: + break; } break; case 0xc1: - if (fpf == FPF_LONG) { + switch (fpf) { + case FPF_LONG: fn = gen_helper_gvec_vcdlg64; + break; + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) { + fn = gen_helper_gvec_vcdlg32; + } + break; + default: + break; } break; case 0xc2: - if (fpf == FPF_LONG) { + switch (fpf) { + case FPF_LONG: fn = gen_helper_gvec_vcgd64; + break; + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) { + fn = gen_helper_gvec_vcgd32; + } + break; + default: + break; } break; case 0xc0: - if (fpf == FPF_LONG) { + switch (fpf) { + case FPF_LONG: fn = gen_helper_gvec_vclgd64; + break; + case FPF_SHORT: + if (s390_has_feat(S390_FEAT_VECTOR_ENH2)) { + fn = gen_helper_gvec_vclgd32; + } + break; + default: + break; } break; case 0xc7: diff --git a/target/s390x/tcg/vec_fpu_helper.c b/target/s390x/tcg/vec_fpu_helper.c index aa2cc8e..2a618a1 100644 --- a/target/s390x/tcg/vec_fpu_helper.c +++ b/target/s390x/tcg/vec_fpu_helper.c @@ -175,6 +175,30 @@ static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, *v1 = tmp; } +static float32 vcdg32(float32 a, float_status *s) +{ + return int32_to_float32(a, s); +} + +static float32 vcdlg32(float32 a, float_status *s) +{ + return uint32_to_float32(a, s); +} + +static float32 vcgd32(float32 a, float_status *s) +{ + const float32 tmp = float32_to_int32(a, s); + + return float32_is_any_nan(a) ? INT32_MIN : tmp; +} + +static float32 vclgd32(float32 a, float_status *s) +{ + const float32 tmp = float32_to_uint32(a, s); + + return float32_is_any_nan(a) ? 0 : tmp; +} + static float64 vcdg64(float64 a, float_status *s) { return int64_to_float64(a, s); @@ -210,6 +234,9 @@ void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env, \ vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC()); \ } +#define DEF_GVEC_VOP2_32(NAME) \ +DEF_GVEC_VOP2_FN(NAME, NAME##32, 32) + #define DEF_GVEC_VOP2_64(NAME) \ DEF_GVEC_VOP2_FN(NAME, NAME##64, 64) @@ -218,6 +245,10 @@ DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32) \ DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64) \ DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128) +DEF_GVEC_VOP2_32(vcdg) +DEF_GVEC_VOP2_32(vcdlg) +DEF_GVEC_VOP2_32(vcgd) +DEF_GVEC_VOP2_32(vclgd) DEF_GVEC_VOP2_64(vcdg) DEF_GVEC_VOP2_64(vcdlg) DEF_GVEC_VOP2_64(vcgd) diff --git a/target/s390x/tcg/vec_helper.c b/target/s390x/tcg/vec_helper.c index ededf13..48d8672 100644 --- a/target/s390x/tcg/vec_helper.c +++ b/target/s390x/tcg/vec_helper.c @@ -200,7 +200,6 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr, addr = wrap_address(env, addr + 8); cpu_stq_data_ra(env, addr, s390_vec_read_element64(v1, 1), GETPC()); } else { - S390Vector tmp = {}; int i; for (i = 0; i < bytes; i++) { @@ -209,6 +208,5 @@ void HELPER(vstl)(CPUS390XState *env, const void *v1, uint64_t addr, cpu_stb_data_ra(env, addr, byte, GETPC()); addr = wrap_address(env, addr + 1); } - *(S390Vector *)v1 = tmp; } } diff --git a/target/s390x/tcg/vec_int_helper.c b/target/s390x/tcg/vec_int_helper.c index b44859e..53ab5c5 100644 --- a/target/s390x/tcg/vec_int_helper.c +++ b/target/s390x/tcg/vec_int_helper.c @@ -539,18 +539,73 @@ void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count, s390_vec_shl(v1, v2, count); } +void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3, + uint32_t desc) +{ + S390Vector tmp; + uint32_t sh, e0, e1 = 0; + int i; + + for (i = 15; i >= 0; --i, e1 = e0) { + e0 = s390_vec_read_element8(v2, i); + sh = s390_vec_read_element8(v3, i) & 7; + + s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh)); + } + + *(S390Vector *)v1 = tmp; +} + void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count, uint32_t desc) { s390_vec_sar(v1, v2, count); } +void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3, + uint32_t desc) +{ + S390Vector tmp; + uint32_t sh, e0, e1 = 0; + int i = 0; + + /* Byte 0 is special only. */ + e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i); + sh = s390_vec_read_element8(v3, i) & 7; + s390_vec_write_element8(&tmp, i, e0 >> sh); + + e1 = e0; + for (i = 1; i < 16; ++i, e1 = e0) { + e0 = s390_vec_read_element8(v2, i); + sh = s390_vec_read_element8(v3, i) & 7; + s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh); + } + + *(S390Vector *)v1 = tmp; +} + void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count, uint32_t desc) { s390_vec_shr(v1, v2, count); } +void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3, + uint32_t desc) +{ + S390Vector tmp; + uint32_t sh, e0, e1 = 0; + + for (int i = 0; i < 16; ++i, e1 = e0) { + e0 = s390_vec_read_element8(v2, i); + sh = s390_vec_read_element8(v3, i) & 7; + + s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh); + } + + *(S390Vector *)v1 = tmp; +} + #define DEF_VSCBI(BITS) \ void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3, \ uint32_t desc) \ diff --git a/target/s390x/tcg/vec_string_helper.c b/target/s390x/tcg/vec_string_helper.c index f8b54bb..9b85bec 100644 --- a/target/s390x/tcg/vec_string_helper.c +++ b/target/s390x/tcg/vec_string_helper.c @@ -470,3 +470,102 @@ void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ DEF_VSTRC_CC_RT_HELPER(8) DEF_VSTRC_CC_RT_HELPER(16) DEF_VSTRC_CC_RT_HELPER(32) + +static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, uint8_t es, bool zs) +{ + int substr_elen, substr_0, str_elen, i, j, k, cc; + int nelem = 16 >> es; + bool eos = false; + + substr_elen = s390_vec_read_element8(v4, 7) >> es; + + /* If ZS, bound substr length by min(nelem, strlen(v3)). */ + if (zs) { + substr_elen = MIN(substr_elen, nelem); + for (i = 0; i < substr_elen; i++) { + if (s390_vec_read_element(v3, i, es) == 0) { + substr_elen = i; + break; + } + } + } + + if (substr_elen == 0) { + cc = 2; /* full match for degenerate case of empty substr */ + k = 0; + goto done; + } + + /* If ZS, look for eos in the searched string. */ + if (zs) { + for (k = 0; k < nelem; k++) { + if (s390_vec_read_element(v2, k, es) == 0) { + eos = true; + break; + } + } + str_elen = k; + } else { + str_elen = nelem; + } + + substr_0 = s390_vec_read_element(v3, 0, es); + + for (k = 0; ; k++) { + for (; k < str_elen; k++) { + if (s390_vec_read_element(v2, k, es) == substr_0) { + break; + } + } + + /* If we reached the end of the string, no match. */ + if (k == str_elen) { + cc = eos; /* no match (with or without zero char) */ + goto done; + } + + /* If the substring is only one char, match. */ + if (substr_elen == 1) { + cc = 2; /* full match */ + goto done; + } + + /* If the match begins at the last char, we have a partial match. */ + if (k == str_elen - 1) { + cc = 3; /* partial match */ + goto done; + } + + i = MIN(nelem, k + substr_elen); + for (j = k + 1; j < i; j++) { + uint32_t e2 = s390_vec_read_element(v2, j, es); + uint32_t e3 = s390_vec_read_element(v3, j - k, es); + if (e2 != e3) { + break; + } + } + if (j == i) { + /* Matched up until "end". */ + cc = i - k == substr_elen ? 2 : 3; /* full or partial match */ + goto done; + } + } + + done: + s390_vec_write_element64(v1, 0, k << es); + s390_vec_write_element64(v1, 1, 0); + return cc; +} + +#define DEF_VSTRS_HELPER(BITS) \ +void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2, \ + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); } \ +void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2, \ + const void *v3, const void *v4, CPUS390XState *env, uint32_t desc) \ + { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); } + +DEF_VSTRS_HELPER(8) +DEF_VSTRS_HELPER(16) +DEF_VSTRS_HELPER(32) |