diff options
author | Cornelia Huck <cohuck@redhat.com> | 2019-06-07 16:06:09 +0200 |
---|---|---|
committer | Cornelia Huck <cohuck@redhat.com> | 2019-06-07 16:06:27 +0200 |
commit | c984c4e8e31e57c44b3674d9dd8ed6aa5e68306e (patch) | |
tree | 4247f68a9a12cc89e9c2c7a39e99f271a379cab7 | |
parent | a1c993c3136fd72492d5d33c1087ec32e147c0d4 (diff) | |
parent | 6d88baf18653ff8826db3dd840a6b372d3477280 (diff) | |
download | qemu-c984c4e8e31e57c44b3674d9dd8ed6aa5e68306e.zip qemu-c984c4e8e31e57c44b3674d9dd8ed6aa5e68306e.tar.gz qemu-c984c4e8e31e57c44b3674d9dd8ed6aa5e68306e.tar.bz2 |
Merge tag 's390x-tcg-2019-06-07' into s390-next-staging
Finalize implementation of the "Vector Facility" for s390x TCG. Add it
to the QEMU CPU model, so it is enabled as default.
Also:
- One fix (and one workaround) for the STFLE instruction
- Fix the alignment of vector registers (and change the data type)
- Properly generate ELF_HWCAP for s390x for linux-user
- Use a gvec helper for VECTOR SELECT
# gpg: Signature made Fri 07 Jun 2019 02:58:01 PM CEST
# gpg: using RSA key 1BD9CAAD735C4C3A460DFCCA4DDE10F700FF835A
# gpg: issuer "david@redhat.com"
# gpg: Good signature from "David Hildenbrand <david@redhat.com>" [full]
# gpg: aka "David Hildenbrand <davidhildenbrand@gmail.com>" [full]
* tag 's390x-tcg-2019-06-07': (33 commits)
linux-user: elf: ELF_HWCAP for s390x
s390x/tcg: Use tcg_gen_gvec_bitsel for VECTOR SELECT
s390x: Bump the "qemu" CPU model up to a stripped-down z13
s390x/tcg: We support the Vector Facility
s390x/tcg: Allow linux-user to use vector instructions
s390x/tcg: Implement VECTOR FP TEST DATA CLASS IMMEDIATE
s390x/tcg: Implement VECTOR FP SUBTRACT
s390x/tcg: Implement VECTOR FP SQUARE ROOT
s390x/tcg: Implement VECTOR FP PERFORM SIGN OPERATION
s390x/tcg: Implement VECTOR FP MULTIPLY AND (ADD|SUBTRACT)
s390x/tcg: Implement VECTOR FP MULTIPLY
s390x/tcg: Implement VECTOR LOAD ROUNDED
s390x/tcg: Implement VECTOR LOAD LENGTHENED
s390x/tcg: Implement VECTOR LOAD FP INTEGER
s390x/tcg: Implement VECTOR FP DIVIDE
s390x/tcg: Implement VECTOR FP CONVERT TO LOGICAL 64-BIT
s390x/tcg: Implement VECTOR FP CONVERT TO FIXED 64-BIT
s390x/tcg: Implement VECTOR FP CONVERT FROM LOGICAL 64-BIT
s390x/tcg: Implement VECTOR FP CONVERT FROM FIXED 64-BIT
s390x/tcg: Implement VECTOR FP COMPARE (EQUAL|HIGH|HIGH OR EQUAL)
...
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
-rw-r--r-- | hw/s390x/s390-virtio-ccw.c | 2 | ||||
-rw-r--r-- | include/elf.h | 1 | ||||
-rw-r--r-- | linux-user/elfload.c | 28 | ||||
-rw-r--r-- | linux-user/s390x/signal.c | 4 | ||||
-rw-r--r-- | target/s390x/Makefile.objs | 3 | ||||
-rw-r--r-- | target/s390x/arch_dump.c | 8 | ||||
-rw-r--r-- | target/s390x/cpu.c | 3 | ||||
-rw-r--r-- | target/s390x/cpu.h | 5 | ||||
-rw-r--r-- | target/s390x/cpu_models.c | 4 | ||||
-rw-r--r-- | target/s390x/excp_helper.c | 21 | ||||
-rw-r--r-- | target/s390x/fpu_helper.c | 4 | ||||
-rw-r--r-- | target/s390x/gdbstub.c | 16 | ||||
-rw-r--r-- | target/s390x/gen-features.c | 10 | ||||
-rw-r--r-- | target/s390x/helper.c | 10 | ||||
-rw-r--r-- | target/s390x/helper.h | 84 | ||||
-rw-r--r-- | target/s390x/insn-data.def | 58 | ||||
-rw-r--r-- | target/s390x/internal.h | 4 | ||||
-rw-r--r-- | target/s390x/kvm.c | 16 | ||||
-rw-r--r-- | target/s390x/machine.c | 128 | ||||
-rw-r--r-- | target/s390x/misc_helper.c | 11 | ||||
-rw-r--r-- | target/s390x/tcg_s390x.h | 2 | ||||
-rw-r--r-- | target/s390x/translate.c | 2 | ||||
-rw-r--r-- | target/s390x/translate_vx.inc.c | 507 | ||||
-rw-r--r-- | target/s390x/vec.h | 40 | ||||
-rw-r--r-- | target/s390x/vec_fpu_helper.c | 625 | ||||
-rw-r--r-- | target/s390x/vec_string_helper.c | 473 |
26 files changed, 1930 insertions, 139 deletions
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index bbc6e8f..4d64368 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -669,7 +669,9 @@ DEFINE_CCW_MACHINE(4_1, "4.1", true); static void ccw_machine_4_0_instance_options(MachineState *machine) { + static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_0 }; ccw_machine_4_1_instance_options(machine); + s390_set_qemu_cpu_model(0x2827, 12, 2, qemu_cpu_feat); } static void ccw_machine_4_0_class_options(MachineClass *mc) diff --git a/include/elf.h b/include/elf.h index ea7708a..3501e0c 100644 --- a/include/elf.h +++ b/include/elf.h @@ -598,6 +598,7 @@ typedef struct { #define HWCAP_S390_ETF3EH 256 #define HWCAP_S390_HIGH_GPRS 512 #define HWCAP_S390_TE 1024 +#define HWCAP_S390_VXRS 2048 /* M68K specific definitions. */ /* We use the top 24 bits to encode information about the diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a57b704..5451d26 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1308,6 +1308,34 @@ static inline void init_thread(struct target_pt_regs *regs, #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 +#include "elf.h" + +#define ELF_HWCAP get_elf_hwcap() + +#define GET_FEATURE(_feat, _hwcap) \ + do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0) + +static uint32_t get_elf_hwcap(void) +{ + /* + * Let's assume we always have esan3 and zarch. + * 31-bit processes can use 64-bit registers (high gprs). + */ + uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS; + + GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE); + GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA); + GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP); + GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM); + if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) && + s390_has_feat(S390_FEAT_ETF3_ENH)) { + hwcap |= HWCAP_S390_ETF3EH; + } + GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS); + + return hwcap; +} + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { regs->psw.addr = infop->entry; diff --git a/linux-user/s390x/signal.c b/linux-user/s390x/signal.c index 3d3cb67..ecfa2a1 100644 --- a/linux-user/s390x/signal.c +++ b/linux-user/s390x/signal.c @@ -123,7 +123,7 @@ static void save_sigregs(CPUS390XState *env, target_sigregs *sregs) */ //save_fp_regs(¤t->thread.fp_regs); FIXME for (i = 0; i < 16; i++) { - __put_user(get_freg(env, i)->ll, &sregs->fpregs.fprs[i]); + __put_user(*get_freg(env, i), &sregs->fpregs.fprs[i]); } } @@ -254,7 +254,7 @@ restore_sigregs(CPUS390XState *env, target_sigregs *sc) __get_user(env->aregs[i], &sc->regs.acrs[i]); } for (i = 0; i < 16; i++) { - __get_user(get_freg(env, i)->ll, &sc->fpregs.fprs[i]); + __get_user(*get_freg(env, i), &sc->fpregs.fprs[i]); } return err; diff --git a/target/s390x/Makefile.objs b/target/s390x/Makefile.objs index 0316457..3e27455 100644 --- a/target/s390x/Makefile.objs +++ b/target/s390x/Makefile.objs @@ -1,7 +1,8 @@ obj-y += cpu.o cpu_models.o cpu_features.o gdbstub.o interrupt.o helper.o obj-$(CONFIG_TCG) += translate.o cc_helper.o excp_helper.o fpu_helper.o obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o crypto_helper.o -obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o +obj-$(CONFIG_TCG) += vec_helper.o vec_int_helper.o vec_string_helper.o +obj-$(CONFIG_TCG) += vec_fpu_helper.o obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o arch_dump.o mmu_helper.o diag.o obj-$(CONFIG_SOFTMMU) += sigp.o obj-$(CONFIG_KVM) += kvm.o diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c index c9ef0a6..50fa0ae 100644 --- a/target/s390x/arch_dump.c +++ b/target/s390x/arch_dump.c @@ -104,7 +104,7 @@ static void s390x_write_elf64_fpregset(Note *note, S390CPU *cpu, int id) note->hdr.n_type = cpu_to_be32(NT_FPREGSET); note->contents.fpregset.fpc = cpu_to_be32(cpu->env.fpc); for (i = 0; i <= 15; i++) { - note->contents.fpregset.fprs[i] = cpu_to_be64(get_freg(cs, i)->ll); + note->contents.fpregset.fprs[i] = cpu_to_be64(*get_freg(cs, i)); } } @@ -114,7 +114,7 @@ static void s390x_write_elf64_vregslo(Note *note, S390CPU *cpu, int id) note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_LOW); for (i = 0; i <= 15; i++) { - note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1].ll); + note->contents.vregslo.vregs[i] = cpu_to_be64(cpu->env.vregs[i][1]); } } @@ -127,8 +127,8 @@ static void s390x_write_elf64_vregshi(Note *note, S390CPU *cpu, int id) note->hdr.n_type = cpu_to_be32(NT_S390_VXRS_HIGH); for (i = 0; i <= 15; i++) { - temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0].ll); - temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1].ll); + temp_vregshi->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i + 16][0]); + temp_vregshi->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i + 16][1]); } } diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index b1df63d..6af1a15 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -145,6 +145,9 @@ static void s390_cpu_full_reset(CPUState *s) #if defined(CONFIG_USER_ONLY) /* user mode should always be allowed to use the full FPU */ env->cregs[0] |= CR0_AFP; + if (s390_has_feat(S390_FEAT_VECTOR)) { + env->cregs[0] |= CR0_VECTOR; + } #endif /* architectured initial value for Breaking-Event-Address register */ diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 7305cac..4fc08a2 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -66,7 +66,7 @@ struct CPUS390XState { * The floating point registers are part of the vector registers. * vregs[0][0] -> vregs[15][0] are 16 floating point registers */ - CPU_DoubleU vregs[32][2]; /* vector registers */ + uint64_t vregs[32][2] QEMU_ALIGNED(16); /* vector registers */ uint32_t aregs[16]; /* access registers */ uint8_t riccb[64]; /* runtime instrumentation control */ uint64_t gscb[4]; /* guarded storage control */ @@ -153,7 +153,7 @@ struct CPUS390XState { }; -static inline CPU_DoubleU *get_freg(CPUS390XState *cs, int nr) +static inline uint64_t *get_freg(CPUS390XState *cs, int nr) { return &cs->vregs[nr][0]; } @@ -215,6 +215,7 @@ extern const struct VMStateDescription vmstate_s390_cpu; #define PGM_SPECIAL_OP 0x0013 #define PGM_OPERAND 0x0015 #define PGM_TRACE_TABLE 0x0016 +#define PGM_VECTOR_PROCESSING 0x001b #define PGM_SPACE_SWITCH 0x001c #define PGM_HFP_SQRT 0x001d #define PGM_PC_TRANS_SPEC 0x001f diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 21ea819..b5d16e4 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -86,8 +86,8 @@ static S390CPUDef s390_cpu_defs[] = { CPUDEF_INIT(0x8562, 15, 1, 47, 0x08000000U, "gen15b", "IBM 8562 GA1"), }; -#define QEMU_MAX_CPU_TYPE 0x2827 -#define QEMU_MAX_CPU_GEN 12 +#define QEMU_MAX_CPU_TYPE 0x2964 +#define QEMU_MAX_CPU_GEN 13 #define QEMU_MAX_CPU_EC_GA 2 static const S390FeatInit qemu_max_cpu_feat_init = { S390_FEAT_LIST_QEMU_MAX }; static S390FeatBitmap qemu_max_cpu_feat; diff --git a/target/s390x/excp_helper.c b/target/s390x/excp_helper.c index 3a467b7..f21bcf7 100644 --- a/target/s390x/excp_helper.c +++ b/target/s390x/excp_helper.c @@ -62,6 +62,21 @@ void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, tcg_s390_program_interrupt(env, PGM_DATA, ILEN_AUTO, ra); } +void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc, + uintptr_t ra) +{ + g_assert(vxc <= 0xff); +#if !defined(CONFIG_USER_ONLY) + /* Always store the VXC into the lowcore, without AFP it is undefined */ + stl_phys(CPU(s390_env_get_cpu(env))->as, + env->psa + offsetof(LowCore, data_exc_code), vxc); +#endif + + /* Always store the VXC into the FPC, without AFP it is undefined */ + env->fpc = deposit32(env->fpc, 8, 8, vxc); + tcg_s390_program_interrupt(env, PGM_VECTOR_PROCESSING, ILEN_AUTO, ra); +} + void HELPER(data_exception)(CPUS390XState *env, uint32_t dxc) { tcg_s390_data_exception(env, dxc, GETPC()); @@ -390,8 +405,8 @@ static int mchk_store_vregs(CPUS390XState *env, uint64_t mcesao) } for (i = 0; i < 32; i++) { - sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0].ll); - sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1].ll); + sa->vregs[i][0] = cpu_to_be64(env->vregs[i][0]); + sa->vregs[i][1] = cpu_to_be64(env->vregs[i][1]); } cpu_physical_memory_unmap(sa, len, 1, len); @@ -429,7 +444,7 @@ static void do_mchk_interrupt(CPUS390XState *env) lowcore->ar_access_id = 1; for (i = 0; i < 16; i++) { - lowcore->floating_pt_save_area[i] = cpu_to_be64(get_freg(env, i)->ll); + lowcore->floating_pt_save_area[i] = cpu_to_be64(*get_freg(env, i)); lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]); lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]); lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]); diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c index 1be68ba..d2c17ed 100644 --- a/target/s390x/fpu_helper.c +++ b/target/s390x/fpu_helper.c @@ -112,7 +112,7 @@ static void handle_exceptions(CPUS390XState *env, bool XxC, uintptr_t retaddr) } } -static inline int float_comp_to_cc(CPUS390XState *env, int float_compare) +int float_comp_to_cc(CPUS390XState *env, int float_compare) { S390CPU *cpu = s390_env_get_cpu(env); @@ -746,7 +746,7 @@ static inline uint16_t dcmask(int bit, bool neg) } #define DEF_FLOAT_DCMASK(_TYPE) \ -static uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \ +uint16_t _TYPE##_dcmask(CPUS390XState *env, _TYPE f1) \ { \ const bool neg = _TYPE##_is_neg(f1); \ \ diff --git a/target/s390x/gdbstub.c b/target/s390x/gdbstub.c index df14759..9cfd8fe 100644 --- a/target/s390x/gdbstub.c +++ b/target/s390x/gdbstub.c @@ -116,7 +116,7 @@ static int cpu_read_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n) case S390_FPC_REGNUM: return gdb_get_reg32(mem_buf, env->fpc); case S390_F0_REGNUM ... S390_F15_REGNUM: - return gdb_get_reg64(mem_buf, get_freg(env, n - S390_F0_REGNUM)->ll); + return gdb_get_reg64(mem_buf, *get_freg(env, n - S390_F0_REGNUM)); default: return 0; } @@ -129,7 +129,7 @@ static int cpu_write_fp_reg(CPUS390XState *env, uint8_t *mem_buf, int n) env->fpc = ldl_p(mem_buf); return 4; case S390_F0_REGNUM ... S390_F15_REGNUM: - get_freg(env, n - S390_F0_REGNUM)->ll = ldtul_p(mem_buf); + *get_freg(env, n - S390_F0_REGNUM) = ldtul_p(mem_buf); return 8; default: return 0; @@ -150,11 +150,11 @@ static int cpu_read_vreg(CPUS390XState *env, uint8_t *mem_buf, int n) switch (n) { case S390_V0L_REGNUM ... S390_V15L_REGNUM: - ret = gdb_get_reg64(mem_buf, env->vregs[n][1].ll); + ret = gdb_get_reg64(mem_buf, env->vregs[n][1]); break; case S390_V16_REGNUM ... S390_V31_REGNUM: - ret = gdb_get_reg64(mem_buf, env->vregs[n][0].ll); - ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1].ll); + ret = gdb_get_reg64(mem_buf, env->vregs[n][0]); + ret += gdb_get_reg64(mem_buf + 8, env->vregs[n][1]); break; default: ret = 0; @@ -167,11 +167,11 @@ static int cpu_write_vreg(CPUS390XState *env, uint8_t *mem_buf, int n) { switch (n) { case S390_V0L_REGNUM ... S390_V15L_REGNUM: - env->vregs[n][1].ll = ldtul_p(mem_buf + 8); + env->vregs[n][1] = ldtul_p(mem_buf + 8); return 8; case S390_V16_REGNUM ... S390_V31_REGNUM: - env->vregs[n][0].ll = ldtul_p(mem_buf); - env->vregs[n][1].ll = ldtul_p(mem_buf + 8); + env->vregs[n][0] = ldtul_p(mem_buf); + env->vregs[n][1] = ldtul_p(mem_buf + 8); return 16; default: return 0; diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index c346b76..dc320a0 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -689,7 +689,7 @@ static uint16_t qemu_V3_1[] = { S390_FEAT_MSA_EXT_4, }; -static uint16_t qemu_LATEST[] = { +static uint16_t qemu_V4_0[] = { /* * Only BFP bits are implemented (HFP, DFP, PFPO and DIVIDE TO INTEGER not * implemented yet). @@ -698,10 +698,13 @@ static uint16_t qemu_LATEST[] = { S390_FEAT_ZPCI, }; +static uint16_t qemu_LATEST[] = { + S390_FEAT_STFLE_53, + S390_FEAT_VECTOR, +}; + /* add all new definitions before this point */ static uint16_t qemu_MAX[] = { - /* z13+ features */ - S390_FEAT_STFLE_53, /* generates a dependency warning, leave it out for now */ S390_FEAT_MSA_EXT_5, }; @@ -820,6 +823,7 @@ static FeatGroupDefSpec FeatGroupDef[] = { static FeatGroupDefSpec QemuFeatDef[] = { QEMU_FEAT_INITIALIZER(V2_11), QEMU_FEAT_INITIALIZER(V3_1), + QEMU_FEAT_INITIALIZER(V4_0), QEMU_FEAT_INITIALIZER(LATEST), QEMU_FEAT_INITIALIZER(MAX), }; diff --git a/target/s390x/helper.c b/target/s390x/helper.c index 3c8f0a7..a69e5ab 100644 --- a/target/s390x/helper.c +++ b/target/s390x/helper.c @@ -249,7 +249,7 @@ int s390_store_status(S390CPU *cpu, hwaddr addr, bool store_arch) cpu_physical_memory_write(offsetof(LowCore, ar_access_id), &ar_id, 1); } for (i = 0; i < 16; ++i) { - sa->fprs[i] = cpu_to_be64(get_freg(&cpu->env, i)->ll); + sa->fprs[i] = cpu_to_be64(*get_freg(&cpu->env, i)); } for (i = 0; i < 16; ++i) { sa->grs[i] = cpu_to_be64(cpu->env.regs[i]); @@ -299,8 +299,8 @@ int s390_store_adtl_status(S390CPU *cpu, hwaddr addr, hwaddr len) if (s390_has_feat(S390_FEAT_VECTOR)) { for (i = 0; i < 32; i++) { - sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0].ll); - sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1].ll); + sa->vregs[i][0] = cpu_to_be64(cpu->env.vregs[i][0]); + sa->vregs[i][1] = cpu_to_be64(cpu->env.vregs[i][1]); } } if (s390_has_feat(S390_FEAT_GUARDED_STORAGE) && len >= ADTL_GS_MIN_SIZE) { @@ -341,13 +341,13 @@ void s390_cpu_dump_state(CPUState *cs, FILE *f, int flags) if (s390_has_feat(S390_FEAT_VECTOR)) { for (i = 0; i < 32; i++) { qemu_fprintf(f, "V%02d=%016" PRIx64 "%016" PRIx64 "%c", - i, env->vregs[i][0].ll, env->vregs[i][1].ll, + i, env->vregs[i][0], env->vregs[i][1], i % 2 ? '\n' : ' '); } } else { for (i = 0; i < 16; i++) { qemu_fprintf(f, "F%02d=%016" PRIx64 "%c", - i, get_freg(env, i)->ll, + i, *get_freg(env, i), (i % 4) == 3 ? '\n' : ' '); } } diff --git a/target/s390x/helper.h b/target/s390x/helper.h index 7755a96..e9aff83 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -211,6 +211,90 @@ DEF_HELPER_FLAGS_4(gvec_vscbi8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_FLAGS_4(gvec_vscbi16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) DEF_HELPER_4(gvec_vtm, void, ptr, cptr, env, i32) +/* === Vector String Instructions === */ +DEF_HELPER_FLAGS_4(gvec_vfae8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfae16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfae32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfae_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfae_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfae_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfee8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfee16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfee32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfee_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfee_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfee_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfene8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_4(gvec_vfene32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32) +DEF_HELPER_5(gvec_vfene_cc8, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc16, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfene_cc32, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_3(gvec_vistr8, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_FLAGS_3(gvec_vistr16, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_FLAGS_3(gvec_vistr32, TCG_CALL_NO_RWG, void, ptr, cptr, i32) +DEF_HELPER_4(gvec_vistr_cc8, void, ptr, cptr, env, i32) +DEF_HELPER_4(gvec_vistr_cc16, void, ptr, cptr, env, i32) +DEF_HELPER_4(gvec_vistr_cc32, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt8, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_FLAGS_5(gvec_vstrc_rt32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, cptr, i32) +DEF_HELPER_6(gvec_vstrc_cc8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc32, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt8, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32) + +/* === Vector Floating-Point Instructions */ +DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfa64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32) +DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfce64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfce64_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfce64s_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfch64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfch64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfch64_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfch64s_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfche64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfche64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfche64_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_5(gvec_vfche64s_cc, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcdlg64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vcgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vclgd64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfd64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfi64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfll32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vflr64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_4(gvec_vfsq64s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_FLAGS_5(gvec_vfs64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32) +DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32) +DEF_HELPER_4(gvec_vftci64s, void, ptr, cptr, env, i32) + #ifndef CONFIG_USER_ONLY DEF_HELPER_3(servc, i32, env, i64, i64) DEF_HELPER_4(diag, void, env, i32, i32, i32) diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def index e61475b..f421184 100644 --- a/target/s390x/insn-data.def +++ b/target/s390x/insn-data.def @@ -1191,6 +1191,64 @@ /* VECTOR TEST UNDER MASK */ F(0xe7d8, VTM, VRR_a, V, 0, 0, 0, 0, vtm, 0, IF_VEC) +/* === Vector String Instructions === */ + +/* VECTOR FIND ANY ELEMENT EQUAL */ + F(0xe782, VFAE, VRR_b, V, 0, 0, 0, 0, vfae, 0, IF_VEC) +/* VECTOR FIND ELEMENT EQUAL */ + F(0xe780, VFEE, VRR_b, V, 0, 0, 0, 0, vfee, 0, IF_VEC) +/* VECTOR FIND ELEMENT NOT EQUAL */ + F(0xe781, VFENE, VRR_b, V, 0, 0, 0, 0, vfene, 0, IF_VEC) +/* VECTOR ISOLATE STRING */ + F(0xe75c, VISTR, VRR_a, V, 0, 0, 0, 0, vistr, 0, IF_VEC) +/* VECTOR STRING RANGE COMPARE */ + F(0xe78a, VSTRC, VRR_d, V, 0, 0, 0, 0, vstrc, 0, IF_VEC) + +/* === Vector Floating-Point Instructions */ + +/* VECTOR FP ADD */ + F(0xe7e3, VFA, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP COMPARE SCALAR */ + F(0xe7cb, WFC, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC) +/* VECTOR FP COMPARE AND SIGNAL SCALAR */ + F(0xe7ca, WFK, VRR_a, V, 0, 0, 0, 0, wfc, 0, IF_VEC) +/* VECTOR FP COMPARE EQUAL */ + F(0xe7e8, VFCE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP COMPARE HIGH */ + F(0xe7eb, VFCH, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP COMPARE HIGH OR EQUAL */ + F(0xe7ea, VFCHE, VRR_c, V, 0, 0, 0, 0, vfc, 0, IF_VEC) +/* VECTOR FP CONVERT FROM FIXED 64-BIT */ + F(0xe7c3, VCDG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT FROM LOGICAL 64-BIT */ + F(0xe7c1, VCDLG, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT TO FIXED 64-BIT */ + F(0xe7c2, VCGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP CONVERT TO LOGICAL 64-BIT */ + F(0xe7c0, VCLGD, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP DIVIDE */ + F(0xe7e5, VFD, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR LOAD FP INTEGER */ + F(0xe7c7, VFI, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR LOAD LENGTHENED */ + F(0xe7c4, VFLL, VRR_a, V, 0, 0, 0, 0, vfll, 0, IF_VEC) +/* VECTOR LOAD ROUNDED */ + F(0xe7c5, VFLR, VRR_a, V, 0, 0, 0, 0, vcdg, 0, IF_VEC) +/* VECTOR FP MULTIPLY */ + F(0xe7e7, VFM, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP MULTIPLY AND ADD */ + F(0xe78f, VFMA, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP MULTIPLY AND SUBTRACT */ + F(0xe78e, VFMS, VRR_e, V, 0, 0, 0, 0, vfma, 0, IF_VEC) +/* VECTOR FP PERFORM SIGN OPERATION */ + F(0xe7cc, VFPSO, VRR_a, V, 0, 0, 0, 0, vfpso, 0, IF_VEC) +/* VECTOR FP SQUARE ROOT */ + F(0xe7ce, VFSQ, VRR_a, V, 0, 0, 0, 0, vfsq, 0, IF_VEC) +/* VECTOR FP SUBTRACT */ + F(0xe7e2, VFS, VRR_c, V, 0, 0, 0, 0, vfa, 0, IF_VEC) +/* VECTOR FP TEST DATA CLASS IMMEDIATE */ + F(0xe74a, VFTCI, VRI_e, V, 0, 0, 0, 0, vftci, 0, IF_VEC) + #ifndef CONFIG_USER_ONLY /* COMPARE AND SWAP AND PURGE */ E(0xb250, CSP, RRE, Z, r1_32u, ra2, r1_P, 0, csp, 0, MO_TEUL, IF_PRIV) diff --git a/target/s390x/internal.h b/target/s390x/internal.h index 9893fc0..c243fa7 100644 --- a/target/s390x/internal.h +++ b/target/s390x/internal.h @@ -285,6 +285,10 @@ uint32_t set_cc_nz_f128(float128 v); uint8_t s390_softfloat_exc_to_ieee(unsigned int exc); int s390_swap_bfp_rounding_mode(CPUS390XState *env, int m3); void s390_restore_bfp_rounding_mode(CPUS390XState *env, int old_mode); +int float_comp_to_cc(CPUS390XState *env, int float_compare); +uint16_t float32_dcmask(CPUS390XState *env, float32 f1); +uint16_t float64_dcmask(CPUS390XState *env, float64 f1); +uint16_t float128_dcmask(CPUS390XState *env, float128 f1); /* gdbstub.c */ diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c index e5e2b69..bcec979 100644 --- a/target/s390x/kvm.c +++ b/target/s390x/kvm.c @@ -418,21 +418,21 @@ int kvm_arch_put_registers(CPUState *cs, int level) if (can_sync_regs(cs, KVM_SYNC_VRS)) { for (i = 0; i < 32; i++) { - cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0].ll; - cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1].ll; + cs->kvm_run->s.regs.vrs[i][0] = env->vregs[i][0]; + cs->kvm_run->s.regs.vrs[i][1] = env->vregs[i][1]; } cs->kvm_run->s.regs.fpc = env->fpc; cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS; } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) { for (i = 0; i < 16; i++) { - cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll; + cs->kvm_run->s.regs.fprs[i] = *get_freg(env, i); } cs->kvm_run->s.regs.fpc = env->fpc; cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS; } else { /* Floating point */ for (i = 0; i < 16; i++) { - fpu.fprs[i] = get_freg(env, i)->ll; + fpu.fprs[i] = *get_freg(env, i); } fpu.fpc = env->fpc; @@ -586,13 +586,13 @@ int kvm_arch_get_registers(CPUState *cs) /* Floating point and vector registers */ if (can_sync_regs(cs, KVM_SYNC_VRS)) { for (i = 0; i < 32; i++) { - env->vregs[i][0].ll = cs->kvm_run->s.regs.vrs[i][0]; - env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1]; + env->vregs[i][0] = cs->kvm_run->s.regs.vrs[i][0]; + env->vregs[i][1] = cs->kvm_run->s.regs.vrs[i][1]; } env->fpc = cs->kvm_run->s.regs.fpc; } else if (can_sync_regs(cs, KVM_SYNC_FPRS)) { for (i = 0; i < 16; i++) { - get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i]; + *get_freg(env, i) = cs->kvm_run->s.regs.fprs[i]; } env->fpc = cs->kvm_run->s.regs.fpc; } else { @@ -601,7 +601,7 @@ int kvm_arch_get_registers(CPUState *cs) return r; } for (i = 0; i < 16; i++) { - get_freg(env, i)->ll = fpu.fprs[i]; + *get_freg(env, i) = fpu.fprs[i]; } env->fpc = fpu.fpc; } diff --git a/target/s390x/machine.c b/target/s390x/machine.c index cb792aa..e6851a5 100644 --- a/target/s390x/machine.c +++ b/target/s390x/machine.c @@ -66,22 +66,22 @@ static const VMStateDescription vmstate_fpu = { .minimum_version_id = 1, .needed = fpu_needed, .fields = (VMStateField[]) { - VMSTATE_UINT64(env.vregs[0][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[1][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[2][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[3][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[4][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[5][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[6][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[7][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[8][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[9][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[10][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[11][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[12][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[13][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[14][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[15][0].ll, S390CPU), + VMSTATE_UINT64(env.vregs[0][0], S390CPU), + VMSTATE_UINT64(env.vregs[1][0], S390CPU), + VMSTATE_UINT64(env.vregs[2][0], S390CPU), + VMSTATE_UINT64(env.vregs[3][0], S390CPU), + VMSTATE_UINT64(env.vregs[4][0], S390CPU), + VMSTATE_UINT64(env.vregs[5][0], S390CPU), + VMSTATE_UINT64(env.vregs[6][0], S390CPU), + VMSTATE_UINT64(env.vregs[7][0], S390CPU), + VMSTATE_UINT64(env.vregs[8][0], S390CPU), + VMSTATE_UINT64(env.vregs[9][0], S390CPU), + VMSTATE_UINT64(env.vregs[10][0], S390CPU), + VMSTATE_UINT64(env.vregs[11][0], S390CPU), + VMSTATE_UINT64(env.vregs[12][0], S390CPU), + VMSTATE_UINT64(env.vregs[13][0], S390CPU), + VMSTATE_UINT64(env.vregs[14][0], S390CPU), + VMSTATE_UINT64(env.vregs[15][0], S390CPU), VMSTATE_UINT32(env.fpc, S390CPU), VMSTATE_END_OF_LIST() } @@ -99,54 +99,54 @@ static const VMStateDescription vmstate_vregs = { .needed = vregs_needed, .fields = (VMStateField[]) { /* vregs[0][0] -> vregs[15][0] and fregs are overlays */ - VMSTATE_UINT64(env.vregs[16][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[17][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[18][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[19][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[20][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[21][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[22][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[23][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[24][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[25][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[26][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[27][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[28][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[29][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[30][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[31][0].ll, S390CPU), - VMSTATE_UINT64(env.vregs[0][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[1][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[2][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[3][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[4][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[5][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[6][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[7][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[8][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[9][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[10][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[11][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[12][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[13][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[14][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[15][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[16][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[17][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[18][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[19][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[20][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[21][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[22][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[23][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[24][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[25][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[26][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[27][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[28][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[29][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[30][1].ll, S390CPU), - VMSTATE_UINT64(env.vregs[31][1].ll, S390CPU), + VMSTATE_UINT64(env.vregs[16][0], S390CPU), + VMSTATE_UINT64(env.vregs[17][0], S390CPU), + VMSTATE_UINT64(env.vregs[18][0], S390CPU), + VMSTATE_UINT64(env.vregs[19][0], S390CPU), + VMSTATE_UINT64(env.vregs[20][0], S390CPU), + VMSTATE_UINT64(env.vregs[21][0], S390CPU), + VMSTATE_UINT64(env.vregs[22][0], S390CPU), + VMSTATE_UINT64(env.vregs[23][0], S390CPU), + VMSTATE_UINT64(env.vregs[24][0], S390CPU), + VMSTATE_UINT64(env.vregs[25][0], S390CPU), + VMSTATE_UINT64(env.vregs[26][0], S390CPU), + VMSTATE_UINT64(env.vregs[27][0], S390CPU), + VMSTATE_UINT64(env.vregs[28][0], S390CPU), + VMSTATE_UINT64(env.vregs[29][0], S390CPU), + VMSTATE_UINT64(env.vregs[30][0], S390CPU), + VMSTATE_UINT64(env.vregs[31][0], S390CPU), + VMSTATE_UINT64(env.vregs[0][1], S390CPU), + VMSTATE_UINT64(env.vregs[1][1], S390CPU), + VMSTATE_UINT64(env.vregs[2][1], S390CPU), + VMSTATE_UINT64(env.vregs[3][1], S390CPU), + VMSTATE_UINT64(env.vregs[4][1], S390CPU), + VMSTATE_UINT64(env.vregs[5][1], S390CPU), + VMSTATE_UINT64(env.vregs[6][1], S390CPU), + VMSTATE_UINT64(env.vregs[7][1], S390CPU), + VMSTATE_UINT64(env.vregs[8][1], S390CPU), + VMSTATE_UINT64(env.vregs[9][1], S390CPU), + VMSTATE_UINT64(env.vregs[10][1], S390CPU), + VMSTATE_UINT64(env.vregs[11][1], S390CPU), + VMSTATE_UINT64(env.vregs[12][1], S390CPU), + VMSTATE_UINT64(env.vregs[13][1], S390CPU), + VMSTATE_UINT64(env.vregs[14][1], S390CPU), + VMSTATE_UINT64(env.vregs[15][1], S390CPU), + VMSTATE_UINT64(env.vregs[16][1], S390CPU), + VMSTATE_UINT64(env.vregs[17][1], S390CPU), + VMSTATE_UINT64(env.vregs[18][1], S390CPU), + VMSTATE_UINT64(env.vregs[19][1], S390CPU), + VMSTATE_UINT64(env.vregs[20][1], S390CPU), + VMSTATE_UINT64(env.vregs[21][1], S390CPU), + VMSTATE_UINT64(env.vregs[22][1], S390CPU), + VMSTATE_UINT64(env.vregs[23][1], S390CPU), + VMSTATE_UINT64(env.vregs[24][1], S390CPU), + VMSTATE_UINT64(env.vregs[25][1], S390CPU), + VMSTATE_UINT64(env.vregs[26][1], S390CPU), + VMSTATE_UINT64(env.vregs[27][1], S390CPU), + VMSTATE_UINT64(env.vregs[28][1], S390CPU), + VMSTATE_UINT64(env.vregs[29][1], S390CPU), + VMSTATE_UINT64(env.vregs[30][1], S390CPU), + VMSTATE_UINT64(env.vregs[31][1], S390CPU), VMSTATE_END_OF_LIST() } }; diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c index ee67c1f..10aa617 100644 --- a/target/s390x/misc_helper.c +++ b/target/s390x/misc_helper.c @@ -669,7 +669,7 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr) { const uintptr_t ra = GETPC(); const int count_bytes = ((env->regs[0] & 0xff) + 1) * 8; - const int max_bytes = ROUND_UP(used_stfl_bytes, 8); + int max_bytes; int i; if (addr & 0x7) { @@ -677,7 +677,14 @@ uint32_t HELPER(stfle)(CPUS390XState *env, uint64_t addr) } prepare_stfl(); - for (i = 0; i < count_bytes; ++i) { + max_bytes = ROUND_UP(used_stfl_bytes, 8); + + /* + * The PoP says that doublewords beyond the highest-numbered facility + * bit may or may not be stored. However, existing hardware appears to + * not store the words, and existing software depend on that. + */ + for (i = 0; i < MIN(count_bytes, max_bytes); ++i) { cpu_stb_data_ra(env, addr + i, stfl_bytes[i], ra); } diff --git a/target/s390x/tcg_s390x.h b/target/s390x/tcg_s390x.h index ab2c4ba..2813f9d 100644 --- a/target/s390x/tcg_s390x.h +++ b/target/s390x/tcg_s390x.h @@ -18,5 +18,7 @@ void QEMU_NORETURN tcg_s390_program_interrupt(CPUS390XState *env, uint32_t code, int ilen, uintptr_t ra); void QEMU_NORETURN tcg_s390_data_exception(CPUS390XState *env, uint32_t dxc, uintptr_t ra); +void QEMU_NORETURN tcg_s390_vector_exception(CPUS390XState *env, uint32_t vxc, + uintptr_t ra); #endif /* TCG_S390X_H */ diff --git a/target/s390x/translate.c b/target/s390x/translate.c index fa57b75..ac0d8b6 100644 --- a/target/s390x/translate.c +++ b/target/s390x/translate.c @@ -149,7 +149,7 @@ void s390x_translate_init(void) static inline int vec_full_reg_offset(uint8_t reg) { g_assert(reg < 32); - return offsetof(CPUS390XState, vregs[reg][0].d); + return offsetof(CPUS390XState, vregs[reg][0]); } static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp es) diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c index 7e0bfcb..7b1d31c 100644 --- a/target/s390x/translate_vx.inc.c +++ b/target/s390x/translate_vx.inc.c @@ -52,6 +52,11 @@ #define ES_64 MO_64 #define ES_128 4 +/* Floating-Point Format */ +#define FPF_SHORT 2 +#define FPF_LONG 3 +#define FPF_EXT 4 + static inline bool valid_vec_element(uint8_t enr, TCGMemOp es) { return !(enr & ~(NUM_VEC_ELEMENTS(es) - 1)); @@ -188,6 +193,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, #define gen_gvec_2s(v1, v2, c, gen) \ tcg_gen_gvec_2s(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ 16, 16, c, gen) +#define gen_gvec_2_ool(v1, v2, data, fn) \ + tcg_gen_gvec_2_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + 16, 16, data, fn) #define gen_gvec_2i_ool(v1, v2, c, data, fn) \ tcg_gen_gvec_2i_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ c, 16, 16, data, fn) @@ -214,6 +222,10 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, tcg_gen_gvec_4_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ 16, 16, data, fn) +#define gen_gvec_4_ptr(v1, v2, v3, v4, ptr, data, fn) \ + tcg_gen_gvec_4_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), \ + ptr, 16, 16, data, fn) #define gen_gvec_dup_i64(es, v1, c) \ tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c) #define gen_gvec_mov(v1, v2) \ @@ -233,6 +245,9 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr, #define gen_gvec_fn_3(fn, es, v1, v2, v3) \ tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ vec_full_reg_offset(v3), 16, 16) +#define gen_gvec_fn_4(fn, es, v1, v2, v3, v4) \ + tcg_gen_gvec_##fn(es, vec_full_reg_offset(v1), vec_full_reg_offset(v2), \ + vec_full_reg_offset(v3), vec_full_reg_offset(v4), 16, 16) /* * Helper to carry out a 128 bit vector computation using 2 i64 values per @@ -903,40 +918,11 @@ static DisasJumpType op_vsce(DisasContext *s, DisasOps *o) return DISAS_NEXT; } -static void gen_sel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - /* bit in c not set -> copy bit from b */ - tcg_gen_andc_i64(t, b, c); - /* bit in c set -> copy bit from a */ - tcg_gen_and_i64(d, a, c); - /* merge the results */ - tcg_gen_or_i64(d, d, t); - tcg_temp_free_i64(t); -} - -static void gen_sel_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b, - TCGv_vec c) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - tcg_gen_andc_vec(vece, t, b, c); - tcg_gen_and_vec(vece, d, a, c); - tcg_gen_or_vec(vece, d, d, t); - tcg_temp_free_vec(t); -} - static DisasJumpType op_vsel(DisasContext *s, DisasOps *o) { - static const GVecGen4 gvec_op = { - .fni8 = gen_sel_i64, - .fniv = gen_sel_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - - gen_gvec_4(get_field(s->fields, v1), get_field(s->fields, v2), - get_field(s->fields, v3), get_field(s->fields, v4), &gvec_op); + gen_gvec_fn_4(bitsel, ES_8, get_field(s->fields, v1), + get_field(s->fields, v4), get_field(s->fields, v2), + get_field(s->fields, v3)); return DISAS_NEXT; } @@ -2353,3 +2339,460 @@ static DisasJumpType op_vtm(DisasContext *s, DisasOps *o) set_cc_static(s); return DISAS_NEXT; } + +static DisasJumpType op_vfae(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfae8, + gen_helper_gvec_vfae16, + gen_helper_gvec_vfae32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfae_cc8, + gen_helper_gvec_vfae_cc16, + gen_helper_gvec_vfae_cc32, + }; + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfee(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfee8, + gen_helper_gvec_vfee16, + gen_helper_gvec_vfee32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfee_cc8, + gen_helper_gvec_vfee_cc16, + gen_helper_gvec_vfee_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfene(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_3 * const g[3] = { + gen_helper_gvec_vfene8, + gen_helper_gvec_vfene16, + gen_helper_gvec_vfene32, + }; + static gen_helper_gvec_3_ptr * const g_cc[3] = { + gen_helper_gvec_vfene_cc8, + gen_helper_gvec_vfene_cc16, + gen_helper_gvec_vfene_cc32, + }; + + if (es > ES_32 || m5 & ~0x3) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, m5, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_3_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), m5, g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vistr(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + static gen_helper_gvec_2 * const g[3] = { + gen_helper_gvec_vistr8, + gen_helper_gvec_vistr16, + gen_helper_gvec_vistr32, + }; + static gen_helper_gvec_2_ptr * const g_cc[3] = { + gen_helper_gvec_vistr_cc8, + gen_helper_gvec_vistr_cc16, + gen_helper_gvec_vistr_cc32, + }; + + if (es > ES_32 || m5 & ~0x1) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 0, 1)) { + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + cpu_env, 0, g_cc[es]); + set_cc_static(s); + } else { + gen_gvec_2_ool(get_field(s->fields, v1), get_field(s->fields, v2), 0, + g[es]); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vstrc(DisasContext *s, DisasOps *o) +{ + const uint8_t es = get_field(s->fields, m5); + const uint8_t m6 = get_field(s->fields, m6); + static gen_helper_gvec_4 * const g[3] = { + gen_helper_gvec_vstrc8, + gen_helper_gvec_vstrc16, + gen_helper_gvec_vstrc32, + }; + static gen_helper_gvec_4 * const g_rt[3] = { + gen_helper_gvec_vstrc_rt8, + gen_helper_gvec_vstrc_rt16, + gen_helper_gvec_vstrc_rt32, + }; + static gen_helper_gvec_4_ptr * const g_cc[3] = { + gen_helper_gvec_vstrc_cc8, + gen_helper_gvec_vstrc_cc16, + gen_helper_gvec_vstrc_cc32, + }; + static gen_helper_gvec_4_ptr * const g_cc_rt[3] = { + gen_helper_gvec_vstrc_cc_rt8, + gen_helper_gvec_vstrc_cc_rt16, + gen_helper_gvec_vstrc_cc_rt32, + }; + + if (es > ES_32) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m6, 0, 1)) { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + cpu_env, m6, g_cc_rt[es]); + } else { + gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + cpu_env, m6, g_cc[es]); + } + set_cc_static(s); + } else { + if (extract32(m6, 2, 1)) { + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + m6, g_rt[es]); + } else { + gen_gvec_4_ool(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), + m6, g[es]); + } + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfa(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + const bool se = extract32(m5, 3, 1); + gen_helper_gvec_3_ptr *fn; + + if (fpf != FPF_LONG || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields->op2) { + case 0xe3: + fn = se ? gen_helper_gvec_vfa64s : gen_helper_gvec_vfa64; + break; + case 0xe5: + fn = se ? gen_helper_gvec_vfd64s : gen_helper_gvec_vfd64; + break; + case 0xe7: + fn = se ? gen_helper_gvec_vfm64s : gen_helper_gvec_vfm64; + break; + case 0xe2: + fn = se ? gen_helper_gvec_vfs64s : gen_helper_gvec_vfs64; + break; + default: + g_assert_not_reached(); + } + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, 0, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_wfc(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m3); + const uint8_t m4 = get_field(s->fields, m4); + + if (fpf != FPF_LONG || m4) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (s->fields->op2 == 0xcb) { + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + cpu_env, 0, gen_helper_gvec_wfc64); + } else { + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + cpu_env, 0, gen_helper_gvec_wfk64); + } + set_cc_static(s); + return DISAS_NEXT; +} + +static DisasJumpType op_vfc(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + const uint8_t m6 = get_field(s->fields, m6); + const bool se = extract32(m5, 3, 1); + const bool cs = extract32(m6, 0, 1); + gen_helper_gvec_3_ptr *fn; + + if (fpf != FPF_LONG || extract32(m5, 0, 3) || extract32(m6, 1, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (cs) { + switch (s->fields->op2) { + case 0xe8: + fn = se ? gen_helper_gvec_vfce64s_cc : gen_helper_gvec_vfce64_cc; + break; + case 0xeb: + fn = se ? gen_helper_gvec_vfch64s_cc : gen_helper_gvec_vfch64_cc; + break; + case 0xea: + fn = se ? gen_helper_gvec_vfche64s_cc : gen_helper_gvec_vfche64_cc; + break; + default: + g_assert_not_reached(); + } + } else { + switch (s->fields->op2) { + case 0xe8: + fn = se ? gen_helper_gvec_vfce64s : gen_helper_gvec_vfce64; + break; + case 0xeb: + fn = se ? gen_helper_gvec_vfch64s : gen_helper_gvec_vfch64; + break; + case 0xea: + fn = se ? gen_helper_gvec_vfche64s : gen_helper_gvec_vfche64; + break; + default: + g_assert_not_reached(); + } + } + gen_gvec_3_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), cpu_env, 0, fn); + if (cs) { + set_cc_static(s); + } + return DISAS_NEXT; +} + +static DisasJumpType op_vcdg(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m3); + const uint8_t m4 = get_field(s->fields, m4); + const uint8_t erm = get_field(s->fields, m5); + const bool se = extract32(m4, 3, 1); + gen_helper_gvec_2_ptr *fn; + + if (fpf != FPF_LONG || extract32(m4, 0, 2) || erm > 7 || erm == 2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + switch (s->fields->op2) { + case 0xc3: + fn = se ? gen_helper_gvec_vcdg64s : gen_helper_gvec_vcdg64; + break; + case 0xc1: + fn = se ? gen_helper_gvec_vcdlg64s : gen_helper_gvec_vcdlg64; + break; + case 0xc2: + fn = se ? gen_helper_gvec_vcgd64s : gen_helper_gvec_vcgd64; + break; + case 0xc0: + fn = se ? gen_helper_gvec_vclgd64s : gen_helper_gvec_vclgd64; + break; + case 0xc7: + fn = se ? gen_helper_gvec_vfi64s : gen_helper_gvec_vfi64; + break; + case 0xc5: + fn = se ? gen_helper_gvec_vflr64s : gen_helper_gvec_vflr64; + break; + default: + g_assert_not_reached(); + } + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env, + deposit32(m4, 4, 4, erm), fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfll(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m3); + const uint8_t m4 = get_field(s->fields, m4); + gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfll32; + + if (fpf != FPF_SHORT || extract32(m4, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m4, 3, 1)) { + fn = gen_helper_gvec_vfll32s; + } + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env, + 0, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfma(DisasContext *s, DisasOps *o) +{ + const uint8_t m5 = get_field(s->fields, m5); + const uint8_t fpf = get_field(s->fields, m6); + const bool se = extract32(m5, 3, 1); + gen_helper_gvec_4_ptr *fn; + + if (fpf != FPF_LONG || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (s->fields->op2 == 0x8f) { + fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64; + } else { + fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64; + } + gen_gvec_4_ptr(get_field(s->fields, v1), get_field(s->fields, v2), + get_field(s->fields, v3), get_field(s->fields, v4), cpu_env, + 0, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vfpso(DisasContext *s, DisasOps *o) +{ + const uint8_t v1 = get_field(s->fields, v1); + const uint8_t v2 = get_field(s->fields, v2); + const uint8_t fpf = get_field(s->fields, m3); + const uint8_t m4 = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + TCGv_i64 tmp; + + if (fpf != FPF_LONG || extract32(m4, 0, 3) || m5 > 2) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m4, 3, 1)) { + tmp = tcg_temp_new_i64(); + read_vec_element_i64(tmp, v2, 0, ES_64); + switch (m5) { + case 0: + /* sign bit is inverted (complement) */ + tcg_gen_xori_i64(tmp, tmp, 1ull << 63); + break; + case 1: + /* sign bit is set to one (negative) */ + tcg_gen_ori_i64(tmp, tmp, 1ull << 63); + break; + case 2: + /* sign bit is set to zero (positive) */ + tcg_gen_andi_i64(tmp, tmp, (1ull << 63) - 1); + break; + } + write_vec_element_i64(tmp, v1, 0, ES_64); + tcg_temp_free_i64(tmp); + } else { + switch (m5) { + case 0: + /* sign bit is inverted (complement) */ + gen_gvec_fn_2i(xori, ES_64, v1, v2, 1ull << 63); + break; + case 1: + /* sign bit is set to one (negative) */ + gen_gvec_fn_2i(ori, ES_64, v1, v2, 1ull << 63); + break; + case 2: + /* sign bit is set to zero (positive) */ + gen_gvec_fn_2i(andi, ES_64, v1, v2, (1ull << 63) - 1); + break; + } + } + return DISAS_NEXT; +} + +static DisasJumpType op_vfsq(DisasContext *s, DisasOps *o) +{ + const uint8_t fpf = get_field(s->fields, m3); + const uint8_t m4 = get_field(s->fields, m4); + gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vfsq64; + + if (fpf != FPF_LONG || extract32(m4, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m4, 3, 1)) { + fn = gen_helper_gvec_vfsq64s; + } + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env, + 0, fn); + return DISAS_NEXT; +} + +static DisasJumpType op_vftci(DisasContext *s, DisasOps *o) +{ + const uint16_t i3 = get_field(s->fields, i3); + const uint8_t fpf = get_field(s->fields, m4); + const uint8_t m5 = get_field(s->fields, m5); + gen_helper_gvec_2_ptr *fn = gen_helper_gvec_vftci64; + + if (fpf != FPF_LONG || extract32(m5, 0, 3)) { + gen_program_exception(s, PGM_SPECIFICATION); + return DISAS_NORETURN; + } + + if (extract32(m5, 3, 1)) { + fn = gen_helper_gvec_vftci64s; + } + gen_gvec_2_ptr(get_field(s->fields, v1), get_field(s->fields, v2), cpu_env, + i3, fn); + set_cc_static(s); + return DISAS_NEXT; +} diff --git a/target/s390x/vec.h b/target/s390x/vec.h index 3313fb4..a6e3618 100644 --- a/target/s390x/vec.h +++ b/target/s390x/vec.h @@ -12,6 +12,8 @@ #ifndef S390X_VEC_H #define S390X_VEC_H +#include "tcg/tcg.h" + typedef union S390Vector { uint64_t doubleword[2]; uint32_t word[4]; @@ -70,6 +72,23 @@ static inline uint64_t s390_vec_read_element64(const S390Vector *v, uint8_t enr) return v->doubleword[enr]; } +static inline uint64_t s390_vec_read_element(const S390Vector *v, uint8_t enr, + uint8_t es) +{ + switch (es) { + case MO_8: + return s390_vec_read_element8(v, enr); + case MO_16: + return s390_vec_read_element16(v, enr); + case MO_32: + return s390_vec_read_element32(v, enr); + case MO_64: + return s390_vec_read_element64(v, enr); + default: + g_assert_not_reached(); + } +} + static inline void s390_vec_write_element8(S390Vector *v, uint8_t enr, uint8_t data) { @@ -98,4 +117,25 @@ static inline void s390_vec_write_element64(S390Vector *v, uint8_t enr, v->doubleword[enr] = data; } +static inline void s390_vec_write_element(S390Vector *v, uint8_t enr, + uint8_t es, uint64_t data) +{ + switch (es) { + case MO_8: + s390_vec_write_element8(v, enr, data); + break; + case MO_16: + s390_vec_write_element16(v, enr, data); + break; + case MO_32: + s390_vec_write_element32(v, enr, data); + break; + case MO_64: + s390_vec_write_element64(v, enr, data); + break; + default: + g_assert_not_reached(); + } +} + #endif /* S390X_VEC_H */ diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c new file mode 100644 index 0000000..a48bd70 --- /dev/null +++ b/target/s390x/vec_fpu_helper.c @@ -0,0 +1,625 @@ +/* + * QEMU TCG support -- s390x vector floating point instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "internal.h" +#include "vec.h" +#include "tcg_s390x.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" + +#define VIC_INVALID 0x1 +#define VIC_DIVBYZERO 0x2 +#define VIC_OVERFLOW 0x3 +#define VIC_UNDERFLOW 0x4 +#define VIC_INEXACT 0x5 + +/* returns the VEX. If the VEX is 0, there is no trap */ +static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC, + uint8_t *vec_exc) +{ + uint8_t vece_exc = 0, trap_exc; + unsigned qemu_exc; + + /* Retrieve and clear the softfloat exceptions */ + qemu_exc = env->fpu_status.float_exception_flags; + if (qemu_exc == 0) { + return 0; + } + env->fpu_status.float_exception_flags = 0; + + vece_exc = s390_softfloat_exc_to_ieee(qemu_exc); + + /* Add them to the vector-wide s390x exception bits */ + *vec_exc |= vece_exc; + + /* Check for traps and construct the VXC */ + trap_exc = vece_exc & env->fpc >> 24; + if (trap_exc) { + if (trap_exc & S390_IEEE_MASK_INVALID) { + return enr << 4 | VIC_INVALID; + } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) { + return enr << 4 | VIC_DIVBYZERO; + } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) { + return enr << 4 | VIC_OVERFLOW; + } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) { + return enr << 4 | VIC_UNDERFLOW; + } else if (!XxC) { + g_assert(trap_exc & S390_IEEE_MASK_INEXACT); + /* inexact has lowest priority on traps */ + return enr << 4 | VIC_INEXACT; + } + } + return 0; +} + +static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc, + uintptr_t retaddr) +{ + if (vxc) { + /* on traps, the fpc flags are not updated, instruction is suppressed */ + tcg_s390_vector_exception(env, vxc, retaddr); + } + if (vec_exc) { + /* indicate exceptions for all elements combined */ + env->fpc |= vec_exc << 16; + } +} + +typedef uint64_t (*vop64_2_fn)(uint64_t a, float_status *s); +static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, bool XxC, uint8_t erm, vop64_2_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i, old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + for (i = 0; i < 2; i++) { + const uint64_t a = s390_vec_read_element64(v2, i); + + s390_vec_write_element64(&tmp, i, fn(a, &env->fpu_status)); + vxc = check_ieee_exc(env, i, XxC, &vec_exc); + if (s || vxc) { + break; + } + } + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +typedef uint64_t (*vop64_3_fn)(uint64_t a, uint64_t b, float_status *s); +static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vop64_3_fn fn, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + const uint64_t a = s390_vec_read_element64(v2, i); + const uint64_t b = s390_vec_read_element64(v3, i); + + s390_vec_write_element64(&tmp, i, fn(a, b, &env->fpu_status)); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +static uint64_t vfa64(uint64_t a, uint64_t b, float_status *s) +{ + return float64_add(a, b, s); +} + +void HELPER(gvec_vfa64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, false, vfa64, GETPC()); +} + +void HELPER(gvec_vfa64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, true, vfa64, GETPC()); +} + +static int wfc64(const S390Vector *v1, const S390Vector *v2, + CPUS390XState *env, bool signal, uintptr_t retaddr) +{ + /* only the zero-indexed elements are compared */ + const float64 a = s390_vec_read_element64(v1, 0); + const float64 b = s390_vec_read_element64(v2, 0); + uint8_t vxc, vec_exc = 0; + int cmp; + + if (signal) { + cmp = float64_compare(a, b, &env->fpu_status); + } else { + cmp = float64_compare_quiet(a, b, &env->fpu_status); + } + vxc = check_ieee_exc(env, 0, false, &vec_exc); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + + return float_comp_to_cc(env, cmp); +} + +void HELPER(gvec_wfc64)(const void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + env->cc_op = wfc64(v1, v2, env, false, GETPC()); +} + +void HELPER(gvec_wfk64)(const void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + env->cc_op = wfc64(v1, v2, env, true, GETPC()); +} + +typedef int (*vfc64_fn)(float64 a, float64 b, float_status *status); +static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int match = 0; + int i; + + for (i = 0; i < 2; i++) { + const float64 a = s390_vec_read_element64(v2, i); + const float64 b = s390_vec_read_element64(v3, i); + + /* swap the order of the parameters, so we can use existing functions */ + if (fn(b, a, &env->fpu_status)) { + match++; + s390_vec_write_element64(&tmp, i, -1ull); + } + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; + if (match) { + return s || match == 2 ? 0 : 1; + } + return 3; +} + +void HELPER(gvec_vfce64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); +} + +void HELPER(gvec_vfce64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); +} + +void HELPER(gvec_vfce64_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, false, float64_eq_quiet, GETPC()); +} + +void HELPER(gvec_vfce64s_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, true, float64_eq_quiet, GETPC()); +} + +void HELPER(gvec_vfch64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); +} + +void HELPER(gvec_vfch64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); +} + +void HELPER(gvec_vfch64_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, false, float64_lt_quiet, GETPC()); +} + +void HELPER(gvec_vfch64s_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, true, float64_lt_quiet, GETPC()); +} + +void HELPER(gvec_vfche64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); +} + +void HELPER(gvec_vfche64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); +} + +void HELPER(gvec_vfche64_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, false, float64_le_quiet, GETPC()); +} + +void HELPER(gvec_vfche64s_cc)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + env->cc_op = vfc64(v1, v2, v3, env, true, float64_le_quiet, GETPC()); +} + +static uint64_t vcdg64(uint64_t a, float_status *s) +{ + return int64_to_float64(a, s); +} + +void HELPER(gvec_vcdg64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, false, XxC, erm, vcdg64, GETPC()); +} + +void HELPER(gvec_vcdg64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, true, XxC, erm, vcdg64, GETPC()); +} + +static uint64_t vcdlg64(uint64_t a, float_status *s) +{ + return uint64_to_float64(a, s); +} + +void HELPER(gvec_vcdlg64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, false, XxC, erm, vcdlg64, GETPC()); +} + +void HELPER(gvec_vcdlg64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, true, XxC, erm, vcdlg64, GETPC()); +} + +static uint64_t vcgd64(uint64_t a, float_status *s) +{ + return float64_to_int64(a, s); +} + +void HELPER(gvec_vcgd64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, false, XxC, erm, vcgd64, GETPC()); +} + +void HELPER(gvec_vcgd64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, true, XxC, erm, vcgd64, GETPC()); +} + +static uint64_t vclgd64(uint64_t a, float_status *s) +{ + return float64_to_uint64(a, s); +} + +void HELPER(gvec_vclgd64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, false, XxC, erm, vclgd64, GETPC()); +} + +void HELPER(gvec_vclgd64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, true, XxC, erm, vclgd64, GETPC()); +} + +static uint64_t vfd64(uint64_t a, uint64_t b, float_status *s) +{ + return float64_div(a, b, s); +} + +void HELPER(gvec_vfd64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, false, vfd64, GETPC()); +} + +void HELPER(gvec_vfd64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, true, vfd64, GETPC()); +} + +static uint64_t vfi64(uint64_t a, float_status *s) +{ + return float64_round_to_int(a, s); +} + +void HELPER(gvec_vfi64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, false, XxC, erm, vfi64, GETPC()); +} + +void HELPER(gvec_vfi64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vop64_2(v1, v2, env, true, XxC, erm, vfi64, GETPC()); +} + +static void vfll32(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + /* load from even element */ + const float32 a = s390_vec_read_element32(v2, i * 2); + const uint64_t ret = float32_to_float64(a, &env->fpu_status); + + s390_vec_write_element64(&tmp, i, ret); + /* indicate the source element */ + vxc = check_ieee_exc(env, i * 2, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + vfll32(v1, v2, env, false, GETPC()); +} + +void HELPER(gvec_vfll32s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + vfll32(v1, v2, env, true, GETPC()); +} + +static void vflr64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, bool XxC, uint8_t erm, uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i, old_mode; + + old_mode = s390_swap_bfp_rounding_mode(env, erm); + for (i = 0; i < 2; i++) { + float64 a = s390_vec_read_element64(v2, i); + uint32_t ret = float64_to_float32(a, &env->fpu_status); + + /* place at even element */ + s390_vec_write_element32(&tmp, i * 2, ret); + /* indicate the source element */ + vxc = check_ieee_exc(env, i, XxC, &vec_exc); + if (s || vxc) { + break; + } + } + s390_restore_bfp_rounding_mode(env, old_mode); + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vflr64(v1, v2, env, false, XxC, erm, GETPC()); +} + +void HELPER(gvec_vflr64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + const uint8_t erm = extract32(simd_data(desc), 4, 4); + const bool XxC = extract32(simd_data(desc), 2, 1); + + vflr64(v1, v2, env, true, XxC, erm, GETPC()); +} + +static uint64_t vfm64(uint64_t a, uint64_t b, float_status *s) +{ + return float64_mul(a, b, s); +} + +void HELPER(gvec_vfm64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, false, vfm64, GETPC()); +} + +void HELPER(gvec_vfm64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, true, vfm64, GETPC()); +} + +static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3, + const S390Vector *v4, CPUS390XState *env, bool s, int flags, + uintptr_t retaddr) +{ + uint8_t vxc, vec_exc = 0; + S390Vector tmp = {}; + int i; + + for (i = 0; i < 2; i++) { + const uint64_t a = s390_vec_read_element64(v2, i); + const uint64_t b = s390_vec_read_element64(v3, i); + const uint64_t c = s390_vec_read_element64(v4, i); + uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status); + + s390_vec_write_element64(&tmp, i, ret); + vxc = check_ieee_exc(env, i, false, &vec_exc); + if (s || vxc) { + break; + } + } + handle_ieee_exc(env, vxc, vec_exc, retaddr); + *v1 = tmp; +} + +void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3, + const void *v4, CPUS390XState *env, uint32_t desc) +{ + vfma64(v1, v2, v3, v4, env, false, 0, GETPC()); +} + +void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3, + const void *v4, CPUS390XState *env, uint32_t desc) +{ + vfma64(v1, v2, v3, v4, env, true, 0, GETPC()); +} + +void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3, + const void *v4, CPUS390XState *env, uint32_t desc) +{ + vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC()); +} + +void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3, + const void *v4, CPUS390XState *env, uint32_t desc) +{ + vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC()); +} + +static uint64_t vfsq64(uint64_t a, float_status *s) +{ + return float64_sqrt(a, s); +} + +void HELPER(gvec_vfsq64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + vop64_2(v1, v2, env, false, false, 0, vfsq64, GETPC()); +} + +void HELPER(gvec_vfsq64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + vop64_2(v1, v2, env, true, false, 0, vfsq64, GETPC()); +} + +static uint64_t vfs64(uint64_t a, uint64_t b, float_status *s) +{ + return float64_sub(a, b, s); +} + +void HELPER(gvec_vfs64)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, false, vfs64, GETPC()); +} + +void HELPER(gvec_vfs64s)(void *v1, const void *v2, const void *v3, + CPUS390XState *env, uint32_t desc) +{ + vop64_3(v1, v2, v3, env, true, vfs64, GETPC()); +} + +static int vftci64(S390Vector *v1, const S390Vector *v2, CPUS390XState *env, + bool s, uint16_t i3) +{ + int i, match = 0; + + for (i = 0; i < 2; i++) { + float64 a = s390_vec_read_element64(v2, i); + + if (float64_dcmask(env, a) & i3) { + match++; + s390_vec_write_element64(v1, i, -1ull); + } else { + s390_vec_write_element64(v1, i, 0); + } + if (s) { + break; + } + } + + if (match) { + return s || match == 2 ? 0 : 1; + } + return 3; +} + +void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + env->cc_op = vftci64(v1, v2, env, false, simd_data(desc)); +} + +void HELPER(gvec_vftci64s)(void *v1, const void *v2, CPUS390XState *env, + uint32_t desc) +{ + env->cc_op = vftci64(v1, v2, env, true, simd_data(desc)); +} diff --git a/target/s390x/vec_string_helper.c b/target/s390x/vec_string_helper.c new file mode 100644 index 0000000..c516c0c --- /dev/null +++ b/target/s390x/vec_string_helper.c @@ -0,0 +1,473 @@ +/* + * QEMU TCG support -- s390x vector string instruction support + * + * Copyright (C) 2019 Red Hat Inc + * + * Authors: + * David Hildenbrand <david@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "internal.h" +#include "vec.h" +#include "tcg/tcg.h" +#include "tcg/tcg-gvec-desc.h" +#include "exec/helper-proto.h" + +/* + * Returns a bit set in the MSB of each element that is zero, + * as defined by the mask. + */ +static inline uint64_t zero_search(uint64_t a, uint64_t mask) +{ + return ~(((a & mask) + mask) | a | mask); +} + +/* + * Returns a bit set in the MSB of each element that is not zero, + * as defined by the mask. + */ +static inline uint64_t nonzero_search(uint64_t a, uint64_t mask) +{ + return (((a & mask) + mask) | a) & ~mask; +} + +/* + * Returns the byte offset for the first match, or 16 for no match. + */ +static inline int match_index(uint64_t c0, uint64_t c1) +{ + return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3; +} + +/* + * Returns the number of bits composing one element. + */ +static uint8_t get_element_bits(uint8_t es) +{ + return (1 << es) * BITS_PER_BYTE; +} + +/* + * Returns the bitmask for a single element. + */ +static uint64_t get_single_element_mask(uint8_t es) +{ + return -1ull >> (64 - get_element_bits(es)); +} + +/* + * Returns the bitmask for a single element (excluding the MSB). + */ +static uint64_t get_single_element_lsbs_mask(uint8_t es) +{ + return -1ull >> (65 - get_element_bits(es)); +} + +/* + * Returns the bitmasks for multiple elements (excluding the MSBs). + */ +static uint64_t get_element_lsbs_mask(uint8_t es) +{ + return dup_const(es, get_single_element_lsbs_mask(es)); +} + +static int vfae(void *v1, const void *v2, const void *v3, bool in, + bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + const int bits = get_element_bits(es); + uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + int i; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = 0; + e1 = 0; + /* compare against equality with every other element */ + for (i = 0; i < 64; i += bits) { + t0 = rol64(b0, i); + t1 = rol64(b1, i); + e0 |= zero_search(a0 ^ t0, mask); + e0 |= zero_search(a0 ^ t1, mask); + e1 |= zero_search(a1 ^ t0, mask); + e1 |= zero_search(a1 ^ t1, mask); + } + /* invert the result if requested - invert only the MSBs */ + if (in) { + e0 = ~e0 & ~mask; + e1 = ~e1 & ~mask; + } + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + if (rt) { + e0 = (e0 >> (bits - 1)) * get_single_element_mask(es); + e1 = (e1 >> (bits - 1)) * get_single_element_mask(es); + s390_vec_write_element64(v1, 0, e0); + s390_vec_write_element64(v1, 1, e1); + } else { + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFAE_HELPER(BITS) \ +void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_HELPER(8) +DEF_VFAE_HELPER(16) +DEF_VFAE_HELPER(32) + +#define DEF_VFAE_CC_HELPER(BITS) \ +void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool rt = extract32(simd_data(desc), 2, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS); \ +} +DEF_VFAE_CC_HELPER(8) +DEF_VFAE_CC_HELPER(16) +DEF_VFAE_CC_HELPER(32) + +static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_equal; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = zero_search(a0 ^ b0, mask); + e1 = zero_search(a1 ^ b1, mask); + first_equal = match_index(e0, e1); + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_equal == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_equal < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VFEE_HELPER(BITS) \ +void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_HELPER(8) +DEF_VFEE_HELPER(16) +DEF_VFEE_HELPER(32) + +#define DEF_VFEE_CC_HELPER(BITS) \ +void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFEE_CC_HELPER(8) +DEF_VFEE_CC_HELPER(16) +DEF_VFEE_CC_HELPER(32) + +static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0, a1, b0, b1, e0, e1, z0, z1; + uint64_t first_zero = 16; + uint64_t first_inequal; + bool smaller = false; + + a0 = s390_vec_read_element64(v2, 0); + a1 = s390_vec_read_element64(v2, 1); + b0 = s390_vec_read_element64(v3, 0); + b1 = s390_vec_read_element64(v3, 1); + e0 = nonzero_search(a0 ^ b0, mask); + e1 = nonzero_search(a1 ^ b1, mask); + first_inequal = match_index(e0, e1); + + /* identify the smaller element */ + if (first_inequal < 16) { + uint8_t enr = first_inequal / (1 << es); + uint32_t a = s390_vec_read_element(v2, enr, es); + uint32_t b = s390_vec_read_element(v3, enr, es); + + smaller = a < b; + } + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero)); + s390_vec_write_element64(v1, 1, 0); + if (first_zero == 16 && first_inequal == 16) { + return 3; + } else if (first_zero < first_inequal) { + return 0; + } + return smaller ? 1 : 2; +} + +#define DEF_VFENE_HELPER(BITS) \ +void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3, \ + uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_HELPER(8) +DEF_VFENE_HELPER(16) +DEF_VFENE_HELPER(32) + +#define DEF_VFENE_CC_HELPER(BITS) \ +void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3, \ + CPUS390XState *env, uint32_t desc) \ +{ \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS); \ +} +DEF_VFENE_CC_HELPER(8) +DEF_VFENE_CC_HELPER(16) +DEF_VFENE_CC_HELPER(32) + +static int vistr(void *v1, const void *v2, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + uint64_t z; + int cc = 3; + + z = zero_search(a0, mask); + if (z) { + a0 &= ~(-1ull >> clz64(z)); + a1 = 0; + cc = 0; + } else { + z = zero_search(a1, mask); + if (z) { + a1 &= ~(-1ull >> clz64(z)); + cc = 0; + } + } + + s390_vec_write_element64(v1, 0, a0); + s390_vec_write_element64(v1, 1, a1); + return cc; +} + +#define DEF_VISTR_HELPER(BITS) \ +void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc) \ +{ \ + vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_HELPER(8) +DEF_VISTR_HELPER(16) +DEF_VISTR_HELPER(32) + +#define DEF_VISTR_CC_HELPER(BITS) \ +void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + env->cc_op = vistr(v1, v2, MO_##BITS); \ +} +DEF_VISTR_CC_HELPER(8) +DEF_VISTR_CC_HELPER(16) +DEF_VISTR_CC_HELPER(32) + +static bool element_compare(uint32_t data, uint32_t l, uint8_t c) +{ + const bool equal = extract32(c, 7, 1); + const bool lower = extract32(c, 6, 1); + const bool higher = extract32(c, 5, 1); + + if (data < l) { + return lower; + } else if (data > l) { + return higher; + } + return equal; +} + +static int vstrc(void *v1, const void *v2, const void *v3, const void *v4, + bool in, bool rt, bool zs, uint8_t es) +{ + const uint64_t mask = get_element_lsbs_mask(es); + uint64_t a0 = s390_vec_read_element64(v2, 0); + uint64_t a1 = s390_vec_read_element64(v2, 1); + int first_zero = 16, first_match = 16; + S390Vector rt_result = {}; + uint64_t z0, z1; + int i, j; + + if (zs) { + z0 = zero_search(a0, mask); + z1 = zero_search(a1, mask); + first_zero = match_index(z0, z1); + } + + for (i = 0; i < 16 / (1 << es); i++) { + const uint32_t data = s390_vec_read_element(v2, i, es); + const int cur_byte = i * (1 << es); + bool any_match = false; + + /* if we don't need a bit vector, we can stop early */ + if (cur_byte == first_zero && !rt) { + break; + } + + for (j = 0; j < 16 / (1 << es); j += 2) { + const uint32_t l1 = s390_vec_read_element(v3, j, es); + const uint32_t l2 = s390_vec_read_element(v3, j + 1, es); + /* we are only interested in the highest byte of each element */ + const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es)); + const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es)); + + if (element_compare(data, l1, c1) && + element_compare(data, l2, c2)) { + any_match = true; + break; + } + } + /* invert the result if requested */ + any_match = in ^ any_match; + + if (any_match) { + /* indicate bit vector if requested */ + if (rt) { + const uint64_t val = -1ull; + + first_match = MIN(cur_byte, first_match); + s390_vec_write_element(&rt_result, i, es, val); + } else { + /* stop on the first match */ + first_match = cur_byte; + break; + } + } + } + + if (rt) { + *(S390Vector *)v1 = rt_result; + } else { + s390_vec_write_element64(v1, 0, MIN(first_match, first_zero)); + s390_vec_write_element64(v1, 1, 0); + } + + if (first_zero == 16 && first_match == 16) { + return 3; /* no match */ + } else if (first_zero == 16) { + return 1; /* matching elements, no match for zero */ + } else if (first_match < first_zero) { + return 2; /* matching elements before match for zero */ + } + return 0; /* match for zero */ +} + +#define DEF_VSTRC_HELPER(BITS) \ +void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_HELPER(8) +DEF_VSTRC_HELPER(16) +DEF_VSTRC_HELPER(32) + +#define DEF_VSTRC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_RT_HELPER(8) +DEF_VSTRC_RT_HELPER(16) +DEF_VSTRC_RT_HELPER(32) + +#define DEF_VSTRC_CC_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_HELPER(8) +DEF_VSTRC_CC_HELPER(16) +DEF_VSTRC_CC_HELPER(32) + +#define DEF_VSTRC_CC_RT_HELPER(BITS) \ +void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3, \ + const void *v4, CPUS390XState *env, \ + uint32_t desc) \ +{ \ + const bool in = extract32(simd_data(desc), 3, 1); \ + const bool zs = extract32(simd_data(desc), 1, 1); \ + \ + env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS); \ +} +DEF_VSTRC_CC_RT_HELPER(8) +DEF_VSTRC_CC_RT_HELPER(16) +DEF_VSTRC_CC_RT_HELPER(32) |