diff options
Diffstat (limited to 'target/riscv/insn_trans/trans_rvv.c.inc')
-rw-r--r-- | target/riscv/insn_trans/trans_rvv.c.inc | 397 |
1 files changed, 309 insertions, 88 deletions
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index b9883a5..2a48717 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s) } } -/* Destination vector register group cannot overlap source mask register. */ -static bool require_vm(int vm, int vd) +/* + * Source and destination vector register groups cannot overlap source mask + * register: + * + * A vector register cannot be used to provide source operands with more than + * one EEW for a single instruction. A mask register source is considered to + * have EEW=1 for this constraint. An encoding that would result in the same + * vector register being read with two or more different EEWs, including when + * the vector register appears at different positions within two or more vector + * register groups, is reserved. + * (Section 5.2) + * + * A destination vector register group can overlap a source vector + * register group only if one of the following holds: + * 1. The destination EEW equals the source EEW. + * 2. The destination EEW is smaller than the source EEW and the overlap + * is in the lowest-numbered part of the source register group. + * 3. The destination EEW is greater than the source EEW, the source EMUL + * is at least 1, and the overlap is in the highest-numbered part of + * the destination register group. + * For the purpose of determining register group overlap constraints, mask + * elements have EEW=1. + * (Section 5.2) + */ +static bool require_vm(int vm, int v) { - return (vm != 0 || vd != 0); + return (vm != 0 || v != 0); } static bool require_nf(int vd, int nf, int lmul) @@ -179,7 +202,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2) s1 = get_gpr(s, rs1, EXT_ZERO); } - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0))); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); @@ -199,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2) dst = dest_gpr(s, rd); - gen_helper_vsetvl(dst, tcg_env, s1, s2); + gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0)); gen_set_gpr(s, rd, dst); finalize_rvv_inst(s); gen_update_pc(s, s->cur_insn_len); @@ -356,11 +379,41 @@ static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, return ret; } +/* + * Check whether a vector register is used to provide source operands with + * more than one EEW for the vector instruction. + * Returns true if the instruction has valid encoding + * Returns false if encoding violates the mismatched input EEWs constraint + */ +static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1, + int vs2, uint8_t eew_vs2, int vm) +{ + bool is_valid = true; + int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul; + int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul; + + /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */ + if ((vs1 != -1 && !require_vm(vm, vs1)) || + (vs2 != -1 && !require_vm(vm, vs2))) { + is_valid = false; + } + + /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */ + if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) && + is_overlapped(vs1, 1 << MAX(emul_vs1, 0), + vs2, 1 << MAX(emul_vs2, 0))) { + is_valid = false; + } + + return is_valid; +} + static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) { return require_vm(vm, vd) && require_align(vd, s->lmul) && - require_align(vs, s->lmul); + require_align(vs, s->lmul) && + vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm); } /* @@ -379,6 +432,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ss(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul); } @@ -474,6 +528,7 @@ static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew, -1, 0, vm) && require_align(vs, s->lmul) && require_noover(vd, s->lmul + 1, vs, s->lmul); } @@ -481,6 +536,7 @@ static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) { return vext_wide_check_common(s, vd, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) && require_align(vs, s->lmul + 1); } @@ -499,6 +555,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) && require_align(vs1, s->lmul) && require_noover(vd, s->lmul + 1, vs1, s->lmul); } @@ -521,12 +578,14 @@ static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs1, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs2, s->lmul + 1); } static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) { - bool ret = vext_narrow_check_common(s, vd, vs, vm); + bool ret = vext_narrow_check_common(s, vd, vs, vm) && + vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm); if (vd != vs) { ret &= require_noover(vd, s->lmul, vs, s->lmul + 1); } @@ -549,6 +608,7 @@ static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_sd(s, vd, vs2, vm) && + vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) && require_align(vs1, s->lmul); } @@ -584,7 +644,9 @@ static bool vext_check_slide(DisasContext *s, int vd, int vs2, { bool ret = require_align(vs2, s->lmul) && require_align(vd, s->lmul) && - require_vm(vm, vd); + require_vm(vm, vd) && + vext_check_input_eew(s, -1, 0, vs2, s->sew, vm); + if (is_over) { ret &= (vd != vs2); } @@ -981,7 +1043,8 @@ static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew); + vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) && + vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check) @@ -1033,7 +1096,8 @@ static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew) { return require_rvv(s) && vext_check_isa_ill(s) && - vext_check_st_index(s, a->rd, a->rs2, a->nf, eew); + vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) && + vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm); } GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check) @@ -1063,6 +1127,12 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, fn(dest, mask, base, tcg_env, desc); finalize_rvv_inst(s); + + /* vector unit-stride fault-only-first load may modify vl CSR */ + gen_update_pc(s, s->cur_insn_len); + lookup_and_goto_ptr(s); + s->base.is_jmp = DISAS_NORETURN; + return true; } @@ -1100,25 +1170,86 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, - gen_helper_ldst_whole *fn, - DisasContext *s) + uint32_t log2_esz, gen_helper_ldst_whole *fn, + DisasContext *s, bool is_load) { - TCGv_ptr dest; - TCGv base; - TCGv_i32 desc; - - uint32_t data = FIELD_DP32(0, VDATA, NF, nf); - data = FIELD_DP32(data, VDATA, VM, 1); - dest = tcg_temp_new_ptr(); - desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, - s->cfg_ptr->vlenb, data)); - - base = get_gpr(s, rs1, EXT_NONE); - tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); - mark_vs_dirty(s); - fn(dest, base, tcg_env, desc); + /* + * Load/store multiple bytes per iteration. + * When possible do this atomically. + * Update vstart with the number of processed elements. + * Use the helper function if either: + * - vstart is not 0. + * - the target has 32 bit registers and we are loading/storing 64 bit long + * elements. This is to ensure that we process every element with a single + * memory instruction. + */ + + bool use_helper_fn = !(s->vstart_eq_zero) || + (TCG_TARGET_REG_BITS == 32 && log2_esz == 3); + + if (!use_helper_fn) { + TCGv addr = tcg_temp_new(); + uint32_t size = s->cfg_ptr->vlenb * nf; + TCGv_i64 t8 = tcg_temp_new_i64(); + TCGv_i32 t4 = tcg_temp_new_i32(); + MemOp atomicity = MO_ATOM_NONE; + if (log2_esz == 0) { + atomicity = MO_ATOM_NONE; + } else { + atomicity = MO_ATOM_IFALIGN_PAIR; + } + if (TCG_TARGET_REG_BITS == 64) { + for (int i = 0; i < size; i += 8) { + addr = get_address(s, rs1, i); + if (is_load) { + tcg_gen_qemu_ld_i64(t8, addr, s->mem_idx, + MO_LE | MO_64 | atomicity); + tcg_gen_st_i64(t8, tcg_env, vreg_ofs(s, vd) + i); + } else { + tcg_gen_ld_i64(t8, tcg_env, vreg_ofs(s, vd) + i); + tcg_gen_qemu_st_i64(t8, addr, s->mem_idx, + MO_LE | MO_64 | atomicity); + } + if (i == size - 8) { + tcg_gen_movi_tl(cpu_vstart, 0); + } else { + tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz); + } + } + } else { + for (int i = 0; i < size; i += 4) { + addr = get_address(s, rs1, i); + if (is_load) { + tcg_gen_qemu_ld_i32(t4, addr, s->mem_idx, + MO_LE | MO_32 | atomicity); + tcg_gen_st_i32(t4, tcg_env, vreg_ofs(s, vd) + i); + } else { + tcg_gen_ld_i32(t4, tcg_env, vreg_ofs(s, vd) + i); + tcg_gen_qemu_st_i32(t4, addr, s->mem_idx, + MO_LE | MO_32 | atomicity); + } + if (i == size - 4) { + tcg_gen_movi_tl(cpu_vstart, 0); + } else { + tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz); + } + } + } + } else { + TCGv_ptr dest; + TCGv base; + TCGv_i32 desc; + uint32_t data = FIELD_DP32(0, VDATA, NF, nf); + data = FIELD_DP32(data, VDATA, VM, 1); + dest = tcg_temp_new_ptr(); + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, + s->cfg_ptr->vlenb, data)); + base = get_gpr(s, rs1, EXT_NONE); + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd)); + fn(dest, base, tcg_env, desc); + } finalize_rvv_inst(s); return true; @@ -1128,42 +1259,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, * load and store whole register instructions ignore vtype and vl setting. * Thus, we don't need to check vill bit. (Section 7.9) */ -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \ -static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ -{ \ - if (require_rvv(s) && \ - QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ - return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \ - gen_helper_##NAME, s); \ - } \ - return false; \ -} - -GEN_LDST_WHOLE_TRANS(vl1re8_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1) -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1) -GEN_LDST_WHOLE_TRANS(vl2re8_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2) -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2) -GEN_LDST_WHOLE_TRANS(vl4re8_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4) -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4) -GEN_LDST_WHOLE_TRANS(vl8re8_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8) -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8) +#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \ +static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ +{ \ + if (require_rvv(s) && \ + QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ + return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \ + gen_helper_##NAME, s, IS_LOAD); \ + } \ + return false; \ +} + +GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true) +GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true) +GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true) +GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true) +GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true) /* * The vector whole register store instructions are encoded similar to * unmasked unit-stride store of elements with EEW=8. */ -GEN_LDST_WHOLE_TRANS(vs1r_v, 1) -GEN_LDST_WHOLE_TRANS(vs2r_v, 2) -GEN_LDST_WHOLE_TRANS(vs4r_v, 4) -GEN_LDST_WHOLE_TRANS(vs8r_v, 8) +GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false) +GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false) +GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false) +GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false) /* *** Vector Integer Arithmetic Instructions @@ -1475,6 +1606,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +/* OPIVV with overwrite and WIDEN */ +static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, gen_helper_gvec_4_ptr *fn, bool (*checkfn)(DisasContext *, arg_rmrr *)) @@ -1522,6 +1663,14 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + #define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ @@ -1993,13 +2142,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check) GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) /* Vector Widening Integer Multiply-Add Instructions */ -GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) @@ -2340,6 +2489,17 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVV with WIDEN */ #define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2379,11 +2539,21 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ + return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm); +} + /* OPFVF with WIDEN */ -#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ { \ - if (opfvf_widen_check(s, a)) { \ + if (CHECK(s, a)) { \ uint32_t data = 0; \ static gen_helper_opfvf *const fns[2] = { \ gen_helper_##NAME##_h, gen_helper_##NAME##_w, \ @@ -2399,8 +2569,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ return false; \ } -GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) -GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) { @@ -2482,7 +2652,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) @@ -2503,14 +2673,14 @@ GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check) GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check) /* Vector Widening Floating-Point Fused Multiply-Add Instructions */ -GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) -GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) +GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check) /* Vector Floating-Point Square-Root Instruction */ @@ -3181,19 +3351,19 @@ static void load_element(TCGv_i64 dest, TCGv_ptr base, /* offset of the idx element with base register r */ static uint32_t endian_ofs(DisasContext *s, int r, int idx) { -#if HOST_BIG_ENDIAN - return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); -#else - return vreg_ofs(s, r) + (idx << s->sew); -#endif + if (HOST_BIG_ENDIAN) { + return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew); + } else { + return vreg_ofs(s, r) + (idx << s->sew); + } } /* adjust the index according to the endian */ static void endian_adjust(TCGv_i32 ofs, int sew) { -#if HOST_BIG_ENDIAN - tcg_gen_xori_i32(ofs, ofs, 7 >> sew); -#endif + if (HOST_BIG_ENDIAN) { + tcg_gen_xori_i32(ofs, ofs, 7 >> sew); + } } /* Load idx >= VLMAX ? 0 : vreg[idx] */ @@ -3391,7 +3561,6 @@ static bool slideup_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslideup_vx, slideup_check) -GEN_OPIVX_TRANS(vslide1up_vx, slideup_check) GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check) static bool slidedown_check(DisasContext *s, arg_rmrr *a) @@ -3402,9 +3571,56 @@ static bool slidedown_check(DisasContext *s, arg_rmrr *a) } GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check) -GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check) GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check) +typedef void gen_helper_vslide1_vx(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr, + TCGv_env, TCGv_i32); + +#define GEN_OPIVX_VSLIDE1_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_vslide1_vx * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + \ + TCGv_ptr dest, src2, mask; \ + TCGv_i64 src1; \ + TCGv_i32 desc; \ + uint32_t data = 0; \ + \ + dest = tcg_temp_new_ptr(); \ + mask = tcg_temp_new_ptr(); \ + src2 = tcg_temp_new_ptr(); \ + src1 = tcg_temp_new_i64(); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, \ + s->cfg_ptr->vlenb, data)); \ + \ + tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); \ + tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); \ + tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); \ + \ + fns[s->sew](dest, mask, src1, src2, tcg_env, desc); \ + \ + tcg_gen_movi_tl(cpu_vstart, 0); \ + finalize_rvv_inst(s); \ + \ + return true; \ + } \ + return false; \ +} + +GEN_OPIVX_VSLIDE1_TRANS(vslide1up_vx, slideup_check) +GEN_OPIVX_VSLIDE1_TRANS(vslide1down_vx, slidedown_check) + /* Vector Floating-Point Slide Instructions */ static bool fslideup_check(DisasContext *s, arg_rmrr *a) { @@ -3426,6 +3642,7 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs1, s->lmul) && require_align(a->rs2, s->lmul) && @@ -3438,6 +3655,7 @@ static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) int8_t emul = MO_16 - s->sew + s->lmul; return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) && (emul >= -3 && emul <= 3) && require_align(a->rd, s->lmul) && require_align(a->rs1, emul) && @@ -3457,6 +3675,7 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) { return require_rvv(s) && vext_check_isa_ill(s) && + vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) && require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul) && (a->rd != a->rs2) && @@ -3600,7 +3819,9 @@ static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div) require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul - div) && require_vm(a->vm, a->rd) && - require_noover(a->rd, s->lmul, a->rs2, s->lmul - div); + require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) && + vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm); + return ret; } |