aboutsummaryrefslogtreecommitdiff
path: root/target/riscv/insn_trans/trans_rvv.c.inc
diff options
context:
space:
mode:
Diffstat (limited to 'target/riscv/insn_trans/trans_rvv.c.inc')
-rw-r--r--target/riscv/insn_trans/trans_rvv.c.inc397
1 files changed, 309 insertions, 88 deletions
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc
index b9883a5..2a48717 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s)
}
}
-/* Destination vector register group cannot overlap source mask register. */
-static bool require_vm(int vm, int vd)
+/*
+ * Source and destination vector register groups cannot overlap source mask
+ * register:
+ *
+ * A vector register cannot be used to provide source operands with more than
+ * one EEW for a single instruction. A mask register source is considered to
+ * have EEW=1 for this constraint. An encoding that would result in the same
+ * vector register being read with two or more different EEWs, including when
+ * the vector register appears at different positions within two or more vector
+ * register groups, is reserved.
+ * (Section 5.2)
+ *
+ * A destination vector register group can overlap a source vector
+ * register group only if one of the following holds:
+ * 1. The destination EEW equals the source EEW.
+ * 2. The destination EEW is smaller than the source EEW and the overlap
+ * is in the lowest-numbered part of the source register group.
+ * 3. The destination EEW is greater than the source EEW, the source EMUL
+ * is at least 1, and the overlap is in the highest-numbered part of
+ * the destination register group.
+ * For the purpose of determining register group overlap constraints, mask
+ * elements have EEW=1.
+ * (Section 5.2)
+ */
+static bool require_vm(int vm, int v)
{
- return (vm != 0 || vd != 0);
+ return (vm != 0 || v != 0);
}
static bool require_nf(int vd, int nf, int lmul)
@@ -179,7 +202,7 @@ static bool do_vsetvl(DisasContext *s, int rd, int rs1, TCGv s2)
s1 = get_gpr(s, rs1, EXT_ZERO);
}
- gen_helper_vsetvl(dst, tcg_env, s1, s2);
+ gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl((int) (rd == 0 && rs1 == 0)));
gen_set_gpr(s, rd, dst);
finalize_rvv_inst(s);
@@ -199,7 +222,7 @@ static bool do_vsetivli(DisasContext *s, int rd, TCGv s1, TCGv s2)
dst = dest_gpr(s, rd);
- gen_helper_vsetvl(dst, tcg_env, s1, s2);
+ gen_helper_vsetvl(dst, tcg_env, s1, s2, tcg_constant_tl(0));
gen_set_gpr(s, rd, dst);
finalize_rvv_inst(s);
gen_update_pc(s, s->cur_insn_len);
@@ -356,11 +379,41 @@ static bool vext_check_ld_index(DisasContext *s, int vd, int vs2,
return ret;
}
+/*
+ * Check whether a vector register is used to provide source operands with
+ * more than one EEW for the vector instruction.
+ * Returns true if the instruction has valid encoding
+ * Returns false if encoding violates the mismatched input EEWs constraint
+ */
+static bool vext_check_input_eew(DisasContext *s, int vs1, uint8_t eew_vs1,
+ int vs2, uint8_t eew_vs2, int vm)
+{
+ bool is_valid = true;
+ int8_t emul_vs1 = eew_vs1 - s->sew + s->lmul;
+ int8_t emul_vs2 = eew_vs2 - s->sew + s->lmul;
+
+ /* When vm is 0, vs1 & vs2(EEW!=1) group can't overlap v0 (EEW=1) */
+ if ((vs1 != -1 && !require_vm(vm, vs1)) ||
+ (vs2 != -1 && !require_vm(vm, vs2))) {
+ is_valid = false;
+ }
+
+ /* When eew_vs1 != eew_vs2, check whether vs1 and vs2 are overlapped */
+ if ((vs1 != -1 && vs2 != -1) && (eew_vs1 != eew_vs2) &&
+ is_overlapped(vs1, 1 << MAX(emul_vs1, 0),
+ vs2, 1 << MAX(emul_vs2, 0))) {
+ is_valid = false;
+ }
+
+ return is_valid;
+}
+
static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
{
return require_vm(vm, vd) &&
require_align(vd, s->lmul) &&
- require_align(vs, s->lmul);
+ require_align(vs, s->lmul) &&
+ vext_check_input_eew(s, vs, s->sew, -1, s->sew, vm);
}
/*
@@ -379,6 +432,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
{
return vext_check_ss(s, vd, vs2, vm) &&
+ vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) &&
require_align(vs1, s->lmul);
}
@@ -474,6 +528,7 @@ static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2,
static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
{
return vext_wide_check_common(s, vd, vm) &&
+ vext_check_input_eew(s, vs, s->sew, -1, 0, vm) &&
require_align(vs, s->lmul) &&
require_noover(vd, s->lmul + 1, vs, s->lmul);
}
@@ -481,6 +536,7 @@ static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
{
return vext_wide_check_common(s, vd, vm) &&
+ vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm) &&
require_align(vs, s->lmul + 1);
}
@@ -499,6 +555,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
{
return vext_check_ds(s, vd, vs2, vm) &&
+ vext_check_input_eew(s, vs1, s->sew, vs2, s->sew, vm) &&
require_align(vs1, s->lmul) &&
require_noover(vd, s->lmul + 1, vs1, s->lmul);
}
@@ -521,12 +578,14 @@ static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
{
return vext_check_ds(s, vd, vs1, vm) &&
+ vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) &&
require_align(vs2, s->lmul + 1);
}
static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
{
- bool ret = vext_narrow_check_common(s, vd, vs, vm);
+ bool ret = vext_narrow_check_common(s, vd, vs, vm) &&
+ vext_check_input_eew(s, vs, s->sew + 1, -1, 0, vm);
if (vd != vs) {
ret &= require_noover(vd, s->lmul, vs, s->lmul + 1);
}
@@ -549,6 +608,7 @@ static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
static bool vext_check_sds(DisasContext *s, int vd, int vs1, int vs2, int vm)
{
return vext_check_sd(s, vd, vs2, vm) &&
+ vext_check_input_eew(s, vs1, s->sew, vs2, s->sew + 1, vm) &&
require_align(vs1, s->lmul);
}
@@ -584,7 +644,9 @@ static bool vext_check_slide(DisasContext *s, int vd, int vs2,
{
bool ret = require_align(vs2, s->lmul) &&
require_align(vd, s->lmul) &&
- require_vm(vm, vd);
+ require_vm(vm, vd) &&
+ vext_check_input_eew(s, -1, 0, vs2, s->sew, vm);
+
if (is_over) {
ret &= (vd != vs2);
}
@@ -981,7 +1043,8 @@ static bool ld_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
return require_rvv(s) &&
vext_check_isa_ill(s) &&
- vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew);
+ vext_check_ld_index(s, a->rd, a->rs2, a->nf, a->vm, eew) &&
+ vext_check_input_eew(s, -1, 0, a->rs2, eew, a->vm);
}
GEN_VEXT_TRANS(vlxei8_v, MO_8, rnfvm, ld_index_op, ld_index_check)
@@ -1033,7 +1096,8 @@ static bool st_index_check(DisasContext *s, arg_rnfvm* a, uint8_t eew)
{
return require_rvv(s) &&
vext_check_isa_ill(s) &&
- vext_check_st_index(s, a->rd, a->rs2, a->nf, eew);
+ vext_check_st_index(s, a->rd, a->rs2, a->nf, eew) &&
+ vext_check_input_eew(s, a->rd, s->sew, a->rs2, eew, a->vm);
}
GEN_VEXT_TRANS(vsxei8_v, MO_8, rnfvm, st_index_op, st_index_check)
@@ -1063,6 +1127,12 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data,
fn(dest, mask, base, tcg_env, desc);
finalize_rvv_inst(s);
+
+ /* vector unit-stride fault-only-first load may modify vl CSR */
+ gen_update_pc(s, s->cur_insn_len);
+ lookup_and_goto_ptr(s);
+ s->base.is_jmp = DISAS_NORETURN;
+
return true;
}
@@ -1100,25 +1170,86 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check)
typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32);
static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
- gen_helper_ldst_whole *fn,
- DisasContext *s)
+ uint32_t log2_esz, gen_helper_ldst_whole *fn,
+ DisasContext *s, bool is_load)
{
- TCGv_ptr dest;
- TCGv base;
- TCGv_i32 desc;
-
- uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
- data = FIELD_DP32(data, VDATA, VM, 1);
- dest = tcg_temp_new_ptr();
- desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
- s->cfg_ptr->vlenb, data));
-
- base = get_gpr(s, rs1, EXT_NONE);
- tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));
-
mark_vs_dirty(s);
- fn(dest, base, tcg_env, desc);
+ /*
+ * Load/store multiple bytes per iteration.
+ * When possible do this atomically.
+ * Update vstart with the number of processed elements.
+ * Use the helper function if either:
+ * - vstart is not 0.
+ * - the target has 32 bit registers and we are loading/storing 64 bit long
+ * elements. This is to ensure that we process every element with a single
+ * memory instruction.
+ */
+
+ bool use_helper_fn = !(s->vstart_eq_zero) ||
+ (TCG_TARGET_REG_BITS == 32 && log2_esz == 3);
+
+ if (!use_helper_fn) {
+ TCGv addr = tcg_temp_new();
+ uint32_t size = s->cfg_ptr->vlenb * nf;
+ TCGv_i64 t8 = tcg_temp_new_i64();
+ TCGv_i32 t4 = tcg_temp_new_i32();
+ MemOp atomicity = MO_ATOM_NONE;
+ if (log2_esz == 0) {
+ atomicity = MO_ATOM_NONE;
+ } else {
+ atomicity = MO_ATOM_IFALIGN_PAIR;
+ }
+ if (TCG_TARGET_REG_BITS == 64) {
+ for (int i = 0; i < size; i += 8) {
+ addr = get_address(s, rs1, i);
+ if (is_load) {
+ tcg_gen_qemu_ld_i64(t8, addr, s->mem_idx,
+ MO_LE | MO_64 | atomicity);
+ tcg_gen_st_i64(t8, tcg_env, vreg_ofs(s, vd) + i);
+ } else {
+ tcg_gen_ld_i64(t8, tcg_env, vreg_ofs(s, vd) + i);
+ tcg_gen_qemu_st_i64(t8, addr, s->mem_idx,
+ MO_LE | MO_64 | atomicity);
+ }
+ if (i == size - 8) {
+ tcg_gen_movi_tl(cpu_vstart, 0);
+ } else {
+ tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 8 >> log2_esz);
+ }
+ }
+ } else {
+ for (int i = 0; i < size; i += 4) {
+ addr = get_address(s, rs1, i);
+ if (is_load) {
+ tcg_gen_qemu_ld_i32(t4, addr, s->mem_idx,
+ MO_LE | MO_32 | atomicity);
+ tcg_gen_st_i32(t4, tcg_env, vreg_ofs(s, vd) + i);
+ } else {
+ tcg_gen_ld_i32(t4, tcg_env, vreg_ofs(s, vd) + i);
+ tcg_gen_qemu_st_i32(t4, addr, s->mem_idx,
+ MO_LE | MO_32 | atomicity);
+ }
+ if (i == size - 4) {
+ tcg_gen_movi_tl(cpu_vstart, 0);
+ } else {
+ tcg_gen_addi_tl(cpu_vstart, cpu_vstart, 4 >> log2_esz);
+ }
+ }
+ }
+ } else {
+ TCGv_ptr dest;
+ TCGv base;
+ TCGv_i32 desc;
+ uint32_t data = FIELD_DP32(0, VDATA, NF, nf);
+ data = FIELD_DP32(data, VDATA, VM, 1);
+ dest = tcg_temp_new_ptr();
+ desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb,
+ s->cfg_ptr->vlenb, data));
+ base = get_gpr(s, rs1, EXT_NONE);
+ tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, vd));
+ fn(dest, base, tcg_env, desc);
+ }
finalize_rvv_inst(s);
return true;
@@ -1128,42 +1259,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf,
* load and store whole register instructions ignore vtype and vl setting.
* Thus, we don't need to check vill bit. (Section 7.9)
*/
-#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \
-static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
-{ \
- if (require_rvv(s) && \
- QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \
- return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \
- gen_helper_##NAME, s); \
- } \
- return false; \
-}
-
-GEN_LDST_WHOLE_TRANS(vl1re8_v, 1)
-GEN_LDST_WHOLE_TRANS(vl1re16_v, 1)
-GEN_LDST_WHOLE_TRANS(vl1re32_v, 1)
-GEN_LDST_WHOLE_TRANS(vl1re64_v, 1)
-GEN_LDST_WHOLE_TRANS(vl2re8_v, 2)
-GEN_LDST_WHOLE_TRANS(vl2re16_v, 2)
-GEN_LDST_WHOLE_TRANS(vl2re32_v, 2)
-GEN_LDST_WHOLE_TRANS(vl2re64_v, 2)
-GEN_LDST_WHOLE_TRANS(vl4re8_v, 4)
-GEN_LDST_WHOLE_TRANS(vl4re16_v, 4)
-GEN_LDST_WHOLE_TRANS(vl4re32_v, 4)
-GEN_LDST_WHOLE_TRANS(vl4re64_v, 4)
-GEN_LDST_WHOLE_TRANS(vl8re8_v, 8)
-GEN_LDST_WHOLE_TRANS(vl8re16_v, 8)
-GEN_LDST_WHOLE_TRANS(vl8re32_v, 8)
-GEN_LDST_WHOLE_TRANS(vl8re64_v, 8)
+#define GEN_LDST_WHOLE_TRANS(NAME, ETYPE, ARG_NF, IS_LOAD) \
+static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \
+{ \
+ if (require_rvv(s) && \
+ QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \
+ return ldst_whole_trans(a->rd, a->rs1, ARG_NF, ctzl(sizeof(ETYPE)), \
+ gen_helper_##NAME, s, IS_LOAD); \
+ } \
+ return false; \
+}
+
+GEN_LDST_WHOLE_TRANS(vl1re8_v, int8_t, 1, true)
+GEN_LDST_WHOLE_TRANS(vl1re16_v, int16_t, 1, true)
+GEN_LDST_WHOLE_TRANS(vl1re32_v, int32_t, 1, true)
+GEN_LDST_WHOLE_TRANS(vl1re64_v, int64_t, 1, true)
+GEN_LDST_WHOLE_TRANS(vl2re8_v, int8_t, 2, true)
+GEN_LDST_WHOLE_TRANS(vl2re16_v, int16_t, 2, true)
+GEN_LDST_WHOLE_TRANS(vl2re32_v, int32_t, 2, true)
+GEN_LDST_WHOLE_TRANS(vl2re64_v, int64_t, 2, true)
+GEN_LDST_WHOLE_TRANS(vl4re8_v, int8_t, 4, true)
+GEN_LDST_WHOLE_TRANS(vl4re16_v, int16_t, 4, true)
+GEN_LDST_WHOLE_TRANS(vl4re32_v, int32_t, 4, true)
+GEN_LDST_WHOLE_TRANS(vl4re64_v, int64_t, 4, true)
+GEN_LDST_WHOLE_TRANS(vl8re8_v, int8_t, 8, true)
+GEN_LDST_WHOLE_TRANS(vl8re16_v, int16_t, 8, true)
+GEN_LDST_WHOLE_TRANS(vl8re32_v, int32_t, 8, true)
+GEN_LDST_WHOLE_TRANS(vl8re64_v, int64_t, 8, true)
/*
* The vector whole register store instructions are encoded similar to
* unmasked unit-stride store of elements with EEW=8.
*/
-GEN_LDST_WHOLE_TRANS(vs1r_v, 1)
-GEN_LDST_WHOLE_TRANS(vs2r_v, 2)
-GEN_LDST_WHOLE_TRANS(vs4r_v, 4)
-GEN_LDST_WHOLE_TRANS(vs8r_v, 8)
+GEN_LDST_WHOLE_TRANS(vs1r_v, int8_t, 1, false)
+GEN_LDST_WHOLE_TRANS(vs2r_v, int8_t, 2, false)
+GEN_LDST_WHOLE_TRANS(vs4r_v, int8_t, 4, false)
+GEN_LDST_WHOLE_TRANS(vs8r_v, int8_t, 8, false)
/*
*** Vector Integer Arithmetic Instructions
@@ -1475,6 +1606,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr *a)
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
}
+/* OPIVV with overwrite and WIDEN */
+static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm);
+}
+
static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
gen_helper_gvec_4_ptr *fn,
bool (*checkfn)(DisasContext *, arg_rmrr *))
@@ -1522,6 +1663,14 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a)
vext_check_ds(s, a->rd, a->rs2, a->vm);
}
+static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ds(s, a->rd, a->rs2, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm);
+}
+
#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
@@ -1993,13 +2142,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
/* Vector Widening Integer Multiply-Add Instructions */
-GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check)
/* Vector Integer Merge and Move Instructions */
static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
@@ -2340,6 +2489,17 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a)
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
}
+static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ require_scale_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs1, s->sew, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm);
+}
+
/* OPFVV with WIDEN */
#define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
@@ -2379,11 +2539,21 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a)
vext_check_ds(s, a->rd, a->rs2, a->vm);
}
+static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+ return require_rvv(s) &&
+ require_rvf(s) &&
+ require_scale_rvf(s) &&
+ vext_check_isa_ill(s) &&
+ vext_check_ds(s, a->rd, a->rs2, a->vm) &&
+ vext_check_input_eew(s, a->rd, s->sew + 1, a->rs2, s->sew, a->vm);
+}
+
/* OPFVF with WIDEN */
-#define GEN_OPFVF_WIDEN_TRANS(NAME) \
+#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \
static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
{ \
- if (opfvf_widen_check(s, a)) { \
+ if (CHECK(s, a)) { \
uint32_t data = 0; \
static gen_helper_opfvf *const fns[2] = { \
gen_helper_##NAME##_h, gen_helper_##NAME##_w, \
@@ -2399,8 +2569,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
return false; \
}
-GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check)
static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
{
@@ -2482,7 +2652,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check)
/* Vector Widening Floating-Point Multiply */
GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check)
/* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
@@ -2503,14 +2673,14 @@ GEN_OPFVF_TRANS(vfmsub_vf, opfvf_check)
GEN_OPFVF_TRANS(vfnmsub_vf, opfvf_check)
/* Vector Widening Floating-Point Fused Multiply-Add Instructions */
-GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
-GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
-GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
-GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
+GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_overwrite_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_overwrite_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_overwrite_widen_check)
+GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_overwrite_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_overwrite_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_overwrite_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_overwrite_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_overwrite_widen_check)
/* Vector Floating-Point Square-Root Instruction */
@@ -3181,19 +3351,19 @@ static void load_element(TCGv_i64 dest, TCGv_ptr base,
/* offset of the idx element with base register r */
static uint32_t endian_ofs(DisasContext *s, int r, int idx)
{
-#if HOST_BIG_ENDIAN
- return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
-#else
- return vreg_ofs(s, r) + (idx << s->sew);
-#endif
+ if (HOST_BIG_ENDIAN) {
+ return vreg_ofs(s, r) + ((idx ^ (7 >> s->sew)) << s->sew);
+ } else {
+ return vreg_ofs(s, r) + (idx << s->sew);
+ }
}
/* adjust the index according to the endian */
static void endian_adjust(TCGv_i32 ofs, int sew)
{
-#if HOST_BIG_ENDIAN
- tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
-#endif
+ if (HOST_BIG_ENDIAN) {
+ tcg_gen_xori_i32(ofs, ofs, 7 >> sew);
+ }
}
/* Load idx >= VLMAX ? 0 : vreg[idx] */
@@ -3391,7 +3561,6 @@ static bool slideup_check(DisasContext *s, arg_rmrr *a)
}
GEN_OPIVX_TRANS(vslideup_vx, slideup_check)
-GEN_OPIVX_TRANS(vslide1up_vx, slideup_check)
GEN_OPIVI_TRANS(vslideup_vi, IMM_ZX, vslideup_vx, slideup_check)
static bool slidedown_check(DisasContext *s, arg_rmrr *a)
@@ -3402,9 +3571,56 @@ static bool slidedown_check(DisasContext *s, arg_rmrr *a)
}
GEN_OPIVX_TRANS(vslidedown_vx, slidedown_check)
-GEN_OPIVX_TRANS(vslide1down_vx, slidedown_check)
GEN_OPIVI_TRANS(vslidedown_vi, IMM_ZX, vslidedown_vx, slidedown_check)
+typedef void gen_helper_vslide1_vx(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_ptr,
+ TCGv_env, TCGv_i32);
+
+#define GEN_OPIVX_VSLIDE1_TRANS(NAME, CHECK) \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \
+{ \
+ if (CHECK(s, a)) { \
+ static gen_helper_vslide1_vx * const fns[4] = { \
+ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \
+ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \
+ }; \
+ \
+ TCGv_ptr dest, src2, mask; \
+ TCGv_i64 src1; \
+ TCGv_i32 desc; \
+ uint32_t data = 0; \
+ \
+ dest = tcg_temp_new_ptr(); \
+ mask = tcg_temp_new_ptr(); \
+ src2 = tcg_temp_new_ptr(); \
+ src1 = tcg_temp_new_i64(); \
+ \
+ data = FIELD_DP32(data, VDATA, VM, a->vm); \
+ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \
+ data = FIELD_DP32(data, VDATA, VTA, s->vta); \
+ data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \
+ data = FIELD_DP32(data, VDATA, VMA, s->vma); \
+ desc = tcg_constant_i32(simd_desc(s->cfg_ptr->vlenb, \
+ s->cfg_ptr->vlenb, data)); \
+ \
+ tcg_gen_addi_ptr(dest, tcg_env, vreg_ofs(s, a->rd)); \
+ tcg_gen_addi_ptr(src2, tcg_env, vreg_ofs(s, a->rs2)); \
+ tcg_gen_addi_ptr(mask, tcg_env, vreg_ofs(s, 0)); \
+ tcg_gen_ext_tl_i64(src1, get_gpr(s, a->rs1, EXT_SIGN)); \
+ \
+ fns[s->sew](dest, mask, src1, src2, tcg_env, desc); \
+ \
+ tcg_gen_movi_tl(cpu_vstart, 0); \
+ finalize_rvv_inst(s); \
+ \
+ return true; \
+ } \
+ return false; \
+}
+
+GEN_OPIVX_VSLIDE1_TRANS(vslide1up_vx, slideup_check)
+GEN_OPIVX_VSLIDE1_TRANS(vslide1down_vx, slidedown_check)
+
/* Vector Floating-Point Slide Instructions */
static bool fslideup_check(DisasContext *s, arg_rmrr *a)
{
@@ -3426,6 +3642,7 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a)
{
return require_rvv(s) &&
vext_check_isa_ill(s) &&
+ vext_check_input_eew(s, a->rs1, s->sew, a->rs2, s->sew, a->vm) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs1, s->lmul) &&
require_align(a->rs2, s->lmul) &&
@@ -3438,6 +3655,7 @@ static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a)
int8_t emul = MO_16 - s->sew + s->lmul;
return require_rvv(s) &&
vext_check_isa_ill(s) &&
+ vext_check_input_eew(s, a->rs1, MO_16, a->rs2, s->sew, a->vm) &&
(emul >= -3 && emul <= 3) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs1, emul) &&
@@ -3457,6 +3675,7 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a)
{
return require_rvv(s) &&
vext_check_isa_ill(s) &&
+ vext_check_input_eew(s, -1, MO_64, a->rs2, s->sew, a->vm) &&
require_align(a->rd, s->lmul) &&
require_align(a->rs2, s->lmul) &&
(a->rd != a->rs2) &&
@@ -3600,7 +3819,9 @@ static bool int_ext_check(DisasContext *s, arg_rmr *a, uint8_t div)
require_align(a->rd, s->lmul) &&
require_align(a->rs2, s->lmul - div) &&
require_vm(a->vm, a->rd) &&
- require_noover(a->rd, s->lmul, a->rs2, s->lmul - div);
+ require_noover(a->rd, s->lmul, a->rs2, s->lmul - div) &&
+ vext_check_input_eew(s, -1, 0, a->rs2, s->sew, a->vm);
+
return ret;
}