diff options
author | Daniel Henrique Barboza <dbarboza@ventanamicro.com> | 2024-03-14 14:56:59 -0300 |
---|---|---|
committer | Alistair Francis <alistair.francis@wdc.com> | 2024-03-22 15:18:28 +1000 |
commit | 929e521a47b7110339526771720bc2096f69dddf (patch) | |
tree | dcb96b2000af54e8113a07e7cb137f3480605d0e | |
parent | 7e53e3ddf6dff200098e112c5370ab16d2d5dbd1 (diff) | |
download | qemu-929e521a47b7110339526771720bc2096f69dddf.zip qemu-929e521a47b7110339526771720bc2096f69dddf.tar.gz qemu-929e521a47b7110339526771720bc2096f69dddf.tar.bz2 |
target/riscv: always clear vstart for ldst_whole insns
Commit 8ff8ac6329 added a conditional to guard the vext_ldst_whole()
helper if vstart >= evl. But by skipping the helper we're also not
setting vstart = 0 at the end of the insns, which is incorrect.
We'll move the conditional to vext_ldst_whole(), following in line with
the removal of all brconds vstart >= vl that the next patch will do. The
idea is to make the helpers responsible for their own vstart management.
Fix ldst_whole isns by:
- remove the brcond that skips the helper if vstart is >= evl;
- vext_ldst_whole() now does an early exit with the same check, where
evl = (vlenb * nf) >> log2_esz, but the early exit will also clear
vstart.
The 'width' param is now unneeded in ldst_whole_trans() and is also
removed. It was used for the evl calculation for the brcond and has no
other use now. The 'width' is reflected in vext_ldst_whole() via
log2_esz, which is encoded by GEN_VEXT_LD_WHOLE() as
"ctzl(sizeof(ETYPE))".
Suggested-by: Max Chou <max.chou@sifive.com>
Fixes: 8ff8ac6329 ("target/riscv: rvv: Add missing early exit condition for whole register load/store")
Signed-off-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Reviewed-by: Max Chou <max.chou@sifive.com>
Message-ID: <20240314175704.478276-6-dbarboza@ventanamicro.com>
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
-rw-r--r-- | target/riscv/insn_trans/trans_rvv.c.inc | 52 | ||||
-rw-r--r-- | target/riscv/vector_helper.c | 5 |
2 files changed, 28 insertions, 29 deletions
diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 52c26a7..1366445 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1097,13 +1097,9 @@ GEN_VEXT_TRANS(vle64ff_v, MO_64, r2nfvm, ldff_op, ld_us_check) typedef void gen_helper_ldst_whole(TCGv_ptr, TCGv, TCGv_env, TCGv_i32); static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, - uint32_t width, gen_helper_ldst_whole *fn, + gen_helper_ldst_whole *fn, DisasContext *s) { - uint32_t evl = s->cfg_ptr->vlenb * nf / width; - TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_GEU, cpu_vstart, evl, over); - TCGv_ptr dest; TCGv base; TCGv_i32 desc; @@ -1120,8 +1116,6 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, fn(dest, base, tcg_env, desc); - gen_set_label(over); - return true; } @@ -1129,42 +1123,42 @@ static bool ldst_whole_trans(uint32_t vd, uint32_t rs1, uint32_t nf, * load and store whole register instructions ignore vtype and vl setting. * Thus, we don't need to check vill bit. (Section 7.9) */ -#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF, WIDTH) \ +#define GEN_LDST_WHOLE_TRANS(NAME, ARG_NF) \ static bool trans_##NAME(DisasContext *s, arg_##NAME * a) \ { \ if (require_rvv(s) && \ QEMU_IS_ALIGNED(a->rd, ARG_NF)) { \ - return ldst_whole_trans(a->rd, a->rs1, ARG_NF, WIDTH, \ + return ldst_whole_trans(a->rd, a->rs1, ARG_NF, \ gen_helper_##NAME, s); \ } \ return false; \ } -GEN_LDST_WHOLE_TRANS(vl1re8_v, 1, 1) -GEN_LDST_WHOLE_TRANS(vl1re16_v, 1, 2) -GEN_LDST_WHOLE_TRANS(vl1re32_v, 1, 4) -GEN_LDST_WHOLE_TRANS(vl1re64_v, 1, 8) -GEN_LDST_WHOLE_TRANS(vl2re8_v, 2, 1) -GEN_LDST_WHOLE_TRANS(vl2re16_v, 2, 2) -GEN_LDST_WHOLE_TRANS(vl2re32_v, 2, 4) -GEN_LDST_WHOLE_TRANS(vl2re64_v, 2, 8) -GEN_LDST_WHOLE_TRANS(vl4re8_v, 4, 1) -GEN_LDST_WHOLE_TRANS(vl4re16_v, 4, 2) -GEN_LDST_WHOLE_TRANS(vl4re32_v, 4, 4) -GEN_LDST_WHOLE_TRANS(vl4re64_v, 4, 8) -GEN_LDST_WHOLE_TRANS(vl8re8_v, 8, 1) -GEN_LDST_WHOLE_TRANS(vl8re16_v, 8, 2) -GEN_LDST_WHOLE_TRANS(vl8re32_v, 8, 4) -GEN_LDST_WHOLE_TRANS(vl8re64_v, 8, 8) +GEN_LDST_WHOLE_TRANS(vl1re8_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re16_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re32_v, 1) +GEN_LDST_WHOLE_TRANS(vl1re64_v, 1) +GEN_LDST_WHOLE_TRANS(vl2re8_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re16_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re32_v, 2) +GEN_LDST_WHOLE_TRANS(vl2re64_v, 2) +GEN_LDST_WHOLE_TRANS(vl4re8_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re16_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re32_v, 4) +GEN_LDST_WHOLE_TRANS(vl4re64_v, 4) +GEN_LDST_WHOLE_TRANS(vl8re8_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re16_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re32_v, 8) +GEN_LDST_WHOLE_TRANS(vl8re64_v, 8) /* * The vector whole register store instructions are encoded similar to * unmasked unit-stride store of elements with EEW=8. */ -GEN_LDST_WHOLE_TRANS(vs1r_v, 1, 1) -GEN_LDST_WHOLE_TRANS(vs2r_v, 2, 1) -GEN_LDST_WHOLE_TRANS(vs4r_v, 4, 1) -GEN_LDST_WHOLE_TRANS(vs8r_v, 8, 1) +GEN_LDST_WHOLE_TRANS(vs1r_v, 1) +GEN_LDST_WHOLE_TRANS(vs2r_v, 2) +GEN_LDST_WHOLE_TRANS(vs4r_v, 4) +GEN_LDST_WHOLE_TRANS(vs8r_v, 8) /* *** Vector Integer Arithmetic Instructions diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index bcc553c..1f4c276 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -572,6 +572,11 @@ vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, uint32_t vlenb = riscv_cpu_cfg(env)->vlenb; uint32_t max_elems = vlenb >> log2_esz; + if (env->vstart >= ((vlenb * nf) >> log2_esz)) { + env->vstart = 0; + return; + } + k = env->vstart / max_elems; off = env->vstart % max_elems; |