diff options
author | Chih-Min Chao <chihmin.chao@sifive.com> | 2020-05-20 01:59:32 -0700 |
---|---|---|
committer | Chih-Min Chao <chihmin.chao@sifive.com> | 2020-05-20 11:38:45 -0700 |
commit | 4bb17aa37063f0e805cdb8ad410a709c34f6af33 (patch) | |
tree | 17e9dec1ce5fb15732886a934095e96722a73028 /riscv/decode.h | |
parent | 5126f9ac6ba365a949e8125ea0caaa7185a0e542 (diff) | |
download | spike-4bb17aa37063f0e805cdb8ad410a709c34f6af33.zip spike-4bb17aa37063f0e805cdb8ad410a709c34f6af33.tar.gz spike-4bb17aa37063f0e805cdb8ad410a709c34f6af33.tar.bz2 |
rvv: fix index load/store
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 50 |
1 files changed, 24 insertions, 26 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 8ece184..e031537 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -450,15 +450,23 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) require_align(insn.rd(), P.VU.vflmul * 2); \ require_vm; \ -#define VI_CHECK_ST_INDEX \ +#define VI_CHECK_ST_INDEX(elt_width) \ require_vector; \ + P.VU.veew = elt_width; \ + P.VU.vemul = ((float)P.VU.veew / P.VU.vsew * P.VU.vflmul); \ + require(P.VU.vemul >= 0.125 && P.VU.vemul <= 8); \ + reg_t flmul = P.VU.vflmul < 1 ? 1 : P.VU.vflmul; \ require_align(insn.rd(), P.VU.vflmul); \ require_align(insn.rs2(), P.VU.vflmul); \ + require((nf * flmul) <= (NVPR / 4) && \ + (insn.rd() + nf * flmul) <= NVPR); \ -#define VI_CHECK_LD_INDEX \ - VI_CHECK_ST_INDEX; \ - if (insn.v_nf() > 0) \ - require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul); \ +#define VI_CHECK_LD_INDEX(elt_width) \ + VI_CHECK_ST_INDEX(elt_width); \ + if (insn.v_nf() > 0) {\ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vemul); \ + require_noover(vd, nf, insn.rs2(), 1); \ + } \ require_vm; \ #define VI_CHECK_MSS(is_vs1) \ @@ -1552,7 +1560,6 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ require(0); \ } - #define VI_LD(stride, offset, elt_width) \ const reg_t nf = insn.v_nf() + 1; \ const reg_t vl = P.VU.vl; \ @@ -1572,20 +1579,13 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ P.VU.vstart = 0; #define VI_LD_INDEX(elt_width, is_seg) \ - VI_CHECK_LD_INDEX; \ const reg_t nf = insn.v_nf() + 1; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ - const reg_t mew = insn.v_mew(); \ - const reg_t width = insn.v_width(); \ - require((nf * P.VU.vlmul) <= (NVPR / 4) && \ - (vd + nf * P.VU.vlmul) <= NVPR); \ if (!is_seg) \ require(nf == 1); \ - if (nf >= 2) \ - require_noover(vd, nf, insn.rs2(), 1); \ - const reg_t vlmul = P.VU.vlmul; \ + VI_CHECK_LD_INDEX(elt_width); \ VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ @@ -1594,19 +1594,19 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t fn = 0; fn < nf; ++fn) { \ switch(P.VU.vsew){ \ case e8: \ - P.VU.elt<uint8_t>(vd + fn * vlmul, vreg_inx, true) = \ + P.VU.elt<uint8_t>(vd + fn * flmul, vreg_inx, true) = \ MMU.load_uint8(baseAddr + index[i] + fn * 1); \ break; \ case e16: \ - P.VU.elt<uint16_t>(vd + fn * vlmul, vreg_inx, true) = \ + P.VU.elt<uint16_t>(vd + fn * flmul, vreg_inx, true) = \ MMU.load_uint16(baseAddr + index[i] + fn * 2); \ break; \ case e32: \ - P.VU.elt<uint32_t>(vd + fn * vlmul, vreg_inx, true) = \ + P.VU.elt<uint32_t>(vd + fn * flmul, vreg_inx, true) = \ MMU.load_uint32(baseAddr + index[i] + fn * 4); \ break; \ default: \ - P.VU.elt<uint64_t>(vd + fn * vlmul, vreg_inx, true) = \ + P.VU.elt<uint64_t>(vd + fn * flmul, vreg_inx, true) = \ MMU.load_uint64(baseAddr + index[i] + fn * 8); \ break; \ } \ @@ -1633,37 +1633,35 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ P.VU.vstart = 0; #define VI_ST_INDEX(elt_width, is_seg) \ - VI_CHECK_ST_INDEX; \ const reg_t nf = insn.v_nf() + 1; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vs3 = insn.rd(); \ - require((nf * P.VU.vlmul) <= (NVPR / 4) && \ - vs3 + nf * P.VU.vlmul <= NVPR); \ if (!is_seg) \ require(nf == 1); \ - const reg_t vlmul = P.VU.vlmul; \ + VI_CHECK_ST_INDEX(elt_width); \ VI_DUPLICATE_VREG(insn.rs2(), elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ + P.VU.vstart = i; \ for (reg_t fn = 0; fn < nf; ++fn) { \ switch (P.VU.vsew) { \ case e8: \ MMU.store_uint8(baseAddr + index[i] + fn * 1, \ - P.VU.elt<uint8_t>(vs3 + fn * vlmul, vreg_inx)); \ + P.VU.elt<uint8_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ case e16: \ MMU.store_uint16(baseAddr + index[i] + fn * 2, \ - P.VU.elt<uint16_t>(vs3 + fn * vlmul, vreg_inx)); \ + P.VU.elt<uint16_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ case e32: \ MMU.store_uint32(baseAddr + index[i] + fn * 4, \ - P.VU.elt<uint32_t>(vs3 + fn * vlmul, vreg_inx)); \ + P.VU.elt<uint32_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ default: \ MMU.store_uint64(baseAddr + index[i] + fn * 8, \ - P.VU.elt<uint64_t>(vs3 + fn * vlmul, vreg_inx)); \ + P.VU.elt<uint64_t>(vs3 + fn * flmul, vreg_inx)); \ break; \ } \ } \ |