diff options
author | Chih-Min Chao <chihmin.chao@sifive.com> | 2020-05-19 01:57:33 -0700 |
---|---|---|
committer | Chih-Min Chao <chihmin.chao@sifive.com> | 2020-05-19 02:54:52 -0700 |
commit | cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78 (patch) | |
tree | b9650f1edde9c50e5a96cb97a59594d0506db1c2 /riscv/decode.h | |
parent | 62814109390b358e5190bd4789dc1a0cfc3e7253 (diff) | |
download | spike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.zip spike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.tar.gz spike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.tar.bz2 |
rvv: store eew and emul to P.VU for unit/stride load/store
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 5731ae1..b0609a5 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -472,16 +472,18 @@ static inline bool is_overlapped(const int astart, const int asize, } \ } -#define VI_CHECK_STORE \ +#define VI_CHECK_STORE(elt_width) \ require_vector; \ - reg_t emul = (eew / P.VU.vsew * P.VU.vlmul) + 0.875; \ + P.VU.veew = sizeof(elt_width##_t) * 8; \ + P.VU.vemul = ((float)P.VU.veew / P.VU.vsew * P.VU.vlmul); \ + reg_t emul = P.VU.vemul + 0.875; \ require(emul >= 1 && emul <= 8); \ require((insn.rd() & (emul - 1)) == 0); \ require((nf * emul) <= (NVPR / 4) && \ (insn.rd() + nf * emul) <= NVPR); \ -#define VI_CHECK_LOAD \ - VI_CHECK_STORE; \ +#define VI_CHECK_LOAD(elt_width) \ + VI_CHECK_STORE(elt_width); \ if (insn.v_vm() == 0) \ require(insn.rd() != 0); \ @@ -1546,21 +1548,20 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ } -#define VI_LD(stride, offset, ld_width) \ +#define VI_LD(stride, offset, elt_width) \ const reg_t nf = insn.v_nf() + 1; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ - const float eew = sizeof(ld_width##_t) * 8; \ - VI_CHECK_LOAD; \ + VI_CHECK_LOAD(elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ P.VU.vstart = i; \ for (reg_t fn = 0; fn < nf; ++fn) { \ - ld_width##_t val = MMU.load_##ld_width( \ - baseAddr + (stride) + (offset) * sizeof(ld_width##_t)); \ - P.VU.elt<ld_width##_t>(vd + fn * emul, vreg_inx, true) = val; \ + elt_width##_t val = MMU.load_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \ + P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \ } \ } \ P.VU.vstart = 0; @@ -1605,21 +1606,20 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ } \ P.VU.vstart = 0; -#define VI_ST(stride, offset, st_width) \ +#define VI_ST(stride, offset, elt_width) \ const reg_t nf = insn.v_nf() + 1; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vs3 = insn.rd(); \ - const float eew = sizeof(st_width##_t) * 8; \ - VI_CHECK_STORE; \ + VI_CHECK_STORE(elt_width); \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ P.VU.vstart = i; \ for (reg_t fn = 0; fn < nf; ++fn) { \ - st_width##_t val = P.VU.elt<st_width##_t>(vs3 + fn * emul, vreg_inx); \ - MMU.store_##st_width( \ - baseAddr + (stride) + (offset) * sizeof(st_width##_t), val); \ + elt_width##_t val = P.VU.elt<elt_width##_t>(vs3 + fn * emul, vreg_inx); \ + MMU.store_##elt_width( \ + baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \ } \ } \ P.VU.vstart = 0; @@ -1660,14 +1660,13 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ } \ P.VU.vstart = 0; -#define VI_LDST_FF(ld_type) \ +#define VI_LDST_FF(elt_width) \ const reg_t nf = insn.v_nf() + 1; \ const reg_t sew = p->VU.vsew; \ const reg_t vl = p->VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t rd_num = insn.rd(); \ - const float eew = sizeof(ld_type##_t) * 8; \ - VI_CHECK_LOAD; \ + VI_CHECK_LOAD(elt_width); \ bool early_stop = false; \ for (reg_t i = p->VU.vstart; i < vl; ++i) { \ VI_STRIP(i); \ @@ -1676,8 +1675,8 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t fn = 0; fn < nf; ++fn) { \ uint64_t val; \ try { \ - val = MMU.load_##ld_type( \ - baseAddr + (i * nf + fn) * sizeof(ld_type##_t)); \ + val = MMU.load_##elt_width( \ + baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \ } catch (trap_t& t) { \ if (i == 0) \ throw; /* Only take exception on zeroth element */ \ @@ -1686,7 +1685,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ P.VU.vl = i; \ break; \ } \ - p->VU.elt<ld_type##_t>(rd_num + fn * emul, vreg_inx, true) = val; \ + p->VU.elt<elt_width##_t>(rd_num + fn * emul, vreg_inx, true) = val; \ } \ \ if (early_stop) { \ |