aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2020-05-19 01:57:33 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2020-05-19 02:54:52 -0700
commitcba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78 (patch)
treeb9650f1edde9c50e5a96cb97a59594d0506db1c2
parent62814109390b358e5190bd4789dc1a0cfc3e7253 (diff)
downloadspike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.zip
spike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.tar.gz
spike-cba9c9d7c95219c7fc7c55c85f8ab0d31d9b9d78.tar.bz2
rvv: store eew and emul to P.VU for unit/stride load/store
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
-rw-r--r--riscv/decode.h43
-rw-r--r--riscv/execute.cc2
2 files changed, 22 insertions, 23 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 5731ae1..b0609a5 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -472,16 +472,18 @@ static inline bool is_overlapped(const int astart, const int asize,
} \
}
-#define VI_CHECK_STORE \
+#define VI_CHECK_STORE(elt_width) \
require_vector; \
- reg_t emul = (eew / P.VU.vsew * P.VU.vlmul) + 0.875; \
+ P.VU.veew = sizeof(elt_width##_t) * 8; \
+ P.VU.vemul = ((float)P.VU.veew / P.VU.vsew * P.VU.vlmul); \
+ reg_t emul = P.VU.vemul + 0.875; \
require(emul >= 1 && emul <= 8); \
require((insn.rd() & (emul - 1)) == 0); \
require((nf * emul) <= (NVPR / 4) && \
(insn.rd() + nf * emul) <= NVPR); \
-#define VI_CHECK_LOAD \
- VI_CHECK_STORE; \
+#define VI_CHECK_LOAD(elt_width) \
+ VI_CHECK_STORE(elt_width); \
if (insn.v_vm() == 0) \
require(insn.rd() != 0); \
@@ -1546,21 +1548,20 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
}
-#define VI_LD(stride, offset, ld_width) \
+#define VI_LD(stride, offset, elt_width) \
const reg_t nf = insn.v_nf() + 1; \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vd = insn.rd(); \
- const float eew = sizeof(ld_width##_t) * 8; \
- VI_CHECK_LOAD; \
+ VI_CHECK_LOAD(elt_width); \
for (reg_t i = 0; i < vl; ++i) { \
VI_ELEMENT_SKIP(i); \
VI_STRIP(i); \
P.VU.vstart = i; \
for (reg_t fn = 0; fn < nf; ++fn) { \
- ld_width##_t val = MMU.load_##ld_width( \
- baseAddr + (stride) + (offset) * sizeof(ld_width##_t)); \
- P.VU.elt<ld_width##_t>(vd + fn * emul, vreg_inx, true) = val; \
+ elt_width##_t val = MMU.load_##elt_width( \
+ baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \
+ P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \
} \
} \
P.VU.vstart = 0;
@@ -1605,21 +1606,20 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
} \
P.VU.vstart = 0;
-#define VI_ST(stride, offset, st_width) \
+#define VI_ST(stride, offset, elt_width) \
const reg_t nf = insn.v_nf() + 1; \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vs3 = insn.rd(); \
- const float eew = sizeof(st_width##_t) * 8; \
- VI_CHECK_STORE; \
+ VI_CHECK_STORE(elt_width); \
for (reg_t i = 0; i < vl; ++i) { \
VI_STRIP(i) \
VI_ELEMENT_SKIP(i); \
P.VU.vstart = i; \
for (reg_t fn = 0; fn < nf; ++fn) { \
- st_width##_t val = P.VU.elt<st_width##_t>(vs3 + fn * emul, vreg_inx); \
- MMU.store_##st_width( \
- baseAddr + (stride) + (offset) * sizeof(st_width##_t), val); \
+ elt_width##_t val = P.VU.elt<elt_width##_t>(vs3 + fn * emul, vreg_inx); \
+ MMU.store_##elt_width( \
+ baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \
} \
} \
P.VU.vstart = 0;
@@ -1660,14 +1660,13 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
} \
P.VU.vstart = 0;
-#define VI_LDST_FF(ld_type) \
+#define VI_LDST_FF(elt_width) \
const reg_t nf = insn.v_nf() + 1; \
const reg_t sew = p->VU.vsew; \
const reg_t vl = p->VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t rd_num = insn.rd(); \
- const float eew = sizeof(ld_type##_t) * 8; \
- VI_CHECK_LOAD; \
+ VI_CHECK_LOAD(elt_width); \
bool early_stop = false; \
for (reg_t i = p->VU.vstart; i < vl; ++i) { \
VI_STRIP(i); \
@@ -1676,8 +1675,8 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
for (reg_t fn = 0; fn < nf; ++fn) { \
uint64_t val; \
try { \
- val = MMU.load_##ld_type( \
- baseAddr + (i * nf + fn) * sizeof(ld_type##_t)); \
+ val = MMU.load_##elt_width( \
+ baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \
} catch (trap_t& t) { \
if (i == 0) \
throw; /* Only take exception on zeroth element */ \
@@ -1686,7 +1685,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
P.VU.vl = i; \
break; \
} \
- p->VU.elt<ld_type##_t>(rd_num + fn * emul, vreg_inx, true) = val; \
+ p->VU.elt<elt_width##_t>(rd_num + fn * emul, vreg_inx, true) = val; \
} \
\
if (early_stop) { \
diff --git a/riscv/execute.cc b/riscv/execute.cc
index 6c20e99..5cd29b7 100644
--- a/riscv/execute.cc
+++ b/riscv/execute.cc
@@ -282,7 +282,7 @@ void processor_t::step(size_t n)
}
for (reg_t i=0; i<NVPR; ++i) {
if (!VU.reg_referenced[i]) continue;
- fprintf(stderr, "vconfig <- sew=%lu vlmul=%.3f eew=%lu emul=%f vlmax=%lu vl=%lu\n",
+ fprintf(stderr, "vconfig <- sew=%lu vlmul=%.3f eew=%lu emul=%.3f vlmax=%lu vl=%lu\n",
VU.vsew, vlmul, VU.veew, VU.vemul, VU.vlmax, VU.vl);
for (reg_t j=0; j<VU.VLEN/32; ++j) {
uint32_t &old = saved->VU.elt<uint32_t>(i, j);