aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave.Wen <dave.wen@sifive.com>2020-05-07 05:59:07 -0700
committerDave.Wen <dave.wen@sifive.com>2020-05-07 05:59:07 -0700
commit3baafbe3559fb62b8a4d3f13288593035e4502d3 (patch)
tree42f99317b2b11c2fe1e3a8f79de8a1852e4d4cdb
parentf471e0edac1be60e92b96518cb653fa5f173af07 (diff)
downloadspike-3baafbe3559fb62b8a4d3f13288593035e4502d3.zip
spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.gz
spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.bz2
rvv: add eew and lmul for vle/vse/vleff
-rw-r--r--riscv/decode.h25
-rw-r--r--riscv/insns/vse_v.h6
-rw-r--r--riscv/processor.cc12
-rw-r--r--riscv/processor.h3
4 files changed, 39 insertions, 7 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 9c69b48..260956c 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -118,6 +118,11 @@ public:
uint64_t v_zimm11() { return x(20, 11); }
uint64_t v_lmul() { return 1 << x(20, 2); }
uint64_t v_sew() { return 1 << (x(22, 3) + 3); }
+ uint64_t v_width() {return x(12, 3); }
+ uint64_t v_mop() {return x(26, 2); }
+ uint64_t v_lumop() {return x(20, 5); }
+ uint64_t v_sumop() {return x(20, 5); }
+ uint64_t v_mew() {return x(28, 1); }
private:
insn_bits_t b;
@@ -350,6 +355,9 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
#define e32 32 // 32b elements
#define e64 64 // 64b elements
#define e128 128 // 128b elements
+#define e256 256 // 256b elements
+#define e512 512 // 512b elements
+#define e1024 1024 // 1024b elements
#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew))
#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew))
@@ -1551,11 +1559,21 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
} \
P.VU.vstart = 0;
+#define VI_EEW(mew, width) \
+ reg_t base = mew? 128 : 8; \
+ reg_t shf = width == 0? 0: width - 5; \
+ P.VU.veew = base << shf; \
+ P.VU.vemul = (P.VU.veew/P.VU.vsew) * P.VU.vlmul; \
+ assert((P.VU.veew/P.VU.vemul) == (P.VU.vsew/P.VU.vlmul));
+
#define VI_LD_COMMON(stride, offset, ld_width, elt_byte, is_seg) \
const reg_t nf = insn.v_nf() + 1; \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vd = insn.rd(); \
+ const reg_t mew = insn.v_mew(); \
+ const reg_t width = insn.v_width(); \
+ VI_EEW(mew, width); \
require((nf * P.VU.vlmul) <= (NVPR / 4) && \
(vd + nf * P.VU.vlmul) <= NVPR); \
if (!is_seg) \
@@ -1566,7 +1584,7 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
VI_STRIP(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \
- switch(P.VU.vsew){ \
+ switch(P.VU.veew){ \
case e8: \
P.VU.elt<uint8_t>(vd + fn * vlmul, vreg_inx, true) = val; \
break; \
@@ -1613,6 +1631,9 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
const reg_t vl = p->VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t rd_num = insn.rd(); \
+ const reg_t mew = insn.v_mew(); \
+ const reg_t width = insn.v_width(); \
+ VI_EEW(mew, width); \
bool early_stop = false; \
const reg_t vlmul = P.VU.vlmul; \
require(rd_num + nf * P.VU.vlmul <= NVPR); \
@@ -1633,7 +1654,7 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
break; \
} \
\
- switch (sew) { \
+ switch (P.VU.veew) { \
case e8: \
p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx, true) = val; \
break; \
diff --git a/riscv/insns/vse_v.h b/riscv/insns/vse_v.h
index bcb1b21..cdb07bd 100644
--- a/riscv/insns/vse_v.h
+++ b/riscv/insns/vse_v.h
@@ -1,7 +1,11 @@
// vse.v and vsseg[2-8]e.v
reg_t sew = P.VU.vsew;
-if (sew == e8) {
+const reg_t mew = insn.v_mew();
+const reg_t width = insn.v_width();
+VI_EEW(mew, width);
+
+if (P.VU.veew == e8) {
VI_ST(0, (i * nf + fn), uint8, 1, true);
} else if (sew == e16) {
VI_ST(0, (i * nf + fn), uint16, 2, true);
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 926bd37..827ea1d 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -374,10 +374,14 @@ reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newT
vtype = newType;
vsew = 1 << (BITS(newType, 4, 2) + 3);
vlmul = 1 << BITS(newType, 1, 0);
- vediv = 1 << BITS(newType, 6, 5);
- vlmax = VLEN/vsew * vlmul;
- vmlen = vsew / vlmul;
- reg_mask = (NVPR-1) & ~(vlmul-1);
+ vemul = vlmul;
+ veew = vsew;
+ fractional_lmul = BITS(newType, 5, 5);
+ vta = BITS(newType, 6, 6);
+ vma = BITS(newType, 7, 7);
+ vediv = 1 << BITS(newType, 9, 8);
+ vlmax = fractional_lmul? (VLEN/vsew)/vlmul : VLEN/vsew * vlmul;
+ vmlen = fractional_lmul? 1 : vsew / vlmul;
vill = vsew > ELEN || vediv != 1 || (newType >> 7) != 0;
if (vill) {
diff --git a/riscv/processor.h b/riscv/processor.h
index af2fe0f..166086f 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -445,6 +445,9 @@ public:
reg_t vstart, vxrm, vxsat, vl, vtype, vlenb;
reg_t vma, vta;
reg_t vediv, vsew, vlmul;
+ reg_t veew;
+ float vemul;
+ reg_t vmel;
reg_t ELEN, VLEN, SLEN;
reg_t VALU;
bool vill;