rvv: add eew and lmul for vle/vse/vleff

author: Dave.Wen <dave.wen@sifive.com> 2020-05-07 05:59:07 -0700
committer: Dave.Wen <dave.wen@sifive.com> 2020-05-07 05:59:07 -0700
commit: 3baafbe3559fb62b8a4d3f13288593035e4502d3 (patch)
tree: 42f99317b2b11c2fe1e3a8f79de8a1852e4d4cdb /riscv
parent: f471e0edac1be60e92b96518cb653fa5f173af07 (diff)
download: spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.zip
spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.gz
spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.bz2
4 files changed, 39 insertions, 7 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 9c69b48..260956c 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -118,6 +118,11 @@ public:
   uint64_t v_zimm11() { return x(20, 11); }
   uint64_t v_lmul() { return 1 << x(20, 2); }
   uint64_t v_sew() { return 1 << (x(22, 3) + 3); }
+  uint64_t v_width() {return x(12, 3); }
+  uint64_t v_mop() {return x(26, 2); }
+  uint64_t v_lumop() {return x(20, 5); }
+  uint64_t v_sumop() {return x(20, 5); }
+  uint64_t v_mew() {return x(28, 1); }
 
 private:
   insn_bits_t b;
@@ -350,6 +355,9 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
 #define e32 32    // 32b elements
 #define e64 64    // 64b elements
 #define e128 128  // 128b elements
+#define e256 256  // 256b elements
+#define e512 512  // 512b elements
+#define e1024 1024  // 1024b elements
 
 #define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew))
 #define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew))
@@ -1551,11 +1559,21 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   } \
   P.VU.vstart = 0; 
 
+#define VI_EEW(mew, width) \
+  reg_t base = mew? 128 : 8; \
+  reg_t shf = width == 0? 0: width - 5; \
+  P.VU.veew = base << shf; \
+  P.VU.vemul = (P.VU.veew/P.VU.vsew) * P.VU.vlmul; \
+  assert((P.VU.veew/P.VU.vemul) == (P.VU.vsew/P.VU.vlmul));
+
 #define VI_LD_COMMON(stride, offset, ld_width, elt_byte, is_seg) \
   const reg_t nf = insn.v_nf() + 1; \
   const reg_t vl = P.VU.vl; \
   const reg_t baseAddr = RS1; \
   const reg_t vd = insn.rd(); \
+  const reg_t mew = insn.v_mew(); \
+  const reg_t width = insn.v_width(); \
+  VI_EEW(mew, width); \
   require((nf * P.VU.vlmul) <= (NVPR / 4) && \
           (vd + nf * P.VU.vlmul) <= NVPR); \
   if (!is_seg) \
@@ -1566,7 +1584,7 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
     VI_STRIP(i); \
     for (reg_t fn = 0; fn < nf; ++fn) { \
       ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \
-      switch(P.VU.vsew){ \
+      switch(P.VU.veew){ \
         case e8: \
           P.VU.elt<uint8_t>(vd + fn * vlmul, vreg_inx, true) = val; \
           break; \
@@ -1613,6 +1631,9 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   const reg_t vl = p->VU.vl; \
   const reg_t baseAddr = RS1; \
   const reg_t rd_num = insn.rd(); \
+  const reg_t mew = insn.v_mew(); \
+  const reg_t width = insn.v_width(); \
+  VI_EEW(mew, width); \
   bool early_stop = false; \
   const reg_t vlmul = P.VU.vlmul; \
   require(rd_num + nf * P.VU.vlmul <= NVPR); \
@@ -1633,7 +1654,7 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
         break; \
       } \
       \
-      switch (sew) { \
+      switch (P.VU.veew) { \
       case e8: \
         p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx, true) = val; \
         break; \
diff --git a/riscv/insns/vse_v.h b/riscv/insns/vse_v.h
index bcb1b21..cdb07bd 100644
--- a/riscv/insns/vse_v.h
+++ b/riscv/insns/vse_v.h
@@ -1,7 +1,11 @@
 // vse.v and vsseg[2-8]e.v
 reg_t sew = P.VU.vsew;
 
-if (sew == e8) {
+const reg_t mew = insn.v_mew(); 
+const reg_t width = insn.v_width(); 
+VI_EEW(mew, width); 
+
+if (P.VU.veew == e8) {
   VI_ST(0, (i * nf + fn), uint8,  1, true);
 } else if (sew == e16) {
   VI_ST(0, (i * nf + fn), uint16, 2, true);
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 926bd37..827ea1d 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -374,10 +374,14 @@ reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newT
     vtype = newType;
     vsew = 1 << (BITS(newType, 4, 2) + 3);
     vlmul = 1 << BITS(newType, 1, 0);
-    vediv = 1 << BITS(newType, 6, 5);
-    vlmax = VLEN/vsew * vlmul;
-    vmlen = vsew / vlmul;
-    reg_mask = (NVPR-1) & ~(vlmul-1);
+    vemul = vlmul;
+    veew = vsew;
+    fractional_lmul = BITS(newType, 5, 5);
+    vta = BITS(newType, 6, 6);
+    vma = BITS(newType, 7, 7);
+    vediv = 1 << BITS(newType, 9, 8);
+    vlmax = fractional_lmul? (VLEN/vsew)/vlmul : VLEN/vsew * vlmul;
+    vmlen = fractional_lmul? 1 : vsew / vlmul;
 
     vill = vsew > ELEN || vediv != 1 || (newType >> 7) != 0;
     if (vill) {
diff --git a/riscv/processor.h b/riscv/processor.h
index af2fe0f..166086f 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -445,6 +445,9 @@ public:
       reg_t vstart, vxrm, vxsat, vl, vtype, vlenb;
       reg_t vma, vta;
       reg_t vediv, vsew, vlmul;
+      reg_t veew;
+      float vemul;
+      reg_t vmel;
       reg_t ELEN, VLEN, SLEN;
       reg_t VALU;
       bool vill;
author	Dave.Wen <dave.wen@sifive.com>	2020-05-07 05:59:07 -0700
committer	Dave.Wen <dave.wen@sifive.com>	2020-05-07 05:59:07 -0700
commit	3baafbe3559fb62b8a4d3f13288593035e4502d3 (patch)
tree	42f99317b2b11c2fe1e3a8f79de8a1852e4d4cdb /riscv
parent	f471e0edac1be60e92b96518cb653fa5f173af07 (diff)
download	spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.zip spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.gz spike-3baafbe3559fb62b8a4d3f13288593035e4502d3.tar.bz2