From 6e4977abdbe16dc7923a709cf3b3defb871c9061 Mon Sep 17 00:00:00 2001
From: Chih-Min Chao <chihmin.chao@sifive.com>
Date: Mon, 3 Aug 2020 01:25:53 -0700
Subject: rvv: add 'vstartalu" option to --varch arugment

   except for load/store instructions

   0      : all instruction can't have non-zero vstart
   not 0  : all instruction can have non-zero vstart if it is not required
            vstart must be zero in spec

   the default value is  1

Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
---
 riscv/decode.h           | 36 ++++++++++++++++++++----------------
 riscv/insns/vfirst_m.h   |  2 +-
 riscv/insns/vfmv_f_s.h   |  2 +-
 riscv/insns/vfmv_s_f.h   |  2 +-
 riscv/insns/vid_v.h      |  2 +-
 riscv/insns/viota_m.h    |  2 +-
 riscv/insns/vmerge_vim.h |  2 +-
 riscv/insns/vmerge_vvm.h |  2 +-
 riscv/insns/vmerge_vxm.h |  2 +-
 riscv/insns/vmsbf_m.h    |  2 +-
 riscv/insns/vmsif_m.h    |  2 +-
 riscv/insns/vmsof_m.h    |  2 +-
 riscv/insns/vmv_s_x.h    |  2 +-
 riscv/insns/vmv_v_i.h    |  2 +-
 riscv/insns/vmv_v_v.h    |  2 +-
 riscv/insns/vmv_v_x.h    |  2 +-
 riscv/insns/vmv_x_s.h    |  2 +-
 riscv/insns/vmvnfr_v.h   |  2 +-
 riscv/insns/vpopc_m.h    |  2 +-
 riscv/insns/vsetvl.h     |  2 +-
 riscv/insns/vsetvli.h    |  2 +-
 riscv/processor.cc       |  4 ++++
 riscv/processor.h        |  1 +
 23 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/riscv/decode.h b/riscv/decode.h
index 9f8f786..c64cacc 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -240,18 +240,22 @@ private:
 #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0)
 
 #define require_vector_vs require((STATE.mstatus & MSTATUS_VS) != 0);
-#define require_vector \
+#define require_vector(alu) \
   do { \
     require_vector_vs; \
     require_extension('V'); \
     require(!P.VU.vill); \
+    if (alu && !P.VU.vstart_alu) \
+      require(P.VU.vstart == 0); \
     WRITE_VSTATUS; \
     dirty_vs_state; \
   } while (0);
-#define require_vector_novtype(is_log) \
+#define require_vector_novtype(is_log, alu) \
   do {  \
     require_vector_vs; \
     require_extension('V'); \
+    if (alu && !P.VU.vstart_alu) \
+      require(P.VU.vstart == 0); \
     if (is_log) \
       WRITE_VSTATUS; \
     dirty_vs_state; \
@@ -468,7 +472,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
 }
 
 #define VI_NARROW_CHECK_COMMON \
-  require_vector;\
+  require_vector(true);\
   require(P.VU.vflmul <= 4); \
   require(P.VU.vsew * 2 <= P.VU.ELEN); \
   require_align(insn.rs2(), P.VU.vflmul * 2); \
@@ -476,14 +480,14 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
   require_vm; \
 
 #define VI_WIDE_CHECK_COMMON \
-  require_vector;\
+  require_vector(true);\
   require(P.VU.vflmul <= 4); \
   require(P.VU.vsew * 2 <= P.VU.ELEN); \
   require_align(insn.rd(), P.VU.vflmul * 2); \
   require_vm; \
 
 #define VI_CHECK_ST_INDEX(elt_width) \
-  require_vector; \
+  require_vector(false); \
   float vemul = ((float)elt_width / P.VU.vsew * P.VU.vflmul); \
   require(vemul >= 0.125 && vemul <= 8); \
   reg_t emul = vemul < 1 ? 1 : vemul; \
@@ -532,7 +536,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
   }
 
 #define VI_CHECK_STORE(elt_width) \
-  require_vector; \
+  require_vector(false); \
   reg_t veew = sizeof(elt_width##_t) * 8; \
   float vemul = ((float)veew / P.VU.vsew * P.VU.vflmul); \
   reg_t emul = vemul < 1 ? 1 : vemul; \
@@ -563,7 +567,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
   }
 
 #define VI_CHECK_QSS(is_vs1) \
-  require_vector;\
+  require_vector(true);\
   p->supports_extension(EXT_ZVQMAC); \
   require(P.VU.vflmul <= 2); \
   require(P.VU.vsew * 4 <= P.VU.ELEN); \
@@ -604,7 +608,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
     require_align(insn.rs1(), P.VU.vflmul); \
 
 #define VI_CHECK_REDUCTION(is_wide) \
-  require_vector;\
+  require_vector(true);\
   if (is_wide) {\
     require(P.VU.vsew * 2 <= P.VU.ELEN); \
   } \
@@ -624,7 +628,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
 //
 #define VI_GENERAL_LOOP_BASE \
   require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
-  require_vector;\
+  require_vector(true);\
   reg_t vl = P.VU.vl; \
   reg_t sew = P.VU.vsew; \
   reg_t rd_num = insn.rd(); \
@@ -649,7 +653,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
 
 #define VI_LOOP_CMP_BASE \
   require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
-  require_vector;\
+  require_vector(true);\
   reg_t vl = P.VU.vl; \
   reg_t sew = P.VU.vsew; \
   reg_t rd_num = insn.rd(); \
@@ -668,7 +672,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
 
 #define VI_LOOP_MASK(op) \
   require(P.VU.vsew <= e64); \
-  require_vector;\
+  require_vector(true);\
   reg_t vl = P.VU.vl; \
   for (reg_t i = P.VU.vstart; i < vl; ++i) { \
     int midx = i / 64; \
@@ -1749,7 +1753,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
   p->VU.vstart = 0;
 
 #define VI_LD_WHOLE(elt_width) \
-  require_vector_novtype(true); \
+  require_vector_novtype(true, false); \
   const reg_t baseAddr = RS1; \
   const reg_t vd = insn.rd(); \
   const reg_t len = insn.v_nf() + 1; \
@@ -1780,7 +1784,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
   P.VU.vstart = 0; \
 
 #define VI_ST_WHOLE \
-  require_vector_novtype(true); \
+  require_vector_novtype(true, false); \
   const reg_t baseAddr = RS1; \
   const reg_t vs3 = insn.rd(); \
   const reg_t len = insn.v_nf() + 1; \
@@ -1812,7 +1816,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
 // vector: amo 
 //
 #define VI_AMO(op, type, idx_type) \
-  require_vector; \
+  require_vector(false); \
   require_align(insn.rd(), P.VU.vflmul); \
   require(P.VU.vsew <= P.get_xlen() && P.VU.vsew >= 32); \
   require_align(insn.rd(), P.VU.vflmul); \
@@ -1912,7 +1916,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
   require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \
           (P.VU.vsew == e32 && p->supports_extension('F')) || \
           (P.VU.vsew == e64 && p->supports_extension('D'))); \
-  require_vector;\
+  require_vector(true);\
   reg_t vl = P.VU.vl; \
   reg_t rd_num = insn.rd(); \
   reg_t rs1_num = insn.rs1(); \
@@ -2295,7 +2299,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
 
 #define VI_VFP_LOOP_SCALE_BASE \
   require_fp; \
-  require_vector;\
+  require_vector(true);\
   require((P.VU.vsew == e8 && p->supports_extension(EXT_ZFH)) || \
           (P.VU.vsew == e16 && p->supports_extension('F')) || \
           (P.VU.vsew == e32 && p->supports_extension('D'))); \
diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h
index 40e2f09..3095723 100644
--- a/riscv/insns/vfirst_m.h
+++ b/riscv/insns/vfirst_m.h
@@ -1,6 +1,6 @@
 // vmfirst rd, vs2
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 reg_t vl = P.VU.vl;
 reg_t sew = P.VU.vsew;
 reg_t rd_num = insn.rd();
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 2f82ce8..47e3c3b 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -1,5 +1,5 @@
 // vfmv_f_s: rd = vs2[0] (rs1=0)
-require_vector;
+require_vector(true);
 require_fp;
 require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
         (P.VU.vsew == e32 && p->supports_extension('F')) ||
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 17c85d3..4a4c105 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -1,5 +1,5 @@
 // vfmv_s_f: vd[0] = rs1 (vs2=0)
-require_vector;
+require_vector(true);
 require_fp;
 require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
         (P.VU.vsew == e32 && p->supports_extension('F')) ||
diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h
index 786432f..012d124 100644
--- a/riscv/insns/vid_v.h
+++ b/riscv/insns/vid_v.h
@@ -1,6 +1,6 @@
 // vmpopc rd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 reg_t vl = P.VU.vl;
 reg_t sew = P.VU.vsew;
 reg_t rd_num = insn.rd();
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index 8b019aa..a436825 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -1,6 +1,6 @@
 // vmpopc rd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 reg_t vl = P.VU.vl;
 reg_t sew = P.VU.vsew;
 reg_t rd_num = insn.rd();
diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h
index b20bcde..fd6ae1c 100644
--- a/riscv/insns/vmerge_vim.h
+++ b/riscv/insns/vmerge_vim.h
@@ -1,5 +1,5 @@
 // vmerge.vim vd, vs2, simm5
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(false);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h
index d670554..df416b2 100644
--- a/riscv/insns/vmerge_vvm.h
+++ b/riscv/insns/vmerge_vvm.h
@@ -1,5 +1,5 @@
 // vmerge.vvm vd, vs2, vs1
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(true);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h
index 3fd68cb..122a7b7 100644
--- a/riscv/insns/vmerge_vxm.h
+++ b/riscv/insns/vmerge_vxm.h
@@ -1,5 +1,5 @@
 // vmerge.vxm vd, vs2, rs1
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(false);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
index d90df5e..a4195cf 100644
--- a/riscv/insns/vmsbf_m.h
+++ b/riscv/insns/vmsbf_m.h
@@ -1,6 +1,6 @@
 // vmsbf.m vd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 require(P.VU.vstart == 0);
 require_vm;
 require(insn.rd() != insn.rs2());
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
index 6e941eb..a16ef68 100644
--- a/riscv/insns/vmsif_m.h
+++ b/riscv/insns/vmsif_m.h
@@ -1,6 +1,6 @@
 // vmsif.m rd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 require(P.VU.vstart == 0);
 require_vm;
 require(insn.rd() != insn.rs2());
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
index 9dd122d..5ef0bfd 100644
--- a/riscv/insns/vmsof_m.h
+++ b/riscv/insns/vmsof_m.h
@@ -1,6 +1,6 @@
 // vmsof.m rd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 require(P.VU.vstart == 0);
 require_vm;
 require(insn.rd() != insn.rs2());
diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h
index 74ab9e0..0e6a13e 100644
--- a/riscv/insns/vmv_s_x.h
+++ b/riscv/insns/vmv_s_x.h
@@ -1,5 +1,5 @@
 // vmv_s_x: vd[0] = rs1
-require_vector;
+require_vector(true);
 require(insn.v_vm() == 1);
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
 reg_t vl = P.VU.vl;
diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h
index f6b5b48..a760779 100644
--- a/riscv/insns/vmv_v_i.h
+++ b/riscv/insns/vmv_v_i.h
@@ -1,5 +1,5 @@
 // vmv.v.i vd, simm5
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(false);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h
index 523f2d9..d7f47d0 100644
--- a/riscv/insns/vmv_v_v.h
+++ b/riscv/insns/vmv_v_v.h
@@ -1,5 +1,5 @@
 // vvmv.v.v vd, vs1
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(true);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h
index 7528f41..fa7c920 100644
--- a/riscv/insns/vmv_v_x.h
+++ b/riscv/insns/vmv_v_x.h
@@ -1,5 +1,5 @@
 // vmv.v.x vd, rs1
-require_vector;
+require_vector(true);
 VI_CHECK_SSS(false);
 VI_VVXI_MERGE_LOOP
 ({
diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h
index 04cad1c..2c03e43 100644
--- a/riscv/insns/vmv_x_s.h
+++ b/riscv/insns/vmv_x_s.h
@@ -1,5 +1,5 @@
 // vmv_x_s: rd = vs2[rs1]
-require_vector;
+require_vector(true);
 require(insn.v_vm() == 1);
 uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen());
 reg_t rs1 = RS1;
diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h
index aedb521..96f0074 100644
--- a/riscv/insns/vmvnfr_v.h
+++ b/riscv/insns/vmvnfr_v.h
@@ -1,5 +1,5 @@
 // vmv1r.v vd, vs2
-require_vector_novtype(true);
+require_vector_novtype(true, true);
 const reg_t baseAddr = RS1;
 const reg_t vd = insn.rd();
 const reg_t vs2 = insn.rs2();
diff --git a/riscv/insns/vpopc_m.h b/riscv/insns/vpopc_m.h
index 9eaca1e..c204b2c 100644
--- a/riscv/insns/vpopc_m.h
+++ b/riscv/insns/vpopc_m.h
@@ -1,6 +1,6 @@
 // vmpopc rd, vs2, vm
 require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
+require_vector(true);
 reg_t vl = P.VU.vl;
 reg_t sew = P.VU.vsew;
 reg_t rd_num = insn.rd();
diff --git a/riscv/insns/vsetvl.h b/riscv/insns/vsetvl.h
index 4d03542..2969edc 100644
--- a/riscv/insns/vsetvl.h
+++ b/riscv/insns/vsetvl.h
@@ -1,2 +1,2 @@
-require_vector_novtype(false);
+require_vector_novtype(false, false);
 WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, RS2));
diff --git a/riscv/insns/vsetvli.h b/riscv/insns/vsetvli.h
index d1f43b5..7b1f1d7 100644
--- a/riscv/insns/vsetvli.h
+++ b/riscv/insns/vsetvli.h
@@ -1,2 +1,2 @@
-require_vector_novtype(false);
+require_vector_novtype(false, false);
 WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, insn.v_zimm11()));
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 50092c8..9f51a3f 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -119,6 +119,7 @@ void processor_t::parse_varch_string(const char* s)
   int vlen = 0;
   int elen = 0;
   int slen = 0;
+  int vstart_alu = 1;
 
   while (pos < len) {
     std::string attr = get_string_token(str, ':', pos);
@@ -131,6 +132,8 @@ void processor_t::parse_varch_string(const char* s)
       slen = get_int_token(str, ',', pos);
     else if (attr == "elen")
       elen = get_int_token(str, ',', pos);
+    else if (attr == "vstartalu")
+      vstart_alu = get_int_token(str, ',', pos);
     else
       bad_varch_string(s, "Unsupported token");
 
@@ -160,6 +163,7 @@ void processor_t::parse_varch_string(const char* s)
   VU.VLEN = vlen;
   VU.ELEN = elen;
   VU.vlenb = vlen / 8;
+  VU.vstart_alu = vstart_alu;
 }
 
 static std::string strtolower(const char* str)
diff --git a/riscv/processor.h b/riscv/processor.h
index 6c16eb9..5d7baeb 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -477,6 +477,7 @@ public:
       float vflmul;
       reg_t ELEN, VLEN;
       bool vill;
+      bool vstart_alu;
 
       // vector element for varies SEW
       template<class T>
-- 
cgit v1.1