Merge pull request #378 from chihminchao/rvv-0.8-float64

Rvv 0.8 float64
author: Andrew Waterman <andrew@sifive.com> 2020-01-13 11:07:25 -0800
committer: GitHub <noreply@github.com> 2020-01-13 11:07:25 -0800
commit: 826f05fda033d98c23cfd727ec0a769d1f2a6a46 (patch)
tree: 364c5a84b39f7c3abb8801cecc6ec4b30f8d0928
parent: bb1cd8f9e374f1730d131bfb68462c6133e4c107 (diff)
parent: e75ba052d42b1af954c09adc815b541124c2ccce (diff)
download: riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.zip
riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.gz
riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.bz2
85 files changed, 458 insertions, 109 deletions
diff --git a/README.md b/README.md
index 3db8cf7..2e9e0ac 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Spike supports the following RISC-V ISA features:
   - D extension, v2.2
   - Q extension, v2.2
   - C extension, v2.0
-  - V extension, v0.8-draft-20191118, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_)
+  - V extension, v0.8, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_)
   - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent)
   - Machine, Supervisor, and User modes, v1.11
   - Debug v0.14
diff --git a/riscv/decode.h b/riscv/decode.h
index 495ffc4..21bb92b 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -441,12 +441,10 @@ static inline bool is_overlapped(const int astart, const int asize,
       require(insn.rd() != 0); \
   }
 
-#define VI_CHECK_SD \
-  require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2));
-
 #define VI_CHECK_DSS(is_vs1) \
   VI_WIDE_CHECK_COMMON; \
   require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \
+  require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \
   require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
   if (is_vs1) {\
      require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \
@@ -1466,7 +1464,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   const reg_t vl = P.VU.vl; \
   const reg_t baseAddr = RS1; \
   const reg_t vs3 = insn.rd(); \
-  require(vs3 + nf <= NVPR); \
+  require(vs3 + nf * P.VU.vlmul <= NVPR); \
   const reg_t vlmul = P.VU.vlmul; \
   for (reg_t i = 0; i < vl; ++i) { \
     VI_STRIP(i) \
@@ -1499,7 +1497,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   const reg_t vl = P.VU.vl; \
   const reg_t baseAddr = RS1; \
   const reg_t vd = insn.rd(); \
-  require(vd + nf <= NVPR); \
+  require(vd + nf * P.VU.vlmul <= NVPR); \
   const reg_t vlmul = P.VU.vlmul; \
   for (reg_t i = 0; i < vl; ++i) { \
     VI_ELEMENT_SKIP(i); \
@@ -1551,6 +1549,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   const reg_t rd_num = insn.rd(); \
   bool early_stop = false; \
   const reg_t vlmul = P.VU.vlmul; \
+  require(rd_num + nf * P.VU.vlmul <= NVPR); \
   p->VU.vstart = 0; \
   for (reg_t i = 0; i < vl; ++i) { \
     VI_STRIP(i); \
@@ -1595,9 +1594,9 @@ for (reg_t i = 0; i < vlmax; ++i) { \
 // vector: vfp helper
 //
 #define VI_VFP_COMMON \
-  require_extension('F'); \
   require_fp; \
-  require(P.VU.vsew == 32); \
+  require((P.VU.vsew == e32 && p->supports_extension('F')) || \
+          (P.VU.vsew == e64 && p->supports_extension('D'))); \
   require_vector;\
   reg_t vl = P.VU.vl; \
   reg_t rd_num = insn.rd(); \
@@ -1621,14 +1620,14 @@ for (reg_t i = 0; i < vlmax; ++i) { \
     uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx); \
     uint64_t res = 0;
 
-#define VI_VFP_LOOP_REDUCTION_BASE \
-  VI_VFP_COMMON \
-  float32_t vd_0 = P.VU.elt<float32_t>(rd_num, 0); \
-  float32_t vs1_0 = P.VU.elt<float32_t>(rs1_num, 0); \
+#define VI_VFP_LOOP_REDUCTION_BASE(width) \
+  float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \
+  float##width##_t vs1_0 = P.VU.elt<float##width##_t>(rs1_num, 0); \
   vd_0 = vs1_0;\
   for (reg_t i=P.VU.vstart; i<vl; ++i){ \
     VI_LOOP_ELEMENT_SKIP(); \
-    int32_t &vd = P.VU.elt<int32_t>(rd_num, i); \
+    int##width##_t &vd = P.VU.elt<int##width##_t>(rd_num, i); \
+    float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); \
 
 #define VI_VFP_LOOP_WIDE_REDUCTION_BASE \
   VI_VFP_COMMON \
@@ -1648,19 +1647,18 @@ for (reg_t i = 0; i < vlmax; ++i) { \
 #define VI_VFP_LOOP_REDUCTION_END(x) \
   } \
   P.VU.vstart = 0; \
-  set_fp_exceptions; \
   if (vl > 0) { \
     P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \
   }
 
 #define VI_VFP_LOOP_CMP_END \
   switch(P.VU.vsew) { \
-    case e32: { \
+    case e32: \
+    case e64: { \
       vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
       break; \
     } \
     case e16: \
-    case e8: \
     default: \
       require(0); \
       break; \
@@ -1669,7 +1667,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   P.VU.vstart = 0; \
   set_fp_exceptions;
 
-#define VI_VFP_VV_LOOP(BODY) \
+#define VI_VFP_VV_LOOP(BODY32, BODY64) \
   VI_CHECK_SSS(true); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
@@ -1677,12 +1675,19 @@ for (reg_t i = 0; i < vlmax; ++i) { \
       float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
       float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
       float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
-      BODY; \
+      BODY32; \
+      set_fp_exceptions; \
+      break; \
+    }\
+    case e64: {\
+      float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+      float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
+      float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+      BODY64; \
       set_fp_exceptions; \
       break; \
     }\
     case e16: \
-    case e8: \
     default: \
       require(0); \
       break; \
@@ -1690,22 +1695,39 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   DEBUG_RVV_FP_VV; \
   VI_VFP_LOOP_END
 
-#define VI_VFP_VV_LOOP_REDUCTION(BODY) \
+#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \
   VI_CHECK_REDUCTION(false) \
-  VI_VFP_LOOP_REDUCTION_BASE \
-  float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
-  BODY; \
-  DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_REDUCTION_END(e32)
+  VI_VFP_COMMON \
+  switch(P.VU.vsew) { \
+    case e32: {\
+      VI_VFP_LOOP_REDUCTION_BASE(32) \
+        BODY32; \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_REDUCTION_END(e32) \
+      break; \
+    }\
+    case e64: {\
+      VI_VFP_LOOP_REDUCTION_BASE(64) \
+        BODY64; \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_REDUCTION_END(e64) \
+      break; \
+    }\
+    case e16: \
+    default: \
+      require(0); \
+      break; \
+  }; \
 
 #define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
   VI_VFP_LOOP_WIDE_REDUCTION_BASE \
   float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
   BODY; \
+  set_fp_exceptions; \
   DEBUG_RVV_FP_VV; \
   VI_VFP_LOOP_REDUCTION_END(e64)
 
-#define VI_VFP_VF_LOOP(BODY) \
+#define VI_VFP_VF_LOOP(BODY32, BODY64) \
   VI_CHECK_SSS(false); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
@@ -1713,7 +1735,15 @@ for (reg_t i = 0; i < vlmax; ++i) { \
       float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
       float32_t rs1 = f32(READ_FREG(rs1_num)); \
       float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
-      BODY; \
+      BODY32; \
+      set_fp_exceptions; \
+      break; \
+    }\
+    case e64: {\
+      float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+      float64_t rs1 = f64(READ_FREG(rs1_num)); \
+      float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+      BODY64; \
       set_fp_exceptions; \
       break; \
     }\
@@ -1726,12 +1756,31 @@ for (reg_t i = 0; i < vlmax; ++i) { \
   DEBUG_RVV_FP_VF; \
   VI_VFP_LOOP_END
 
-#define VI_VFP_LOOP_CMP(BODY, is_vs1) \
+#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \
   VI_CHECK_MSS(is_vs1); \
   VI_VFP_LOOP_CMP_BASE \
-  BODY; \
-  set_fp_exceptions; \
-  DEBUG_RVV_FP_VV; \
+  switch(P.VU.vsew) { \
+    case e32: {\
+      float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+      float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
+      float32_t rs1 = f32(READ_FREG(rs1_num)); \
+      BODY32; \
+      set_fp_exceptions; \
+      break; \
+    }\
+    case e64: {\
+      float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+      float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
+      float64_t rs1 = f64(READ_FREG(rs1_num)); \
+      BODY64; \
+      set_fp_exceptions; \
+      break; \
+    }\
+    case e16: \
+    default: \
+      require(0); \
+      break; \
+  }; \
   VI_VFP_LOOP_CMP_END \
 
 #define VI_VFP_VF_LOOP_WIDE(BODY) \
diff --git a/riscv/encoding.h b/riscv/encoding.h
index 6d691e7..26142db 100644
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@@ -1266,6 +1266,14 @@
 #define MASK_VSADD_VI  0xfc00707f
 #define MATCH_VSLL_VI 0x94003057
 #define MASK_VSLL_VI  0xfc00707f
+#define MATCH_VMV1R_V 0x9e003057
+#define MASK_VMV1R_V  0xfe0ff07f
+#define MATCH_VMV2R_V 0x9e00b057
+#define MASK_VMV2R_V  0xfe0ff07f
+#define MATCH_VMV4R_V 0x9e01b057
+#define MASK_VMV4R_V  0xfe0ff07f
+#define MATCH_VMV8R_V 0x9e03b057
+#define MASK_VMV8R_V  0xfe0ff07f
 #define MATCH_VSRL_VI 0xa0003057
 #define MASK_VSRL_VI  0xfc00707f
 #define MATCH_VSRA_VI 0xa4003057
@@ -2305,6 +2313,10 @@ DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI)
 DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI)
 DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI)
 DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI)
+DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V)
+DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V)
+DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V)
+DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V)
 DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI)
 DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI)
 DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI)
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
index 60dec4a..bdb7f75 100644
--- a/riscv/insns/vfadd_vf.h
+++ b/riscv/insns/vfadd_vf.h
@@ -1,5 +1,8 @@
 // vfadd.vf vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
-    vd = f32_add(rs1, vs2);
+  vd = f32_add(rs1, vs2);
+},
+{
+  vd = f64_add(rs1, vs2);
 })
diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h
index de0ae53..b333a8a 100644
--- a/riscv/insns/vfadd_vv.h
+++ b/riscv/insns/vfadd_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_add(vs1, vs2);
+},
+{
+  vd = f64_add(vs1, vs2);
 })
diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h
index 75f29a2..8ee092f 100644
--- a/riscv/insns/vfclass_v.h
+++ b/riscv/insns/vfclass_v.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd.v = f32_classify(vs2);
+},
+{
+  vd.v = f64_classify(vs2);
 })
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index f6604fb..fdaa697 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -3,4 +3,8 @@ VI_VFP_VF_LOOP
 ({
   auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
   vd = i32_to_f32(vs2_i);
+},
+{
+  auto vs2_i = P.VU.elt<int64_t>(rs2_num, i);
+  vd = i64_to_f64(vs2_i);
 })
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index 2c845ac..01ea61c 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -3,4 +3,8 @@ VI_VFP_VF_LOOP
 ({
   auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
   vd = ui32_to_f32(vs2_u);
+},
+{
+  auto vs2_u = P.VU.elt<uint64_t>(rs2_num, i);
+  vd = ui64_to_f64(vs2_u);
 })
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index a9eedc4..96bc481 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
+},
+{
+  P.VU.elt<int64_t>(rd_num, i) = f64_to_i64(vs2, STATE.frm, true);
 })
diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h
index 76c7735..5f19f90 100644
--- a/riscv/insns/vfcvt_xu_f_v.h
+++ b/riscv/insns/vfcvt_xu_f_v.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
+},
+{
+  P.VU.elt<uint64_t>(rd_num, i) = f64_to_ui64(vs2, STATE.frm, true);
 })
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
index 2b8124c..ce21730 100644
--- a/riscv/insns/vfdiv_vf.h
+++ b/riscv/insns/vfdiv_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_div(vs2, rs1);
+},
+{
+  vd = f64_div(vs2, rs1);
 })
diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h
index c20ff1d..8a49a91 100644
--- a/riscv/insns/vfdiv_vv.h
+++ b/riscv/insns/vfdiv_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_div(vs2, vs1);
+},
+{
+  vd = f64_div(vs2, vs1);
 })
diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h
index 11c8bce..85d0b8a 100644
--- a/riscv/insns/vfdot_vv.h
+++ b/riscv/insns/vfdot_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_add(vd, f32_mul(vs2, vs1));
+},
+{
+  vd = f64_add(vd, f64_mul(vs2, vs1));
 })
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
index 5013d34..fca4184 100644
--- a/riscv/insns/vfmacc_vf.h
+++ b/riscv/insns/vfmacc_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(rs1, vs2, vd);
+},
+{
+  vd = f64_mulAdd(rs1, vs2, vd);
 })
diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h
index 663a648..f1caf33 100644
--- a/riscv/insns/vfmacc_vv.h
+++ b/riscv/insns/vfmacc_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(vs1, vs2, vd);
+},
+{
+  vd = f64_mulAdd(vs1, vs2, vd);
 })
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
index 920c392..7707dae 100644
--- a/riscv/insns/vfmadd_vf.h
+++ b/riscv/insns/vfmadd_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(vd, rs1, vs2);
+},
+{
+  vd = f64_mulAdd(vd, rs1, vs2);
 })
diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h
index c967ec3..a095c38 100644
--- a/riscv/insns/vfmadd_vv.h
+++ b/riscv/insns/vfmadd_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(vd, vs1, vs2);
+},
+{
+  vd = f64_mulAdd(vd, vs1, vs2);
 })
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
index eb70e48..a8df880 100644
--- a/riscv/insns/vfmax_vf.h
+++ b/riscv/insns/vfmax_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_max(vs2, rs1);
+},
+{
+  vd = f64_max(vs2, rs1);
 })
diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h
index 6d12f08..2329e74 100644
--- a/riscv/insns/vfmax_vv.h
+++ b/riscv/insns/vfmax_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_max(vs2, vs1);
+},
+{
+  vd = f64_max(vs2, vs1);
 })
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index 0ffa49b..639809d 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -3,16 +3,37 @@ require(insn.rd() != 0);
 VI_CHECK_SSS(false);
 VI_VFP_COMMON;
 reg_t sew = P.VU.vsew;
-for (reg_t i=P.VU.vstart; i<vl; ++i) {
-  auto &vd = P.VU.elt<float32_t>(rd_num, i);
-  auto rs1 = f32(READ_FREG(rs1_num));
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
 
-  int midx = (P.VU.vmlen * i) / 64;
-  int mpos = (P.VU.vmlen * i) % 64;
-  bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+switch(P.VU.vsew) {
+  case 32:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float32_t>(rd_num, i);
+      auto rs1 = f32(READ_FREG(rs1_num));
+      auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
 
-  vd = use_first ? rs1 : vs2;
+      int midx = (P.VU.vmlen * i) / 64;
+      int mpos = (P.VU.vmlen * i) % 64;
+      bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+      vd = use_first ? rs1 : vs2;
+    }
+    break;
+  case 64:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float64_t>(rd_num, i);
+      auto rs1 = f64(READ_FREG(rs1_num));
+      auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
+
+      int midx = (P.VU.vmlen * i) / 64;
+      int mpos = (P.VU.vmlen * i) % 64;
+      bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+      vd = use_first ? rs1 : vs2;
+    }
+    break;
+  default:
+    require(0);
+    break;
 }
 
 P.VU.vstart = 0;
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
index bf06638..a55462b 100644
--- a/riscv/insns/vfmin_vf.h
+++ b/riscv/insns/vfmin_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_min(vs2, rs1);
+},
+{
+  vd = f64_min(vs2, rs1);
 })
diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h
index 65d20ff..399b563 100644
--- a/riscv/insns/vfmin_vv.h
+++ b/riscv/insns/vfmin_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_min(vs2, vs1);
+},
+{
+  vd = f64_min(vs2, vs1);
 })
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
index 23661b3..0f42560 100644
--- a/riscv/insns/vfmsac_vf.h
+++ b/riscv/insns/vfmsac_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h
index 952c12e..9b4ed9f 100644
--- a/riscv/insns/vfmsac_vv.h
+++ b/riscv/insns/vfmsac_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
index 2328d07..bd968e3 100644
--- a/riscv/insns/vfmsub_vf.h
+++ b/riscv/insns/vfmsub_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(vd, rs1, f64(vs2.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h
index a58f1e3..f8e0b3d 100644
--- a/riscv/insns/vfmsub_vv.h
+++ b/riscv/insns/vfmsub_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(vd, vs1, f64(vs2.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
index 086b6d8..9e7d481 100644
--- a/riscv/insns/vfmul_vf.h
+++ b/riscv/insns/vfmul_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mul(vs2, rs1);
+},
+{
+  vd = f64_mul(vs2, rs1);
 })
diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h
index 259dc01..0e4d499 100644
--- a/riscv/insns/vfmul_vv.h
+++ b/riscv/insns/vfmul_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mul(vs1, vs2);
+},
+{
+  vd = f64_mul(vs1, vs2);
 })
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 066db80..dbfe8f9 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -2,18 +2,12 @@
 require_vector;
 require_fp;
 require_extension('F');
-require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
+require(P.VU.vsew == e32 || P.VU.vsew == e64);
 
 reg_t rs2_num = insn.rs2();
 uint64_t vs2_0 = 0;
 const reg_t sew = P.VU.vsew;
 switch(sew) {
-case e8:
-  vs2_0 = P.VU.elt<uint8_t>(rs2_num, 0);
-  break;
-case e16:
-  vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
-  break;
 case e32:
   vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
   break;
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 8ff6094..44e9e2e 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -1,19 +1,26 @@
 // vfmv_s_f: vd[0] = rs1 (vs2=0)
 require_vector;
-require(insn.v_vm() == 1);
 require_fp;
 require_extension('F');
-require(P.VU.vsew == e32);
+require(P.VU.vsew >= e32 && P.VU.vsew <= 64);
 reg_t vl = P.VU.vl;
 
 if (vl > 0) {
   reg_t rd_num = insn.rd();
-  reg_t sew = P.VU.vsew;
 
-  if (FLEN == 64)
-    P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
-  else
-    P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
-
-  vl = 0;
+  switch(P.VU.vsew) {
+    case 32:
+      if (FLEN == 64)
+        P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
+      else
+        P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
+      break;
+    case 64:
+      if (FLEN == 64)
+        P.VU.elt<uint64_t>(rd_num, 0) = f64(FRS1).v;
+      else
+        P.VU.elt<uint64_t>(rd_num, 0) = f32(FRS1).v;
+      break;
+  }
 }
+P.VU.vstart = 0;
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index f323263..75832f9 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -1,12 +1,23 @@
 // vfmv_vf vd, vs1
 require((insn.rd() & (P.VU.vlmul - 1)) == 0);
 VI_VFP_COMMON
-reg_t sew = P.VU.vsew;
-for (reg_t i=P.VU.vstart; i<vl; ++i) {
-  auto &vd = P.VU.elt<float32_t>(rd_num, i);
-  auto rs1 = f32(READ_FREG(rs1_num));
+switch(P.VU.vsew) {
+  case e32:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float32_t>(rd_num, i);
+      auto rs1 = f32(READ_FREG(rs1_num));
 
-  vd = rs1;
+      vd = rs1;
+    }
+    break;
+  case e64:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float64_t>(rd_num, i);
+      auto rs1 = f64(READ_FREG(rs1_num));
+
+      vd = rs1;
+    }
+    break;
 }
 
 P.VU.vstart = 0;
diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h
index 42c18c7..55a8eac 100644
--- a/riscv/insns/vfncvt_f_f_w.h
+++ b/riscv/insns/vfncvt_f_f_w.h
@@ -1,6 +1,10 @@
 // vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h
index 80ebe00..daf2274 100644
--- a/riscv/insns/vfncvt_f_x_w.h
+++ b/riscv/insns/vfncvt_f_x_w.h
@@ -1,6 +1,10 @@
 // vfncvt.f.x.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
   auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i) = i64_to_f32(vs2);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h
index 013f57c..7f57ec5 100644
--- a/riscv/insns/vfncvt_f_xu_w.h
+++ b/riscv/insns/vfncvt_f_xu_w.h
@@ -1,6 +1,10 @@
 // vfncvt.f.xu.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
   auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i) = ui64_to_f32(vs2);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h
index 77a3873..130c5b5 100644
--- a/riscv/insns/vfncvt_rod_f_f_w.h
+++ b/riscv/insns/vfncvt_rod_f_f_w.h
@@ -1,7 +1,11 @@
 // vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
   softfloat_roundingMode = softfloat_round_odd;
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h
index 8985f1b..cda2fe2 100644
--- a/riscv/insns/vfncvt_x_f_w.h
+++ b/riscv/insns/vfncvt_x_f_w.h
@@ -1,6 +1,10 @@
 // vfncvt.x.f.v vd, vs2, vm
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_SD;
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<int32_t>(rd_num, i) = f64_to_i32(vs2, STATE.frm, true);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h
index 2db8d82..a009105 100644
--- a/riscv/insns/vfncvt_xu_f_w.h
+++ b/riscv/insns/vfncvt_xu_f_w.h
@@ -1,6 +1,10 @@
 // vfncvt.xu.f.v vd, vs2, vm
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_SD;
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<uint32_t>(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true);
+  set_fp_exceptions;
 VI_VFP_LOOP_END
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
index 04a31bf..da58d3a 100644
--- a/riscv/insns/vfnmacc_vf.h
+++ b/riscv/insns/vfnmacc_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h
index b950df9..62a1486 100644
--- a/riscv/insns/vfnmacc_vv.h
+++ b/riscv/insns/vfnmacc_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(f64(vs2.v ^ F64_SIGN), vs1, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
index f8f3b83..b26f377 100644
--- a/riscv/insns/vfnmadd_vf.h
+++ b/riscv/insns/vfnmadd_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, f64(vs2.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h
index f96d102..fc70574 100644
--- a/riscv/insns/vfnmadd_vv.h
+++ b/riscv/insns/vfnmadd_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+  vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, f64(vs2.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
index c3dc12c..b78d0ca 100644
--- a/riscv/insns/vfnmsac_vf.h
+++ b/riscv/insns/vfnmsac_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
+},
+{
+  vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), vd);
 })
diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h
index 0ecd648..795dc38 100644
--- a/riscv/insns/vfnmsac_vv.h
+++ b/riscv/insns/vfnmsac_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
+},
+{
+  vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
 })
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
index 1879b9e..6c6dc27 100644
--- a/riscv/insns/vfnmsub_vf.h
+++ b/riscv/insns/vfnmsub_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
+},
+{
+  vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, vs2);
 })
diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h
index da9f59c..ff4a9b5 100644
--- a/riscv/insns/vfnmsub_vv.h
+++ b/riscv/insns/vfnmsub_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
+},
+{
+  vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, vs2);
 })
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
index 49e4293..73ec534 100644
--- a/riscv/insns/vfrdiv_vf.h
+++ b/riscv/insns/vfrdiv_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_div(rs1, vs2);
+},
+{
+  vd = f64_div(rs1, vs2);
 })
diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h
index dca10bf..cb03dbb 100644
--- a/riscv/insns/vfredmax_vs.h
+++ b/riscv/insns/vfredmax_vs.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP_REDUCTION
 ({
   vd_0 = f32_max(vd_0, vs2);
+},
+{
+  vd_0 = f64_max(vd_0, vs2);
 })
diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h
index b4556bc..51c0bcb 100644
--- a/riscv/insns/vfredmin_vs.h
+++ b/riscv/insns/vfredmin_vs.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP_REDUCTION
 ({
   vd_0 = f32_min(vd_0, vs2);
+},
+{
+  vd_0 = f64_min(vd_0, vs2);
 })
diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h
index 87422ee..7de6dbb 100644
--- a/riscv/insns/vfredosum_vs.h
+++ b/riscv/insns/vfredosum_vs.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP_REDUCTION
 ({
   vd_0 = f32_add(vd_0, vs2);
+},
+{
+  vd_0 = f64_add(vd_0, vs2);
 })
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h
index b50b45f..7b5cccc 100644
--- a/riscv/insns/vfredsum_vs.h
+++ b/riscv/insns/vfredsum_vs.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP_REDUCTION
 ({
   vd_0 = f32_add(vd_0, vs2);
+},
+{
+  vd_0 = f64_add(vd_0, vs2);
 })
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
index ee8ac83..d9a1986 100644
--- a/riscv/insns/vfrsub_vf.h
+++ b/riscv/insns/vfrsub_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_sub(rs1, vs2);
+},
+{
+  vd = f64_sub(rs1, vs2);
 })
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
index d93f175..c7f731b 100644
--- a/riscv/insns/vfsgnj_vf.h
+++ b/riscv/insns/vfsgnj_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = fsgnj32(vs2.v, rs1.v, false, false);
+},
+{
+  vd = fsgnj64(vs2.v, rs1.v, false, false);
 })
diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h
index 050dd9c..12d3d43 100644
--- a/riscv/insns/vfsgnj_vv.h
+++ b/riscv/insns/vfsgnj_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = fsgnj32(vs2.v, vs1.v, false, false);
+},
+{
+  vd = fsgnj64(vs2.v, vs1.v, false, false);
 })
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
index 303ec76..4511748 100644
--- a/riscv/insns/vfsgnjn_vf.h
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = fsgnj32(vs2.v, rs1.v, true, false);
+},
+{
+  vd = fsgnj64(vs2.v, rs1.v, true, false);
 })
diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h
index 6603352..a16acf7 100644
--- a/riscv/insns/vfsgnjn_vv.h
+++ b/riscv/insns/vfsgnjn_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = fsgnj32(vs2.v, vs1.v, true, false);
+},
+{
+  vd = fsgnj64(vs2.v, vs1.v, true, false);
 })
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
index 93f4303..c423060 100644
--- a/riscv/insns/vfsgnjx_vf.h
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = fsgnj32(vs2.v, rs1.v, false, true);
+},
+{
+  vd = fsgnj64(vs2.v, rs1.v, false, true);
 })
diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h
index 9cc12dc..9dbe078 100644
--- a/riscv/insns/vfsgnjx_vv.h
+++ b/riscv/insns/vfsgnjx_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = fsgnj32(vs2.v, vs1.v, false, true);
+},
+{
+  vd = fsgnj64(vs2.v, vs1.v, false, true);
 })
diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h
index 4931037..4a36932 100644
--- a/riscv/insns/vfsqrt_v.h
+++ b/riscv/insns/vfsqrt_v.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_sqrt(vs2);
+},
+{
+  vd = f64_sqrt(vs2);
 })
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
index 38d6acc..a4702d0 100644
--- a/riscv/insns/vfsub_vf.h
+++ b/riscv/insns/vfsub_vf.h
@@ -2,4 +2,7 @@
 VI_VFP_VF_LOOP
 ({
   vd = f32_sub(vs2, rs1);
+},
+{
+  vd = f64_sub(vs2, rs1);
 })
diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h
index 71e7a43..40545fb 100644
--- a/riscv/insns/vfsub_vv.h
+++ b/riscv/insns/vfsub_vv.h
@@ -2,4 +2,7 @@
 VI_VFP_VV_LOOP
 ({
   vd = f32_sub(vs2, vs1);
+},
+{
+  vd = f64_sub(vs2, vs1);
 })
diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h
index 4d6b4fc..4bda2bc 100644
--- a/riscv/insns/vfwcvt_f_f_v.h
+++ b/riscv/insns/vfwcvt_f_f_v.h
@@ -1,6 +1,9 @@
 // vfwcvt.f.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_DSS(false);
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i) = f32_to_f64(vs2);
   set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h
index ab5d825..346db32 100644
--- a/riscv/insns/vfwcvt_f_x_v.h
+++ b/riscv/insns/vfwcvt_f_x_v.h
@@ -1,6 +1,9 @@
 // vfwcvt.f.x.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_DSS(false);
   auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i) = i32_to_f64(vs2);
   set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h
index 8af8d7c..c963abb 100644
--- a/riscv/insns/vfwcvt_f_xu_v.h
+++ b/riscv/insns/vfwcvt_f_xu_v.h
@@ -1,6 +1,9 @@
 // vfwcvt.f.xu.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_DSS(false);
   auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i) = ui32_to_f64(vs2);
   set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h
index 06e81d4..9088a79 100644
--- a/riscv/insns/vfwcvt_x_f_v.h
+++ b/riscv/insns/vfwcvt_x_f_v.h
@@ -1,6 +1,9 @@
 // vfwcvt.x.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_DSS(false);
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<int64_t>(rd_num, i) = f32_to_i64(vs2, STATE.frm, true);
   set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h
index cc82481..266cbca 100644
--- a/riscv/insns/vfwcvt_xu_f_v.h
+++ b/riscv/insns/vfwcvt_xu_f_v.h
@@ -1,6 +1,9 @@
 // vfwcvt.xu.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+  require(p->supports_extension('D'));
+
 VI_VFP_LOOP_BASE
-  VI_CHECK_DSS(false);
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<uint64_t>(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true);
   set_fp_exceptions;
diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h
index b47e2c7..d6da222 100644
--- a/riscv/insns/vfwredosum_vs.h
+++ b/riscv/insns/vfwredosum_vs.h
@@ -1,4 +1,8 @@
 // vfwredosum.vs vd, vs2, vs1
+require_vector;
+require(P.VU.vsew * 2 <= P.VU.ELEN);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(P.VU.vlmul <= 4);
 VI_VFP_VV_LOOP_WIDE_REDUCTION
 ({
   vd_0 = f64_add(vd_0, vs2);
diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h
index 3ce591b..13bd1ab 100644
--- a/riscv/insns/vfwredsum_vs.h
+++ b/riscv/insns/vfwredsum_vs.h
@@ -1,4 +1,8 @@
 // vfwredsum.vs vd, vs2, vs1
+require_vector;
+require(P.VU.vsew * 2 <= P.VU.ELEN);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(P.VU.vlmul <= 4);
 VI_VFP_VV_LOOP_WIDE_REDUCTION
 ({
   vd_0 = f64_add(vd_0, vs2);
diff --git a/riscv/insns/vl1r_v.h b/riscv/insns/vl1r_v.h
index 8dabcb6..eded573 100644
--- a/riscv/insns/vl1r_v.h
+++ b/riscv/insns/vl1r_v.h
@@ -3,9 +3,7 @@ require_vector;
 const reg_t baseAddr = RS1;
 const reg_t vd = insn.rd();
 for (reg_t i = 0; i < P.VU.vlenb; ++i) {
-  
   auto val = MMU.load_uint8(baseAddr + i);
-  fprintf(stderr, "here: %ld: %x\n", i, val);
   P.VU.elt<uint8_t>(vd, i) = val;
 }
 P.VU.vstart = 0;
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index f0e7109..766f0ab 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -1,5 +1,9 @@
-// vfeq.vf vd, vs2, fs1
+// vmfeq.vf vd, vs2, fs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_eq(vs2, rs1);
-}, false)
+},
+{
+  res = f64_eq(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index 1be3a69..19117fc 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -1,5 +1,9 @@
-// vfeq.vv vd, vs2, vs1
+// vmfeq.vv vd, vs2, vs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_eq(vs2, vs1);
-}, true)
+},
+{
+  res = f64_eq(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index 1c68366..c5f4c83 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,5 +1,9 @@
-// vfge.vf vd, vs2, rs1
+// vmfge.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_le(rs1, vs2);
-}, false)
+},
+{
+  res = f64_le(rs1, vs2);
+},
+false)
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index 0979185..5387300 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,5 +1,9 @@
-// vfgt.vf vd, vs2, rs1
+// vmfgt.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_lt(rs1, vs2);
-}, false)
+},
+{
+  res = f64_lt(rs1, vs2);
+},
+false)
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index 90607ec..1a3a7c4 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -1,5 +1,9 @@
-// vfle.vf vd, vs2, rs1
+// vmfle.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_le(vs2, rs1);
-}, false)
+},
+{
+  res = f64_le(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index 6ccdfec..067f1a9 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,5 +1,9 @@
-// vfle.vv vd, vs2, rs1
+// vmfle.vv vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_le(vs2, vs1);
-}, true)
+},
+{
+  res = f64_le(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index 6b71a4a..248071d 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,5 +1,9 @@
-// vflt.vf vd, vs2, rs1
+// vmflt.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_lt(vs2, rs1);
-}, false)
+},
+{
+  res = f64_lt(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index a2ed8e3..71895df 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,5 +1,9 @@
-// vflt.vv vd, vs2, vs1
+// vmflt.vv vd, vs2, vs1
 VI_VFP_LOOP_CMP
 ({
   res = f32_lt(vs2, vs1);
-}, true)
+},
+{
+  res = f64_lt(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index ef63678..afccbcb 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -1,5 +1,9 @@
-// vfne.vf vd, vs2, rs1
+// vmfne.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = !f32_eq(vs2, rs1);
-}, false)
+},
+{
+  res = !f64_eq(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index 8378a23..d5df60c 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -1,5 +1,9 @@
-// vfne.vv vd, vs2, rs1
+// vmfne.vv vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
   res = !f32_eq(vs2, vs1);
-}, true)
+},
+{
+  res = !f64_eq(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmv1r_v.h b/riscv/insns/vmv1r_v.h
new file mode 100644
index 0000000..bbdeab9
--- /dev/null
+++ b/riscv/insns/vmv1r_v.h
@@ -0,0 +1,2 @@
+// vmv1r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv2r_v.h b/riscv/insns/vmv2r_v.h
new file mode 100644
index 0000000..1ac8e09
--- /dev/null
+++ b/riscv/insns/vmv2r_v.h
@@ -0,0 +1,2 @@
+// vmv2r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv4r_v.h b/riscv/insns/vmv4r_v.h
new file mode 100644
index 0000000..2068731
--- /dev/null
+++ b/riscv/insns/vmv4r_v.h
@@ -0,0 +1,2 @@
+// vmv4r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv8r_v.h b/riscv/insns/vmv8r_v.h
new file mode 100644
index 0000000..2b205fc
--- /dev/null
+++ b/riscv/insns/vmv8r_v.h
@@ -0,0 +1,2 @@
+// vmv8r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h
new file mode 100644
index 0000000..6ae66d5
--- /dev/null
+++ b/riscv/insns/vmvnfr_v.h
@@ -0,0 +1,12 @@
+// vmv1r.v vd, vs2
+require_vector;
+const reg_t baseAddr = RS1;
+const reg_t vd = insn.rd();
+const reg_t vs2 = insn.rs2();
+const reg_t len = insn.rs1() + 1;
+require((vd & (len - 1)) == 0);
+require((vs2 & (len - 1)) == 0);
+if (vd != vs2)
+  memcpy(&P.VU.elt<uint8_t>(vd, 0),
+         &P.VU.elt<uint8_t>(vs2, 0), P.VU.vlenb * len);
+P.VU.vstart = 0;
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index db1bdcc..252b196 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -384,6 +384,10 @@ riscv_insn_ext_v_alu_int = \
 	vmv_v_v \
 	vmv_v_x \
 	vmv_x_s \
+	vmv1r_v \
+	vmv2r_v \
+	vmv4r_v \
+	vmv8r_v \
 	vmxnor_mm \
 	vmxor_mm \
 	vnclip_wi \
diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc
index 373c6bb..addc223 100644
--- a/spike_main/disasm.cc
+++ b/spike_main/disasm.cc
@@ -911,10 +911,10 @@ disassembler_t::disassembler_t(int xlen)
   DISASM_OPIV_VX__INSN(vssubu,    0);
   DISASM_OPIV_VX__INSN(vssub,     1);
   DISASM_OPIV_VXI_INSN(vsll,      1, v);
-  DISASM_OPIV_VX__INSN(vaaddu,    0);
-  DISASM_OPIV_VX__INSN(vaadd,     0);
-  DISASM_OPIV_VX__INSN(vasubu,    0);
-  DISASM_OPIV_VX__INSN(vasub,     0);
+  DISASM_INSN("vmv1r.v", vmv1r_v, 0, {&vd, &vs2});
+  DISASM_INSN("vmv2r.v", vmv2r_v, 0, {&vd, &vs2});
+  DISASM_INSN("vmv4r.v", vmv4r_v, 0, {&vd, &vs2});
+  DISASM_INSN("vmv8r.v", vmv8r_v, 0, {&vd, &vs2});
   DISASM_OPIV_VX__INSN(vsmul,     1);
   DISASM_OPIV_VXI_INSN(vsrl,      0, v);
   DISASM_OPIV_VXI_INSN(vsra,      0, v);
@@ -937,6 +937,11 @@ disassembler_t::disassembler_t(int xlen)
 
   //OPMVV/OPMVX
   //0b00_0000
+  DISASM_OPIV_VX__INSN(vaaddu,    0);
+  DISASM_OPIV_VX__INSN(vaadd,     0);
+  DISASM_OPIV_VX__INSN(vasubu,    0);
+  DISASM_OPIV_VX__INSN(vasub,     0);
+
   DISASM_OPIV_S___INSN(vredsum,   1);
   DISASM_OPIV_S___INSN(vredand,   1);
   DISASM_OPIV_S___INSN(vredor,    1);
@@ -1037,23 +1042,19 @@ disassembler_t::disassembler_t(int xlen)
     add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \
                 {&vd, &vs2, &frs1, &opt, &vm})); \
 
-  #define DISASM_VFUNARY0_INSN(name, extra, suf) \
+  #define DISASM_VFUNARY0_INSN(name, suf) \
     add_insn(new disasm_insn_t(#name "cvt.xu.f." #suf, \
                 match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
     add_insn(new disasm_insn_t(#name "cvt.x.f." #suf, \
-                match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+                match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
     add_insn(new disasm_insn_t(#name "cvt.f.xu." #suf, \
-                match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+                match_##name##cvt_f_xu_##suf, mask_##name##cvt_f_xu_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
     add_insn(new disasm_insn_t(#name "cvt.f.x." #suf, \
-                match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+                match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
-    if (extra) \
-        add_insn(new disasm_insn_t(#name "cvt.f.f." #suf, \
-                    match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
-                    {&vd, &vs2, &opt, &vm})); \
 
   //OPFVV/OPFVF
   //0b01_0000
@@ -1086,11 +1087,13 @@ disassembler_t::disassembler_t(int xlen)
   DISASM_OPIV__F_INSN(vfrdiv);
 
   //vfunary0
-  DISASM_VFUNARY0_INSN(vf,  0, v);
+  DISASM_VFUNARY0_INSN(vf,  v);
 
-  DISASM_VFUNARY0_INSN(vfw, 1, v);
+  DISASM_VFUNARY0_INSN(vfw, v);
+  DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm});
 
-  DISASM_VFUNARY0_INSN(vfn, 1, w);
+  DISASM_VFUNARY0_INSN(vfn, w);
+  DISASM_INSN("vfncvt.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm});
   DISASM_INSN("vfncvt.rod.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm});
 
   //vfunary1
author	Andrew Waterman <andrew@sifive.com>	2020-01-13 11:07:25 -0800
committer	GitHub <noreply@github.com>	2020-01-13 11:07:25 -0800
commit	826f05fda033d98c23cfd727ec0a769d1f2a6a46 (patch)
tree	364c5a84b39f7c3abb8801cecc6ec4b30f8d0928
parent	bb1cd8f9e374f1730d131bfb68462c6133e4c107 (diff)
parent	e75ba052d42b1af954c09adc815b541124c2ccce (diff)
download	riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.zip riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.gz riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.bz2