aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <andrew@sifive.com>2020-01-13 11:07:25 -0800
committerGitHub <noreply@github.com>2020-01-13 11:07:25 -0800
commit826f05fda033d98c23cfd727ec0a769d1f2a6a46 (patch)
tree364c5a84b39f7c3abb8801cecc6ec4b30f8d0928
parentbb1cd8f9e374f1730d131bfb68462c6133e4c107 (diff)
parente75ba052d42b1af954c09adc815b541124c2ccce (diff)
downloadriscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.zip
riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.gz
riscv-isa-sim-826f05fda033d98c23cfd727ec0a769d1f2a6a46.tar.bz2
Merge pull request #378 from chihminchao/rvv-0.8-float64
Rvv 0.8 float64
-rw-r--r--README.md2
-rw-r--r--riscv/decode.h109
-rw-r--r--riscv/encoding.h12
-rw-r--r--riscv/insns/vfadd_vf.h5
-rw-r--r--riscv/insns/vfadd_vv.h3
-rw-r--r--riscv/insns/vfclass_v.h3
-rw-r--r--riscv/insns/vfcvt_f_x_v.h4
-rw-r--r--riscv/insns/vfcvt_f_xu_v.h4
-rw-r--r--riscv/insns/vfcvt_x_f_v.h3
-rw-r--r--riscv/insns/vfcvt_xu_f_v.h3
-rw-r--r--riscv/insns/vfdiv_vf.h3
-rw-r--r--riscv/insns/vfdiv_vv.h3
-rw-r--r--riscv/insns/vfdot_vv.h3
-rw-r--r--riscv/insns/vfmacc_vf.h3
-rw-r--r--riscv/insns/vfmacc_vv.h3
-rw-r--r--riscv/insns/vfmadd_vf.h3
-rw-r--r--riscv/insns/vfmadd_vv.h3
-rw-r--r--riscv/insns/vfmax_vf.h3
-rw-r--r--riscv/insns/vfmax_vv.h3
-rw-r--r--riscv/insns/vfmerge_vfm.h37
-rw-r--r--riscv/insns/vfmin_vf.h3
-rw-r--r--riscv/insns/vfmin_vv.h3
-rw-r--r--riscv/insns/vfmsac_vf.h3
-rw-r--r--riscv/insns/vfmsac_vv.h3
-rw-r--r--riscv/insns/vfmsub_vf.h3
-rw-r--r--riscv/insns/vfmsub_vv.h3
-rw-r--r--riscv/insns/vfmul_vf.h3
-rw-r--r--riscv/insns/vfmul_vv.h3
-rw-r--r--riscv/insns/vfmv_f_s.h8
-rw-r--r--riscv/insns/vfmv_s_f.h25
-rw-r--r--riscv/insns/vfmv_v_f.h21
-rw-r--r--riscv/insns/vfncvt_f_f_w.h6
-rw-r--r--riscv/insns/vfncvt_f_x_w.h6
-rw-r--r--riscv/insns/vfncvt_f_xu_w.h6
-rw-r--r--riscv/insns/vfncvt_rod_f_f_w.h6
-rw-r--r--riscv/insns/vfncvt_x_f_w.h6
-rw-r--r--riscv/insns/vfncvt_xu_f_w.h6
-rw-r--r--riscv/insns/vfnmacc_vf.h3
-rw-r--r--riscv/insns/vfnmacc_vv.h3
-rw-r--r--riscv/insns/vfnmadd_vf.h3
-rw-r--r--riscv/insns/vfnmadd_vv.h3
-rw-r--r--riscv/insns/vfnmsac_vf.h3
-rw-r--r--riscv/insns/vfnmsac_vv.h3
-rw-r--r--riscv/insns/vfnmsub_vf.h3
-rw-r--r--riscv/insns/vfnmsub_vv.h3
-rw-r--r--riscv/insns/vfrdiv_vf.h3
-rw-r--r--riscv/insns/vfredmax_vs.h3
-rw-r--r--riscv/insns/vfredmin_vs.h3
-rw-r--r--riscv/insns/vfredosum_vs.h3
-rw-r--r--riscv/insns/vfredsum_vs.h3
-rw-r--r--riscv/insns/vfrsub_vf.h3
-rw-r--r--riscv/insns/vfsgnj_vf.h3
-rw-r--r--riscv/insns/vfsgnj_vv.h3
-rw-r--r--riscv/insns/vfsgnjn_vf.h3
-rw-r--r--riscv/insns/vfsgnjn_vv.h3
-rw-r--r--riscv/insns/vfsgnjx_vf.h3
-rw-r--r--riscv/insns/vfsgnjx_vv.h3
-rw-r--r--riscv/insns/vfsqrt_v.h3
-rw-r--r--riscv/insns/vfsub_vf.h3
-rw-r--r--riscv/insns/vfsub_vv.h3
-rw-r--r--riscv/insns/vfwcvt_f_f_v.h5
-rw-r--r--riscv/insns/vfwcvt_f_x_v.h5
-rw-r--r--riscv/insns/vfwcvt_f_xu_v.h5
-rw-r--r--riscv/insns/vfwcvt_x_f_v.h5
-rw-r--r--riscv/insns/vfwcvt_xu_f_v.h5
-rw-r--r--riscv/insns/vfwredosum_vs.h4
-rw-r--r--riscv/insns/vfwredsum_vs.h4
-rw-r--r--riscv/insns/vl1r_v.h2
-rw-r--r--riscv/insns/vmfeq_vf.h8
-rw-r--r--riscv/insns/vmfeq_vv.h8
-rw-r--r--riscv/insns/vmfge_vf.h8
-rw-r--r--riscv/insns/vmfgt_vf.h8
-rw-r--r--riscv/insns/vmfle_vf.h8
-rw-r--r--riscv/insns/vmfle_vv.h8
-rw-r--r--riscv/insns/vmflt_vf.h8
-rw-r--r--riscv/insns/vmflt_vv.h8
-rw-r--r--riscv/insns/vmfne_vf.h8
-rw-r--r--riscv/insns/vmfne_vv.h8
-rw-r--r--riscv/insns/vmv1r_v.h2
-rw-r--r--riscv/insns/vmv2r_v.h2
-rw-r--r--riscv/insns/vmv4r_v.h2
-rw-r--r--riscv/insns/vmv8r_v.h2
-rw-r--r--riscv/insns/vmvnfr_v.h12
-rw-r--r--riscv/riscv.mk.in4
-rw-r--r--spike_main/disasm.cc33
85 files changed, 458 insertions, 109 deletions
diff --git a/README.md b/README.md
index 3db8cf7..2e9e0ac 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,7 @@ Spike supports the following RISC-V ISA features:
- D extension, v2.2
- Q extension, v2.2
- C extension, v2.0
- - V extension, v0.8-draft-20191118, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_)
+ - V extension, v0.8, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_)
- Conformance to both RVWMO and RVTSO (Spike is sequentially consistent)
- Machine, Supervisor, and User modes, v1.11
- Debug v0.14
diff --git a/riscv/decode.h b/riscv/decode.h
index 495ffc4..21bb92b 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -441,12 +441,10 @@ static inline bool is_overlapped(const int astart, const int asize,
require(insn.rd() != 0); \
}
-#define VI_CHECK_SD \
- require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2));
-
#define VI_CHECK_DSS(is_vs1) \
VI_WIDE_CHECK_COMMON; \
require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \
+ require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \
require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
if (is_vs1) {\
require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \
@@ -1466,7 +1464,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vs3 = insn.rd(); \
- require(vs3 + nf <= NVPR); \
+ require(vs3 + nf * P.VU.vlmul <= NVPR); \
const reg_t vlmul = P.VU.vlmul; \
for (reg_t i = 0; i < vl; ++i) { \
VI_STRIP(i) \
@@ -1499,7 +1497,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vd = insn.rd(); \
- require(vd + nf <= NVPR); \
+ require(vd + nf * P.VU.vlmul <= NVPR); \
const reg_t vlmul = P.VU.vlmul; \
for (reg_t i = 0; i < vl; ++i) { \
VI_ELEMENT_SKIP(i); \
@@ -1551,6 +1549,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
const reg_t rd_num = insn.rd(); \
bool early_stop = false; \
const reg_t vlmul = P.VU.vlmul; \
+ require(rd_num + nf * P.VU.vlmul <= NVPR); \
p->VU.vstart = 0; \
for (reg_t i = 0; i < vl; ++i) { \
VI_STRIP(i); \
@@ -1595,9 +1594,9 @@ for (reg_t i = 0; i < vlmax; ++i) { \
// vector: vfp helper
//
#define VI_VFP_COMMON \
- require_extension('F'); \
require_fp; \
- require(P.VU.vsew == 32); \
+ require((P.VU.vsew == e32 && p->supports_extension('F')) || \
+ (P.VU.vsew == e64 && p->supports_extension('D'))); \
require_vector;\
reg_t vl = P.VU.vl; \
reg_t rd_num = insn.rd(); \
@@ -1621,14 +1620,14 @@ for (reg_t i = 0; i < vlmax; ++i) { \
uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx); \
uint64_t res = 0;
-#define VI_VFP_LOOP_REDUCTION_BASE \
- VI_VFP_COMMON \
- float32_t vd_0 = P.VU.elt<float32_t>(rd_num, 0); \
- float32_t vs1_0 = P.VU.elt<float32_t>(rs1_num, 0); \
+#define VI_VFP_LOOP_REDUCTION_BASE(width) \
+ float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \
+ float##width##_t vs1_0 = P.VU.elt<float##width##_t>(rs1_num, 0); \
vd_0 = vs1_0;\
for (reg_t i=P.VU.vstart; i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
- int32_t &vd = P.VU.elt<int32_t>(rd_num, i); \
+ int##width##_t &vd = P.VU.elt<int##width##_t>(rd_num, i); \
+ float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); \
#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \
VI_VFP_COMMON \
@@ -1648,19 +1647,18 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
P.VU.vstart = 0; \
- set_fp_exceptions; \
if (vl > 0) { \
P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \
}
#define VI_VFP_LOOP_CMP_END \
switch(P.VU.vsew) { \
- case e32: { \
+ case e32: \
+ case e64: { \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
break; \
} \
case e16: \
- case e8: \
default: \
require(0); \
break; \
@@ -1669,7 +1667,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
P.VU.vstart = 0; \
set_fp_exceptions;
-#define VI_VFP_VV_LOOP(BODY) \
+#define VI_VFP_VV_LOOP(BODY32, BODY64) \
VI_CHECK_SSS(true); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
@@ -1677,12 +1675,19 @@ for (reg_t i = 0; i < vlmax; ++i) { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
- BODY; \
+ BODY32; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e64: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
+ float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+ BODY64; \
set_fp_exceptions; \
break; \
}\
case e16: \
- case e8: \
default: \
require(0); \
break; \
@@ -1690,22 +1695,39 @@ for (reg_t i = 0; i < vlmax; ++i) { \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
-#define VI_VFP_VV_LOOP_REDUCTION(BODY) \
+#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \
VI_CHECK_REDUCTION(false) \
- VI_VFP_LOOP_REDUCTION_BASE \
- float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
- BODY; \
- DEBUG_RVV_FP_VV; \
- VI_VFP_LOOP_REDUCTION_END(e32)
+ VI_VFP_COMMON \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ VI_VFP_LOOP_REDUCTION_BASE(32) \
+ BODY32; \
+ set_fp_exceptions; \
+ VI_VFP_LOOP_REDUCTION_END(e32) \
+ break; \
+ }\
+ case e64: {\
+ VI_VFP_LOOP_REDUCTION_BASE(64) \
+ BODY64; \
+ set_fp_exceptions; \
+ VI_VFP_LOOP_REDUCTION_END(e64) \
+ break; \
+ }\
+ case e16: \
+ default: \
+ require(0); \
+ break; \
+ }; \
#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
VI_VFP_LOOP_WIDE_REDUCTION_BASE \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
BODY; \
+ set_fp_exceptions; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_REDUCTION_END(e64)
-#define VI_VFP_VF_LOOP(BODY) \
+#define VI_VFP_VF_LOOP(BODY32, BODY64) \
VI_CHECK_SSS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
@@ -1713,7 +1735,15 @@ for (reg_t i = 0; i < vlmax; ++i) { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
- BODY; \
+ BODY32; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e64: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t rs1 = f64(READ_FREG(rs1_num)); \
+ float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+ BODY64; \
set_fp_exceptions; \
break; \
}\
@@ -1726,12 +1756,31 @@ for (reg_t i = 0; i < vlmax; ++i) { \
DEBUG_RVV_FP_VF; \
VI_VFP_LOOP_END
-#define VI_VFP_LOOP_CMP(BODY, is_vs1) \
+#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \
VI_CHECK_MSS(is_vs1); \
VI_VFP_LOOP_CMP_BASE \
- BODY; \
- set_fp_exceptions; \
- DEBUG_RVV_FP_VV; \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+ float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
+ float32_t rs1 = f32(READ_FREG(rs1_num)); \
+ BODY32; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e64: {\
+ float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+ float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \
+ float64_t rs1 = f64(READ_FREG(rs1_num)); \
+ BODY64; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ default: \
+ require(0); \
+ break; \
+ }; \
VI_VFP_LOOP_CMP_END \
#define VI_VFP_VF_LOOP_WIDE(BODY) \
diff --git a/riscv/encoding.h b/riscv/encoding.h
index 6d691e7..26142db 100644
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@@ -1266,6 +1266,14 @@
#define MASK_VSADD_VI 0xfc00707f
#define MATCH_VSLL_VI 0x94003057
#define MASK_VSLL_VI 0xfc00707f
+#define MATCH_VMV1R_V 0x9e003057
+#define MASK_VMV1R_V 0xfe0ff07f
+#define MATCH_VMV2R_V 0x9e00b057
+#define MASK_VMV2R_V 0xfe0ff07f
+#define MATCH_VMV4R_V 0x9e01b057
+#define MASK_VMV4R_V 0xfe0ff07f
+#define MATCH_VMV8R_V 0x9e03b057
+#define MASK_VMV8R_V 0xfe0ff07f
#define MATCH_VSRL_VI 0xa0003057
#define MASK_VSRL_VI 0xfc00707f
#define MATCH_VSRA_VI 0xa4003057
@@ -2305,6 +2313,10 @@ DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI)
DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI)
DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI)
DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI)
+DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V)
+DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V)
+DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V)
+DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V)
DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI)
DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI)
DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI)
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
index 60dec4a..bdb7f75 100644
--- a/riscv/insns/vfadd_vf.h
+++ b/riscv/insns/vfadd_vf.h
@@ -1,5 +1,8 @@
// vfadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
- vd = f32_add(rs1, vs2);
+ vd = f32_add(rs1, vs2);
+},
+{
+ vd = f64_add(rs1, vs2);
})
diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h
index de0ae53..b333a8a 100644
--- a/riscv/insns/vfadd_vv.h
+++ b/riscv/insns/vfadd_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_add(vs1, vs2);
+},
+{
+ vd = f64_add(vs1, vs2);
})
diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h
index 75f29a2..8ee092f 100644
--- a/riscv/insns/vfclass_v.h
+++ b/riscv/insns/vfclass_v.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd.v = f32_classify(vs2);
+},
+{
+ vd.v = f64_classify(vs2);
})
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index f6604fb..fdaa697 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -3,4 +3,8 @@ VI_VFP_VF_LOOP
({
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
+},
+{
+ auto vs2_i = P.VU.elt<int64_t>(rs2_num, i);
+ vd = i64_to_f64(vs2_i);
})
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index 2c845ac..01ea61c 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -3,4 +3,8 @@ VI_VFP_VF_LOOP
({
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
+},
+{
+ auto vs2_u = P.VU.elt<uint64_t>(rs2_num, i);
+ vd = ui64_to_f64(vs2_u);
})
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index a9eedc4..96bc481 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
+},
+{
+ P.VU.elt<int64_t>(rd_num, i) = f64_to_i64(vs2, STATE.frm, true);
})
diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h
index 76c7735..5f19f90 100644
--- a/riscv/insns/vfcvt_xu_f_v.h
+++ b/riscv/insns/vfcvt_xu_f_v.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
+},
+{
+ P.VU.elt<uint64_t>(rd_num, i) = f64_to_ui64(vs2, STATE.frm, true);
})
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
index 2b8124c..ce21730 100644
--- a/riscv/insns/vfdiv_vf.h
+++ b/riscv/insns/vfdiv_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_div(vs2, rs1);
+},
+{
+ vd = f64_div(vs2, rs1);
})
diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h
index c20ff1d..8a49a91 100644
--- a/riscv/insns/vfdiv_vv.h
+++ b/riscv/insns/vfdiv_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_div(vs2, vs1);
+},
+{
+ vd = f64_div(vs2, vs1);
})
diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h
index 11c8bce..85d0b8a 100644
--- a/riscv/insns/vfdot_vv.h
+++ b/riscv/insns/vfdot_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_add(vd, f32_mul(vs2, vs1));
+},
+{
+ vd = f64_add(vd, f64_mul(vs2, vs1));
})
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
index 5013d34..fca4184 100644
--- a/riscv/insns/vfmacc_vf.h
+++ b/riscv/insns/vfmacc_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, vs2, vd);
+},
+{
+ vd = f64_mulAdd(rs1, vs2, vd);
})
diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h
index 663a648..f1caf33 100644
--- a/riscv/insns/vfmacc_vv.h
+++ b/riscv/insns/vfmacc_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vs1, vs2, vd);
+},
+{
+ vd = f64_mulAdd(vs1, vs2, vd);
})
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
index 920c392..7707dae 100644
--- a/riscv/insns/vfmadd_vf.h
+++ b/riscv/insns/vfmadd_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(vd, rs1, vs2);
+},
+{
+ vd = f64_mulAdd(vd, rs1, vs2);
})
diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h
index c967ec3..a095c38 100644
--- a/riscv/insns/vfmadd_vv.h
+++ b/riscv/insns/vfmadd_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vd, vs1, vs2);
+},
+{
+ vd = f64_mulAdd(vd, vs1, vs2);
})
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
index eb70e48..a8df880 100644
--- a/riscv/insns/vfmax_vf.h
+++ b/riscv/insns/vfmax_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_max(vs2, rs1);
+},
+{
+ vd = f64_max(vs2, rs1);
})
diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h
index 6d12f08..2329e74 100644
--- a/riscv/insns/vfmax_vv.h
+++ b/riscv/insns/vfmax_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_max(vs2, vs1);
+},
+{
+ vd = f64_max(vs2, vs1);
})
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index 0ffa49b..639809d 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -3,16 +3,37 @@ require(insn.rd() != 0);
VI_CHECK_SSS(false);
VI_VFP_COMMON;
reg_t sew = P.VU.vsew;
-for (reg_t i=P.VU.vstart; i<vl; ++i) {
- auto &vd = P.VU.elt<float32_t>(rd_num, i);
- auto rs1 = f32(READ_FREG(rs1_num));
- auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
- int midx = (P.VU.vmlen * i) / 64;
- int mpos = (P.VU.vmlen * i) % 64;
- bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+switch(P.VU.vsew) {
+ case 32:
+ for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float32_t>(rd_num, i);
+ auto rs1 = f32(READ_FREG(rs1_num));
+ auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
- vd = use_first ? rs1 : vs2;
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? rs1 : vs2;
+ }
+ break;
+ case 64:
+ for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float64_t>(rd_num, i);
+ auto rs1 = f64(READ_FREG(rs1_num));
+ auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
+
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? rs1 : vs2;
+ }
+ break;
+ default:
+ require(0);
+ break;
}
P.VU.vstart = 0;
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
index bf06638..a55462b 100644
--- a/riscv/insns/vfmin_vf.h
+++ b/riscv/insns/vfmin_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_min(vs2, rs1);
+},
+{
+ vd = f64_min(vs2, rs1);
})
diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h
index 65d20ff..399b563 100644
--- a/riscv/insns/vfmin_vv.h
+++ b/riscv/insns/vfmin_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_min(vs2, vs1);
+},
+{
+ vd = f64_min(vs2, vs1);
})
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
index 23661b3..0f42560 100644
--- a/riscv/insns/vfmsac_vf.h
+++ b/riscv/insns/vfmsac_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h
index 952c12e..9b4ed9f 100644
--- a/riscv/insns/vfmsac_vv.h
+++ b/riscv/insns/vfmsac_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
index 2328d07..bd968e3 100644
--- a/riscv/insns/vfmsub_vf.h
+++ b/riscv/insns/vfmsub_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(vd, rs1, f64(vs2.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h
index a58f1e3..f8e0b3d 100644
--- a/riscv/insns/vfmsub_vv.h
+++ b/riscv/insns/vfmsub_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(vd, vs1, f64(vs2.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
index 086b6d8..9e7d481 100644
--- a/riscv/insns/vfmul_vf.h
+++ b/riscv/insns/vfmul_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mul(vs2, rs1);
+},
+{
+ vd = f64_mul(vs2, rs1);
})
diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h
index 259dc01..0e4d499 100644
--- a/riscv/insns/vfmul_vv.h
+++ b/riscv/insns/vfmul_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mul(vs1, vs2);
+},
+{
+ vd = f64_mul(vs1, vs2);
})
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 066db80..dbfe8f9 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -2,18 +2,12 @@
require_vector;
require_fp;
require_extension('F');
-require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
+require(P.VU.vsew == e32 || P.VU.vsew == e64);
reg_t rs2_num = insn.rs2();
uint64_t vs2_0 = 0;
const reg_t sew = P.VU.vsew;
switch(sew) {
-case e8:
- vs2_0 = P.VU.elt<uint8_t>(rs2_num, 0);
- break;
-case e16:
- vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
- break;
case e32:
vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
break;
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 8ff6094..44e9e2e 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -1,19 +1,26 @@
// vfmv_s_f: vd[0] = rs1 (vs2=0)
require_vector;
-require(insn.v_vm() == 1);
require_fp;
require_extension('F');
-require(P.VU.vsew == e32);
+require(P.VU.vsew >= e32 && P.VU.vsew <= 64);
reg_t vl = P.VU.vl;
if (vl > 0) {
reg_t rd_num = insn.rd();
- reg_t sew = P.VU.vsew;
- if (FLEN == 64)
- P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
- else
- P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
-
- vl = 0;
+ switch(P.VU.vsew) {
+ case 32:
+ if (FLEN == 64)
+ P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
+ else
+ P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
+ break;
+ case 64:
+ if (FLEN == 64)
+ P.VU.elt<uint64_t>(rd_num, 0) = f64(FRS1).v;
+ else
+ P.VU.elt<uint64_t>(rd_num, 0) = f32(FRS1).v;
+ break;
+ }
}
+P.VU.vstart = 0;
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index f323263..75832f9 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -1,12 +1,23 @@
// vfmv_vf vd, vs1
require((insn.rd() & (P.VU.vlmul - 1)) == 0);
VI_VFP_COMMON
-reg_t sew = P.VU.vsew;
-for (reg_t i=P.VU.vstart; i<vl; ++i) {
- auto &vd = P.VU.elt<float32_t>(rd_num, i);
- auto rs1 = f32(READ_FREG(rs1_num));
+switch(P.VU.vsew) {
+ case e32:
+ for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float32_t>(rd_num, i);
+ auto rs1 = f32(READ_FREG(rs1_num));
- vd = rs1;
+ vd = rs1;
+ }
+ break;
+ case e64:
+ for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float64_t>(rd_num, i);
+ auto rs1 = f64(READ_FREG(rs1_num));
+
+ vd = rs1;
+ }
+ break;
}
P.VU.vstart = 0;
diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h
index 42c18c7..55a8eac 100644
--- a/riscv/insns/vfncvt_f_f_w.h
+++ b/riscv/insns/vfncvt_f_f_w.h
@@ -1,6 +1,10 @@
// vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h
index 80ebe00..daf2274 100644
--- a/riscv/insns/vfncvt_f_x_w.h
+++ b/riscv/insns/vfncvt_f_x_w.h
@@ -1,6 +1,10 @@
// vfncvt.f.x.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = i64_to_f32(vs2);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h
index 013f57c..7f57ec5 100644
--- a/riscv/insns/vfncvt_f_xu_w.h
+++ b/riscv/insns/vfncvt_f_xu_w.h
@@ -1,6 +1,10 @@
// vfncvt.f.xu.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = ui64_to_f32(vs2);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h
index 77a3873..130c5b5 100644
--- a/riscv/insns/vfncvt_rod_f_f_w.h
+++ b/riscv/insns/vfncvt_rod_f_f_w.h
@@ -1,7 +1,11 @@
// vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SD;
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
softfloat_roundingMode = softfloat_round_odd;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h
index 8985f1b..cda2fe2 100644
--- a/riscv/insns/vfncvt_x_f_w.h
+++ b/riscv/insns/vfncvt_x_f_w.h
@@ -1,6 +1,10 @@
// vfncvt.x.f.v vd, vs2, vm
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_SD;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<int32_t>(rd_num, i) = f64_to_i32(vs2, STATE.frm, true);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h
index 2db8d82..a009105 100644
--- a/riscv/insns/vfncvt_xu_f_w.h
+++ b/riscv/insns/vfncvt_xu_f_w.h
@@ -1,6 +1,10 @@
// vfncvt.xu.f.v vd, vs2, vm
+VI_CHECK_SDS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_SD;
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
P.VU.elt<uint32_t>(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true);
+ set_fp_exceptions;
VI_VFP_LOOP_END
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
index 04a31bf..da58d3a 100644
--- a/riscv/insns/vfnmacc_vf.h
+++ b/riscv/insns/vfnmacc_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), f64(vd.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h
index b950df9..62a1486 100644
--- a/riscv/insns/vfnmacc_vv.h
+++ b/riscv/insns/vfnmacc_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(f64(vs2.v ^ F64_SIGN), vs1, f64(vd.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
index f8f3b83..b26f377 100644
--- a/riscv/insns/vfnmadd_vf.h
+++ b/riscv/insns/vfnmadd_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, f64(vs2.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h
index f96d102..fc70574 100644
--- a/riscv/insns/vfnmadd_vv.h
+++ b/riscv/insns/vfnmadd_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
+},
+{
+ vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, f64(vs2.v ^ F64_SIGN));
})
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
index c3dc12c..b78d0ca 100644
--- a/riscv/insns/vfnmsac_vf.h
+++ b/riscv/insns/vfnmsac_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
+},
+{
+ vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), vd);
})
diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h
index 0ecd648..795dc38 100644
--- a/riscv/insns/vfnmsac_vv.h
+++ b/riscv/insns/vfnmsac_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
+},
+{
+ vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
})
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
index 1879b9e..6c6dc27 100644
--- a/riscv/insns/vfnmsub_vf.h
+++ b/riscv/insns/vfnmsub_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
+},
+{
+ vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, vs2);
})
diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h
index da9f59c..ff4a9b5 100644
--- a/riscv/insns/vfnmsub_vv.h
+++ b/riscv/insns/vfnmsub_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
+},
+{
+ vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, vs2);
})
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
index 49e4293..73ec534 100644
--- a/riscv/insns/vfrdiv_vf.h
+++ b/riscv/insns/vfrdiv_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_div(rs1, vs2);
+},
+{
+ vd = f64_div(rs1, vs2);
})
diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h
index dca10bf..cb03dbb 100644
--- a/riscv/insns/vfredmax_vs.h
+++ b/riscv/insns/vfredmax_vs.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_max(vd_0, vs2);
+},
+{
+ vd_0 = f64_max(vd_0, vs2);
})
diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h
index b4556bc..51c0bcb 100644
--- a/riscv/insns/vfredmin_vs.h
+++ b/riscv/insns/vfredmin_vs.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_min(vd_0, vs2);
+},
+{
+ vd_0 = f64_min(vd_0, vs2);
})
diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h
index 87422ee..7de6dbb 100644
--- a/riscv/insns/vfredosum_vs.h
+++ b/riscv/insns/vfredosum_vs.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
+},
+{
+ vd_0 = f64_add(vd_0, vs2);
})
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h
index b50b45f..7b5cccc 100644
--- a/riscv/insns/vfredsum_vs.h
+++ b/riscv/insns/vfredsum_vs.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP_REDUCTION
({
vd_0 = f32_add(vd_0, vs2);
+},
+{
+ vd_0 = f64_add(vd_0, vs2);
})
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
index ee8ac83..d9a1986 100644
--- a/riscv/insns/vfrsub_vf.h
+++ b/riscv/insns/vfrsub_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_sub(rs1, vs2);
+},
+{
+ vd = f64_sub(rs1, vs2);
})
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
index d93f175..c7f731b 100644
--- a/riscv/insns/vfsgnj_vf.h
+++ b/riscv/insns/vfsgnj_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = fsgnj32(vs2.v, rs1.v, false, false);
+},
+{
+ vd = fsgnj64(vs2.v, rs1.v, false, false);
})
diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h
index 050dd9c..12d3d43 100644
--- a/riscv/insns/vfsgnj_vv.h
+++ b/riscv/insns/vfsgnj_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs2.v, vs1.v, false, false);
+},
+{
+ vd = fsgnj64(vs2.v, vs1.v, false, false);
})
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
index 303ec76..4511748 100644
--- a/riscv/insns/vfsgnjn_vf.h
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = fsgnj32(vs2.v, rs1.v, true, false);
+},
+{
+ vd = fsgnj64(vs2.v, rs1.v, true, false);
})
diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h
index 6603352..a16acf7 100644
--- a/riscv/insns/vfsgnjn_vv.h
+++ b/riscv/insns/vfsgnjn_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs2.v, vs1.v, true, false);
+},
+{
+ vd = fsgnj64(vs2.v, vs1.v, true, false);
})
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
index 93f4303..c423060 100644
--- a/riscv/insns/vfsgnjx_vf.h
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = fsgnj32(vs2.v, rs1.v, false, true);
+},
+{
+ vd = fsgnj64(vs2.v, rs1.v, false, true);
})
diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h
index 9cc12dc..9dbe078 100644
--- a/riscv/insns/vfsgnjx_vv.h
+++ b/riscv/insns/vfsgnjx_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = fsgnj32(vs2.v, vs1.v, false, true);
+},
+{
+ vd = fsgnj64(vs2.v, vs1.v, false, true);
})
diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h
index 4931037..4a36932 100644
--- a/riscv/insns/vfsqrt_v.h
+++ b/riscv/insns/vfsqrt_v.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_sqrt(vs2);
+},
+{
+ vd = f64_sqrt(vs2);
})
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
index 38d6acc..a4702d0 100644
--- a/riscv/insns/vfsub_vf.h
+++ b/riscv/insns/vfsub_vf.h
@@ -2,4 +2,7 @@
VI_VFP_VF_LOOP
({
vd = f32_sub(vs2, rs1);
+},
+{
+ vd = f64_sub(vs2, rs1);
})
diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h
index 71e7a43..40545fb 100644
--- a/riscv/insns/vfsub_vv.h
+++ b/riscv/insns/vfsub_vv.h
@@ -2,4 +2,7 @@
VI_VFP_VV_LOOP
({
vd = f32_sub(vs2, vs1);
+},
+{
+ vd = f64_sub(vs2, vs1);
})
diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h
index 4d6b4fc..4bda2bc 100644
--- a/riscv/insns/vfwcvt_f_f_v.h
+++ b/riscv/insns/vfwcvt_f_f_v.h
@@ -1,6 +1,9 @@
// vfwcvt.f.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = f32_to_f64(vs2);
set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h
index ab5d825..346db32 100644
--- a/riscv/insns/vfwcvt_f_x_v.h
+++ b/riscv/insns/vfwcvt_f_x_v.h
@@ -1,6 +1,9 @@
// vfwcvt.f.x.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = i32_to_f64(vs2);
set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h
index 8af8d7c..c963abb 100644
--- a/riscv/insns/vfwcvt_f_xu_v.h
+++ b/riscv/insns/vfwcvt_f_xu_v.h
@@ -1,6 +1,9 @@
// vfwcvt.f.xu.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
P.VU.elt<float64_t>(rd_num, i) = ui32_to_f64(vs2);
set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h
index 06e81d4..9088a79 100644
--- a/riscv/insns/vfwcvt_x_f_v.h
+++ b/riscv/insns/vfwcvt_x_f_v.h
@@ -1,6 +1,9 @@
// vfwcvt.x.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<int64_t>(rd_num, i) = f32_to_i64(vs2, STATE.frm, true);
set_fp_exceptions;
diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h
index cc82481..266cbca 100644
--- a/riscv/insns/vfwcvt_xu_f_v.h
+++ b/riscv/insns/vfwcvt_xu_f_v.h
@@ -1,6 +1,9 @@
// vfwcvt.xu.f.v vd, vs2, vm
+VI_CHECK_DSS(false);
+if (P.VU.vsew == e32)
+ require(p->supports_extension('D'));
+
VI_VFP_LOOP_BASE
- VI_CHECK_DSS(false);
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
P.VU.elt<uint64_t>(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true);
set_fp_exceptions;
diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h
index b47e2c7..d6da222 100644
--- a/riscv/insns/vfwredosum_vs.h
+++ b/riscv/insns/vfwredosum_vs.h
@@ -1,4 +1,8 @@
// vfwredosum.vs vd, vs2, vs1
+require_vector;
+require(P.VU.vsew * 2 <= P.VU.ELEN);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(P.VU.vlmul <= 4);
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f64_add(vd_0, vs2);
diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h
index 3ce591b..13bd1ab 100644
--- a/riscv/insns/vfwredsum_vs.h
+++ b/riscv/insns/vfwredsum_vs.h
@@ -1,4 +1,8 @@
// vfwredsum.vs vd, vs2, vs1
+require_vector;
+require(P.VU.vsew * 2 <= P.VU.ELEN);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(P.VU.vlmul <= 4);
VI_VFP_VV_LOOP_WIDE_REDUCTION
({
vd_0 = f64_add(vd_0, vs2);
diff --git a/riscv/insns/vl1r_v.h b/riscv/insns/vl1r_v.h
index 8dabcb6..eded573 100644
--- a/riscv/insns/vl1r_v.h
+++ b/riscv/insns/vl1r_v.h
@@ -3,9 +3,7 @@ require_vector;
const reg_t baseAddr = RS1;
const reg_t vd = insn.rd();
for (reg_t i = 0; i < P.VU.vlenb; ++i) {
-
auto val = MMU.load_uint8(baseAddr + i);
- fprintf(stderr, "here: %ld: %x\n", i, val);
P.VU.elt<uint8_t>(vd, i) = val;
}
P.VU.vstart = 0;
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index f0e7109..766f0ab 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -1,5 +1,9 @@
-// vfeq.vf vd, vs2, fs1
+// vmfeq.vf vd, vs2, fs1
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, rs1);
-}, false)
+},
+{
+ res = f64_eq(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index 1be3a69..19117fc 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -1,5 +1,9 @@
-// vfeq.vv vd, vs2, vs1
+// vmfeq.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, vs1);
-}, true)
+},
+{
+ res = f64_eq(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index 1c68366..c5f4c83 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,5 +1,9 @@
-// vfge.vf vd, vs2, rs1
+// vmfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le(rs1, vs2);
-}, false)
+},
+{
+ res = f64_le(rs1, vs2);
+},
+false)
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index 0979185..5387300 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,5 +1,9 @@
-// vfgt.vf vd, vs2, rs1
+// vmfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_lt(rs1, vs2);
-}, false)
+},
+{
+ res = f64_lt(rs1, vs2);
+},
+false)
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index 90607ec..1a3a7c4 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -1,5 +1,9 @@
-// vfle.vf vd, vs2, rs1
+// vmfle.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le(vs2, rs1);
-}, false)
+},
+{
+ res = f64_le(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index 6ccdfec..067f1a9 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,5 +1,9 @@
-// vfle.vv vd, vs2, rs1
+// vmfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_le(vs2, vs1);
-}, true)
+},
+{
+ res = f64_le(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index 6b71a4a..248071d 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,5 +1,9 @@
-// vflt.vf vd, vs2, rs1
+// vmflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = f32_lt(vs2, rs1);
-}, false)
+},
+{
+ res = f64_lt(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index a2ed8e3..71895df 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,5 +1,9 @@
-// vflt.vv vd, vs2, vs1
+// vmflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
res = f32_lt(vs2, vs1);
-}, true)
+},
+{
+ res = f64_lt(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index ef63678..afccbcb 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -1,5 +1,9 @@
-// vfne.vf vd, vs2, rs1
+// vmfne.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, rs1);
-}, false)
+},
+{
+ res = !f64_eq(vs2, rs1);
+},
+false)
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index 8378a23..d5df60c 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -1,5 +1,9 @@
-// vfne.vv vd, vs2, rs1
+// vmfne.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, vs1);
-}, true)
+},
+{
+ res = !f64_eq(vs2, vs1);
+},
+true)
diff --git a/riscv/insns/vmv1r_v.h b/riscv/insns/vmv1r_v.h
new file mode 100644
index 0000000..bbdeab9
--- /dev/null
+++ b/riscv/insns/vmv1r_v.h
@@ -0,0 +1,2 @@
+// vmv1r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv2r_v.h b/riscv/insns/vmv2r_v.h
new file mode 100644
index 0000000..1ac8e09
--- /dev/null
+++ b/riscv/insns/vmv2r_v.h
@@ -0,0 +1,2 @@
+// vmv2r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv4r_v.h b/riscv/insns/vmv4r_v.h
new file mode 100644
index 0000000..2068731
--- /dev/null
+++ b/riscv/insns/vmv4r_v.h
@@ -0,0 +1,2 @@
+// vmv4r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmv8r_v.h b/riscv/insns/vmv8r_v.h
new file mode 100644
index 0000000..2b205fc
--- /dev/null
+++ b/riscv/insns/vmv8r_v.h
@@ -0,0 +1,2 @@
+// vmv8r.v vd, vs2
+#include "vmvnfr_v.h"
diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h
new file mode 100644
index 0000000..6ae66d5
--- /dev/null
+++ b/riscv/insns/vmvnfr_v.h
@@ -0,0 +1,12 @@
+// vmv1r.v vd, vs2
+require_vector;
+const reg_t baseAddr = RS1;
+const reg_t vd = insn.rd();
+const reg_t vs2 = insn.rs2();
+const reg_t len = insn.rs1() + 1;
+require((vd & (len - 1)) == 0);
+require((vs2 & (len - 1)) == 0);
+if (vd != vs2)
+ memcpy(&P.VU.elt<uint8_t>(vd, 0),
+ &P.VU.elt<uint8_t>(vs2, 0), P.VU.vlenb * len);
+P.VU.vstart = 0;
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index db1bdcc..252b196 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -384,6 +384,10 @@ riscv_insn_ext_v_alu_int = \
vmv_v_v \
vmv_v_x \
vmv_x_s \
+ vmv1r_v \
+ vmv2r_v \
+ vmv4r_v \
+ vmv8r_v \
vmxnor_mm \
vmxor_mm \
vnclip_wi \
diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc
index 373c6bb..addc223 100644
--- a/spike_main/disasm.cc
+++ b/spike_main/disasm.cc
@@ -911,10 +911,10 @@ disassembler_t::disassembler_t(int xlen)
DISASM_OPIV_VX__INSN(vssubu, 0);
DISASM_OPIV_VX__INSN(vssub, 1);
DISASM_OPIV_VXI_INSN(vsll, 1, v);
- DISASM_OPIV_VX__INSN(vaaddu, 0);
- DISASM_OPIV_VX__INSN(vaadd, 0);
- DISASM_OPIV_VX__INSN(vasubu, 0);
- DISASM_OPIV_VX__INSN(vasub, 0);
+ DISASM_INSN("vmv1r.v", vmv1r_v, 0, {&vd, &vs2});
+ DISASM_INSN("vmv2r.v", vmv2r_v, 0, {&vd, &vs2});
+ DISASM_INSN("vmv4r.v", vmv4r_v, 0, {&vd, &vs2});
+ DISASM_INSN("vmv8r.v", vmv8r_v, 0, {&vd, &vs2});
DISASM_OPIV_VX__INSN(vsmul, 1);
DISASM_OPIV_VXI_INSN(vsrl, 0, v);
DISASM_OPIV_VXI_INSN(vsra, 0, v);
@@ -937,6 +937,11 @@ disassembler_t::disassembler_t(int xlen)
//OPMVV/OPMVX
//0b00_0000
+ DISASM_OPIV_VX__INSN(vaaddu, 0);
+ DISASM_OPIV_VX__INSN(vaadd, 0);
+ DISASM_OPIV_VX__INSN(vasubu, 0);
+ DISASM_OPIV_VX__INSN(vasub, 0);
+
DISASM_OPIV_S___INSN(vredsum, 1);
DISASM_OPIV_S___INSN(vredand, 1);
DISASM_OPIV_S___INSN(vredor, 1);
@@ -1037,23 +1042,19 @@ disassembler_t::disassembler_t(int xlen)
add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \
{&vd, &vs2, &frs1, &opt, &vm})); \
- #define DISASM_VFUNARY0_INSN(name, extra, suf) \
+ #define DISASM_VFUNARY0_INSN(name, suf) \
add_insn(new disasm_insn_t(#name "cvt.xu.f." #suf, \
match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
{&vd, &vs2, &opt, &vm})); \
add_insn(new disasm_insn_t(#name "cvt.x.f." #suf, \
- match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+ match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \
{&vd, &vs2, &opt, &vm})); \
add_insn(new disasm_insn_t(#name "cvt.f.xu." #suf, \
- match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+ match_##name##cvt_f_xu_##suf, mask_##name##cvt_f_xu_##suf, \
{&vd, &vs2, &opt, &vm})); \
add_insn(new disasm_insn_t(#name "cvt.f.x." #suf, \
- match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+ match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \
{&vd, &vs2, &opt, &vm})); \
- if (extra) \
- add_insn(new disasm_insn_t(#name "cvt.f.f." #suf, \
- match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
- {&vd, &vs2, &opt, &vm})); \
//OPFVV/OPFVF
//0b01_0000
@@ -1086,11 +1087,13 @@ disassembler_t::disassembler_t(int xlen)
DISASM_OPIV__F_INSN(vfrdiv);
//vfunary0
- DISASM_VFUNARY0_INSN(vf, 0, v);
+ DISASM_VFUNARY0_INSN(vf, v);
- DISASM_VFUNARY0_INSN(vfw, 1, v);
+ DISASM_VFUNARY0_INSN(vfw, v);
+ DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm});
- DISASM_VFUNARY0_INSN(vfn, 1, w);
+ DISASM_VFUNARY0_INSN(vfn, w);
+ DISASM_INSN("vfncvt.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm});
DISASM_INSN("vfncvt.rod.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm});
//vfunary1