aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2019-06-06 03:25:04 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2019-06-18 08:56:11 -0700
commit833b965679f4502f83c66353bfc07a092cfac9f6 (patch)
tree293532625d0f60ec6b149b79b353f1ff1dda98c7
parent80ebc70e43e48c5a851348e898c13a2d8a8148d7 (diff)
downloadspike-833b965679f4502f83c66353bfc07a092cfac9f6.zip
spike-833b965679f4502f83c66353bfc07a092cfac9f6.tar.gz
spike-833b965679f4502f83c66353bfc07a092cfac9f6.tar.bz2
rvv: add floating-point instructions
based on v-spec 0.7.1, support sections: 14/15.3 ~ 15.4 element size: 32 Signed-off-by: Bruce Hoult <bruce@hoult.org> Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com> Signed-off-by: Dave Wen <dave.wen@sifive.com>
-rw-r--r--riscv/decode.h265
-rw-r--r--riscv/insns/vfadd_vf.h5
-rw-r--r--riscv/insns/vfadd_vv.h5
-rw-r--r--riscv/insns/vfclass_v.h5
-rw-r--r--riscv/insns/vfcvt_f_x_v.h6
-rw-r--r--riscv/insns/vfcvt_f_xu_v.h6
-rw-r--r--riscv/insns/vfcvt_x_f_v.h5
-rw-r--r--riscv/insns/vfcvt_xu_f_v.h5
-rw-r--r--riscv/insns/vfdiv_vf.h5
-rw-r--r--riscv/insns/vfdiv_vv.h5
-rw-r--r--riscv/insns/vfdot_vv.h5
-rw-r--r--riscv/insns/vfmacc_vf.h5
-rw-r--r--riscv/insns/vfmacc_vv.h5
-rw-r--r--riscv/insns/vfmadd_vf.h5
-rw-r--r--riscv/insns/vfmadd_vv.h5
-rw-r--r--riscv/insns/vfmax_vf.h5
-rw-r--r--riscv/insns/vfmax_vv.h5
-rw-r--r--riscv/insns/vfmerge_vfm.h25
-rw-r--r--riscv/insns/vfmin_vf.h5
-rw-r--r--riscv/insns/vfmin_vv.h5
-rw-r--r--riscv/insns/vfmsac_vf.h5
-rw-r--r--riscv/insns/vfmsac_vv.h5
-rw-r--r--riscv/insns/vfmsub_vf.h5
-rw-r--r--riscv/insns/vfmsub_vv.h5
-rw-r--r--riscv/insns/vfmul_vf.h5
-rw-r--r--riscv/insns/vfmul_vv.h5
-rw-r--r--riscv/insns/vfmv_f_s.h33
-rw-r--r--riscv/insns/vfmv_s_f.h29
-rw-r--r--riscv/insns/vfmv_v_f.h20
-rw-r--r--riscv/insns/vfncvt_f_f_v.h6
-rw-r--r--riscv/insns/vfncvt_f_x_v.h6
-rw-r--r--riscv/insns/vfncvt_f_xu_v.h6
-rw-r--r--riscv/insns/vfncvt_x_f_v.h6
-rw-r--r--riscv/insns/vfncvt_xu_f_v.h6
-rw-r--r--riscv/insns/vfnmacc_vf.h5
-rw-r--r--riscv/insns/vfnmacc_vv.h5
-rw-r--r--riscv/insns/vfnmadd_vf.h5
-rw-r--r--riscv/insns/vfnmadd_vv.h5
-rw-r--r--riscv/insns/vfnmsac_vf.h5
-rw-r--r--riscv/insns/vfnmsac_vv.h5
-rw-r--r--riscv/insns/vfnmsub_vf.h5
-rw-r--r--riscv/insns/vfnmsub_vv.h5
-rw-r--r--riscv/insns/vfrdiv_vf.h5
-rw-r--r--riscv/insns/vfredmax_vs.h5
-rw-r--r--riscv/insns/vfredmin_vs.h5
-rw-r--r--riscv/insns/vfredosum_vs.h5
-rw-r--r--riscv/insns/vfredsum_vs.h5
-rw-r--r--riscv/insns/vfrsub_vf.h5
-rw-r--r--riscv/insns/vfsgnj_vf.h5
-rw-r--r--riscv/insns/vfsgnj_vv.h5
-rw-r--r--riscv/insns/vfsgnjn_vf.h5
-rw-r--r--riscv/insns/vfsgnjn_vv.h5
-rw-r--r--riscv/insns/vfsgnjx_vf.h5
-rw-r--r--riscv/insns/vfsgnjx_vv.h5
-rw-r--r--riscv/insns/vfsqrt_v.h5
-rw-r--r--riscv/insns/vfsub_vf.h5
-rw-r--r--riscv/insns/vfsub_vv.h5
-rw-r--r--riscv/insns/vfwadd_vf.h5
-rw-r--r--riscv/insns/vfwadd_vv.h5
-rw-r--r--riscv/insns/vfwadd_wf.h5
-rw-r--r--riscv/insns/vfwadd_wv.h5
-rw-r--r--riscv/insns/vfwcvt_f_f_v.h7
-rw-r--r--riscv/insns/vfwcvt_f_x_v.h7
-rw-r--r--riscv/insns/vfwcvt_f_xu_v.h7
-rw-r--r--riscv/insns/vfwcvt_x_f_v.h7
-rw-r--r--riscv/insns/vfwcvt_xu_f_v.h7
-rw-r--r--riscv/insns/vfwmacc_vf.h5
-rw-r--r--riscv/insns/vfwmacc_vv.h5
-rw-r--r--riscv/insns/vfwmsac_vf.h5
-rw-r--r--riscv/insns/vfwmsac_vv.h5
-rw-r--r--riscv/insns/vfwmul_vf.h5
-rw-r--r--riscv/insns/vfwmul_vv.h5
-rw-r--r--riscv/insns/vfwnmacc_vf.h5
-rw-r--r--riscv/insns/vfwnmacc_vv.h5
-rw-r--r--riscv/insns/vfwnmsac_vf.h5
-rw-r--r--riscv/insns/vfwnmsac_vv.h5
-rw-r--r--riscv/insns/vfwredosum_vs.h5
-rw-r--r--riscv/insns/vfwredsum_vs.h5
-rw-r--r--riscv/insns/vfwsub_vf.h5
-rw-r--r--riscv/insns/vfwsub_vv.h5
-rw-r--r--riscv/insns/vfwsub_wf.h5
-rw-r--r--riscv/insns/vfwsub_wv.h5
-rw-r--r--riscv/riscv.mk.in96
83 files changed, 869 insertions, 1 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 86fd799..1f80683 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -148,6 +148,7 @@ private:
#define MMU (*p->get_mmu())
#define STATE (*p->get_state())
#define P (*p)
+#define FLEN (p->get_flen())
#define READ_REG(reg) STATE.XPR[reg]
#define READ_FREG(reg) STATE.FPR[reg]
#define RD READ_REG(insn.rd())
@@ -314,6 +315,24 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew))
#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew))
+#define DEBUG_RVV 0
+
+#if DEBUG_RVV
+#define DEBUG_RVV_FP_VV \
+ printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2));
+#define DEBUG_RVV_FP_VF \
+ printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2));
+#define DEBUG_RVV_FMA_VV \
+ printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2), to_f(vd_old));
+#define DEBUG_RVV_FMA_VF \
+ printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old));
+#else
+#define DEBUG_RVV_FP_VV 0
+#define DEBUG_RVV_FP_VF 0
+#define DEBUG_RVV_FMA_VV 0
+#define DEBUG_RVV_FMA_VF 0
+#endif
+
//
// vector: masking skip helper
//
@@ -1480,8 +1499,252 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
} \
p->VU.vstart = 0;
+
+//
+// vector: vfp helper
+//
+#define VI_VFP_COMMON \
+ require_extension('F'); \
+ require_fp; \
+ require(P.VU.vsew == 32); \
+ require(!P.VU.vill);\
+ reg_t vl = P.VU.vl; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ softfloat_roundingMode = STATE.frm;
+
+#define VI_VFP_LOOP_BASE \
+ VI_VFP_COMMON \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP();
+
+#define VI_VFP_LOOP_CMP_BASE \
+ VI_VFP_COMMON \
+ for (reg_t i = P.VU.vstart; i < vl; ++i) { \
+ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+ float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
+ float32_t rs1 = f32(READ_FREG(rs1_num)); \
+ VI_LOOP_ELEMENT_SKIP(); \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx); \
+ uint64_t res = 0;
+
+#define VI_VFP_LOOP_REDUCTION_BASE \
+ VI_VFP_COMMON \
+ float32_t vd_0 = P.VU.elt<float32_t>(rd_num, 0); \
+ float32_t vs1_0 = P.VU.elt<float32_t>(rs1_num, 0); \
+ vd_0 = vs1_0;\
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP(); \
+ int32_t &vd = P.VU.elt<int32_t>(rd_num, i); \
+
+#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \
+ VI_VFP_COMMON \
+ float64_t vd_0 = f64(P.VU.elt<float64_t>(rs1_num, 0).v); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ VI_LOOP_ELEMENT_SKIP();
+
+#define VI_VFP_LOOP_END \
+ } \
+ if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 1)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 1)); \
+ }\
+ P.VU.vstart = 0; \
+
+#define VI_VFP_LOOP_WIDE_END \
+ } \
+ if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 2)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 2)); \
+ }\
+ P.VU.vstart = 0; \
+ set_fp_exceptions;
+
+#define VI_VFP_LOOP_REDUCTION_END(x) \
+ } \
+ P.VU.vstart = 0; \
+ set_fp_exceptions; \
+ if (vl > 0 && TAIL_ZEROING) { \
+ P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \
+ for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \
+ P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \
+ } \
+ }
+
+#define VI_VFP_LOOP_CMP_END \
+ switch(P.VU.vsew) { \
+ case e32: { \
+ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
+ break; \
+ } \
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ break; \
+ }; \
+ } \
+ if (vl != 0 && TAIL_ZEROING){ \
+ for (reg_t i=vl; i<P.VU.vlmax; ++i){ \
+ const int mlen = P.VU.vmlen; \
+ const int midx = (mlen * i) / 64; \
+ const int mpos = (mlen * i) % 64; \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \
+ vdi = (vdi & ~mmask);\
+ }\
+ }\
+ P.VU.vstart = 0; \
+ set_fp_exceptions;
+
+#define VI_VFP_VV_LOOP(BODY) \
+ VI_VFP_LOOP_BASE \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
+ float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
+ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ break; \
+ }; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_END
+
+#define VI_VFP_VV_LOOP_REDUCTION(BODY) \
+ VI_VFP_LOOP_REDUCTION_BASE \
+ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+ BODY; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_REDUCTION_END(e32)
+
+#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
+ VI_VFP_LOOP_WIDE_REDUCTION_BASE \
+ float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
+ BODY; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_REDUCTION_END(e64)
+
+#define VI_VFP_VF_LOOP(BODY) \
+ VI_VFP_LOOP_BASE \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \
+ float32_t rs1 = f32(READ_FREG(rs1_num)); \
+ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ break; \
+ }; \
+ DEBUG_RVV_FP_VF; \
+ VI_VFP_LOOP_END
+
+#define VI_VFP_LOOP_CMP(BODY) \
+ VI_VFP_LOOP_CMP_BASE \
+ BODY; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_CMP_END \
+
+#define VI_VFP_VF_LOOP_WIDE(BODY) \
+ VI_VFP_LOOP_BASE \
+ VI_CHECK_DSS(false); \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
+ float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ break; \
+ }; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_WIDE_END
+
+
+#define VI_VFP_VV_LOOP_WIDE(BODY) \
+ VI_VFP_LOOP_BASE \
+ VI_CHECK_DSS(true); \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
+ float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ break; \
+ }; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_WIDE_END
+
+#define VI_VFP_WF_LOOP_WIDE(BODY) \
+ VI_VFP_LOOP_BASE \
+ VI_CHECK_DDS(false); \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+ float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ }; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_WIDE_END
+
+#define VI_VFP_WV_LOOP_WIDE(BODY) \
+ VI_VFP_LOOP_BASE \
+ VI_CHECK_DDS(true); \
+ switch(P.VU.vsew) { \
+ case e32: {\
+ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
+ float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
+ float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
+ BODY; \
+ set_fp_exceptions; \
+ break; \
+ }\
+ case e16: \
+ case e8: \
+ default: \
+ require(0); \
+ }; \
+ DEBUG_RVV_FP_VV; \
+ VI_VFP_LOOP_WIDE_END
+
+
// Seems that 0x0 doesn't work.
#define DEBUG_START 0x100
-#define DEBUG_END (0x1000 - 1)
+#define DEBUG_END (0x1000 - 1)
#endif
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
new file mode 100644
index 0000000..60dec4a
--- /dev/null
+++ b/riscv/insns/vfadd_vf.h
@@ -0,0 +1,5 @@
+// vfadd.vf vd, vs2, rs1
+VI_VFP_VF_LOOP
+({
+ vd = f32_add(rs1, vs2);
+})
diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h
new file mode 100644
index 0000000..de0ae53
--- /dev/null
+++ b/riscv/insns/vfadd_vv.h
@@ -0,0 +1,5 @@
+// vfadd.vv vd, vs2, vs1
+VI_VFP_VV_LOOP
+({
+ vd = f32_add(vs1, vs2);
+})
diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h
new file mode 100644
index 0000000..75f29a2
--- /dev/null
+++ b/riscv/insns/vfclass_v.h
@@ -0,0 +1,5 @@
+// vfclass.v vd, vs2, vm
+VI_VFP_VV_LOOP
+({
+ vd.v = f32_classify(vs2);
+})
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
new file mode 100644
index 0000000..311f875
--- /dev/null
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -0,0 +1,6 @@
+// vfcvt.f.x.v vd, vd2, vm
+VI_VFP_VV_LOOP
+({
+ auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
+ vd = i32_to_f32(vs2_i);
+})
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
new file mode 100644
index 0000000..ceabea3
--- /dev/null
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -0,0 +1,6 @@
+// vfcvt.f.xu.v vd, vd2, vm
+VI_VFP_VV_LOOP
+({
+ auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
+ vd = ui32_to_f32(vs2_u);
+})
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
new file mode 100644
index 0000000..ee53c6d
--- /dev/null
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -0,0 +1,5 @@
+// vfcvt.x.f.v vd, vd2, vm
+VI_VFP_VV_LOOP
+({
+ P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
+})
diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h
new file mode 100644
index 0000000..76c7735
--- /dev/null
+++ b/riscv/insns/vfcvt_xu_f_v.h
@@ -0,0 +1,5 @@
+// vfcvt.xu.f.v vd, vd2, vm
+VI_VFP_VV_LOOP
+({
+ P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
+})
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
new file mode 100644
index 0000000..2b8124c
--- /dev/null
+++ b/riscv/insns/vfdiv_vf.h
@@ -0,0 +1,5 @@
+// vfdiv.vf vd, vs2, rs1
+VI_VFP_VF_LOOP
+({
+ vd = f32_div(vs2, rs1);
+})
diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h
new file mode 100644
index 0000000..c20ff1d
--- /dev/null
+++ b/riscv/insns/vfdiv_vv.h
@@ -0,0 +1,5 @@
+// vfdiv.vv vd, vs2, vs1
+VI_VFP_VV_LOOP
+({
+ vd = f32_div(vs2, vs1);
+})
diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h
new file mode 100644
index 0000000..11c8bce
--- /dev/null
+++ b/riscv/insns/vfdot_vv.h
@@ -0,0 +1,5 @@
+// vfdot.vv vd, vs2, vs1
+VI_VFP_VV_LOOP
+({
+ vd = f32_add(vd, f32_mul(vs2, vs1));
+})
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
new file mode 100644
index 0000000..5013d34
--- /dev/null
+++ b/riscv/insns/vfmacc_vf.h
@@ -0,0 +1,5 @@
+// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(rs1, vs2, vd);
+})
diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h
new file mode 100644
index 0000000..663a648
--- /dev/null
+++ b/riscv/insns/vfmacc_vv.h
@@ -0,0 +1,5 @@
+// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(vs1, vs2, vd);
+})
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
new file mode 100644
index 0000000..920c392
--- /dev/null
+++ b/riscv/insns/vfmadd_vf.h
@@ -0,0 +1,5 @@
+// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(vd, rs1, vs2);
+})
diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h
new file mode 100644
index 0000000..c967ec3
--- /dev/null
+++ b/riscv/insns/vfmadd_vv.h
@@ -0,0 +1,5 @@
+// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(vd, vs1, vs2);
+})
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
new file mode 100644
index 0000000..eb70e48
--- /dev/null
+++ b/riscv/insns/vfmax_vf.h
@@ -0,0 +1,5 @@
+// vfmax
+VI_VFP_VF_LOOP
+({
+ vd = f32_max(vs2, rs1);
+})
diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h
new file mode 100644
index 0000000..6d12f08
--- /dev/null
+++ b/riscv/insns/vfmax_vv.h
@@ -0,0 +1,5 @@
+// vfmax
+VI_VFP_VV_LOOP
+({
+ vd = f32_max(vs2, vs1);
+})
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
new file mode 100644
index 0000000..73d6cee
--- /dev/null
+++ b/riscv/insns/vfmerge_vfm.h
@@ -0,0 +1,25 @@
+// vfmerge_vf vd, vs2, vs1, vm
+require_extension('F');
+require_fp;
+require(P.VU.vsew == 32);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float32_t>(rd_num, i);
+ auto rs1 = f32(READ_FREG(rs1_num));
+ auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? rs1 : vs2;
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
+set_fp_exceptions;
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
new file mode 100644
index 0000000..bf06638
--- /dev/null
+++ b/riscv/insns/vfmin_vf.h
@@ -0,0 +1,5 @@
+// vfmin vd, vs2, rs1
+VI_VFP_VF_LOOP
+({
+ vd = f32_min(vs2, rs1);
+})
diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h
new file mode 100644
index 0000000..65d20ff
--- /dev/null
+++ b/riscv/insns/vfmin_vv.h
@@ -0,0 +1,5 @@
+// vfmin vd, vs2, vs1
+VI_VFP_VV_LOOP
+({
+ vd = f32_min(vs2, vs1);
+})
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
new file mode 100644
index 0000000..23661b3
--- /dev/null
+++ b/riscv/insns/vfmsac_vf.h
@@ -0,0 +1,5 @@
+// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h
new file mode 100644
index 0000000..952c12e
--- /dev/null
+++ b/riscv/insns/vfmsac_vv.h
@@ -0,0 +1,5 @@
+// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
new file mode 100644
index 0000000..2328d07
--- /dev/null
+++ b/riscv/insns/vfmsub_vf.h
@@ -0,0 +1,5 @@
+// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h
new file mode 100644
index 0000000..a58f1e3
--- /dev/null
+++ b/riscv/insns/vfmsub_vv.h
@@ -0,0 +1,5 @@
+// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
new file mode 100644
index 0000000..086b6d8
--- /dev/null
+++ b/riscv/insns/vfmul_vf.h
@@ -0,0 +1,5 @@
+// vfmul.vf vd, vs2, rs1, vm
+VI_VFP_VF_LOOP
+({
+ vd = f32_mul(vs2, rs1);
+})
diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h
new file mode 100644
index 0000000..259dc01
--- /dev/null
+++ b/riscv/insns/vfmul_vv.h
@@ -0,0 +1,5 @@
+// vfmul.vv vd, vs1, vs2, vm
+VI_VFP_VV_LOOP
+({
+ vd = f32_mul(vs1, vs2);
+})
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
new file mode 100644
index 0000000..b956e6f
--- /dev/null
+++ b/riscv/insns/vfmv_f_s.h
@@ -0,0 +1,33 @@
+// vfmv_f_s: rd = vs2[0] (rs1=0)
+require(insn.v_vm() == 1);
+require_fp;
+require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
+
+reg_t rs2_num = insn.rs2();
+uint64_t vs2_0 = 0;
+const reg_t sew = P.VU.vsew;
+switch(sew) {
+case e8:
+ vs2_0 = P.VU.elt<uint8_t>(rs2_num, 0);
+ break;
+case e16:
+ vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
+ break;
+case e32:
+ vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
+ break;
+default:
+ vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
+ break;
+}
+
+// nan_extened
+if (FLEN > sew) {
+ vs2_0 = vs2_0 | ~((1ul << sew) - 1);
+}
+
+if (FLEN == 64) {
+ WRITE_FRD(f64(vs2_0));
+} else {
+ WRITE_FRD(f32(vs2_0));
+}
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
new file mode 100644
index 0000000..13423b1
--- /dev/null
+++ b/riscv/insns/vfmv_s_f.h
@@ -0,0 +1,29 @@
+// vfmv_s_f: vd[0] = rs1 (vs2=0)
+require(insn.v_vm() == 1);
+require_fp;
+require(P.VU.vsew == e32);
+reg_t vl = P.VU.vl;
+
+if (vl > 0) {
+ reg_t rd_num = insn.rd();
+ reg_t sew = P.VU.vsew;
+
+ if (FLEN == 64)
+ P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v;
+ else
+ P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
+
+ const reg_t max_len = P.VU.VLEN / sew;
+ for (reg_t i = 1; i < max_len; ++i) {
+ switch(sew) {
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = 0;
+ break;
+ default:
+ require(false);
+ break;
+ }
+ }
+
+ vl = 0;
+}
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
new file mode 100644
index 0000000..5b4fb2b
--- /dev/null
+++ b/riscv/insns/vfmv_v_f.h
@@ -0,0 +1,20 @@
+// vfmerge_vf vd, vs2, vs1, vm
+require_extension('F');
+require_fp;
+require(P.VU.vsew == 32);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ auto &vd = P.VU.elt<float32_t>(rd_num, i);
+ auto rs1 = f32(READ_FREG(rs1_num));
+
+ vd = rs1;
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
+set_fp_exceptions;
diff --git a/riscv/insns/vfncvt_f_f_v.h b/riscv/insns/vfncvt_f_f_v.h
new file mode 100644
index 0000000..b35cd60
--- /dev/null
+++ b/riscv/insns/vfncvt_f_f_v.h
@@ -0,0 +1,6 @@
+// vfncvt.f.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_SD;
+ auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
+ P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2);
+VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_x_v.h b/riscv/insns/vfncvt_f_x_v.h
new file mode 100644
index 0000000..69bdba8
--- /dev/null
+++ b/riscv/insns/vfncvt_f_x_v.h
@@ -0,0 +1,6 @@
+// vfncvt.f.x.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_SD;
+ auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
+ P.VU.elt<float32_t>(rd_num, i) = i64_to_f32(vs2);
+VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_f_xu_v.h b/riscv/insns/vfncvt_f_xu_v.h
new file mode 100644
index 0000000..6f37734
--- /dev/null
+++ b/riscv/insns/vfncvt_f_xu_v.h
@@ -0,0 +1,6 @@
+// vfncvt.f.xu.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_SD;
+ auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
+ P.VU.elt<float32_t>(rd_num, i) = ui64_to_f32(vs2);
+VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_x_f_v.h b/riscv/insns/vfncvt_x_f_v.h
new file mode 100644
index 0000000..8985f1b
--- /dev/null
+++ b/riscv/insns/vfncvt_x_f_v.h
@@ -0,0 +1,6 @@
+// vfncvt.x.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_SD;
+ auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
+ P.VU.elt<int32_t>(rd_num, i) = f64_to_i32(vs2, STATE.frm, true);
+VI_VFP_LOOP_END
diff --git a/riscv/insns/vfncvt_xu_f_v.h b/riscv/insns/vfncvt_xu_f_v.h
new file mode 100644
index 0000000..2db8d82
--- /dev/null
+++ b/riscv/insns/vfncvt_xu_f_v.h
@@ -0,0 +1,6 @@
+// vfncvt.xu.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_SD;
+ auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
+ P.VU.elt<uint32_t>(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true);
+VI_VFP_LOOP_END
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
new file mode 100644
index 0000000..04a31bf
--- /dev/null
+++ b/riscv/insns/vfnmacc_vf.h
@@ -0,0 +1,5 @@
+// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h
new file mode 100644
index 0000000..b950df9
--- /dev/null
+++ b/riscv/insns/vfnmacc_vv.h
@@ -0,0 +1,5 @@
+// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
new file mode 100644
index 0000000..f8f3b83
--- /dev/null
+++ b/riscv/insns/vfnmadd_vf.h
@@ -0,0 +1,5 @@
+// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h
new file mode 100644
index 0000000..f96d102
--- /dev/null
+++ b/riscv/insns/vfnmadd_vv.h
@@ -0,0 +1,5 @@
+// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
+})
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
new file mode 100644
index 0000000..c3dc12c
--- /dev/null
+++ b/riscv/insns/vfnmsac_vf.h
@@ -0,0 +1,5 @@
+// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
+})
diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h
new file mode 100644
index 0000000..0ecd648
--- /dev/null
+++ b/riscv/insns/vfnmsac_vv.h
@@ -0,0 +1,5 @@
+// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
+})
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
new file mode 100644
index 0000000..1879b9e
--- /dev/null
+++ b/riscv/insns/vfnmsub_vf.h
@@ -0,0 +1,5 @@
+// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
+})
diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h
new file mode 100644
index 0000000..da9f59c
--- /dev/null
+++ b/riscv/insns/vfnmsub_vv.h
@@ -0,0 +1,5 @@
+// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
+VI_VFP_VV_LOOP
+({
+ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
+})
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
new file mode 100644
index 0000000..49e4293
--- /dev/null
+++ b/riscv/insns/vfrdiv_vf.h
@@ -0,0 +1,5 @@
+// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
+VI_VFP_VF_LOOP
+({
+ vd = f32_div(rs1, vs2);
+})
diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h
new file mode 100644
index 0000000..dca10bf
--- /dev/null
+++ b/riscv/insns/vfredmax_vs.h
@@ -0,0 +1,5 @@
+// vfredmax vd, vs2, vs1
+VI_VFP_VV_LOOP_REDUCTION
+({
+ vd_0 = f32_max(vd_0, vs2);
+})
diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h
new file mode 100644
index 0000000..b4556bc
--- /dev/null
+++ b/riscv/insns/vfredmin_vs.h
@@ -0,0 +1,5 @@
+// vfredmin vd, vs2, vs1
+VI_VFP_VV_LOOP_REDUCTION
+({
+ vd_0 = f32_min(vd_0, vs2);
+})
diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h
new file mode 100644
index 0000000..87422ee
--- /dev/null
+++ b/riscv/insns/vfredosum_vs.h
@@ -0,0 +1,5 @@
+// vfredosum: vd[0] = sum( vs2[*] , vs1[0] )
+VI_VFP_VV_LOOP_REDUCTION
+({
+ vd_0 = f32_add(vd_0, vs2);
+})
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h
new file mode 100644
index 0000000..b50b45f
--- /dev/null
+++ b/riscv/insns/vfredsum_vs.h
@@ -0,0 +1,5 @@
+// vfredsum: vd[0] = sum( vs2[*] , vs1[0] )
+VI_VFP_VV_LOOP_REDUCTION
+({
+ vd_0 = f32_add(vd_0, vs2);
+})
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
new file mode 100644
index 0000000..ee8ac83
--- /dev/null
+++ b/riscv/insns/vfrsub_vf.h
@@ -0,0 +1,5 @@
+// vfsub.vf vd, vs2, rs1
+VI_VFP_VF_LOOP
+({
+ vd = f32_sub(rs1, vs2);
+})
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
new file mode 100644
index 0000000..90d78fa
--- /dev/null
+++ b/riscv/insns/vfsgnj_vf.h
@@ -0,0 +1,5 @@
+// vfsgnj vd, vs2, vs1
+VI_VFP_VF_LOOP
+({
+ vd = fsgnj32(rs1.v, vs2.v, false, false);
+})
diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h
new file mode 100644
index 0000000..1662a4c
--- /dev/null
+++ b/riscv/insns/vfsgnj_vv.h
@@ -0,0 +1,5 @@
+// vfsgnj
+VI_VFP_VV_LOOP
+({
+ vd = fsgnj32(vs1.v, vs2.v, false, false);
+})
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
new file mode 100644
index 0000000..af722eb
--- /dev/null
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -0,0 +1,5 @@
+// vfsgnn
+VI_VFP_VF_LOOP
+({
+ vd = fsgnj32(rs1.v, vs2.v, true, false);
+})
diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h
new file mode 100644
index 0000000..6ae3369
--- /dev/null
+++ b/riscv/insns/vfsgnjn_vv.h
@@ -0,0 +1,5 @@
+// vfsgnn
+VI_VFP_VV_LOOP
+({
+ vd = fsgnj32(vs1.v, vs2.v, true, false);
+})
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
new file mode 100644
index 0000000..1e82369
--- /dev/null
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -0,0 +1,5 @@
+// vfsgnx
+VI_VFP_VF_LOOP
+({
+ vd = fsgnj32(rs1.v, vs2.v, false, true);
+})
diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h
new file mode 100644
index 0000000..ba1cb9b
--- /dev/null
+++ b/riscv/insns/vfsgnjx_vv.h
@@ -0,0 +1,5 @@
+// vfsgnx
+VI_VFP_VV_LOOP
+({
+ vd = fsgnj32(vs1.v, vs2.v, false, true);
+})
diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h
new file mode 100644
index 0000000..4931037
--- /dev/null
+++ b/riscv/insns/vfsqrt_v.h
@@ -0,0 +1,5 @@
+// vsqrt.v vd, vd2, vm
+VI_VFP_VV_LOOP
+({
+ vd = f32_sqrt(vs2);
+})
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
new file mode 100644
index 0000000..38d6acc
--- /dev/null
+++ b/riscv/insns/vfsub_vf.h
@@ -0,0 +1,5 @@
+// vfsub.vf vd, vs2, rs1
+VI_VFP_VF_LOOP
+({
+ vd = f32_sub(vs2, rs1);
+})
diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h
new file mode 100644
index 0000000..71e7a43
--- /dev/null
+++ b/riscv/insns/vfsub_vv.h
@@ -0,0 +1,5 @@
+// vfsub.vv vd, vs2, vs1
+VI_VFP_VV_LOOP
+({
+ vd = f32_sub(vs2, vs1);
+})
diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h
new file mode 100644
index 0000000..ecac202
--- /dev/null
+++ b/riscv/insns/vfwadd_vf.h
@@ -0,0 +1,5 @@
+// vfwadd.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_add(vs2, rs1);
+})
diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h
new file mode 100644
index 0000000..0665cdc
--- /dev/null
+++ b/riscv/insns/vfwadd_vv.h
@@ -0,0 +1,5 @@
+// vfwadd.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_add(vs2, vs1);
+})
diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h
new file mode 100644
index 0000000..eb38d0d
--- /dev/null
+++ b/riscv/insns/vfwadd_wf.h
@@ -0,0 +1,5 @@
+// vfwadd.wf vd, vs2, vs1
+VI_VFP_WF_LOOP_WIDE
+({
+ vd = f64_add(vs2, rs1);
+})
diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h
new file mode 100644
index 0000000..675ef22
--- /dev/null
+++ b/riscv/insns/vfwadd_wv.h
@@ -0,0 +1,5 @@
+// vfwadd.wv vd, vs2, vs1
+VI_VFP_WV_LOOP_WIDE
+({
+ vd = f64_add(vs2, vs1);
+})
diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h
new file mode 100644
index 0000000..4d6b4fc
--- /dev/null
+++ b/riscv/insns/vfwcvt_f_f_v.h
@@ -0,0 +1,7 @@
+// vfwcvt.f.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_DSS(false);
+ auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+ P.VU.elt<float64_t>(rd_num, i) = f32_to_f64(vs2);
+ set_fp_exceptions;
+VI_VFP_LOOP_WIDE_END
diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h
new file mode 100644
index 0000000..ab5d825
--- /dev/null
+++ b/riscv/insns/vfwcvt_f_x_v.h
@@ -0,0 +1,7 @@
+// vfwcvt.f.x.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_DSS(false);
+ auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
+ P.VU.elt<float64_t>(rd_num, i) = i32_to_f64(vs2);
+ set_fp_exceptions;
+VI_VFP_LOOP_WIDE_END
diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h
new file mode 100644
index 0000000..8af8d7c
--- /dev/null
+++ b/riscv/insns/vfwcvt_f_xu_v.h
@@ -0,0 +1,7 @@
+// vfwcvt.f.xu.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_DSS(false);
+ auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
+ P.VU.elt<float64_t>(rd_num, i) = ui32_to_f64(vs2);
+ set_fp_exceptions;
+VI_VFP_LOOP_WIDE_END
diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h
new file mode 100644
index 0000000..06e81d4
--- /dev/null
+++ b/riscv/insns/vfwcvt_x_f_v.h
@@ -0,0 +1,7 @@
+// vfwcvt.x.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_DSS(false);
+ auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+ P.VU.elt<int64_t>(rd_num, i) = f32_to_i64(vs2, STATE.frm, true);
+ set_fp_exceptions;
+VI_VFP_LOOP_WIDE_END
diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h
new file mode 100644
index 0000000..cc82481
--- /dev/null
+++ b/riscv/insns/vfwcvt_xu_f_v.h
@@ -0,0 +1,7 @@
+// vfwcvt.xu.f.v vd, vs2, vm
+VI_VFP_LOOP_BASE
+ VI_CHECK_DSS(false);
+ auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+ P.VU.elt<uint64_t>(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true);
+ set_fp_exceptions;
+VI_VFP_LOOP_WIDE_END
diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h
new file mode 100644
index 0000000..6ee011e
--- /dev/null
+++ b/riscv/insns/vfwmacc_vf.h
@@ -0,0 +1,5 @@
+// vfwmacc.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_mulAdd(rs1, vs2, vd);
+})
diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h
new file mode 100644
index 0000000..99839af
--- /dev/null
+++ b/riscv/insns/vfwmacc_vv.h
@@ -0,0 +1,5 @@
+// vfwmacc.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_mulAdd(vs1, vs2, vd);
+})
diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h
new file mode 100644
index 0000000..ea8f050
--- /dev/null
+++ b/riscv/insns/vfwmsac_vf.h
@@ -0,0 +1,5 @@
+// vfwmsac.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
+})
diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h
new file mode 100644
index 0000000..8157170
--- /dev/null
+++ b/riscv/insns/vfwmsac_vv.h
@@ -0,0 +1,5 @@
+// vfwmsac.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
+})
diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h
new file mode 100644
index 0000000..884e66f
--- /dev/null
+++ b/riscv/insns/vfwmul_vf.h
@@ -0,0 +1,5 @@
+// vfwmul.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_mul(vs2, rs1);
+})
diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h
new file mode 100644
index 0000000..f8e717e
--- /dev/null
+++ b/riscv/insns/vfwmul_vv.h
@@ -0,0 +1,5 @@
+// vfwmul.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_mul(vs2, vs1);
+})
diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h
new file mode 100644
index 0000000..bccc24f
--- /dev/null
+++ b/riscv/insns/vfwnmacc_vf.h
@@ -0,0 +1,5 @@
+// vfwnmacc.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
+})
diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h
new file mode 100644
index 0000000..3dcba1d
--- /dev/null
+++ b/riscv/insns/vfwnmacc_vv.h
@@ -0,0 +1,5 @@
+// vfwnmacc.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
+})
diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h
new file mode 100644
index 0000000..32ef624
--- /dev/null
+++ b/riscv/insns/vfwnmsac_vf.h
@@ -0,0 +1,5 @@
+// vfwnmacc.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd);
+})
diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h
new file mode 100644
index 0000000..d2447e1
--- /dev/null
+++ b/riscv/insns/vfwnmsac_vv.h
@@ -0,0 +1,5 @@
+// vfwnmsac.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
+})
diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h
new file mode 100644
index 0000000..b47e2c7
--- /dev/null
+++ b/riscv/insns/vfwredosum_vs.h
@@ -0,0 +1,5 @@
+// vfwredosum.vs vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE_REDUCTION
+({
+ vd_0 = f64_add(vd_0, vs2);
+})
diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h
new file mode 100644
index 0000000..3ce591b
--- /dev/null
+++ b/riscv/insns/vfwredsum_vs.h
@@ -0,0 +1,5 @@
+// vfwredsum.vs vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE_REDUCTION
+({
+ vd_0 = f64_add(vd_0, vs2);
+})
diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h
new file mode 100644
index 0000000..1d20c38
--- /dev/null
+++ b/riscv/insns/vfwsub_vf.h
@@ -0,0 +1,5 @@
+// vfwsub.vf vd, vs2, rs1
+VI_VFP_VF_LOOP_WIDE
+({
+ vd = f64_sub(vs2, rs1);
+})
diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h
new file mode 100644
index 0000000..0a72fea
--- /dev/null
+++ b/riscv/insns/vfwsub_vv.h
@@ -0,0 +1,5 @@
+// vfwsub.vv vd, vs2, vs1
+VI_VFP_VV_LOOP_WIDE
+({
+ vd = f64_sub(vs2, vs1);
+})
diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h
new file mode 100644
index 0000000..fa3d747
--- /dev/null
+++ b/riscv/insns/vfwsub_wf.h
@@ -0,0 +1,5 @@
+// vfwsub.wf vd, vs2, rs1
+VI_VFP_WF_LOOP_WIDE
+({
+ vd = f64_sub(vs2, rs1);
+})
diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h
new file mode 100644
index 0000000..4c6fcf6
--- /dev/null
+++ b/riscv/insns/vfwsub_wv.h
@@ -0,0 +1,5 @@
+// vfwsub.wv vd, vs2, vs1
+VI_VFP_WV_LOOP_WIDE
+({
+ vd = f64_sub(vs2, vs1);
+})
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index faf4019..2479ba3 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -496,6 +496,101 @@ riscv_insn_ext_v_alu_int = \
vxor_vv \
vxor_vx \
+riscv_insn_ext_v_alu_fp = \
+ vfadd_vf \
+ vfadd_vv \
+ vfclass_v \
+ vfcvt_f_x_v \
+ vfcvt_f_xu_v \
+ vfcvt_x_f_v \
+ vfcvt_xu_f_v \
+ vfdiv_vf \
+ vfdiv_vv \
+ vfdot_vv \
+ vfmacc_vf \
+ vfmacc_vv \
+ vfmadd_vf \
+ vfmadd_vv \
+ vfmax_vf \
+ vfmax_vv \
+ vfmerge_vfm \
+ vfmin_vf \
+ vfmin_vv \
+ vfmsac_vf \
+ vfmsac_vv \
+ vfmsub_vf \
+ vfmsub_vv \
+ vfmul_vf \
+ vfmul_vv \
+ vfmv_f_s \
+ vfmv_s_f \
+ vfmv_v_f \
+ vfncvt_f_f_v \
+ vfncvt_f_x_v \
+ vfncvt_f_xu_v \
+ vfncvt_x_f_v \
+ vfncvt_xu_f_v \
+ vfnmacc_vf \
+ vfnmacc_vv \
+ vfnmadd_vf \
+ vfnmadd_vv \
+ vfnmsac_vf \
+ vfnmsac_vv \
+ vfnmsub_vf \
+ vfnmsub_vv \
+ vfrdiv_vf \
+ vfredmax_vs \
+ vfredmin_vs \
+ vfredosum_vs \
+ vfredsum_vs \
+ vfrsub_vf \
+ vfsgnj_vf \
+ vfsgnj_vv \
+ vfsgnjn_vf \
+ vfsgnjn_vv \
+ vfsgnjx_vf \
+ vfsgnjx_vv \
+ vfsqrt_v \
+ vfsub_vf \
+ vfsub_vv \
+ vfwadd_vf \
+ vfwadd_vv \
+ vfwadd_wf \
+ vfwadd_wv \
+ vfwcvt_f_f_v \
+ vfwcvt_f_x_v \
+ vfwcvt_f_xu_v \
+ vfwcvt_x_f_v \
+ vfwcvt_xu_f_v \
+ vfwmacc_vf \
+ vfwmacc_vv \
+ vfwmsac_vf \
+ vfwmsac_vv \
+ vfwmul_vf \
+ vfwmul_vv \
+ vfwnmacc_vf \
+ vfwnmacc_vv \
+ vfwnmsac_vf \
+ vfwnmsac_vv \
+ vfwredosum_vs \
+ vfwredsum_vs \
+ vfwsub_vf \
+ vfwsub_vv \
+ vfwsub_wf \
+ vfwsub_wv \
+ vmfeq_vf \
+ vmfeq_vv \
+ vmfge_vf \
+ vmfgt_vf \
+ vmfle_vf \
+ vmfle_vv \
+ vmflt_vf \
+ vmflt_vv \
+ vmfne_vf \
+ vmfne_vv \
+ vmford_vf \
+ vmford_vv \
+
riscv_insn_ext_v_ldst = \
vlb_v \
vlh_v \
@@ -547,6 +642,7 @@ riscv_insn_ext_v_ctrl = \
vsetvl \
riscv_insn_ext_v = \
+ $(riscv_insn_ext_v_alu_fp) \
$(riscv_insn_ext_v_alu_int) \
$(riscv_insn_ext_v_ctrl) \
$(riscv_insn_ext_v_ldst) \