diff options
author | Chih-Min Chao <chihmin.chao@sifive.com> | 2019-06-06 03:25:04 -0700 |
---|---|---|
committer | Chih-Min Chao <chihmin.chao@sifive.com> | 2019-06-18 08:56:11 -0700 |
commit | 833b965679f4502f83c66353bfc07a092cfac9f6 (patch) | |
tree | 293532625d0f60ec6b149b79b353f1ff1dda98c7 | |
parent | 80ebc70e43e48c5a851348e898c13a2d8a8148d7 (diff) | |
download | riscv-isa-sim-833b965679f4502f83c66353bfc07a092cfac9f6.zip riscv-isa-sim-833b965679f4502f83c66353bfc07a092cfac9f6.tar.gz riscv-isa-sim-833b965679f4502f83c66353bfc07a092cfac9f6.tar.bz2 |
rvv: add floating-point instructions
based on v-spec 0.7.1, support
sections: 14/15.3 ~ 15.4
element size: 32
Signed-off-by: Bruce Hoult <bruce@hoult.org>
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Signed-off-by: Dave Wen <dave.wen@sifive.com>
83 files changed, 869 insertions, 1 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 86fd799..1f80683 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -148,6 +148,7 @@ private: #define MMU (*p->get_mmu()) #define STATE (*p->get_state()) #define P (*p) +#define FLEN (p->get_flen()) #define READ_REG(reg) STATE.XPR[reg] #define READ_FREG(reg) STATE.FPR[reg] #define RD READ_REG(insn.rd()) @@ -314,6 +315,24 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); #define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew)) #define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew)) +#define DEBUG_RVV 0 + +#if DEBUG_RVV +#define DEBUG_RVV_FP_VV \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2)); +#define DEBUG_RVV_FP_VF \ + printf("vfp(%lu) vd=%f vs1=%f vs2=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2)); +#define DEBUG_RVV_FMA_VV \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(vs1), to_f(vs2), to_f(vd_old)); +#define DEBUG_RVV_FMA_VF \ + printf("vfma(%lu) vd=%f vs1=%f vs2=%f vd_old=%f\n", i, to_f(vd), to_f(rs1), to_f(vs2), to_f(vd_old)); +#else +#define DEBUG_RVV_FP_VV 0 +#define DEBUG_RVV_FP_VF 0 +#define DEBUG_RVV_FMA_VV 0 +#define DEBUG_RVV_FMA_VF 0 +#endif + // // vector: masking skip helper // @@ -1480,8 +1499,252 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ } \ p->VU.vstart = 0; + +// +// vector: vfp helper +// +#define VI_VFP_COMMON \ + require_extension('F'); \ + require_fp; \ + require(P.VU.vsew == 32); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + softfloat_roundingMode = STATE.frm; + +#define VI_VFP_LOOP_BASE \ + VI_VFP_COMMON \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); + +#define VI_VFP_LOOP_CMP_BASE \ + VI_VFP_COMMON \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + VI_LOOP_ELEMENT_SKIP(); \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx); \ + uint64_t res = 0; + +#define VI_VFP_LOOP_REDUCTION_BASE \ + VI_VFP_COMMON \ + float32_t vd_0 = P.VU.elt<float32_t>(rd_num, 0); \ + float32_t vs1_0 = P.VU.elt<float32_t>(rs1_num, 0); \ + vd_0 = vs1_0;\ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); \ + int32_t &vd = P.VU.elt<int32_t>(rd_num, i); \ + +#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \ + VI_VFP_COMMON \ + float64_t vd_0 = f64(P.VU.elt<float64_t>(rs1_num, 0).v); \ + for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + VI_LOOP_ELEMENT_SKIP(); + +#define VI_VFP_LOOP_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 1)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 1)); \ + }\ + P.VU.vstart = 0; \ + +#define VI_VFP_LOOP_WIDE_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((P.VU.vsew >> 3) * 2)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((P.VU.vsew >> 3) * 2)); \ + }\ + P.VU.vstart = 0; \ + set_fp_exceptions; + +#define VI_VFP_LOOP_REDUCTION_END(x) \ + } \ + P.VU.vstart = 0; \ + set_fp_exceptions; \ + if (vl > 0 && TAIL_ZEROING) { \ + P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \ + for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \ + P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \ + } \ + } + +#define VI_VFP_LOOP_CMP_END \ + switch(P.VU.vsew) { \ + case e32: { \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + break; \ + } \ + case e16: \ + case e8: \ + default: \ + require(0); \ + break; \ + }; \ + } \ + if (vl != 0 && TAIL_ZEROING){ \ + for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ + const int mlen = P.VU.vmlen; \ + const int midx = (mlen * i) / 64; \ + const int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \ + vdi = (vdi & ~mmask);\ + }\ + }\ + P.VU.vstart = 0; \ + set_fp_exceptions; + +#define VI_VFP_VV_LOOP(BODY) \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e32: {\ + float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \ + float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ + float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_END + +#define VI_VFP_VV_LOOP_REDUCTION(BODY) \ + VI_VFP_LOOP_REDUCTION_BASE \ + float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + BODY; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_REDUCTION_END(e32) + +#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \ + VI_VFP_LOOP_WIDE_REDUCTION_BASE \ + float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \ + BODY; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_REDUCTION_END(e64) + +#define VI_VFP_VF_LOOP(BODY) \ + VI_VFP_LOOP_BASE \ + switch(P.VU.vsew) { \ + case e32: {\ + float32_t &vd = P.VU.elt<float32_t>(rd_num, i); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VF; \ + VI_VFP_LOOP_END + +#define VI_VFP_LOOP_CMP(BODY) \ + VI_VFP_LOOP_CMP_BASE \ + BODY; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_CMP_END \ + +#define VI_VFP_VF_LOOP_WIDE(BODY) \ + VI_VFP_LOOP_BASE \ + VI_CHECK_DSS(false); \ + switch(P.VU.vsew) { \ + case e32: {\ + float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ + float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_WIDE_END + + +#define VI_VFP_VV_LOOP_WIDE(BODY) \ + VI_VFP_LOOP_BASE \ + VI_CHECK_DSS(true); \ + switch(P.VU.vsew) { \ + case e32: {\ + float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ + float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \ + float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + break; \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_WIDE_END + +#define VI_VFP_WF_LOOP_WIDE(BODY) \ + VI_VFP_LOOP_BASE \ + VI_CHECK_DDS(false); \ + switch(P.VU.vsew) { \ + case e32: {\ + float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ + float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_WIDE_END + +#define VI_VFP_WV_LOOP_WIDE(BODY) \ + VI_VFP_LOOP_BASE \ + VI_CHECK_DDS(true); \ + switch(P.VU.vsew) { \ + case e32: {\ + float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ + float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \ + BODY; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + case e8: \ + default: \ + require(0); \ + }; \ + DEBUG_RVV_FP_VV; \ + VI_VFP_LOOP_WIDE_END + + // Seems that 0x0 doesn't work. #define DEBUG_START 0x100 -#define DEBUG_END (0x1000 - 1) +#define DEBUG_END (0x1000 - 1) #endif diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h new file mode 100644 index 0000000..60dec4a --- /dev/null +++ b/riscv/insns/vfadd_vf.h @@ -0,0 +1,5 @@ +// vfadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f32_add(rs1, vs2); +}) diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h new file mode 100644 index 0000000..de0ae53 --- /dev/null +++ b/riscv/insns/vfadd_vv.h @@ -0,0 +1,5 @@ +// vfadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f32_add(vs1, vs2); +}) diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h new file mode 100644 index 0000000..75f29a2 --- /dev/null +++ b/riscv/insns/vfclass_v.h @@ -0,0 +1,5 @@ +// vfclass.v vd, vs2, vm +VI_VFP_VV_LOOP +({ + vd.v = f32_classify(vs2); +}) diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h new file mode 100644 index 0000000..311f875 --- /dev/null +++ b/riscv/insns/vfcvt_f_x_v.h @@ -0,0 +1,6 @@ +// vfcvt.f.x.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + auto vs2_i = P.VU.elt<int32_t>(rs2_num, i); + vd = i32_to_f32(vs2_i); +}) diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h new file mode 100644 index 0000000..ceabea3 --- /dev/null +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -0,0 +1,6 @@ +// vfcvt.f.xu.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i); + vd = ui32_to_f32(vs2_u); +}) diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h new file mode 100644 index 0000000..ee53c6d --- /dev/null +++ b/riscv/insns/vfcvt_x_f_v.h @@ -0,0 +1,5 @@ +// vfcvt.x.f.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h new file mode 100644 index 0000000..76c7735 --- /dev/null +++ b/riscv/insns/vfcvt_xu_f_v.h @@ -0,0 +1,5 @@ +// vfcvt.xu.f.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true); +}) diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h new file mode 100644 index 0000000..2b8124c --- /dev/null +++ b/riscv/insns/vfdiv_vf.h @@ -0,0 +1,5 @@ +// vfdiv.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f32_div(vs2, rs1); +}) diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h new file mode 100644 index 0000000..c20ff1d --- /dev/null +++ b/riscv/insns/vfdiv_vv.h @@ -0,0 +1,5 @@ +// vfdiv.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f32_div(vs2, vs1); +}) diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h new file mode 100644 index 0000000..11c8bce --- /dev/null +++ b/riscv/insns/vfdot_vv.h @@ -0,0 +1,5 @@ +// vfdot.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f32_add(vd, f32_mul(vs2, vs1)); +}) diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h new file mode 100644 index 0000000..5013d34 --- /dev/null +++ b/riscv/insns/vfmacc_vf.h @@ -0,0 +1,5 @@ +// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h new file mode 100644 index 0000000..663a648 --- /dev/null +++ b/riscv/insns/vfmacc_vv.h @@ -0,0 +1,5 @@ +// vfmacc.vv vd, rs1, vs2, vm # vd[i] = +(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h new file mode 100644 index 0000000..920c392 --- /dev/null +++ b/riscv/insns/vfmadd_vf.h @@ -0,0 +1,5 @@ +// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(vd, rs1, vs2); +}) diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h new file mode 100644 index 0000000..c967ec3 --- /dev/null +++ b/riscv/insns/vfmadd_vv.h @@ -0,0 +1,5 @@ +// vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(vd, vs1, vs2); +}) diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h new file mode 100644 index 0000000..eb70e48 --- /dev/null +++ b/riscv/insns/vfmax_vf.h @@ -0,0 +1,5 @@ +// vfmax +VI_VFP_VF_LOOP +({ + vd = f32_max(vs2, rs1); +}) diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h new file mode 100644 index 0000000..6d12f08 --- /dev/null +++ b/riscv/insns/vfmax_vv.h @@ -0,0 +1,5 @@ +// vfmax +VI_VFP_VV_LOOP +({ + vd = f32_max(vs2, vs1); +}) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h new file mode 100644 index 0000000..73d6cee --- /dev/null +++ b/riscv/insns/vfmerge_vfm.h @@ -0,0 +1,25 @@ +// vfmerge_vf vd, vs2, vs1, vm +require_extension('F'); +require_fp; +require(P.VU.vsew == 32); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +for (reg_t i=P.VU.vstart; i<vl; ++i) { + auto &vd = P.VU.elt<float32_t>(rd_num, i); + auto rs1 = f32(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt<float32_t>(rs2_num, i); + + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; +set_fp_exceptions; diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h new file mode 100644 index 0000000..bf06638 --- /dev/null +++ b/riscv/insns/vfmin_vf.h @@ -0,0 +1,5 @@ +// vfmin vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f32_min(vs2, rs1); +}) diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h new file mode 100644 index 0000000..65d20ff --- /dev/null +++ b/riscv/insns/vfmin_vv.h @@ -0,0 +1,5 @@ +// vfmin vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f32_min(vs2, vs1); +}) diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h new file mode 100644 index 0000000..23661b3 --- /dev/null +++ b/riscv/insns/vfmsac_vf.h @@ -0,0 +1,5 @@ +// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h new file mode 100644 index 0000000..952c12e --- /dev/null +++ b/riscv/insns/vfmsac_vv.h @@ -0,0 +1,5 @@ +// vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h new file mode 100644 index 0000000..2328d07 --- /dev/null +++ b/riscv/insns/vfmsub_vf.h @@ -0,0 +1,5 @@ +// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h new file mode 100644 index 0000000..a58f1e3 --- /dev/null +++ b/riscv/insns/vfmsub_vv.h @@ -0,0 +1,5 @@ +// vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h new file mode 100644 index 0000000..086b6d8 --- /dev/null +++ b/riscv/insns/vfmul_vf.h @@ -0,0 +1,5 @@ +// vfmul.vf vd, vs2, rs1, vm +VI_VFP_VF_LOOP +({ + vd = f32_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h new file mode 100644 index 0000000..259dc01 --- /dev/null +++ b/riscv/insns/vfmul_vv.h @@ -0,0 +1,5 @@ +// vfmul.vv vd, vs1, vs2, vm +VI_VFP_VV_LOOP +({ + vd = f32_mul(vs1, vs2); +}) diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h new file mode 100644 index 0000000..b956e6f --- /dev/null +++ b/riscv/insns/vfmv_f_s.h @@ -0,0 +1,33 @@ +// vfmv_f_s: rd = vs2[0] (rs1=0) +require(insn.v_vm() == 1); +require_fp; +require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); + +reg_t rs2_num = insn.rs2(); +uint64_t vs2_0 = 0; +const reg_t sew = P.VU.vsew; +switch(sew) { +case e8: + vs2_0 = P.VU.elt<uint8_t>(rs2_num, 0); + break; +case e16: + vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0); + break; +case e32: + vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0); + break; +default: + vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0); + break; +} + +// nan_extened +if (FLEN > sew) { + vs2_0 = vs2_0 | ~((1ul << sew) - 1); +} + +if (FLEN == 64) { + WRITE_FRD(f64(vs2_0)); +} else { + WRITE_FRD(f32(vs2_0)); +} diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h new file mode 100644 index 0000000..13423b1 --- /dev/null +++ b/riscv/insns/vfmv_s_f.h @@ -0,0 +1,29 @@ +// vfmv_s_f: vd[0] = rs1 (vs2=0) +require(insn.v_vm() == 1); +require_fp; +require(P.VU.vsew == e32); +reg_t vl = P.VU.vl; + +if (vl > 0) { + reg_t rd_num = insn.rd(); + reg_t sew = P.VU.vsew; + + if (FLEN == 64) + P.VU.elt<uint32_t>(rd_num, 0) = f64(FRS1).v; + else + P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v; + + const reg_t max_len = P.VU.VLEN / sew; + for (reg_t i = 1; i < max_len; ++i) { + switch(sew) { + case e32: + P.VU.elt<uint32_t>(rd_num, i) = 0; + break; + default: + require(false); + break; + } + } + + vl = 0; +} diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h new file mode 100644 index 0000000..5b4fb2b --- /dev/null +++ b/riscv/insns/vfmv_v_f.h @@ -0,0 +1,20 @@ +// vfmerge_vf vd, vs2, vs1, vm +require_extension('F'); +require_fp; +require(P.VU.vsew == 32); +require(!P.VU.vill); +reg_t vl = P.VU.vl; +reg_t sew = P.VU.vsew; +reg_t rd_num = insn.rd(); +reg_t rs1_num = insn.rs1(); +reg_t rs2_num = insn.rs2(); +for (reg_t i=P.VU.vstart; i<vl; ++i) { + auto &vd = P.VU.elt<float32_t>(rd_num, i); + auto rs1 = f32(READ_FREG(rs1_num)); + + vd = rs1; +} + +VI_TAIL_ZERO(1); +P.VU.vstart = 0; +set_fp_exceptions; diff --git a/riscv/insns/vfncvt_f_f_v.h b/riscv/insns/vfncvt_f_f_v.h new file mode 100644 index 0000000..b35cd60 --- /dev/null +++ b/riscv/insns/vfncvt_f_f_v.h @@ -0,0 +1,6 @@ +// vfncvt.f.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_SD; + auto vs2 = P.VU.elt<float64_t>(rs2_num, i); + P.VU.elt<float32_t>(rd_num, i) = f64_to_f32(vs2); +VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_f_x_v.h b/riscv/insns/vfncvt_f_x_v.h new file mode 100644 index 0000000..69bdba8 --- /dev/null +++ b/riscv/insns/vfncvt_f_x_v.h @@ -0,0 +1,6 @@ +// vfncvt.f.x.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_SD; + auto vs2 = P.VU.elt<int64_t>(rs2_num, i); + P.VU.elt<float32_t>(rd_num, i) = i64_to_f32(vs2); +VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_f_xu_v.h b/riscv/insns/vfncvt_f_xu_v.h new file mode 100644 index 0000000..6f37734 --- /dev/null +++ b/riscv/insns/vfncvt_f_xu_v.h @@ -0,0 +1,6 @@ +// vfncvt.f.xu.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_SD; + auto vs2 = P.VU.elt<uint64_t>(rs2_num, i); + P.VU.elt<float32_t>(rd_num, i) = ui64_to_f32(vs2); +VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_x_f_v.h b/riscv/insns/vfncvt_x_f_v.h new file mode 100644 index 0000000..8985f1b --- /dev/null +++ b/riscv/insns/vfncvt_x_f_v.h @@ -0,0 +1,6 @@ +// vfncvt.x.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_SD; + auto vs2 = P.VU.elt<float64_t>(rs2_num, i); + P.VU.elt<int32_t>(rd_num, i) = f64_to_i32(vs2, STATE.frm, true); +VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_xu_f_v.h b/riscv/insns/vfncvt_xu_f_v.h new file mode 100644 index 0000000..2db8d82 --- /dev/null +++ b/riscv/insns/vfncvt_xu_f_v.h @@ -0,0 +1,6 @@ +// vfncvt.xu.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_SD; + auto vs2 = P.VU.elt<float64_t>(rs2_num, i); + P.VU.elt<uint32_t>(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true); +VI_VFP_LOOP_END diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h new file mode 100644 index 0000000..04a31bf --- /dev/null +++ b/riscv/insns/vfnmacc_vf.h @@ -0,0 +1,5 @@ +// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h new file mode 100644 index 0000000..b950df9 --- /dev/null +++ b/riscv/insns/vfnmacc_vv.h @@ -0,0 +1,5 @@ +// vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h new file mode 100644 index 0000000..f8f3b83 --- /dev/null +++ b/riscv/insns/vfnmadd_vf.h @@ -0,0 +1,5 @@ +// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h new file mode 100644 index 0000000..f96d102 --- /dev/null +++ b/riscv/insns/vfnmadd_vv.h @@ -0,0 +1,5 @@ +// vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); +}) diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h new file mode 100644 index 0000000..c3dc12c --- /dev/null +++ b/riscv/insns/vfnmsac_vf.h @@ -0,0 +1,5 @@ +// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); +}) diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h new file mode 100644 index 0000000..0ecd648 --- /dev/null +++ b/riscv/insns/vfnmsac_vv.h @@ -0,0 +1,5 @@ +// vfnmsac.vv vd, vs1, vs2, vm # vd[i] = -(vs2[i] * vs1[i]) + vd[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h new file mode 100644 index 0000000..1879b9e --- /dev/null +++ b/riscv/insns/vfnmsub_vf.h @@ -0,0 +1,5 @@ +// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i] +VI_VFP_VF_LOOP +({ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); +}) diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h new file mode 100644 index 0000000..da9f59c --- /dev/null +++ b/riscv/insns/vfnmsub_vv.h @@ -0,0 +1,5 @@ +// vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i] +VI_VFP_VV_LOOP +({ + vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); +}) diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h new file mode 100644 index 0000000..49e4293 --- /dev/null +++ b/riscv/insns/vfrdiv_vf.h @@ -0,0 +1,5 @@ +// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i] +VI_VFP_VF_LOOP +({ + vd = f32_div(rs1, vs2); +}) diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h new file mode 100644 index 0000000..dca10bf --- /dev/null +++ b/riscv/insns/vfredmax_vs.h @@ -0,0 +1,5 @@ +// vfredmax vd, vs2, vs1 +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f32_max(vd_0, vs2); +}) diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h new file mode 100644 index 0000000..b4556bc --- /dev/null +++ b/riscv/insns/vfredmin_vs.h @@ -0,0 +1,5 @@ +// vfredmin vd, vs2, vs1 +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f32_min(vd_0, vs2); +}) diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h new file mode 100644 index 0000000..87422ee --- /dev/null +++ b/riscv/insns/vfredosum_vs.h @@ -0,0 +1,5 @@ +// vfredosum: vd[0] = sum( vs2[*] , vs1[0] ) +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h new file mode 100644 index 0000000..b50b45f --- /dev/null +++ b/riscv/insns/vfredsum_vs.h @@ -0,0 +1,5 @@ +// vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) +VI_VFP_VV_LOOP_REDUCTION +({ + vd_0 = f32_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h new file mode 100644 index 0000000..ee8ac83 --- /dev/null +++ b/riscv/insns/vfrsub_vf.h @@ -0,0 +1,5 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f32_sub(rs1, vs2); +}) diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h new file mode 100644 index 0000000..90d78fa --- /dev/null +++ b/riscv/insns/vfsgnj_vf.h @@ -0,0 +1,5 @@ +// vfsgnj vd, vs2, vs1 +VI_VFP_VF_LOOP +({ + vd = fsgnj32(rs1.v, vs2.v, false, false); +}) diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h new file mode 100644 index 0000000..1662a4c --- /dev/null +++ b/riscv/insns/vfsgnj_vv.h @@ -0,0 +1,5 @@ +// vfsgnj +VI_VFP_VV_LOOP +({ + vd = fsgnj32(vs1.v, vs2.v, false, false); +}) diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h new file mode 100644 index 0000000..af722eb --- /dev/null +++ b/riscv/insns/vfsgnjn_vf.h @@ -0,0 +1,5 @@ +// vfsgnn +VI_VFP_VF_LOOP +({ + vd = fsgnj32(rs1.v, vs2.v, true, false); +}) diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h new file mode 100644 index 0000000..6ae3369 --- /dev/null +++ b/riscv/insns/vfsgnjn_vv.h @@ -0,0 +1,5 @@ +// vfsgnn +VI_VFP_VV_LOOP +({ + vd = fsgnj32(vs1.v, vs2.v, true, false); +}) diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h new file mode 100644 index 0000000..1e82369 --- /dev/null +++ b/riscv/insns/vfsgnjx_vf.h @@ -0,0 +1,5 @@ +// vfsgnx +VI_VFP_VF_LOOP +({ + vd = fsgnj32(rs1.v, vs2.v, false, true); +}) diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h new file mode 100644 index 0000000..ba1cb9b --- /dev/null +++ b/riscv/insns/vfsgnjx_vv.h @@ -0,0 +1,5 @@ +// vfsgnx +VI_VFP_VV_LOOP +({ + vd = fsgnj32(vs1.v, vs2.v, false, true); +}) diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h new file mode 100644 index 0000000..4931037 --- /dev/null +++ b/riscv/insns/vfsqrt_v.h @@ -0,0 +1,5 @@ +// vsqrt.v vd, vd2, vm +VI_VFP_VV_LOOP +({ + vd = f32_sqrt(vs2); +}) diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h new file mode 100644 index 0000000..38d6acc --- /dev/null +++ b/riscv/insns/vfsub_vf.h @@ -0,0 +1,5 @@ +// vfsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP +({ + vd = f32_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h new file mode 100644 index 0000000..71e7a43 --- /dev/null +++ b/riscv/insns/vfsub_vv.h @@ -0,0 +1,5 @@ +// vfsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP +({ + vd = f32_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h new file mode 100644 index 0000000..ecac202 --- /dev/null +++ b/riscv/insns/vfwadd_vf.h @@ -0,0 +1,5 @@ +// vfwadd.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h new file mode 100644 index 0000000..0665cdc --- /dev/null +++ b/riscv/insns/vfwadd_vv.h @@ -0,0 +1,5 @@ +// vfwadd.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h new file mode 100644 index 0000000..eb38d0d --- /dev/null +++ b/riscv/insns/vfwadd_wf.h @@ -0,0 +1,5 @@ +// vfwadd.wf vd, vs2, vs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f64_add(vs2, rs1); +}) diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h new file mode 100644 index 0000000..675ef22 --- /dev/null +++ b/riscv/insns/vfwadd_wv.h @@ -0,0 +1,5 @@ +// vfwadd.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f64_add(vs2, vs1); +}) diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h new file mode 100644 index 0000000..4d6b4fc --- /dev/null +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -0,0 +1,7 @@ +// vfwcvt.f.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_DSS(false); + auto vs2 = P.VU.elt<float32_t>(rs2_num, i); + P.VU.elt<float64_t>(rd_num, i) = f32_to_f64(vs2); + set_fp_exceptions; +VI_VFP_LOOP_WIDE_END diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h new file mode 100644 index 0000000..ab5d825 --- /dev/null +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -0,0 +1,7 @@ +// vfwcvt.f.x.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_DSS(false); + auto vs2 = P.VU.elt<int32_t>(rs2_num, i); + P.VU.elt<float64_t>(rd_num, i) = i32_to_f64(vs2); + set_fp_exceptions; +VI_VFP_LOOP_WIDE_END diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h new file mode 100644 index 0000000..8af8d7c --- /dev/null +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -0,0 +1,7 @@ +// vfwcvt.f.xu.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_DSS(false); + auto vs2 = P.VU.elt<uint32_t>(rs2_num, i); + P.VU.elt<float64_t>(rd_num, i) = ui32_to_f64(vs2); + set_fp_exceptions; +VI_VFP_LOOP_WIDE_END diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h new file mode 100644 index 0000000..06e81d4 --- /dev/null +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -0,0 +1,7 @@ +// vfwcvt.x.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_DSS(false); + auto vs2 = P.VU.elt<float32_t>(rs2_num, i); + P.VU.elt<int64_t>(rd_num, i) = f32_to_i64(vs2, STATE.frm, true); + set_fp_exceptions; +VI_VFP_LOOP_WIDE_END diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h new file mode 100644 index 0000000..cc82481 --- /dev/null +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -0,0 +1,7 @@ +// vfwcvt.xu.f.v vd, vs2, vm +VI_VFP_LOOP_BASE + VI_CHECK_DSS(false); + auto vs2 = P.VU.elt<float32_t>(rs2_num, i); + P.VU.elt<uint64_t>(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true); + set_fp_exceptions; +VI_VFP_LOOP_WIDE_END diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h new file mode 100644 index 0000000..6ee011e --- /dev/null +++ b/riscv/insns/vfwmacc_vf.h @@ -0,0 +1,5 @@ +// vfwmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_mulAdd(rs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h new file mode 100644 index 0000000..99839af --- /dev/null +++ b/riscv/insns/vfwmacc_vv.h @@ -0,0 +1,5 @@ +// vfwmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_mulAdd(vs1, vs2, vd); +}) diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h new file mode 100644 index 0000000..ea8f050 --- /dev/null +++ b/riscv/insns/vfwmsac_vf.h @@ -0,0 +1,5 @@ +// vfwmsac.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h new file mode 100644 index 0000000..8157170 --- /dev/null +++ b/riscv/insns/vfwmsac_vv.h @@ -0,0 +1,5 @@ +// vfwmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h new file mode 100644 index 0000000..884e66f --- /dev/null +++ b/riscv/insns/vfwmul_vf.h @@ -0,0 +1,5 @@ +// vfwmul.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_mul(vs2, rs1); +}) diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h new file mode 100644 index 0000000..f8e717e --- /dev/null +++ b/riscv/insns/vfwmul_vv.h @@ -0,0 +1,5 @@ +// vfwmul.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_mul(vs2, vs1); +}) diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h new file mode 100644 index 0000000..bccc24f --- /dev/null +++ b/riscv/insns/vfwnmacc_vf.h @@ -0,0 +1,5 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h new file mode 100644 index 0000000..3dcba1d --- /dev/null +++ b/riscv/insns/vfwnmacc_vv.h @@ -0,0 +1,5 @@ +// vfwnmacc.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN)); +}) diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h new file mode 100644 index 0000000..32ef624 --- /dev/null +++ b/riscv/insns/vfwnmsac_vf.h @@ -0,0 +1,5 @@ +// vfwnmacc.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h new file mode 100644 index 0000000..d2447e1 --- /dev/null +++ b/riscv/insns/vfwnmsac_vv.h @@ -0,0 +1,5 @@ +// vfwnmsac.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); +}) diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h new file mode 100644 index 0000000..b47e2c7 --- /dev/null +++ b/riscv/insns/vfwredosum_vs.h @@ -0,0 +1,5 @@ +// vfwredosum.vs vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h new file mode 100644 index 0000000..3ce591b --- /dev/null +++ b/riscv/insns/vfwredsum_vs.h @@ -0,0 +1,5 @@ +// vfwredsum.vs vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE_REDUCTION +({ + vd_0 = f64_add(vd_0, vs2); +}) diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h new file mode 100644 index 0000000..1d20c38 --- /dev/null +++ b/riscv/insns/vfwsub_vf.h @@ -0,0 +1,5 @@ +// vfwsub.vf vd, vs2, rs1 +VI_VFP_VF_LOOP_WIDE +({ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h new file mode 100644 index 0000000..0a72fea --- /dev/null +++ b/riscv/insns/vfwsub_vv.h @@ -0,0 +1,5 @@ +// vfwsub.vv vd, vs2, vs1 +VI_VFP_VV_LOOP_WIDE +({ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h new file mode 100644 index 0000000..fa3d747 --- /dev/null +++ b/riscv/insns/vfwsub_wf.h @@ -0,0 +1,5 @@ +// vfwsub.wf vd, vs2, rs1 +VI_VFP_WF_LOOP_WIDE +({ + vd = f64_sub(vs2, rs1); +}) diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h new file mode 100644 index 0000000..4c6fcf6 --- /dev/null +++ b/riscv/insns/vfwsub_wv.h @@ -0,0 +1,5 @@ +// vfwsub.wv vd, vs2, vs1 +VI_VFP_WV_LOOP_WIDE +({ + vd = f64_sub(vs2, vs1); +}) diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index faf4019..2479ba3 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -496,6 +496,101 @@ riscv_insn_ext_v_alu_int = \ vxor_vv \ vxor_vx \ +riscv_insn_ext_v_alu_fp = \ + vfadd_vf \ + vfadd_vv \ + vfclass_v \ + vfcvt_f_x_v \ + vfcvt_f_xu_v \ + vfcvt_x_f_v \ + vfcvt_xu_f_v \ + vfdiv_vf \ + vfdiv_vv \ + vfdot_vv \ + vfmacc_vf \ + vfmacc_vv \ + vfmadd_vf \ + vfmadd_vv \ + vfmax_vf \ + vfmax_vv \ + vfmerge_vfm \ + vfmin_vf \ + vfmin_vv \ + vfmsac_vf \ + vfmsac_vv \ + vfmsub_vf \ + vfmsub_vv \ + vfmul_vf \ + vfmul_vv \ + vfmv_f_s \ + vfmv_s_f \ + vfmv_v_f \ + vfncvt_f_f_v \ + vfncvt_f_x_v \ + vfncvt_f_xu_v \ + vfncvt_x_f_v \ + vfncvt_xu_f_v \ + vfnmacc_vf \ + vfnmacc_vv \ + vfnmadd_vf \ + vfnmadd_vv \ + vfnmsac_vf \ + vfnmsac_vv \ + vfnmsub_vf \ + vfnmsub_vv \ + vfrdiv_vf \ + vfredmax_vs \ + vfredmin_vs \ + vfredosum_vs \ + vfredsum_vs \ + vfrsub_vf \ + vfsgnj_vf \ + vfsgnj_vv \ + vfsgnjn_vf \ + vfsgnjn_vv \ + vfsgnjx_vf \ + vfsgnjx_vv \ + vfsqrt_v \ + vfsub_vf \ + vfsub_vv \ + vfwadd_vf \ + vfwadd_vv \ + vfwadd_wf \ + vfwadd_wv \ + vfwcvt_f_f_v \ + vfwcvt_f_x_v \ + vfwcvt_f_xu_v \ + vfwcvt_x_f_v \ + vfwcvt_xu_f_v \ + vfwmacc_vf \ + vfwmacc_vv \ + vfwmsac_vf \ + vfwmsac_vv \ + vfwmul_vf \ + vfwmul_vv \ + vfwnmacc_vf \ + vfwnmacc_vv \ + vfwnmsac_vf \ + vfwnmsac_vv \ + vfwredosum_vs \ + vfwredsum_vs \ + vfwsub_vf \ + vfwsub_vv \ + vfwsub_wf \ + vfwsub_wv \ + vmfeq_vf \ + vmfeq_vv \ + vmfge_vf \ + vmfgt_vf \ + vmfle_vf \ + vmfle_vv \ + vmflt_vf \ + vmflt_vv \ + vmfne_vf \ + vmfne_vv \ + vmford_vf \ + vmford_vv \ + riscv_insn_ext_v_ldst = \ vlb_v \ vlh_v \ @@ -547,6 +642,7 @@ riscv_insn_ext_v_ctrl = \ vsetvl \ riscv_insn_ext_v = \ + $(riscv_insn_ext_v_alu_fp) \ $(riscv_insn_ext_v_alu_int) \ $(riscv_insn_ext_v_ctrl) \ $(riscv_insn_ext_v_ldst) \ |