aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2020-04-01 01:06:49 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2020-04-15 09:53:47 -0700
commit6ae23d6b144e07044d23cdb6bcd3dd4a2406ad89 (patch)
treea1071de535ebb4510b6de3a44e8fe64d6f110df6
parent52cef48b3b3d5e36306552ad5f4c8f0e991a56d9 (diff)
downloadspike-6ae23d6b144e07044d23cdb6bcd3dd4a2406ad89.zip
spike-6ae23d6b144e07044d23cdb6bcd3dd4a2406ad89.tar.gz
spike-6ae23d6b144e07044d23cdb6bcd3dd4a2406ad89.tar.bz2
rvv: add .vf fp16 instructions
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
-rw-r--r--riscv/decode.h12
-rw-r--r--riscv/insns/vfadd_vf.h3
-rw-r--r--riscv/insns/vfcvt_f_x_v.h4
-rw-r--r--riscv/insns/vfcvt_f_xu_v.h4
-rw-r--r--riscv/insns/vfcvt_rtz_x_f_v.h4
-rw-r--r--riscv/insns/vfcvt_rtz_xu_f_v.h4
-rw-r--r--riscv/insns/vfcvt_x_f_v.h3
-rw-r--r--riscv/insns/vfdiv_vf.h3
-rw-r--r--riscv/insns/vfmacc_vf.h3
-rw-r--r--riscv/insns/vfmadd_vf.h3
-rw-r--r--riscv/insns/vfmax_vf.h3
-rw-r--r--riscv/insns/vfmin_vf.h3
-rw-r--r--riscv/insns/vfmsac_vf.h3
-rw-r--r--riscv/insns/vfmsub_vf.h3
-rw-r--r--riscv/insns/vfmul_vf.h3
-rw-r--r--riscv/insns/vfnmacc_vf.h3
-rw-r--r--riscv/insns/vfnmadd_vf.h3
-rw-r--r--riscv/insns/vfnmsac_vf.h3
-rw-r--r--riscv/insns/vfnmsub_vf.h3
-rw-r--r--riscv/insns/vfrdiv_vf.h3
-rw-r--r--riscv/insns/vfrsub_vf.h3
-rw-r--r--riscv/insns/vfsgnj_vf.h3
-rw-r--r--riscv/insns/vfsgnjn_vf.h3
-rw-r--r--riscv/insns/vfsgnjx_vf.h3
-rw-r--r--riscv/insns/vfsub_vf.h3
25 files changed, 85 insertions, 3 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index c7b4c1a..1a20716 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -1830,10 +1830,18 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_REDUCTION_END(e64)
-#define VI_VFP_VF_LOOP(BODY32, BODY64) \
+#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
+ case e16: {\
+ float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
+ float16_t rs1 = f16(READ_FREG(rs1_num)); \
+ float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
+ BODY16; \
+ set_fp_exceptions; \
+ break; \
+ }\
case e32: {\
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t rs1 = f32(READ_FREG(rs1_num)); \
@@ -1850,8 +1858,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
set_fp_exceptions; \
break; \
}\
- case e16: \
- case e8: \
default: \
require(0); \
break; \
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
index bdb7f75..2b808e0 100644
--- a/riscv/insns/vfadd_vf.h
+++ b/riscv/insns/vfadd_vf.h
@@ -1,6 +1,9 @@
// vfadd.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
+ vd = f16_add(rs1, vs2);
+},
+{
vd = f32_add(rs1, vs2);
},
{
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index fdaa697..dc8363c 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -2,6 +2,10 @@
VI_VFP_VF_LOOP
({
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
+ vd = i32_to_f16(vs2_i);
+},
+{
+ auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
},
{
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index 01ea61c..8619aa2 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -2,6 +2,10 @@
VI_VFP_VF_LOOP
({
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
+ vd = ui32_to_f16(vs2_u);
+},
+{
+ auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
},
{
diff --git a/riscv/insns/vfcvt_rtz_x_f_v.h b/riscv/insns/vfcvt_rtz_x_f_v.h
index b83365f..5493b09 100644
--- a/riscv/insns/vfcvt_rtz_x_f_v.h
+++ b/riscv/insns/vfcvt_rtz_x_f_v.h
@@ -2,6 +2,10 @@
VI_VFP_VF_LOOP
({
softfloat_roundingMode = softfloat_round_minMag;
+ P.VU.elt<int16_t>(rd_num, i) = f16_to_i32(vs2, STATE.frm, true);
+},
+{
+ softfloat_roundingMode = softfloat_round_minMag;
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
},
{
diff --git a/riscv/insns/vfcvt_rtz_xu_f_v.h b/riscv/insns/vfcvt_rtz_xu_f_v.h
index 60cf8c8..0359b81 100644
--- a/riscv/insns/vfcvt_rtz_xu_f_v.h
+++ b/riscv/insns/vfcvt_rtz_xu_f_v.h
@@ -2,6 +2,10 @@
VI_VFP_VV_LOOP
({
softfloat_roundingMode = softfloat_round_minMag;
+ P.VU.elt<uint16_t>(rd_num, i) = f16_to_ui32(vs2, STATE.frm, true);
+},
+{
+ softfloat_roundingMode = softfloat_round_minMag;
P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
},
{
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index 96bc481..2ce19fc 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -1,6 +1,9 @@
// vfcvt.x.f.v vd, vd2, vm
VI_VFP_VF_LOOP
({
+ P.VU.elt<int32_t>(rd_num, i) = f16_to_i32(vs2, STATE.frm, true);
+},
+{
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
},
{
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
index ce21730..a703ef0 100644
--- a/riscv/insns/vfdiv_vf.h
+++ b/riscv/insns/vfdiv_vf.h
@@ -1,6 +1,9 @@
// vfdiv.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
+ vd = f16_div(vs2, rs1);
+},
+{
vd = f32_div(vs2, rs1);
},
{
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
index fca4184..61578d3 100644
--- a/riscv/insns/vfmacc_vf.h
+++ b/riscv/insns/vfmacc_vf.h
@@ -1,6 +1,9 @@
// vfmacc.vf vd, rs1, vs2, vm # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(rs1, vs2, vd);
+},
+{
vd = f32_mulAdd(rs1, vs2, vd);
},
{
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
index 7707dae..2a01429 100644
--- a/riscv/insns/vfmadd_vf.h
+++ b/riscv/insns/vfmadd_vf.h
@@ -1,6 +1,9 @@
// vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(vd, rs1, vs2);
+},
+{
vd = f32_mulAdd(vd, rs1, vs2);
},
{
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
index a8df880..c4b74cb 100644
--- a/riscv/insns/vfmax_vf.h
+++ b/riscv/insns/vfmax_vf.h
@@ -1,6 +1,9 @@
// vfmax
VI_VFP_VF_LOOP
({
+ vd = f16_max(vs2, rs1);
+},
+{
vd = f32_max(vs2, rs1);
},
{
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
index a55462b..1560cdf7 100644
--- a/riscv/insns/vfmin_vf.h
+++ b/riscv/insns/vfmin_vf.h
@@ -1,6 +1,9 @@
// vfmin vd, vs2, rs1
VI_VFP_VF_LOOP
({
+ vd = f16_min(vs2, rs1);
+},
+{
vd = f32_min(vs2, rs1);
},
{
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
index 0f42560..8af397b 100644
--- a/riscv/insns/vfmsac_vf.h
+++ b/riscv/insns/vfmsac_vf.h
@@ -1,6 +1,9 @@
// vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN));
+},
+{
vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
},
{
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
index bd968e3..ab77b4c 100644
--- a/riscv/insns/vfmsub_vf.h
+++ b/riscv/insns/vfmsub_vf.h
@@ -1,6 +1,9 @@
// vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN));
+},
+{
vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
},
{
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
index 9e7d481..f5f63e4 100644
--- a/riscv/insns/vfmul_vf.h
+++ b/riscv/insns/vfmul_vf.h
@@ -1,6 +1,9 @@
// vfmul.vf vd, vs2, rs1, vm
VI_VFP_VF_LOOP
({
+ vd = f16_mul(vs2, rs1);
+},
+{
vd = f32_mul(vs2, rs1);
},
{
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
index da58d3a..1b99302 100644
--- a/riscv/insns/vfnmacc_vf.h
+++ b/riscv/insns/vfnmacc_vf.h
@@ -1,6 +1,9 @@
// vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN));
+},
+{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
},
{
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
index b26f377..cb9c217 100644
--- a/riscv/insns/vfnmadd_vf.h
+++ b/riscv/insns/vfnmadd_vf.h
@@ -1,6 +1,9 @@
// vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN));
+},
+{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
},
{
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
index b78d0ca..aa6baa3 100644
--- a/riscv/insns/vfnmsac_vf.h
+++ b/riscv/insns/vfnmsac_vf.h
@@ -1,6 +1,9 @@
// vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd);
+},
+{
vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
},
{
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
index 6c6dc27..43aa9e2 100644
--- a/riscv/insns/vfnmsub_vf.h
+++ b/riscv/insns/vfnmsub_vf.h
@@ -1,6 +1,9 @@
// vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
VI_VFP_VF_LOOP
({
+ vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2);
+},
+{
vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
},
{
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
index 73ec534..b283343 100644
--- a/riscv/insns/vfrdiv_vf.h
+++ b/riscv/insns/vfrdiv_vf.h
@@ -1,6 +1,9 @@
// vfrdiv.vf vd, vs2, rs1, vm # scalar-vector, vd[i] = f[rs1]/vs2[i]
VI_VFP_VF_LOOP
({
+ vd = f16_div(rs1, vs2);
+},
+{
vd = f32_div(rs1, vs2);
},
{
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
index d9a1986..7fb26a5 100644
--- a/riscv/insns/vfrsub_vf.h
+++ b/riscv/insns/vfrsub_vf.h
@@ -1,6 +1,9 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
+ vd = f16_sub(rs1, vs2);
+},
+{
vd = f32_sub(rs1, vs2);
},
{
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
index c7f731b..ce06185 100644
--- a/riscv/insns/vfsgnj_vf.h
+++ b/riscv/insns/vfsgnj_vf.h
@@ -1,6 +1,9 @@
// vfsgnj vd, vs2, vs1
VI_VFP_VF_LOOP
({
+ vd = fsgnj16(vs2.v, rs1.v, false, false);
+},
+{
vd = fsgnj32(vs2.v, rs1.v, false, false);
},
{
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
index 4511748..e489412 100644
--- a/riscv/insns/vfsgnjn_vf.h
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -1,6 +1,9 @@
// vfsgnn
VI_VFP_VF_LOOP
({
+ vd = fsgnj16(vs2.v, rs1.v, true, false);
+},
+{
vd = fsgnj32(vs2.v, rs1.v, true, false);
},
{
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
index c423060..7be164c 100644
--- a/riscv/insns/vfsgnjx_vf.h
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -1,6 +1,9 @@
// vfsgnx
VI_VFP_VF_LOOP
({
+ vd = fsgnj16(vs2.v, rs1.v, false, true);
+},
+{
vd = fsgnj32(vs2.v, rs1.v, false, true);
},
{
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
index a4702d0..fc6877c 100644
--- a/riscv/insns/vfsub_vf.h
+++ b/riscv/insns/vfsub_vf.h
@@ -1,6 +1,9 @@
// vfsub.vf vd, vs2, rs1
VI_VFP_VF_LOOP
({
+ vd = f16_sub(vs2, rs1);
+},
+{
vd = f32_sub(vs2, rs1);
},
{