Simplify vfncvt

author: eopXD <yueh.ting.chen@gmail.com> 2021-12-09 10:33:28 +0800
committer: eopXD <yueh.ting.chen@gmail.com> 2021-12-09 18:59:33 +0800
commit: 95c06fbc24c7745941689dae37f7d94faddfa8f5 (patch)
tree: 65f4e620e2164ab20acdcf39ba34029a88a0797b /riscv
parent: 381b28d6340300e4acbc80b19c8d690fc124450e (diff)
download: spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.zip
spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.tar.gz
spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.tar.bz2
9 files changed, 146 insertions, 182 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 1d6ea9a..5f566ce 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -852,6 +852,18 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
   float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \
   float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i);
 
+#define CVT_FP_TO_FP_PARAMS(from_width, to_width) \
+  auto vs2 = P.VU.elt<float##from_width##_t>(rs2_num, i); \
+  auto &vd = P.VU.elt<float##to_width##_t>(rd_num, i, true);
+
+#define CVT_INT_TO_FP_PARAMS(from_width, to_width, sign) \
+  auto vs2 = P.VU.elt<sign##from_width##_t>(rs2_num, i); \
+  auto &vd = P.VU.elt<float##to_width##_t>(rd_num, i, true);
+
+#define CVT_FP_TO_INT_PARAMS(from_width, to_width, sign) \
+  auto vs2 = P.VU.elt<float##from_width##_t>(rs2_num, i); \
+  auto &vd = P.VU.elt<sign##to_width##_t>(rd_num, i, true);
+
 //
 // vector: integer and masking operation loop
 //
@@ -2369,6 +2381,64 @@ reg_t index[P.VU.vlmax]; \
   for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
     VI_LOOP_ELEMENT_SKIP();
 
+#define VI_VFP_CVT_LOOP(CVT_PARAMS, CHECK, BODY) \
+  CHECK \
+  VI_VFP_LOOP_SCALE_BASE \
+  CVT_PARAMS \
+  BODY \
+  set_fp_exceptions; \
+  VI_VFP_LOOP_END
+
+#define VI_VFP_NCVT_FP_TO_FP(BODY8, BODY16, BODY32, \
+                            CHECK8, CHECK16, CHECK32) \
+  VI_CHECK_SDS(false); \
+  switch(P.VU.vsew) { \
+    case e16: \
+      { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK16, BODY16); } \
+      break; \
+    case e32: \
+      { VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(64, 32), CHECK32, BODY32); } \
+      break; \
+    default: \
+      require(0); \
+      break; \
+  }
+
+#define VI_VFP_NCVT_INT_TO_FP(BODY8, BODY16, BODY32, \
+                             CHECK8, CHECK16, CHECK32, \
+                             sign) \
+  VI_CHECK_SDS(false); \
+  switch(P.VU.vsew) { \
+    case e16: \
+      { VI_VFP_CVT_LOOP(CVT_INT_TO_FP_PARAMS(32, 16, sign), CHECK16, BODY16); } \
+      break; \
+    case e32: \
+      { VI_VFP_CVT_LOOP(CVT_INT_TO_FP_PARAMS(64, 32, sign), CHECK32, BODY32); } \
+      break; \
+    default: \
+      require(0); \
+      break; \
+  }
+
+#define VI_VFP_NCVT_FP_TO_INT(BODY8, BODY16, BODY32, \
+                             CHECK8, CHECK16, CHECK32, \
+                             sign) \
+  VI_CHECK_SDS(false); \
+  switch(P.VU.vsew) { \
+    case e8: \
+      { VI_VFP_CVT_LOOP(CVT_FP_TO_INT_PARAMS(16, 8, sign), CHECK8, BODY8); } \
+      break; \
+    case e16: \
+      { VI_VFP_CVT_LOOP(CVT_FP_TO_INT_PARAMS(32, 16, sign), CHECK16, BODY16); } \
+      break; \
+    case e32: \
+      { VI_VFP_CVT_LOOP(CVT_FP_TO_INT_PARAMS(64, 32, sign), CHECK32, BODY32); } \
+      break; \
+    default: \
+      require(0); \
+      break; \
+  }
+
 #define VI_VFP_CVT_SCALE(BODY8, BODY16, BODY32, \
                          CHECK8, CHECK16, CHECK32, \
                          is_widen, eew_check) \
diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h
index 5448eb5..f4996f5 100644
--- a/riscv/insns/vfncvt_f_f_w.h
+++ b/riscv/insns/vfncvt_f_f_w.h
@@ -1,23 +1,9 @@
 // vfncvt.f.f.v vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  ;
-},
-{
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
-},
-{
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
-},
-{
-  ;
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('D'));
-},
-false, (P.VU.vsew >= 16))
+VI_VFP_NCVT_FP_TO_FP(
+  {;},                             // BODY16
+  { vd = f32_to_f16(vs2); },       // BODY32
+  { vd = f64_to_f32(vs2); },       // BODY64
+  {;},                             // CHECK16
+  { require_extension(EXT_ZFH); }, // CHECK32
+  { require_extension('D'); }      // CHECK64
+)
diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h
index 10a6f7b..d587be2 100644
--- a/riscv/insns/vfncvt_f_x_w.h
+++ b/riscv/insns/vfncvt_f_x_w.h
@@ -1,23 +1,10 @@
 // vfncvt.f.x.v vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  ;
-},
-{
-  auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
-  P.VU.elt<float16_t>(rd_num, i, true) = i32_to_f16(vs2);
-},
-{
-  auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
-  P.VU.elt<float32_t>(rd_num, i, true) = i64_to_f32(vs2);
-},
-{
-  ;
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-false, (P.VU.vsew >= 16))
+VI_VFP_NCVT_INT_TO_FP(
+  {;},                             // BODY16
+  { vd = i32_to_f16(vs2); },       // BODY32
+  { vd = i64_to_f32(vs2); },       // BODY64
+  {;},                             // CHECK16
+  { require_extension(EXT_ZFH); }, // CHECK32
+  { require_extension('F'); },     // CHECK64
+  int                              // sign
+)
diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h
index 32b4b02..5e0e34f 100644
--- a/riscv/insns/vfncvt_f_xu_w.h
+++ b/riscv/insns/vfncvt_f_xu_w.h
@@ -1,23 +1,10 @@
 // vfncvt.f.xu.v vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  ;
-},
-{
-  auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
-  P.VU.elt<float16_t>(rd_num, i, true) = ui32_to_f16(vs2);
-},
-{
-  auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
-  P.VU.elt<float32_t>(rd_num, i, true) = ui64_to_f32(vs2);
-},
-{
-  ;
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-false, (P.VU.vsew >= 16))
+VI_VFP_NCVT_INT_TO_FP(
+  {;},                             // BODY16
+  { vd = ui32_to_f16(vs2); },      // BODY32
+  { vd = ui64_to_f32(vs2); },      // BODY64
+  {;},                             // CHECK16
+  { require_extension(EXT_ZFH); }, // CHECK32
+  { require_extension('F'); },     // CHECK64
+  uint                             // sign
+)
diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h
index 20a14c9..89bdc05 100644
--- a/riscv/insns/vfncvt_rod_f_f_w.h
+++ b/riscv/insns/vfncvt_rod_f_f_w.h
@@ -1,25 +1,15 @@
 // vfncvt.rod.f.f.v vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  ;
-},
-{
-  softfloat_roundingMode = softfloat_round_odd;
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
-},
-{
-  softfloat_roundingMode = softfloat_round_odd;
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
-},
-{
-  ;
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-false, (P.VU.vsew >= 16))
+VI_VFP_NCVT_FP_TO_FP(
+  {;},                             // BODY16
+  {                                // BODY32
+    softfloat_roundingMode = softfloat_round_odd;
+    vd = f32_to_f16(vs2);
+  },
+  {                                // BODY64
+    softfloat_roundingMode = softfloat_round_odd;
+    vd = f64_to_f32(vs2);
+  },
+  {;},                             // CHECK16
+  { require_extension(EXT_ZFH); }, // CHECK32
+  { require_extension('F'); }      // CHECK64
+)
diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h
index 0629b8d..23b4d5e 100644
--- a/riscv/insns/vfncvt_rtz_x_f_w.h
+++ b/riscv/insns/vfncvt_rtz_x_f_w.h
@@ -1,24 +1,10 @@
 // vfncvt.rtz.x.f.w vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
-  P.VU.elt<int8_t>(rd_num, i, true) = f16_to_i8(vs2, softfloat_round_minMag, true);
-},
-{
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true);
-},
-{
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true);
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-{
-  require(p->extension_enabled('D'));
-},
-false, (P.VU.vsew <= 32))
+VI_VFP_NCVT_FP_TO_INT(
+  { vd = f16_to_i8(vs2, softfloat_round_minMag, true); },  // BODY16
+  { vd = f32_to_i16(vs2, softfloat_round_minMag, true); }, // BODY32
+  { vd = f64_to_i32(vs2, softfloat_round_minMag, true); }, // BODY64
+  { require_extension(EXT_ZFH); },                         // CHECK16
+  { require(p->extension_enabled('F')); },                 // CHECK32
+  { require(p->extension_enabled('D')); },                 // CHECK64
+  int                                                      // sign
+)
diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h
index 82aa63e..f55c680 100644
--- a/riscv/insns/vfncvt_rtz_xu_f_w.h
+++ b/riscv/insns/vfncvt_rtz_xu_f_w.h
@@ -1,24 +1,10 @@
 // vfncvt.rtz.xu.f.w vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
-  P.VU.elt<uint8_t>(rd_num, i, true) = f16_to_ui8(vs2, softfloat_round_minMag, true);
-},
-{
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true);
-},
-{
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true);
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-{
-  require(p->extension_enabled('D'));
-},
-false, (P.VU.vsew <= 32))
+VI_VFP_NCVT_FP_TO_INT(
+  { vd = f16_to_ui8(vs2, softfloat_round_minMag, true); },  // BODY16
+  { vd = f32_to_ui16(vs2, softfloat_round_minMag, true); }, // BODY32
+  { vd = f64_to_ui32(vs2, softfloat_round_minMag, true); }, // BODY64
+  { require_extension(EXT_ZFH); },                          // CHECK16
+  { require(p->extension_enabled('F')); },                  // CHECK32
+  { require(p->extension_enabled('D')); },                  // CHECK64
+  uint                                                      // sign
+)
diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h
index a8a6dfb..a7f3c33 100644
--- a/riscv/insns/vfncvt_x_f_w.h
+++ b/riscv/insns/vfncvt_x_f_w.h
@@ -1,24 +1,10 @@
 // vfncvt.x.f.w vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
-  P.VU.elt<int8_t>(rd_num, i, true) = f16_to_i8(vs2, STATE.frm->read(), true);
-},
-{
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, STATE.frm->read(), true);
-},
-{
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, STATE.frm->read(), true);
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-{
-  require(p->extension_enabled('D'));
-},
-false, (P.VU.vsew <= 32))
+VI_VFP_NCVT_FP_TO_INT(
+  { vd = f16_to_i8(vs2, softfloat_roundingMode, true); },  // BODY16
+  { vd = f32_to_i16(vs2, softfloat_roundingMode, true); }, // BODY32
+  { vd = f64_to_i32(vs2, softfloat_roundingMode, true); }, // BODY64
+  { require_extension(EXT_ZFH); },                         // CHECK16
+  { require(p->extension_enabled('F')); },                 // CHECK32
+  { require(p->extension_enabled('D')); },                 // CHECK64
+  int                                                      // sign
+)
diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h
index bff733e..02046e8 100644
--- a/riscv/insns/vfncvt_xu_f_w.h
+++ b/riscv/insns/vfncvt_xu_f_w.h
@@ -1,24 +1,10 @@
 // vfncvt.xu.f.w vd, vs2, vm
-VI_VFP_CVT_SCALE
-({
-  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
-  P.VU.elt<uint8_t>(rd_num, i, true) = f16_to_ui8(vs2, STATE.frm->read(), true);
-},
-{
-  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
-  P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm->read(), true);
-},
-{
-  auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
-  P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm->read(), true);
-},
-{
-  require(p->extension_enabled(EXT_ZFH));
-},
-{
-  require(p->extension_enabled('F'));
-},
-{
-  require(p->extension_enabled('D'));
-},
-false, (P.VU.vsew <= 32))
+VI_VFP_NCVT_FP_TO_INT(
+  { vd = f16_to_ui8(vs2, softfloat_roundingMode, true); },  // BODY16
+  { vd = f32_to_ui16(vs2, softfloat_roundingMode, true); }, // BODY32
+  { vd = f64_to_ui32(vs2, softfloat_roundingMode, true); }, // BODY64
+  { require_extension(EXT_ZFH); },                          // CHECK16
+  { require(p->extension_enabled('F')); },                  // CHECK32
+  { require(p->extension_enabled('D')); },                  // CHECK64
+  uint                                                      // sign
+)
author	eopXD <yueh.ting.chen@gmail.com>	2021-12-09 10:33:28 +0800
committer	eopXD <yueh.ting.chen@gmail.com>	2021-12-09 18:59:33 +0800
commit	95c06fbc24c7745941689dae37f7d94faddfa8f5 (patch)
tree	65f4e620e2164ab20acdcf39ba34029a88a0797b /riscv
parent	381b28d6340300e4acbc80b19c8d690fc124450e (diff)
download	spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.zip spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.tar.gz spike-95c06fbc24c7745941689dae37f7d94faddfa8f5.tar.bz2