rvv: update the vector fredsum algorithm

Inactive vector elements should not be treated as zero.
author: Zhen Wei <zhen.wei@sifive.com> 2020-02-26 18:06:28 +0800
committer: Chih-Min Chao <48193236+chihminchao@users.noreply.github.com> 2020-03-05 17:16:19 +0800
commit: d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea (patch)
tree: 73747952fe9d8cfaaea04c1cc1bda79d287ca92e /riscv/decode.h
parent: 621340acc2bbeacfdb9863781ffa0f06b1338344 (diff)
download: spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.zip
spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.tar.gz
spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.tar.bz2
1 files changed, 23 insertions, 15 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index f0c1568..2c8c3c0 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -1739,32 +1739,41 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   }; \
 
 #define VI_VFP_LOOP_REDUCTIONSUM_WIDEN_INIT /* Dedicated to f32 -> f64 */ \
-  float64_t vd_0 = f64(p->VU.elt<float64_t>(rs1_num, 0).v); \
-  reg_t temp_len = 1, temp_vl = (vl > 0) ? (vl - 1) : 0; \
-  uint64_t valu = p->VU.get_valu(); \
-  float64_t* temp_arr = NULL; \
-  for (; temp_vl > 0; temp_len <<= 1) temp_vl >>= 1; /* Calculate the bit length of vl */ \
-  temp_arr = new float64_t[temp_len](); \
+  float64_t vd_0 = p->VU.elt<float64_t>(rd_num, 0); \
+  float64_t vs1_0 = p->VU.elt<float64_t>(rs1_num, 0); \
+  vd_0 = vs1_0; \
+  size_t temp_len = vl; \
+  if (temp_len > 1) { \
+    size_t leading_zeros = __builtin_clzl(temp_len - 1); \
+    temp_len = 1 << (64 - leading_zeros); \
+  } \
+  std::vector<float64_t> temp_arr(temp_len, f64(0)); \
+  reg_t num_active_element = 0; \
   for (reg_t i=p->VU.vstart; i<vl; ++i) { /* Allocate a temporary array for computing */  \
     VI_LOOP_ELEMENT_SKIP(); \
-    temp_arr[i] = f32_to_f64(p->VU.elt<float32_t>(rs2_num, i)); \
+    temp_arr.at(i) = f32_to_f64(p->VU.elt<float32_t>(rs2_num, i)); \
+    ++num_active_element; \
   }
 
 #define VI_VFP_LOOP_REDUCTIONSUM_INIT(width) \
   float##width##_t vd_0 = p->VU.elt<float##width##_t>(rd_num, 0); \
   float##width##_t vs1_0 = p->VU.elt<float##width##_t>(rs1_num, 0); \
   vd_0 = vs1_0; \
-  reg_t temp_len = 1, temp_vl = (vl > 0) ? (vl - 1) : 0; \
-  uint64_t valu = p->VU.get_valu(); \
-  float##width##_t* temp_arr = NULL; \
-  for (; temp_vl > 0; temp_len <<= 1) temp_vl >>= 1; /* Calculate the bit length of vl */ \
-  temp_arr = new float##width##_t[temp_len](); \
+  size_t temp_len = vl; \
+  if (temp_len > 1) { \
+    size_t leading_zeros = __builtin_clzl(temp_len - 1); \
+    temp_len = 1 << (64 - leading_zeros); \
+  } \
+  std::vector<float##width##_t> temp_arr(temp_len, f##width(0)); \
+  reg_t num_active_element = 0; \
   for (reg_t i=p->VU.vstart; i<vl; ++i) { /* Allocate a temporary array for computing */  \
     VI_LOOP_ELEMENT_SKIP(); \
-    temp_arr[i] = p->VU.elt<float##width##_t>(rs2_num, i); \
+    temp_arr.at(i) = p->VU.elt<float##width##_t>(rs2_num, i); \
+    ++num_active_element; \
   }
 
 #define VI_VFP_LOOP_REDUCTIONSUM_MERGE(width) \
+  uint64_t valu = p->VU.get_valu(); \
   while (valu > 0) { \
     while (valu < temp_len) { \
       uint64_t write_pos = 0; \
@@ -1781,9 +1790,8 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
     } \
     valu >>= 1; \
   } \
-  vd_0 = f##width##_add(vd_0, temp_arr[0]); \
+  vd_0 = (num_active_element > 0) ? f##width##_add(vd_0, temp_arr[0]) : vd_0; \
   set_fp_exceptions; \
-  delete [] temp_arr;
 
 #define VI_VFP_LOOP_REDUCTIONSUM_CLOSE(x) \
   P.VU.vstart = 0; \
author	Zhen Wei <zhen.wei@sifive.com>	2020-02-26 18:06:28 +0800
committer	Chih-Min Chao <48193236+chihminchao@users.noreply.github.com>	2020-03-05 17:16:19 +0800
commit	d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea (patch)
tree	73747952fe9d8cfaaea04c1cc1bda79d287ca92e /riscv/decode.h
parent	621340acc2bbeacfdb9863781ffa0f06b1338344 (diff)
download	spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.zip spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.tar.gz spike-d80ff6dfdda2e383e1720a8b9582e6c6515ec5ea.tar.bz2