diff options
author | Max Lin <max.lin@sifive.com> | 2020-02-20 18:25:38 -0800 |
---|---|---|
committer | Max Lin <max.lin@sifive.com> | 2020-02-20 18:28:21 -0800 |
commit | c12f7f6510398d3d259cf52f810137504ff96f7a (patch) | |
tree | adad1a8b9449d78d2dfedf8e5085bea0edfe458e /riscv/insns | |
parent | c7b826eac843620a5293fd947036be3931c70746 (diff) | |
download | spike-c12f7f6510398d3d259cf52f810137504ff96f7a.zip spike-c12f7f6510398d3d259cf52f810137504ff96f7a.tar.gz spike-c12f7f6510398d3d259cf52f810137504ff96f7a.tar.bz2 |
rvv modify the vfredsum.vs behavior with e27 xlen=32
Diffstat (limited to 'riscv/insns')
-rw-r--r-- | riscv/insns/vfredsum_vs.h | 62 |
1 files changed, 54 insertions, 8 deletions
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h index 7b5cccc..b354bdd 100644 --- a/riscv/insns/vfredsum_vs.h +++ b/riscv/insns/vfredsum_vs.h @@ -1,8 +1,54 @@ -// vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) -VI_VFP_VV_LOOP_REDUCTION -({ - vd_0 = f32_add(vd_0, vs2); -}, -{ - vd_0 = f64_add(vd_0, vs2); -}) +// vfredsum.vs vd, vs2, vs1, vm # Unordered sum +{ VI_CHECK_REDUCTION(false) + VI_VFP_COMMON + reg_t tmp_vl = ((vl>64)?128:(vl>32)?64:(vl>16)?32:(vl>8)?16:(vl>4)?8:(vl>2)?4:(vl>1)?2:1); + float32_t tmpValue[tmp_vl]; + float32_t tmpZero; + tmpZero.v = 0; + reg_t i= 0; + while(i<P.VU.vstart){ + tmpValue[i] = tmpZero; + ++i; + } + for (i=P.VU.vstart; i<tmp_vl; ++i){ + const int mlen = P.VU.vmlen; + const int midx = (mlen * i) / 32; + const int mpos = (mlen * i) % 32; + bool skip = ((P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1) == 0; + if ((insn.v_vm() == 0 && skip)|| i>=vl) + tmpValue[i].v = 0; + else + tmpValue[i] = P.VU.elt<float32_t>(rs2_num, i); + } + while(tmp_vl >4){ + for (reg_t i=0; i< tmp_vl/2;i=i+4){ + tmpValue[i+0] = f32_add(tmpValue[(i*2)+0], tmpValue[(i*2)+0+4]); + set_fp_exceptions; + tmpValue[i+1] = f32_add(tmpValue[(i*2)+1], tmpValue[(i*2)+1+4]); + set_fp_exceptions; + tmpValue[i+2] = f32_add(tmpValue[(i*2)+2], tmpValue[(i*2)+2+4]); + set_fp_exceptions; + tmpValue[i+3] = f32_add(tmpValue[(i*2)+3], tmpValue[(i*2)+3+4]); + set_fp_exceptions; + } + tmp_vl /=2; + } + if(tmp_vl>2){ + tmpValue[0] = f32_add(tmpValue[0], tmpValue[2]); + set_fp_exceptions; + tmpValue[1] = f32_add(tmpValue[1], tmpValue[3]); + set_fp_exceptions; + tmp_vl /=2; + } + if(tmp_vl>1){ + tmpValue[0] = f32_add(tmpValue[0], tmpValue[1]); + set_fp_exceptions; + } + P.VU.vstart = 0; + if (vl > 0) { + float32_t vd_0 = P.VU.elt<float32_t>(rs1_num, 0); + vd_0 = f32_add(vd_0, tmpValue[0]); + set_fp_exceptions; + P.VU.elt<type_sew_t<e32>::type>(rd_num, 0, true) = vd_0.v; + } +} |