diff options
author | Zhen Wei <zhen.wei@sifive.com> | 2020-02-21 15:06:04 +0800 |
---|---|---|
committer | Chih-Min Chao <48193236+chihminchao@users.noreply.github.com> | 2020-03-05 17:16:19 +0800 |
commit | 621340acc2bbeacfdb9863781ffa0f06b1338344 (patch) | |
tree | 3787481592e123a64cef19364500cddb491ecaad /riscv/insns | |
parent | e799cf99af80ebe6849bfda0d0af27906f21f3ce (diff) | |
download | spike-621340acc2bbeacfdb9863781ffa0f06b1338344.zip spike-621340acc2bbeacfdb9863781ffa0f06b1338344.tar.gz spike-621340acc2bbeacfdb9863781ffa0f06b1338344.tar.bz2 |
rvv: import parallel vf(w)redsum hardware impl.
The number of vector FP ALUs and implementations of vf(w)redsum could be
passed as options by the following example:
"--varch=vlen:512,elen:32,slen:512,nalu:4,fredsum-impl:parallel"
By default, 4 of vector FP ALUs and ordered vector FP reduction sum
implementations are assumed.
Diffstat (limited to 'riscv/insns')
-rw-r--r-- | riscv/insns/vfredsum_vs.h | 14 | ||||
-rw-r--r-- | riscv/insns/vfredsum_vs_parallel.h | 24 | ||||
-rw-r--r-- | riscv/insns/vfwredsum_vs.h | 13 | ||||
-rw-r--r-- | riscv/insns/vfwredsum_vs_parallel.h | 10 |
4 files changed, 47 insertions, 14 deletions
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h index 7b5cccc..ca8e91b 100644 --- a/riscv/insns/vfredsum_vs.h +++ b/riscv/insns/vfredsum_vs.h @@ -1,8 +1,8 @@ // vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) -VI_VFP_VV_LOOP_REDUCTION -({ - vd_0 = f32_add(vd_0, vs2); -}, -{ - vd_0 = f64_add(vd_0, vs2); -}) + +if(p->VU.FREDSUM_IMPL == "ordered") { + #include "vfredosum_vs.h" +} else if (p->VU.FREDSUM_IMPL == "parallel") { + #include "vfredsum_vs_parallel.h" +} else + require(0); diff --git a/riscv/insns/vfredsum_vs_parallel.h b/riscv/insns/vfredsum_vs_parallel.h new file mode 100644 index 0000000..d611c60 --- /dev/null +++ b/riscv/insns/vfredsum_vs_parallel.h @@ -0,0 +1,24 @@ +// Parallel version of vfredsum +// vfredsum: vd[0] = sum( vs2[*] , vs1[0] ) + +VI_CHECK_REDUCTION(false) +VI_VFP_COMMON + +switch(p->VU.vsew) { + case e32: { + VI_VFP_LOOP_REDUCTIONSUM_INIT(32) + VI_VFP_LOOP_REDUCTIONSUM_MERGE(32) + VI_VFP_LOOP_REDUCTIONSUM_CLOSE(e32) + break; + } + case e64: { + VI_VFP_LOOP_REDUCTIONSUM_INIT(64) + VI_VFP_LOOP_REDUCTIONSUM_MERGE(64) + VI_VFP_LOOP_REDUCTIONSUM_CLOSE(e64) + break; + } + case e16: + default: + require(0); + break; +}; diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h index 3426ef8..bed3c64 100644 --- a/riscv/insns/vfwredsum_vs.h +++ b/riscv/insns/vfwredsum_vs.h @@ -1,8 +1,7 @@ // vfwredsum.vs vd, vs2, vs1 -require_vector; -require(P.VU.vsew * 2 <= P.VU.ELEN); -require((insn.rs2() & (P.VU.vlmul - 1)) == 0); -VI_VFP_VV_LOOP_WIDE_REDUCTION -({ - vd_0 = f64_add(vd_0, vs2); -}) +if(p->VU.FREDSUM_IMPL == "ordered") { + #include "vfwredosum_vs.h" +} else if (p->VU.FREDSUM_IMPL == "parallel") { + #include "vfwredsum_vs_parallel.h" +} else + require(0); diff --git a/riscv/insns/vfwredsum_vs_parallel.h b/riscv/insns/vfwredsum_vs_parallel.h new file mode 100644 index 0000000..236d4d0 --- /dev/null +++ b/riscv/insns/vfwredsum_vs_parallel.h @@ -0,0 +1,10 @@ +// Parallel version of vfwredsum +require_vector; +require(P.VU.vsew * 2 <= P.VU.ELEN); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); + +VI_VFP_COMMON + +VI_VFP_LOOP_REDUCTIONSUM_WIDEN_INIT +VI_VFP_LOOP_REDUCTIONSUM_MERGE(64) +VI_VFP_LOOP_REDUCTIONSUM_CLOSE(e64) |