aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2019-05-15 09:33:10 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2019-05-19 21:22:43 -0700
commit23e4c96aec9e13e5bbfc111b57cd00ba7ada9a75 (patch)
tree9266435a0b8b6d80932d6edca0e24538b2ee6f66
parentd5c53ebfb3fcda4d213fb8f0af791929ad3ef934 (diff)
downloadspike-23e4c96aec9e13e5bbfc111b57cd00ba7ada9a75.zip
spike-23e4c96aec9e13e5bbfc111b57cd00ba7ada9a75.tar.gz
spike-23e4c96aec9e13e5bbfc111b57cd00ba7ada9a75.tar.bz2
rvv: fix reduction loop
1. write result when vl > 0 2. zero inactive part Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
-rw-r--r--riscv/decode.h58
-rw-r--r--riscv/insns/vredmaxu_vs.h2
-rw-r--r--riscv/insns/vredminu_vs.h2
3 files changed, 34 insertions, 28 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 53de5cd..f77786b 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -506,6 +506,15 @@ enum VMUNARY0{
}\
P.VU.vstart = 0;
+#define VI_LOOP_REDUCTION_END(x) \
+ } \
+ P.VU.vstart = 0; \
+ if (vl > 0) { \
+ vd_0_des = vd_0_res; \
+ for (reg_t i = 1; i < P.VU.VLEN / sew; ++i) { \
+ P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \
+ } \
+ }
#define VI_LOOP_CMP_END \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
@@ -781,29 +790,27 @@ VI_LOOP_END
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
- type_sew_t<x>::type &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
- type_sew_t<x>::type vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \
+ auto &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \
for (reg_t i=P.VU.vstart; i<vl; ++i){ \
V_LOOP_ELEMENT_SKIP; \
- type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
#define REDUCTION_LOOP(x, BODY) \
VI_LOOP_REDUCTION_BASE(x) \
BODY; \
- VI_LOOP_END \
- vd_0_des = vd_0_res;
+ VI_LOOP_REDUCTION_END(x)
#define VI_VV_LOOP_REDUCTION(BODY) \
- require(!P.VU.vill);\
reg_t sew = P.VU.vsew; \
- if (sew == e8){ \
- REDUCTION_LOOP(e8, BODY) \
- }else if(sew == e16){ \
- REDUCTION_LOOP(e16, BODY) \
- }else if(sew == e32){ \
- REDUCTION_LOOP(e32, BODY) \
- }else if(sew == e64){ \
- REDUCTION_LOOP(e64, BODY) \
+ if (sew == e8) { \
+ REDUCTION_LOOP(e8, BODY) \
+ } else if(sew == e16) { \
+ REDUCTION_LOOP(e16, BODY) \
+ } else if(sew == e32) { \
+ REDUCTION_LOOP(e32, BODY) \
+ } else if(sew == e64) { \
+ REDUCTION_LOOP(e64, BODY) \
}
// reduction unsiged loop
@@ -813,28 +820,27 @@ VI_LOOP_END
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
- type_usew_t<x>::type &vdu_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0); \
- type_usew_t<x>::type vdu_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \
+ auto &vd_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \
for (reg_t i=P.VU.vstart; i<vl; ++i){ \
V_LOOP_ELEMENT_SKIP; \
- type_usew_t<x>::type vs2u = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
+ auto vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
#define REDUCTION_ULOOP(x, BODY) \
VI_ULOOP_REDUCTION_BASE(x) \
BODY; \
- VI_LOOP_END \
- vdu_0_des = vdu_0_res;
+ VI_LOOP_REDUCTION_END(x)
#define VI_VV_ULOOP_REDUCTION(BODY) \
reg_t sew = P.VU.vsew; \
if (sew == e8){ \
- REDUCTION_ULOOP(e8, BODY) \
- }else if(sew == e16){ \
- REDUCTION_ULOOP(e16, BODY) \
- }else if(sew == e32){ \
- REDUCTION_ULOOP(e32, BODY) \
- }else if(sew == e64){ \
- REDUCTION_ULOOP(e64, BODY) \
+ REDUCTION_ULOOP(e8, BODY) \
+ } else if(sew == e16) { \
+ REDUCTION_ULOOP(e16, BODY) \
+ } else if(sew == e32) { \
+ REDUCTION_ULOOP(e32, BODY) \
+ } else if(sew == e64) { \
+ REDUCTION_ULOOP(e64, BODY) \
}
#define VI_VX_ULOOP(BODY) \
diff --git a/riscv/insns/vredmaxu_vs.h b/riscv/insns/vredmaxu_vs.h
index ca33a95..960f486 100644
--- a/riscv/insns/vredmaxu_vs.h
+++ b/riscv/insns/vredmaxu_vs.h
@@ -1,5 +1,5 @@
// vredmaxu.vs vd, vs2 ,vs1
VI_VV_ULOOP_REDUCTION
({
- vdu_0_res = (vdu_0_res >= vs2u) ? vdu_0_res : vs2u;
+ vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2;
})
diff --git a/riscv/insns/vredminu_vs.h b/riscv/insns/vredminu_vs.h
index bfb77e6..7082475 100644
--- a/riscv/insns/vredminu_vs.h
+++ b/riscv/insns/vredminu_vs.h
@@ -1,5 +1,5 @@
// vredminu.vs vd, vs2 ,vs1
VI_VV_ULOOP_REDUCTION
({
- vdu_0_res = (vdu_0_res <= vs2u) ? vdu_0_res : vs2u;
+ vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2;
})