diff options
-rw-r--r-- | riscv/decode.h | 166 | ||||
-rw-r--r-- | riscv/insns/vfmerge_vfm.h | 52 | ||||
-rw-r--r-- | riscv/insns/vmerge_vim.h | 8 | ||||
-rw-r--r-- | riscv/insns/vmerge_vvm.h | 8 | ||||
-rw-r--r-- | riscv/insns/vmerge_vxm.h | 8 | ||||
-rw-r--r-- | riscv/insns/vmfeq_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfeq_vv.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfge_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfgt_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfle_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfle_vv.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmflt_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmflt_vv.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfne_vf.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmfne_vv.h | 5 | ||||
-rw-r--r-- | riscv/insns/vmv_v_i.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmv_v_v.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmv_v_x.h | 4 |
18 files changed, 153 insertions, 151 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index c21808f..1d6ea9a 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -838,6 +838,20 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) auto vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); +#define VFP_V_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + +#define VFP_VV_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t vs1 = P.VU.elt<float##width##_t>(rs1_num, i); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + +#define VFP_VF_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + // // vector: integer and masking operation loop // @@ -952,23 +966,84 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) VI_LOOP_CMP_END // merge and copy loop -#define VI_VVXI_MERGE_LOOP(BODY) \ +#define VI_MERGE_VARS \ + VI_MASK_VARS \ + bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + +#define VI_MERGE_LOOP_BASE \ + require_vector(true); \ VI_GENERAL_LOOP_BASE \ + VI_MERGE_VARS + +#define VI_VV_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(true); \ + VI_MERGE_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ if (sew == e8){ \ - VXI_PARAMS(e8); \ + VX_PARAMS(e8); \ BODY; \ }else if(sew == e16){ \ - VXI_PARAMS(e16); \ + VX_PARAMS(e16); \ BODY; \ }else if(sew == e32){ \ - VXI_PARAMS(e32); \ + VX_PARAMS(e32); \ BODY; \ }else if(sew == e64){ \ - VXI_PARAMS(e64); \ + VX_PARAMS(e64); \ BODY; \ } \ VI_LOOP_END +#define VI_VI_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VF_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ + if(sew == e16){ \ + VFP_VF_PARAMS(16); \ + BODY; \ + }else if(sew == e32){ \ + VFP_VF_PARAMS(32); \ + BODY; \ + }else if(sew == e64){ \ + VFP_VF_PARAMS(64); \ + BODY; \ + } \ + VI_LOOP_END + // reduction loop - signed #define VI_LOOP_REDUCTION_BASE(x) \ require(x >= e8 && x <= e64); \ @@ -1879,7 +1954,7 @@ reg_t index[P.VU.vlmax]; \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx, true); \ + uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \ uint64_t res = 0; #define VI_VFP_LOOP_REDUCTION_BASE(width) \ @@ -1958,7 +2033,7 @@ reg_t index[P.VU.vlmax]; \ case e16: \ case e32: \ case e64: { \ - vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ break; \ } \ default: \ @@ -1973,25 +2048,19 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_VV_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_VV_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_VV_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ @@ -2008,20 +2077,17 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_V_PARAMS(16); \ BODY16; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_V_PARAMS(32); \ BODY32; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_V_PARAMS(64); \ BODY64; \ break; \ }\ @@ -2101,25 +2167,19 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t rs1 = f16(READ_FREG(rs1_num)); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_VF_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t rs1 = f32(READ_FREG(rs1_num)); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_VF_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t rs1 = f64(READ_FREG(rs1_num)); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_VF_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ @@ -2131,30 +2191,52 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ - VI_CHECK_MSS(is_vs1); \ +#define VI_VFP_VV_LOOP_CMP(BODY16, BODY32, BODY64) \ + VI_CHECK_MSS(true); \ VI_VFP_LOOP_CMP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ - float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \ - float16_t rs1 = f16(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ - float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ - float32_t rs1 = f32(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ - float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \ - float64_t rs1 = f64(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(64); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + VI_VFP_LOOP_CMP_END \ + +#define VI_VFP_VF_LOOP_CMP(BODY16, BODY32, BODY64) \ + VI_CHECK_MSS(false); \ + VI_VFP_LOOP_CMP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + VFP_VF_PARAMS(16); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + VFP_VF_PARAMS(32); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + VFP_VF_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index a38cd45..d82dfef 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -1,50 +1,4 @@ // vfmerge_vf vd, vs2, vs1, vm -VI_CHECK_SSS(false); -VI_VFP_COMMON; - -switch(P.VU.vsew) { - case e16: - for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { - auto &vd = P.VU.elt<float16_t>(rd_num, i, true); - auto rs1 = f16(READ_FREG(rs1_num)); - auto vs2 = P.VU.elt<float16_t>(rs2_num, i); - - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - - vd = use_first ? rs1 : vs2; - } - break; - case e32: - for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { - auto &vd = P.VU.elt<float32_t>(rd_num, i, true); - auto rs1 = f32(READ_FREG(rs1_num)); - auto vs2 = P.VU.elt<float32_t>(rs2_num, i); - - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - - vd = use_first ? rs1 : vs2; - } - break; - case e64: - for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { - auto &vd = P.VU.elt<float64_t>(rd_num, i, true); - auto rs1 = f64(READ_FREG(rs1_num)); - auto vs2 = P.VU.elt<float64_t>(rs2_num, i); - - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - - vd = use_first ? rs1 : vs2; - } - break; - default: - require(0); - break; -} - -P.VU.vstart->write(0); +VI_VF_MERGE_LOOP({ + vd = use_first ? rs1 : vs2; +}) diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h index fd6ae1c..0b2fac9 100644 --- a/riscv/insns/vmerge_vim.h +++ b/riscv/insns/vmerge_vim.h @@ -1,11 +1,5 @@ // vmerge.vim vd, vs2, simm5 -require_vector(true); -VI_CHECK_SSS(false); -VI_VVXI_MERGE_LOOP +VI_VI_MERGE_LOOP ({ - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - vd = use_first ? simm5 : vs2; }) diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h index df416b2..b60c152 100644 --- a/riscv/insns/vmerge_vvm.h +++ b/riscv/insns/vmerge_vvm.h @@ -1,11 +1,5 @@ // vmerge.vvm vd, vs2, vs1 -require_vector(true); -VI_CHECK_SSS(true); -VI_VVXI_MERGE_LOOP +VI_VV_MERGE_LOOP ({ - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - vd = use_first ? vs1 : vs2; }) diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h index 122a7b7..a22da8a 100644 --- a/riscv/insns/vmerge_vxm.h +++ b/riscv/insns/vmerge_vxm.h @@ -1,11 +1,5 @@ // vmerge.vxm vd, vs2, rs1 -require_vector(true); -VI_CHECK_SSS(false); -VI_VVXI_MERGE_LOOP +VI_VX_MERGE_LOOP ({ - int midx = i / 64; - int mpos = i % 64; - bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - vd = use_first ? rs1 : vs2; }) diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index 040f2b0..a4d7c50 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -1,5 +1,5 @@ // vmfeq.vf vd, vs2, fs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = f16_eq(vs2, rs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_eq(vs2, rs1); -}, -false) +}) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index fb24d13..b08ce98 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -1,5 +1,5 @@ // vmfeq.vv vd, vs2, vs1 -VI_VFP_LOOP_CMP +VI_VFP_VV_LOOP_CMP ({ res = f16_eq(vs2, vs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_eq(vs2, vs1); -}, -true) +}) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index 9e69855..ab4df5c 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,5 +1,5 @@ // vmfge.vf vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = f16_le(rs1, vs2); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_le(rs1, vs2); -}, -false) +}) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index bd5d99b..dcc3ea3 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,5 +1,5 @@ // vmfgt.vf vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = f16_lt(rs1, vs2); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_lt(rs1, vs2); -}, -false) +}) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index 3d2852f..a942705 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -1,5 +1,5 @@ // vmfle.vf vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = f16_le(vs2, rs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_le(vs2, rs1); -}, -false) +}) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index 203ef21..dd6f81d 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,5 +1,5 @@ // vmfle.vv vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VV_LOOP_CMP ({ res = f16_le(vs2, vs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_le(vs2, vs1); -}, -true) +}) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index 4780adc..110dbd1 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,5 +1,5 @@ // vmflt.vf vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = f16_lt(vs2, rs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_lt(vs2, rs1); -}, -false) +}) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index cdfc3fa..35f8d70 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,5 +1,5 @@ // vmflt.vv vd, vs2, vs1 -VI_VFP_LOOP_CMP +VI_VFP_VV_LOOP_CMP ({ res = f16_lt(vs2, vs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = f64_lt(vs2, vs1); -}, -true) +}) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index 8401699..1b61d57 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -1,5 +1,5 @@ // vmfne.vf vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VF_LOOP_CMP ({ res = !f16_eq(vs2, rs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = !f64_eq(vs2, rs1); -}, -false) +}) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index 50dfa9c..4447c3c 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -1,5 +1,5 @@ // vmfne.vv vd, vs2, rs1 -VI_VFP_LOOP_CMP +VI_VFP_VV_LOOP_CMP ({ res = !f16_eq(vs2, vs1); }, @@ -8,5 +8,4 @@ VI_VFP_LOOP_CMP }, { res = !f64_eq(vs2, vs1); -}, -true) +}) diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h index a760779..3d5737e 100644 --- a/riscv/insns/vmv_v_i.h +++ b/riscv/insns/vmv_v_i.h @@ -1,7 +1,5 @@ // vmv.v.i vd, simm5 -require_vector(true); -VI_CHECK_SSS(false); -VI_VVXI_MERGE_LOOP +VI_VI_MERGE_LOOP ({ vd = simm5; }) diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h index d7f47d0..429f5a3 100644 --- a/riscv/insns/vmv_v_v.h +++ b/riscv/insns/vmv_v_v.h @@ -1,7 +1,5 @@ // vvmv.v.v vd, vs1 -require_vector(true); -VI_CHECK_SSS(true); -VI_VVXI_MERGE_LOOP +VI_VV_MERGE_LOOP ({ vd = vs1; }) diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h index fa7c920..1eac782 100644 --- a/riscv/insns/vmv_v_x.h +++ b/riscv/insns/vmv_v_x.h @@ -1,7 +1,5 @@ // vmv.v.x vd, rs1 -require_vector(true); -VI_CHECK_SSS(false); -VI_VVXI_MERGE_LOOP +VI_VX_MERGE_LOOP ({ vd = rs1; }) |