diff options
author | Andrew Waterman <andrew@sifive.com> | 2021-12-07 11:19:50 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-07 11:19:50 -0800 |
commit | 9b3b305e42d9427618b08c33b5dfe1b5180b9f43 (patch) | |
tree | ea8a0686bec2b8e0f6f00fde19e9a115ffe8e503 /riscv/decode.h | |
parent | 8f16ed328f13eb00fdae921b2e6adf8d6e528f0b (diff) | |
parent | 1f06e0ecaafb94c595805ad036ccb07affb479f5 (diff) | |
download | spike-9b3b305e42d9427618b08c33b5dfe1b5180b9f43.zip spike-9b3b305e42d9427618b08c33b5dfe1b5180b9f43.tar.gz spike-9b3b305e42d9427618b08c33b5dfe1b5180b9f43.tar.bz2 |
Merge pull request #879 from eopXD/simply-insts
Simply floating point parameters and merge operations
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 166 |
1 files changed, 124 insertions, 42 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index c21808f..1d6ea9a 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -838,6 +838,20 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) auto vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); +#define VFP_V_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + +#define VFP_VV_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t vs1 = P.VU.elt<float##width##_t>(rs1_num, i); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + +#define VFP_VF_PARAMS(width) \ + float##width##_t &vd = P.VU.elt<float##width##_t>(rd_num, i, true); \ + float##width##_t rs1 = f##width(READ_FREG(rs1_num)); \ + float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); + // // vector: integer and masking operation loop // @@ -952,23 +966,84 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) VI_LOOP_CMP_END // merge and copy loop -#define VI_VVXI_MERGE_LOOP(BODY) \ +#define VI_MERGE_VARS \ + VI_MASK_VARS \ + bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + +#define VI_MERGE_LOOP_BASE \ + require_vector(true); \ VI_GENERAL_LOOP_BASE \ + VI_MERGE_VARS + +#define VI_VV_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(true); \ + VI_MERGE_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ if (sew == e8){ \ - VXI_PARAMS(e8); \ + VX_PARAMS(e8); \ BODY; \ }else if(sew == e16){ \ - VXI_PARAMS(e16); \ + VX_PARAMS(e16); \ BODY; \ }else if(sew == e32){ \ - VXI_PARAMS(e32); \ + VX_PARAMS(e32); \ BODY; \ }else if(sew == e64){ \ - VXI_PARAMS(e64); \ + VX_PARAMS(e64); \ BODY; \ } \ VI_LOOP_END +#define VI_VI_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VF_MERGE_LOOP(BODY) \ + VI_CHECK_SSS(false); \ + VI_MERGE_LOOP_BASE \ + if(sew == e16){ \ + VFP_VF_PARAMS(16); \ + BODY; \ + }else if(sew == e32){ \ + VFP_VF_PARAMS(32); \ + BODY; \ + }else if(sew == e64){ \ + VFP_VF_PARAMS(64); \ + BODY; \ + } \ + VI_LOOP_END + // reduction loop - signed #define VI_LOOP_REDUCTION_BASE(x) \ require(x >= e8 && x <= e64); \ @@ -1879,7 +1954,7 @@ reg_t index[P.VU.vlmax]; \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx, true); \ + uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \ uint64_t res = 0; #define VI_VFP_LOOP_REDUCTION_BASE(width) \ @@ -1958,7 +2033,7 @@ reg_t index[P.VU.vlmax]; \ case e16: \ case e32: \ case e64: { \ - vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ break; \ } \ default: \ @@ -1973,25 +2048,19 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_VV_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_VV_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_VV_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ @@ -2008,20 +2077,17 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_V_PARAMS(16); \ BODY16; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_V_PARAMS(32); \ BODY32; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_V_PARAMS(64); \ BODY64; \ break; \ }\ @@ -2101,25 +2167,19 @@ reg_t index[P.VU.vlmax]; \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \ - float16_t rs1 = f16(READ_FREG(rs1_num)); \ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ + VFP_VF_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \ - float32_t rs1 = f32(READ_FREG(rs1_num)); \ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ + VFP_VF_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \ - float64_t rs1 = f64(READ_FREG(rs1_num)); \ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ + VFP_VF_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ @@ -2131,30 +2191,52 @@ reg_t index[P.VU.vlmax]; \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \ - VI_CHECK_MSS(is_vs1); \ +#define VI_VFP_VV_LOOP_CMP(BODY16, BODY32, BODY64) \ + VI_CHECK_MSS(true); \ VI_VFP_LOOP_CMP_BASE \ switch(P.VU.vsew) { \ case e16: {\ - float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \ - float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \ - float16_t rs1 = f16(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(16); \ BODY16; \ set_fp_exceptions; \ break; \ }\ case e32: {\ - float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ - float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \ - float32_t rs1 = f32(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(32); \ BODY32; \ set_fp_exceptions; \ break; \ }\ case e64: {\ - float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \ - float64_t vs1 = P.VU.elt<float64_t>(rs1_num, i); \ - float64_t rs1 = f64(READ_FREG(rs1_num)); \ + VFP_VV_PARAMS(64); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + default: \ + require(0); \ + break; \ + }; \ + VI_VFP_LOOP_CMP_END \ + +#define VI_VFP_VF_LOOP_CMP(BODY16, BODY32, BODY64) \ + VI_CHECK_MSS(false); \ + VI_VFP_LOOP_CMP_BASE \ + switch(P.VU.vsew) { \ + case e16: {\ + VFP_VF_PARAMS(16); \ + BODY16; \ + set_fp_exceptions; \ + break; \ + }\ + case e32: {\ + VFP_VF_PARAMS(32); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + VFP_VF_PARAMS(64); \ BODY64; \ set_fp_exceptions; \ break; \ |