diff options
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 290 |
1 files changed, 130 insertions, 160 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 7ecd74f..a756607 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -65,29 +65,6 @@ const int NCSR = 4096; #define MAX_INSN_LENGTH 8 #define PC_ALIGN 2 -#ifndef TAIL_ZEROING - #define TAIL_ZEROING true -#else - #define TAIL_ZEROING false -#endif - -#ifdef WORDS_BIGENDIAN - // Elements are stored in opposite order, see comment in processor.h - #define TAIL_ZERO(x) \ - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x) - 1); \ - memset(tail - (P.VU.vlmax - vl) * (x), 0, (P.VU.vlmax - vl) * (x)); - #define TAIL_ZERO_REDUCTION(x) \ - uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \ - memset(tail - ((P.VU.get_vlen() - x) >> 3), 0, (P.VU.get_vlen() - x) >> 3); -#else - #define TAIL_ZERO(x) \ - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x)); \ - memset(tail, 0, (P.VU.vlmax - vl) * (x)); - #define TAIL_ZERO_REDUCTION(x) \ - uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \ - memset(tail, 0, (P.VU.get_vlen() - x) >> 3); -#endif - typedef uint64_t insn_bits_t; class insn_t { @@ -230,7 +207,7 @@ private: #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0) #define require_vector_vs do { } while (0) // TODO MSTATUS_VS -#define require_vector do { require_vector_vs; require(!P.VU.vill); } while (0) +#define require_vector do { require_vector_vs; require_extension('V'); require(!P.VU.vill); } while (0) #define require_vector_for_vsetvl do { require_vector_vs; require_extension('V'); } while (0) #define set_fp_exceptions ({ if (softfloat_exceptionFlags) { \ @@ -368,9 +345,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); } #define VI_ELEMENT_SKIP(inx) \ - if (inx >= vl && TAIL_ZEROING) { \ - is_valid = false; \ - } else if (inx >= vl && !TAIL_ZEROING) { \ + if (inx >= vl) { \ continue; \ } else if (inx < P.VU.vstart) { \ continue; \ @@ -381,7 +356,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); // // vector: operation and register acccess check helper // -static inline bool is_overlaped(const int astart, const int asize, +static inline bool is_overlapped(const int astart, const int asize, const int bstart, const int bsize) { const int aend = astart + asize; @@ -390,38 +365,84 @@ static inline bool is_overlaped(const int astart, const int asize, } #define VI_NARROW_CHECK_COMMON \ + require_vector;\ require(P.VU.vlmul <= 4); \ require(P.VU.vsew * 2 <= P.VU.ELEN); \ - require(insn.rs2() + P.VU.vlmul * 2 <= 32); + require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + if (insn.v_vm() == 0 && P.VU.vlmul > 1) \ + require(insn.rd() != 0); #define VI_WIDE_CHECK_COMMON \ require_vector;\ require(P.VU.vlmul <= 4); \ require(P.VU.vsew * 2 <= P.VU.ELEN); \ - require(insn.rd() + P.VU.vlmul * 2 <= 32); \ + require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \ if (insn.v_vm() == 0) \ require(insn.rd() != 0); -#define VI_CHECK_VREG_OVERLAP(v1, v2) \ - require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul)); +#define VI_CHECK_MSS(is_vs1) \ + if (P.VU.vlmul > 1) { \ + require(!is_overlapped(insn.rd(), 1, insn.rs2(), P.VU.vlmul)); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) {\ + require(!is_overlapped(insn.rd(), 1, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } \ + } -#define VI_CHECK_SS \ - require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul)); +#define VI_CHECK_SSS(is_vs1) \ + if (P.VU.vlmul > 1) { \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) { \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); \ + } + +#define VI_CHECK_SXX \ + require_vector; \ + if (P.VU.vlmul > 1) { \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); \ + } #define VI_CHECK_SD \ - require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); + require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); -#define VI_CHECK_DSS(is_rs) \ +#define VI_CHECK_DSS(is_vs1) \ VI_WIDE_CHECK_COMMON; \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ - if (is_rs) \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) {\ + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } #define VI_CHECK_DDS(is_rs) \ VI_WIDE_CHECK_COMMON; \ - require(insn.rs2() + P.VU.vlmul * 2 <= 32); \ - if (is_rs) \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \ + if (is_rs) { \ + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } + +#define VI_CHECK_SDS(is_vs1) \ + VI_NARROW_CHECK_COMMON; \ + require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); \ + if (is_vs1) \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + +#define VI_CHECK_REDUCTION(is_wide) \ + require_vector;\ + if (is_wide) {\ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + } \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ // // vector: loop header and end helper @@ -436,50 +457,22 @@ static inline bool is_overlaped(const int astart, const int asize, reg_t rs2_num = insn.rs2(); \ for (reg_t i=P.VU.vstart; i<vl; ++i){ -#define VI_TAIL_ZERO(elm) \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \ - TAIL_ZERO((sew >> 3) * elm); \ - } - -#define VI_TAIL_ZERO_MASK(dst) \ - if (vl != 0 && TAIL_ZEROING){ \ - for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ - const int mlen = P.VU.vmlen; \ - const int midx = (mlen * i) / 64; \ - const int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &vdi = P.VU.elt<uint64_t>(dst, midx); \ - vdi = (vdi & ~mmask);\ - }\ - }\ - #define VI_LOOP_BASE \ VI_GENERAL_LOOP_BASE \ VI_LOOP_ELEMENT_SKIP(); #define VI_LOOP_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((sew >> 3) * 1); \ - }\ - P.VU.vstart = 0; - -#define VI_LOOP_END_NO_TAIL_ZERO \ - } \ P.VU.vstart = 0; #define VI_LOOP_WIDEN_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((sew >> 3) * 2); \ - }\ P.VU.vstart = 0; #define VI_LOOP_REDUCTION_END(x) \ } \ - if (vl > 0 && TAIL_ZEROING) { \ + if (vl > 0) { \ vd_0_des = vd_0_res; \ - TAIL_ZERO_REDUCTION(x); \ } \ P.VU.vstart = 0; @@ -500,7 +493,6 @@ static inline bool is_overlaped(const int astart, const int asize, #define VI_LOOP_CMP_END \ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ } \ - VI_TAIL_ZERO_MASK(rd_num); \ P.VU.vstart = 0; #define VI_LOOP_MASK(op) \ @@ -516,24 +508,9 @@ static inline bool is_overlaped(const int astart, const int asize, uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ } \ - \ - if (TAIL_ZEROING) {\ - for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \ - int mlen = P.VU.vmlen; \ - int midx = (mlen * i) / 64; \ - int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ - res = (res & ~mmask); \ - } \ - } \ P.VU.vstart = 0; #define VI_LOOP_NSHIFT_BASE \ - require(P.VU.vsew <= e32); \ - if (insn.rd() != 0){ \ - VI_CHECK_SD; \ - } \ VI_GENERAL_LOOP_BASE; \ VI_LOOP_ELEMENT_SKIP({\ require(!(insn.rd() == 0 && P.VU.vlmul > 1));\ @@ -541,31 +518,27 @@ static inline bool is_overlaped(const int astart, const int asize, #define INT_ROUNDING(result, xrm, gb) \ - if (gb > 0) { \ - switch(xrm) {\ + do { \ + const uint64_t lsb = 1UL << (gb); \ + const uint64_t lsb_half = lsb >> 1; \ + switch (xrm) {\ case VRM::RNU:\ - result += ((uint64_t)1 << ((gb) - 1));\ + result += lsb_half; \ break;\ case VRM::RNE:\ - if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\ - result -= ((uint64_t)1 << ((gb) - 1));\ - }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\ - result += ((uint64_t)1 << ((gb) - 1));\ - }\ + if ((result & lsb_half) && ((result & (lsb_half - 1)) || (result & lsb))) \ + result += lsb; \ break;\ case VRM::RDN:\ - result = (result >> ((gb) - 1)) << ((gb) - 1);\ break;\ case VRM::ROD:\ - result |= ((uint64_t)1ul << (gb)); \ + if (result & (lsb - 1)) \ + result |= lsb; \ break;\ case VRM::INVALID_RM:\ assert(true);\ } \ - } else if (gb == 0 && xrm == VRM::ROD) { \ - result |= 1ul; \ - } - + } while (0) // // vector: integer and masking operand access helper @@ -654,6 +627,7 @@ static inline bool is_overlaped(const int astart, const int asize, // comparision result to masking register #define VI_VV_LOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VV_PARAMS(e8); \ @@ -671,6 +645,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VX_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VX_PARAMS(e8); \ @@ -688,6 +663,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VI_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VI_PARAMS(e8); \ @@ -705,6 +681,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VV_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VV_U_PARAMS(e8); \ @@ -722,6 +699,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VX_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VX_U_PARAMS(e8); \ @@ -739,6 +717,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VI_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VI_U_PARAMS(e8); \ @@ -757,6 +736,7 @@ static inline bool is_overlaped(const int astart, const int asize, // merge and copy loop #define VI_VVXI_MERGE_LOOP(BODY) \ + VI_CHECK_SXX; \ VI_GENERAL_LOOP_BASE \ if (sew == e8){ \ VXI_PARAMS(e8); \ @@ -776,7 +756,6 @@ static inline bool is_overlaped(const int astart, const int asize, // reduction loop - signed #define VI_LOOP_REDUCTION_BASE(x) \ require(x == e8 || x == e16 || x == e32 || x == e64); \ - require_vector;\ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -793,6 +772,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_REDUCTION_END(x) #define VI_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ reg_t sew = P.VU.vsew; \ if (sew == e8) { \ REDUCTION_LOOP(e8, BODY) \ @@ -823,6 +803,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_REDUCTION_END(x) #define VI_VV_ULOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ REDUCTION_ULOOP(e8, BODY) \ @@ -836,6 +817,7 @@ static inline bool is_overlaped(const int astart, const int asize, // genearl VXI signed/unsgied loop #define VI_VV_ULOOP(BODY) \ + VI_CHECK_SSS(true) \ VI_LOOP_BASE \ if (sew == e8){ \ VV_U_PARAMS(e8); \ @@ -853,6 +835,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VV_LOOP(BODY) \ + VI_CHECK_SSS(true) \ VI_LOOP_BASE \ if (sew == e8){ \ VV_PARAMS(e8); \ @@ -870,6 +853,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VX_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VX_U_PARAMS(e8); \ @@ -887,6 +871,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VX_LOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VX_PARAMS(e8); \ @@ -904,6 +889,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VI_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_U_PARAMS(e8); \ @@ -921,6 +907,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VI_LOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_PARAMS(e8); \ @@ -961,8 +948,8 @@ VI_LOOP_END type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \ type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1; -#define VI_VVXI_LOOP_NARROW(BODY) \ - require(P.VU.vsew <= e32); \ +#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_BASE \ if (sew == e8){ \ VI_NARROW_SHIFT(e8, e16) \ @@ -976,7 +963,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VI_LOOP_NSHIFT(BODY) \ +#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VI_NSHIFT_PARAMS(e8, e16) \ @@ -990,7 +978,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VX_LOOP_NSHIFT(BODY) \ +#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VX_NSHIFT_PARAMS(e8, e16) \ @@ -1004,7 +993,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VV_LOOP_NSHIFT(BODY) \ +#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VV_NSHIFT_PARAMS(e8, e16) \ @@ -1134,8 +1124,8 @@ VI_LOOP_END vd = sat_add<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_SSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_SSMA(8, 16, opd); \ @@ -1162,8 +1152,8 @@ VI_LOOP_END vd = sat_addu<uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_USSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_USSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_USSMA(8, 16, opd); \ @@ -1190,8 +1180,8 @@ VI_LOOP_END vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_SU_SSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_SU_SSMA(8, 16, opd); \ @@ -1219,7 +1209,7 @@ VI_LOOP_END P.VU.vxsat |= sat; #define VI_VVX_LOOP_WIDE_US_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ + VI_CHECK_DSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_US_SSMA(8, 16, opd); \ @@ -1232,7 +1222,6 @@ VI_LOOP_END // wide reduction loop - signed #define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ - VI_CHECK_DSS(false); \ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -1249,7 +1238,7 @@ VI_LOOP_END VI_LOOP_REDUCTION_END(sew2) #define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ - require_vector;\ + VI_CHECK_REDUCTION(true); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ WIDE_REDUCTION_LOOP(e8, e16, BODY) \ @@ -1261,7 +1250,6 @@ VI_LOOP_END // wide reduction loop - unsigned #define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ - VI_CHECK_DSS(false); \ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -1278,7 +1266,7 @@ VI_LOOP_END VI_LOOP_REDUCTION_END(sew2) #define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ - require_vector;\ + VI_CHECK_REDUCTION(true); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ @@ -1290,6 +1278,7 @@ VI_LOOP_END // carry/borrow bit loop #define VI_VV_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_BASE \ if (sew == e8){ \ VV_CARRY_PARAMS(e8) \ @@ -1305,9 +1294,9 @@ VI_LOOP_END BODY; \ } \ } \ - VI_TAIL_ZERO_MASK(rd_num); #define VI_XI_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_BASE \ if (sew == e8){ \ XI_CARRY_PARAMS(e8) \ @@ -1323,10 +1312,10 @@ VI_LOOP_END BODY; \ } \ } \ - VI_TAIL_ZERO_MASK(rd_num); // average loop -#define VI_VVX_LOOP_AVG(opd, op) \ +#define VI_VVX_LOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ VRM xrm = p->VU.get_vround_mode(); \ VI_LOOP_BASE \ switch(sew) { \ @@ -1399,19 +1388,16 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_ST(stride, offset, st_width, elt_byte) \ const reg_t nf = insn.v_nf() + 1; \ - require_vector; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vs3 = insn.rd(); \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ - if (!is_valid) \ - continue; \ for (reg_t fn = 0; fn < nf; ++fn) { \ st_width##_t val = 0; \ switch (P.VU.vsew) { \ @@ -1435,19 +1421,18 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_LD(stride, offset, ld_width, elt_byte) \ const reg_t nf = insn.v_nf() + 1; \ - require_vector; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ - ld_width##_t val = is_valid ? MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte) : 0; \ + ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \ if (vd + fn >= NVPR){ \ P.VU.vstart = vreg_inx;\ require(false); \ @@ -1471,10 +1456,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_LDST_FF(itype, tsew) \ - require_vector; \ require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \ const reg_t nf = insn.v_nf() + 1; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t sew = p->VU.vsew; \ const reg_t vl = p->VU.vl; \ const reg_t baseAddr = RS1; \ @@ -1483,7 +1468,6 @@ for (reg_t i = 0; i < vlmax; ++i) { \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_STRIP(i); \ VI_ELEMENT_SKIP(i); \ \ @@ -1492,20 +1476,20 @@ for (reg_t i = 0; i < vlmax; ++i) { \ \ switch (sew) { \ case e8: \ - p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e16: \ - p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e32: \ - p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e64: \ - p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ } \ \ - if (val == 0 && is_valid) { \ + if (val == 0) { \ p->VU.vl = i; \ early_stop = true; \ break; \ @@ -1566,16 +1550,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_LOOP_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((P.VU.vsew >> 3) * 1); \ - }\ P.VU.vstart = 0; \ #define VI_VFP_LOOP_WIDE_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((P.VU.vsew >> 3) * 2); \ - }\ P.VU.vstart = 0; \ set_fp_exceptions; @@ -1583,11 +1561,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ } \ P.VU.vstart = 0; \ set_fp_exceptions; \ - if (vl > 0 && TAIL_ZEROING) { \ + if (vl > 0) { \ P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \ - for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \ - P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \ - } \ } #define VI_VFP_LOOP_CMP_END \ @@ -1603,20 +1578,11 @@ for (reg_t i = 0; i < vlmax; ++i) { \ break; \ }; \ } \ - if (vl != 0 && TAIL_ZEROING){ \ - for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ - const int mlen = P.VU.vmlen; \ - const int midx = (mlen * i) / 64; \ - const int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \ - vdi = (vdi & ~mmask);\ - }\ - }\ P.VU.vstart = 0; \ set_fp_exceptions; #define VI_VFP_VV_LOOP(BODY) \ + VI_CHECK_SSS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ @@ -1637,6 +1603,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_END #define VI_VFP_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false) \ VI_VFP_LOOP_REDUCTION_BASE \ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ BODY; \ @@ -1651,6 +1618,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_REDUCTION_END(e64) #define VI_VFP_VF_LOOP(BODY) \ + VI_CHECK_SSS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ @@ -1670,15 +1638,17 @@ for (reg_t i = 0; i < vlmax; ++i) { \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY) \ +#define VI_VFP_LOOP_CMP(BODY, is_vs1) \ + VI_CHECK_MSS(is_vs1); \ VI_VFP_LOOP_CMP_BASE \ BODY; \ + set_fp_exceptions; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_CMP_END \ #define VI_VFP_VF_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DSS(false); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1699,8 +1669,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_VV_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DSS(true); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1720,8 +1690,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_WIDE_END #define VI_VFP_WF_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DDS(false); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1740,8 +1710,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_WIDE_END #define VI_VFP_WV_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DDS(true); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ |