diff options
author | Andrew Waterman <andrew@sifive.com> | 2019-11-12 11:54:33 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-11-12 11:54:33 -0800 |
commit | ff81dea8593c6e51b45e7bed230a2cafd56e4caf (patch) | |
tree | 62bd0cca8bb49199737a5d5532314cb8080d4031 /riscv | |
parent | 3db3d4b1221a145c9703ba5bd82db8b5c6e9ee78 (diff) | |
parent | c8da0f2446d1261397965e6268d117bb50004ac9 (diff) | |
download | spike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.zip spike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.tar.gz spike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.tar.bz2 |
Merge pull request #355 from chihminchao/rvv-0.8-2019-11
rvv-0.8-2019-11
Diffstat (limited to 'riscv')
106 files changed, 441 insertions, 512 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 7ecd74f..a756607 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -65,29 +65,6 @@ const int NCSR = 4096; #define MAX_INSN_LENGTH 8 #define PC_ALIGN 2 -#ifndef TAIL_ZEROING - #define TAIL_ZEROING true -#else - #define TAIL_ZEROING false -#endif - -#ifdef WORDS_BIGENDIAN - // Elements are stored in opposite order, see comment in processor.h - #define TAIL_ZERO(x) \ - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x) - 1); \ - memset(tail - (P.VU.vlmax - vl) * (x), 0, (P.VU.vlmax - vl) * (x)); - #define TAIL_ZERO_REDUCTION(x) \ - uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \ - memset(tail - ((P.VU.get_vlen() - x) >> 3), 0, (P.VU.get_vlen() - x) >> 3); -#else - #define TAIL_ZERO(x) \ - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x)); \ - memset(tail, 0, (P.VU.vlmax - vl) * (x)); - #define TAIL_ZERO_REDUCTION(x) \ - uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \ - memset(tail, 0, (P.VU.get_vlen() - x) >> 3); -#endif - typedef uint64_t insn_bits_t; class insn_t { @@ -230,7 +207,7 @@ private: #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0) #define require_vector_vs do { } while (0) // TODO MSTATUS_VS -#define require_vector do { require_vector_vs; require(!P.VU.vill); } while (0) +#define require_vector do { require_vector_vs; require_extension('V'); require(!P.VU.vill); } while (0) #define require_vector_for_vsetvl do { require_vector_vs; require_extension('V'); } while (0) #define set_fp_exceptions ({ if (softfloat_exceptionFlags) { \ @@ -368,9 +345,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); } #define VI_ELEMENT_SKIP(inx) \ - if (inx >= vl && TAIL_ZEROING) { \ - is_valid = false; \ - } else if (inx >= vl && !TAIL_ZEROING) { \ + if (inx >= vl) { \ continue; \ } else if (inx < P.VU.vstart) { \ continue; \ @@ -381,7 +356,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); // // vector: operation and register acccess check helper // -static inline bool is_overlaped(const int astart, const int asize, +static inline bool is_overlapped(const int astart, const int asize, const int bstart, const int bsize) { const int aend = astart + asize; @@ -390,38 +365,84 @@ static inline bool is_overlaped(const int astart, const int asize, } #define VI_NARROW_CHECK_COMMON \ + require_vector;\ require(P.VU.vlmul <= 4); \ require(P.VU.vsew * 2 <= P.VU.ELEN); \ - require(insn.rs2() + P.VU.vlmul * 2 <= 32); + require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + if (insn.v_vm() == 0 && P.VU.vlmul > 1) \ + require(insn.rd() != 0); #define VI_WIDE_CHECK_COMMON \ require_vector;\ require(P.VU.vlmul <= 4); \ require(P.VU.vsew * 2 <= P.VU.ELEN); \ - require(insn.rd() + P.VU.vlmul * 2 <= 32); \ + require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \ if (insn.v_vm() == 0) \ require(insn.rd() != 0); -#define VI_CHECK_VREG_OVERLAP(v1, v2) \ - require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul)); +#define VI_CHECK_MSS(is_vs1) \ + if (P.VU.vlmul > 1) { \ + require(!is_overlapped(insn.rd(), 1, insn.rs2(), P.VU.vlmul)); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) {\ + require(!is_overlapped(insn.rd(), 1, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } \ + } -#define VI_CHECK_SS \ - require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul)); +#define VI_CHECK_SSS(is_vs1) \ + if (P.VU.vlmul > 1) { \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) { \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); \ + } + +#define VI_CHECK_SXX \ + require_vector; \ + if (P.VU.vlmul > 1) { \ + require((insn.rd() & (P.VU.vlmul - 1)) == 0); \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); \ + } #define VI_CHECK_SD \ - require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); + require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); -#define VI_CHECK_DSS(is_rs) \ +#define VI_CHECK_DSS(is_vs1) \ VI_WIDE_CHECK_COMMON; \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ - if (is_rs) \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ + if (is_vs1) {\ + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } #define VI_CHECK_DDS(is_rs) \ VI_WIDE_CHECK_COMMON; \ - require(insn.rs2() + P.VU.vlmul * 2 <= 32); \ - if (is_rs) \ - require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \ + if (is_rs) { \ + require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + } + +#define VI_CHECK_SDS(is_vs1) \ + VI_NARROW_CHECK_COMMON; \ + require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); \ + if (is_vs1) \ + require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \ + +#define VI_CHECK_REDUCTION(is_wide) \ + require_vector;\ + if (is_wide) {\ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + } \ + require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ // // vector: loop header and end helper @@ -436,50 +457,22 @@ static inline bool is_overlaped(const int astart, const int asize, reg_t rs2_num = insn.rs2(); \ for (reg_t i=P.VU.vstart; i<vl; ++i){ -#define VI_TAIL_ZERO(elm) \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \ - TAIL_ZERO((sew >> 3) * elm); \ - } - -#define VI_TAIL_ZERO_MASK(dst) \ - if (vl != 0 && TAIL_ZEROING){ \ - for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ - const int mlen = P.VU.vmlen; \ - const int midx = (mlen * i) / 64; \ - const int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &vdi = P.VU.elt<uint64_t>(dst, midx); \ - vdi = (vdi & ~mmask);\ - }\ - }\ - #define VI_LOOP_BASE \ VI_GENERAL_LOOP_BASE \ VI_LOOP_ELEMENT_SKIP(); #define VI_LOOP_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((sew >> 3) * 1); \ - }\ - P.VU.vstart = 0; - -#define VI_LOOP_END_NO_TAIL_ZERO \ - } \ P.VU.vstart = 0; #define VI_LOOP_WIDEN_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((sew >> 3) * 2); \ - }\ P.VU.vstart = 0; #define VI_LOOP_REDUCTION_END(x) \ } \ - if (vl > 0 && TAIL_ZEROING) { \ + if (vl > 0) { \ vd_0_des = vd_0_res; \ - TAIL_ZERO_REDUCTION(x); \ } \ P.VU.vstart = 0; @@ -500,7 +493,6 @@ static inline bool is_overlaped(const int astart, const int asize, #define VI_LOOP_CMP_END \ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ } \ - VI_TAIL_ZERO_MASK(rd_num); \ P.VU.vstart = 0; #define VI_LOOP_MASK(op) \ @@ -516,24 +508,9 @@ static inline bool is_overlaped(const int astart, const int asize, uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ } \ - \ - if (TAIL_ZEROING) {\ - for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \ - int mlen = P.VU.vmlen; \ - int midx = (mlen * i) / 64; \ - int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ - res = (res & ~mmask); \ - } \ - } \ P.VU.vstart = 0; #define VI_LOOP_NSHIFT_BASE \ - require(P.VU.vsew <= e32); \ - if (insn.rd() != 0){ \ - VI_CHECK_SD; \ - } \ VI_GENERAL_LOOP_BASE; \ VI_LOOP_ELEMENT_SKIP({\ require(!(insn.rd() == 0 && P.VU.vlmul > 1));\ @@ -541,31 +518,27 @@ static inline bool is_overlaped(const int astart, const int asize, #define INT_ROUNDING(result, xrm, gb) \ - if (gb > 0) { \ - switch(xrm) {\ + do { \ + const uint64_t lsb = 1UL << (gb); \ + const uint64_t lsb_half = lsb >> 1; \ + switch (xrm) {\ case VRM::RNU:\ - result += ((uint64_t)1 << ((gb) - 1));\ + result += lsb_half; \ break;\ case VRM::RNE:\ - if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\ - result -= ((uint64_t)1 << ((gb) - 1));\ - }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\ - result += ((uint64_t)1 << ((gb) - 1));\ - }\ + if ((result & lsb_half) && ((result & (lsb_half - 1)) || (result & lsb))) \ + result += lsb; \ break;\ case VRM::RDN:\ - result = (result >> ((gb) - 1)) << ((gb) - 1);\ break;\ case VRM::ROD:\ - result |= ((uint64_t)1ul << (gb)); \ + if (result & (lsb - 1)) \ + result |= lsb; \ break;\ case VRM::INVALID_RM:\ assert(true);\ } \ - } else if (gb == 0 && xrm == VRM::ROD) { \ - result |= 1ul; \ - } - + } while (0) // // vector: integer and masking operand access helper @@ -654,6 +627,7 @@ static inline bool is_overlaped(const int astart, const int asize, // comparision result to masking register #define VI_VV_LOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VV_PARAMS(e8); \ @@ -671,6 +645,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VX_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VX_PARAMS(e8); \ @@ -688,6 +663,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VI_LOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VI_PARAMS(e8); \ @@ -705,6 +681,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VV_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VV_U_PARAMS(e8); \ @@ -722,6 +699,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VX_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VX_U_PARAMS(e8); \ @@ -739,6 +717,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_CMP_END #define VI_VI_ULOOP_CMP(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_CMP_BASE \ if (sew == e8){ \ VI_U_PARAMS(e8); \ @@ -757,6 +736,7 @@ static inline bool is_overlaped(const int astart, const int asize, // merge and copy loop #define VI_VVXI_MERGE_LOOP(BODY) \ + VI_CHECK_SXX; \ VI_GENERAL_LOOP_BASE \ if (sew == e8){ \ VXI_PARAMS(e8); \ @@ -776,7 +756,6 @@ static inline bool is_overlaped(const int astart, const int asize, // reduction loop - signed #define VI_LOOP_REDUCTION_BASE(x) \ require(x == e8 || x == e16 || x == e32 || x == e64); \ - require_vector;\ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -793,6 +772,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_REDUCTION_END(x) #define VI_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ reg_t sew = P.VU.vsew; \ if (sew == e8) { \ REDUCTION_LOOP(e8, BODY) \ @@ -823,6 +803,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_REDUCTION_END(x) #define VI_VV_ULOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ REDUCTION_ULOOP(e8, BODY) \ @@ -836,6 +817,7 @@ static inline bool is_overlaped(const int astart, const int asize, // genearl VXI signed/unsgied loop #define VI_VV_ULOOP(BODY) \ + VI_CHECK_SSS(true) \ VI_LOOP_BASE \ if (sew == e8){ \ VV_U_PARAMS(e8); \ @@ -853,6 +835,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VV_LOOP(BODY) \ + VI_CHECK_SSS(true) \ VI_LOOP_BASE \ if (sew == e8){ \ VV_PARAMS(e8); \ @@ -870,6 +853,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VX_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VX_U_PARAMS(e8); \ @@ -887,6 +871,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VX_LOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VX_PARAMS(e8); \ @@ -904,6 +889,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VI_ULOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_U_PARAMS(e8); \ @@ -921,6 +907,7 @@ static inline bool is_overlaped(const int astart, const int asize, VI_LOOP_END #define VI_VI_LOOP(BODY) \ + VI_CHECK_SSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_PARAMS(e8); \ @@ -961,8 +948,8 @@ VI_LOOP_END type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \ type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1; -#define VI_VVXI_LOOP_NARROW(BODY) \ - require(P.VU.vsew <= e32); \ +#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_BASE \ if (sew == e8){ \ VI_NARROW_SHIFT(e8, e16) \ @@ -976,7 +963,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VI_LOOP_NSHIFT(BODY) \ +#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VI_NSHIFT_PARAMS(e8, e16) \ @@ -990,7 +978,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VX_LOOP_NSHIFT(BODY) \ +#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VX_NSHIFT_PARAMS(e8, e16) \ @@ -1004,7 +993,8 @@ VI_LOOP_END } \ VI_LOOP_END -#define VI_VV_LOOP_NSHIFT(BODY) \ +#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \ + VI_CHECK_SDS(is_vs1); \ VI_LOOP_NSHIFT_BASE \ if (sew == e8){ \ VV_NSHIFT_PARAMS(e8, e16) \ @@ -1134,8 +1124,8 @@ VI_LOOP_END vd = sat_add<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_SSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_SSMA(8, 16, opd); \ @@ -1162,8 +1152,8 @@ VI_LOOP_END vd = sat_addu<uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_USSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_USSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_USSMA(8, 16, opd); \ @@ -1190,8 +1180,8 @@ VI_LOOP_END vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ P.VU.vxsat |= sat; -#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ +#define VI_VVX_LOOP_WIDE_SU_SSMA(opd, is_vs1) \ + VI_CHECK_DSS(is_vs1) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_SU_SSMA(8, 16, opd); \ @@ -1219,7 +1209,7 @@ VI_LOOP_END P.VU.vxsat |= sat; #define VI_VVX_LOOP_WIDE_US_SSMA(opd) \ - VI_WIDE_CHECK_COMMON \ + VI_CHECK_DSS(false) \ VI_LOOP_BASE \ if (sew == e8){ \ VI_WIDE_US_SSMA(8, 16, opd); \ @@ -1232,7 +1222,6 @@ VI_LOOP_END // wide reduction loop - signed #define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ - VI_CHECK_DSS(false); \ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -1249,7 +1238,7 @@ VI_LOOP_END VI_LOOP_REDUCTION_END(sew2) #define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ - require_vector;\ + VI_CHECK_REDUCTION(true); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ WIDE_REDUCTION_LOOP(e8, e16, BODY) \ @@ -1261,7 +1250,6 @@ VI_LOOP_END // wide reduction loop - unsigned #define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ - VI_CHECK_DSS(false); \ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ @@ -1278,7 +1266,7 @@ VI_LOOP_END VI_LOOP_REDUCTION_END(sew2) #define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ - require_vector;\ + VI_CHECK_REDUCTION(true); \ reg_t sew = P.VU.vsew; \ if (sew == e8){ \ WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ @@ -1290,6 +1278,7 @@ VI_LOOP_END // carry/borrow bit loop #define VI_VV_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(true); \ VI_LOOP_BASE \ if (sew == e8){ \ VV_CARRY_PARAMS(e8) \ @@ -1305,9 +1294,9 @@ VI_LOOP_END BODY; \ } \ } \ - VI_TAIL_ZERO_MASK(rd_num); #define VI_XI_LOOP_CARRY(BODY) \ + VI_CHECK_MSS(false); \ VI_LOOP_BASE \ if (sew == e8){ \ XI_CARRY_PARAMS(e8) \ @@ -1323,10 +1312,10 @@ VI_LOOP_END BODY; \ } \ } \ - VI_TAIL_ZERO_MASK(rd_num); // average loop -#define VI_VVX_LOOP_AVG(opd, op) \ +#define VI_VVX_LOOP_AVG(opd, op, is_vs1) \ +VI_CHECK_SSS(is_vs1); \ VRM xrm = p->VU.get_vround_mode(); \ VI_LOOP_BASE \ switch(sew) { \ @@ -1399,19 +1388,16 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_ST(stride, offset, st_width, elt_byte) \ const reg_t nf = insn.v_nf() + 1; \ - require_vector; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vs3 = insn.rd(); \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ - if (!is_valid) \ - continue; \ for (reg_t fn = 0; fn < nf; ++fn) { \ st_width##_t val = 0; \ switch (P.VU.vsew) { \ @@ -1435,19 +1421,18 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_LD(stride, offset, ld_width, elt_byte) \ const reg_t nf = insn.v_nf() + 1; \ - require_vector; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ - ld_width##_t val = is_valid ? MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte) : 0; \ + ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \ if (vd + fn >= NVPR){ \ P.VU.vstart = vreg_inx;\ require(false); \ @@ -1471,10 +1456,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_LDST_FF(itype, tsew) \ - require_vector; \ require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \ const reg_t nf = insn.v_nf() + 1; \ require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + VI_CHECK_SXX; \ const reg_t sew = p->VU.vsew; \ const reg_t vl = p->VU.vl; \ const reg_t baseAddr = RS1; \ @@ -1483,7 +1468,6 @@ for (reg_t i = 0; i < vlmax; ++i) { \ const reg_t vlmax = P.VU.vlmax; \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ - bool is_valid = true; \ VI_STRIP(i); \ VI_ELEMENT_SKIP(i); \ \ @@ -1492,20 +1476,20 @@ for (reg_t i = 0; i < vlmax; ++i) { \ \ switch (sew) { \ case e8: \ - p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e16: \ - p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e32: \ - p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ case e64: \ - p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = val; \ break; \ } \ \ - if (val == 0 && is_valid) { \ + if (val == 0) { \ p->VU.vl = i; \ early_stop = true; \ break; \ @@ -1566,16 +1550,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_LOOP_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((P.VU.vsew >> 3) * 1); \ - }\ P.VU.vstart = 0; \ #define VI_VFP_LOOP_WIDE_END \ } \ - if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ - TAIL_ZERO((P.VU.vsew >> 3) * 2); \ - }\ P.VU.vstart = 0; \ set_fp_exceptions; @@ -1583,11 +1561,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ } \ P.VU.vstart = 0; \ set_fp_exceptions; \ - if (vl > 0 && TAIL_ZEROING) { \ + if (vl > 0) { \ P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \ - for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \ - P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \ - } \ } #define VI_VFP_LOOP_CMP_END \ @@ -1603,20 +1578,11 @@ for (reg_t i = 0; i < vlmax; ++i) { \ break; \ }; \ } \ - if (vl != 0 && TAIL_ZEROING){ \ - for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ - const int mlen = P.VU.vmlen; \ - const int midx = (mlen * i) / 64; \ - const int mpos = (mlen * i) % 64; \ - uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ - uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \ - vdi = (vdi & ~mmask);\ - }\ - }\ P.VU.vstart = 0; \ set_fp_exceptions; #define VI_VFP_VV_LOOP(BODY) \ + VI_CHECK_SSS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ @@ -1637,6 +1603,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_END #define VI_VFP_VV_LOOP_REDUCTION(BODY) \ + VI_CHECK_REDUCTION(false) \ VI_VFP_LOOP_REDUCTION_BASE \ float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \ BODY; \ @@ -1651,6 +1618,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_REDUCTION_END(e64) #define VI_VFP_VF_LOOP(BODY) \ + VI_CHECK_SSS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ @@ -1670,15 +1638,17 @@ for (reg_t i = 0; i < vlmax; ++i) { \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY) \ +#define VI_VFP_LOOP_CMP(BODY, is_vs1) \ + VI_CHECK_MSS(is_vs1); \ VI_VFP_LOOP_CMP_BASE \ BODY; \ + set_fp_exceptions; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_CMP_END \ #define VI_VFP_VF_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DSS(false); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1699,8 +1669,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_VV_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DSS(true); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1720,8 +1690,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_WIDE_END #define VI_VFP_WF_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DDS(false); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ @@ -1740,8 +1710,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \ VI_VFP_LOOP_WIDE_END #define VI_VFP_WV_LOOP_WIDE(BODY) \ - VI_VFP_LOOP_BASE \ VI_CHECK_DDS(true); \ + VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ case e32: {\ float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \ diff --git a/riscv/encoding.h b/riscv/encoding.h index a18a0c9..17ba2d3 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -882,8 +882,6 @@ #define MASK_VMFEQ_VF 0xfc00707f #define MATCH_VMFLE_VF 0x64005057 #define MASK_VMFLE_VF 0xfc00707f -#define MATCH_VMFORD_VF 0x68005057 -#define MASK_VMFORD_VF 0xfc00707f #define MATCH_VMFLT_VF 0x6c005057 #define MASK_VMFLT_VF 0xfc00707f #define MATCH_VMFNE_VF 0x70005057 @@ -962,8 +960,6 @@ #define MASK_VMFEQ_VV 0xfc00707f #define MATCH_VMFLE_VV 0x64001057 #define MASK_VMFLE_VV 0xfc00707f -#define MATCH_VMFORD_VV 0x68001057 -#define MASK_VMFORD_VV 0xfc00707f #define MATCH_VMFLT_VV 0x6c001057 #define MASK_VMFLT_VV 0xfc00707f #define MATCH_VMFNE_VV 0x70001057 @@ -2103,7 +2099,6 @@ DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM) DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F) DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF) DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF) -DECLARE_INSN(vmford_vf, MATCH_VMFORD_VF, MASK_VMFORD_VF) DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF) DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF) DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF) @@ -2143,7 +2138,6 @@ DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV) DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S) DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV) DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV) -DECLARE_INSN(vmford_vv, MATCH_VMFORD_VV, MASK_VMFORD_VV) DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV) DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV) DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV) diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h index 5f8d5f5..6bd1a60 100644 --- a/riscv/insns/vaadd_vi.h +++ b/riscv/insns/vaadd_vi.h @@ -1,4 +1,5 @@ // vaadd: Averaging adds of integers +VI_CHECK_SSS(false); VRM xrm = P.VU.get_vround_mode(); VI_VI_LOOP ({ diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h index b479970..0a14467 100644 --- a/riscv/insns/vaadd_vv.h +++ b/riscv/insns/vaadd_vv.h @@ -1,2 +1,2 @@ // vaadd.vv vd, vs2, vs1 -VI_VVX_LOOP_AVG(vs1, +); +VI_VVX_LOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h index c811a0a..ae00d8e 100644 --- a/riscv/insns/vaadd_vx.h +++ b/riscv/insns/vaadd_vx.h @@ -1,2 +1,2 @@ // vaadd.vx vd, vs2, rs1 -VI_VVX_LOOP_AVG(rs1, +); +VI_VVX_LOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h index 5a5ccc9..a45c18d 100644 --- a/riscv/insns/vasub_vv.h +++ b/riscv/insns/vasub_vv.h @@ -1,2 +1,2 @@ // vasub.vv vd, vs2, vs1 -VI_VVX_LOOP_AVG(vs1, -); +VI_VVX_LOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h index c3cad4b..4e8dba1 100644 --- a/riscv/insns/vasub_vx.h +++ b/riscv/insns/vasub_vx.h @@ -1,2 +1,2 @@ // vasub.vx vd, vs2, rs1 -VI_VVX_LOOP_AVG(rs1, -); +VI_VVX_LOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index b056b0e..77e91bf 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -1,14 +1,13 @@ // vcompress vd, vs2, vs1 -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; require(P.VU.vstart == 0); -reg_t sew = P.VU.vsew; -reg_t vl = P.VU.vl; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs1(), 1)); + reg_t pos = 0; -for (reg_t i = P.VU.vstart ; i < vl; ++i) { + +VI_GENERAL_LOOP_BASE const int mlen = P.VU.vmlen; const int midx = (mlen * i) / 64; const int mpos = (mlen * i) % 64; @@ -32,10 +31,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { ++pos; } -} - -if (vl > 0 && TAIL_ZEROING) { - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1)); - memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1)); -} - +VI_LOOP_END; diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h index 311f875..f6604fb 100644 --- a/riscv/insns/vfcvt_f_x_v.h +++ b/riscv/insns/vfcvt_f_x_v.h @@ -1,5 +1,5 @@ // vfcvt.f.x.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ auto vs2_i = P.VU.elt<int32_t>(rs2_num, i); vd = i32_to_f32(vs2_i); diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h index ceabea3..2c845ac 100644 --- a/riscv/insns/vfcvt_f_xu_v.h +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -1,5 +1,5 @@ // vfcvt.f.xu.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i); vd = ui32_to_f32(vs2_u); diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h index ee53c6d..a9eedc4 100644 --- a/riscv/insns/vfcvt_x_f_v.h +++ b/riscv/insns/vfcvt_x_f_v.h @@ -1,5 +1,5 @@ // vfcvt.x.f.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); }) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index 6d12bce..ea78165 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -1,13 +1,7 @@ // vfmerge_vf vd, vs2, vs1, vm -require_extension('F'); -require_fp; -require(P.VU.vsew == 32); -require_vector; -reg_t vl = P.VU.vl; +VI_CHECK_SSS(false); +VI_VFP_COMMON; reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); for (reg_t i=P.VU.vstart; i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i); auto rs1 = f32(READ_FREG(rs1_num)); @@ -20,6 +14,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) { vd = use_first ? rs1 : vs2; } -VI_TAIL_ZERO(1); P.VU.vstart = 0; -set_fp_exceptions; diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index c6dbaff..066db80 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -1,6 +1,5 @@ // vfmv_f_s: rd = vs2[0] (rs1=0) require_vector; -require(insn.v_vm() == 1); require_fp; require_extension('F'); require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index cb81008..8ff6094 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -15,17 +15,5 @@ if (vl > 0) { else P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v; - const reg_t max_len = P.VU.VLEN / sew; - for (reg_t i = 1; i < max_len; ++i) { - switch(sew) { - case e32: - P.VU.elt<uint32_t>(rd_num, i) = 0; - break; - default: - require(false); - break; - } - } - vl = 0; } diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h index c85a3e9..f323263 100644 --- a/riscv/insns/vfmv_v_f.h +++ b/riscv/insns/vfmv_v_f.h @@ -1,13 +1,7 @@ -// vfmerge_vf vd, vs2, vs1, vm -require_extension('F'); -require_fp; -require(P.VU.vsew == 32); -require_vector; -reg_t vl = P.VU.vl; +// vfmv_vf vd, vs1 +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +VI_VFP_COMMON reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); for (reg_t i=P.VU.vstart; i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i); auto rs1 = f32(READ_FREG(rs1_num)); @@ -15,6 +9,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) { vd = rs1; } -VI_TAIL_ZERO(1); P.VU.vstart = 0; -set_fp_exceptions; diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h index df6dd04..25422d6 100644 --- a/riscv/insns/vid_v.h +++ b/riscv/insns/vid_v.h @@ -6,6 +6,9 @@ reg_t sew = P.VU.vsew; reg_t rd_num = insn.rd(); reg_t rs1_num = insn.rs1(); reg_t rs2_num = insn.rs2(); +require((rd_num & (P.VU.vlmul - 1)) == 0); +if (insn.v_vm() == 0 && P.VU.vlmul >= 2) \ + require(insn.rd() != 0); for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { VI_LOOP_ELEMENT_SKIP(); @@ -26,5 +29,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { } } -VI_TAIL_ZERO(1); P.VU.vstart = 0; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index fde0291..04bfcd8 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -7,6 +7,10 @@ reg_t rd_num = insn.rd(); reg_t rs1_num = insn.rs1(); reg_t rs2_num = insn.rs2(); require(P.VU.vstart == 0); +require(!is_overlapped(rd_num, P.VU.vlmul, rs2_num, 1)); +if (insn.v_vm() == 0) + require(!is_overlapped(rd_num, P.VU.vlmul, 0, 1)); +require((rd_num & (P.VU.vlmul - 1)) == 0); int cnt = 0; for (reg_t i = 0; i < vl; ++i) { @@ -49,4 +53,3 @@ for (reg_t i = 0; i < vl; ++i) { } } -VI_TAIL_ZERO(1); diff --git a/riscv/insns/vleff_v.h b/riscv/insns/vleff_v.h index ec2777a..e858de9 100644 --- a/riscv/insns/vleff_v.h +++ b/riscv/insns/vleff_v.h @@ -1,7 +1,7 @@ -require_vector; require(P.VU.vsew >= e8 && P.VU.vsew <= e64); const reg_t nf = insn.v_nf() + 1; require((nf * P.VU.vlmul) <= (NVPR / 4)); +VI_CHECK_SXX; const reg_t sew = P.VU.vsew; const reg_t vl = P.VU.vl; const reg_t baseAddr = RS1; @@ -9,7 +9,6 @@ const reg_t rd_num = insn.rd(); bool early_stop = false; const reg_t vlmul = P.VU.vlmul; for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) { - bool is_valid = true; bool is_zero = false; VI_STRIP(i); VI_ELEMENT_SKIP(i); @@ -20,23 +19,23 @@ for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) { switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint8(baseAddr + (i * nf + fn) * 1) : 0; - is_zero = is_valid && P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint8(baseAddr + (i * nf + fn) * 1); + is_zero = P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e16: P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint16(baseAddr + (i * nf + fn) * 2) : 0; - is_zero = is_valid && P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint16(baseAddr + (i * nf + fn) * 2); + is_zero = P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e32: P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint32(baseAddr + (i * nf + fn) * 4) : 0; - is_zero = is_valid && P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint32(baseAddr + (i * nf + fn) * 4); + is_zero = P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e64: P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint64(baseAddr + (i * nf + fn) * 8) : 0; - is_zero = is_valid && P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint64(baseAddr + (i * nf + fn) * 8); + is_zero = P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; } diff --git a/riscv/insns/vlxb_v.h b/riscv/insns/vlxb_v.h index 5a99bd3..57ce8c8 100644 --- a/riscv/insns/vlxb_v.h +++ b/riscv/insns/vlxb_v.h @@ -1,4 +1,5 @@ // vlxb.v and vlsseg[2-8]b.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int8, 1); diff --git a/riscv/insns/vlxbu_v.h b/riscv/insns/vlxbu_v.h index daf2d2b..d8e3dd6 100644 --- a/riscv/insns/vlxbu_v.h +++ b/riscv/insns/vlxbu_v.h @@ -1,4 +1,5 @@ // vlxbu.v and vlxseg[2-8]bu.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint8, 1); diff --git a/riscv/insns/vlxe_v.h b/riscv/insns/vlxe_v.h index b1190a8..1055eca 100644 --- a/riscv/insns/vlxe_v.h +++ b/riscv/insns/vlxe_v.h @@ -1,5 +1,6 @@ // vlxe.v and vlxseg[2-8]e.v reg_t sew = P.VU.vsew; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); if (sew == e8) { VI_LD(index[i], fn, int8, 1); diff --git a/riscv/insns/vlxh_v.h b/riscv/insns/vlxh_v.h index 98145db..9f4c3a1 100644 --- a/riscv/insns/vlxh_v.h +++ b/riscv/insns/vlxh_v.h @@ -1,4 +1,5 @@ // vlxh.v and vlxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int16, 2); diff --git a/riscv/insns/vlxhu_v.h b/riscv/insns/vlxhu_v.h index 27d549c..9283127 100644 --- a/riscv/insns/vlxhu_v.h +++ b/riscv/insns/vlxhu_v.h @@ -1,4 +1,5 @@ // vlxh.v and vlxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint16, 2); diff --git a/riscv/insns/vlxw_v.h b/riscv/insns/vlxw_v.h index 83300f0..c1117a2 100644 --- a/riscv/insns/vlxw_v.h +++ b/riscv/insns/vlxw_v.h @@ -1,5 +1,6 @@ // vlxw.v and vlxseg[2-8]w.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int32, 4); diff --git a/riscv/insns/vlxwu_v.h b/riscv/insns/vlxwu_v.h index a2f9913..d3034bd 100644 --- a/riscv/insns/vlxwu_v.h +++ b/riscv/insns/vlxwu_v.h @@ -1,4 +1,5 @@ // vlxwu.v and vlxseg[2-8]wu.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint32, 4); diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h index fd79089..a8185d1 100644 --- a/riscv/insns/vmadc_vim.h +++ b/riscv/insns/vmadc_vim.h @@ -1,5 +1,4 @@ // vmadc.vim vd, vs2, simm5 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h index 82042ca..8d58658 100644 --- a/riscv/insns/vmadc_vvm.h +++ b/riscv/insns/vmadc_vvm.h @@ -1,5 +1,4 @@ // vmadc.vvm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_VV_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h index 8f26584..0b6273a 100644 --- a/riscv/insns/vmadc_vxm.h +++ b/riscv/insns/vmadc_vxm.h @@ -1,5 +1,4 @@ // vadc.vx vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h index 13354d6..c6c87c7 100644 --- a/riscv/insns/vmerge_vim.h +++ b/riscv/insns/vmerge_vim.h @@ -1,4 +1,5 @@ // vmerge.vim vd, vs2, simm5 +VI_CHECK_SSS(false); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h index 7530b40..97a0182 100644 --- a/riscv/insns/vmerge_vvm.h +++ b/riscv/insns/vmerge_vvm.h @@ -1,4 +1,5 @@ // vmerge.vvm vd, vs2, vs1 +VI_CHECK_SSS(true); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h index b1757fa..de7df91 100644 --- a/riscv/insns/vmerge_vxm.h +++ b/riscv/insns/vmerge_vxm.h @@ -1,4 +1,5 @@ // vmerge.vxm vd, vs2, rs1 +VI_CHECK_SSS(false); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index cedf4b9..f0e7109 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index 7e76cac..1be3a69 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, vs1); -}) +}, true) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index 7eade89..1c68366 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,5 +1,5 @@ // vfge.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_le_quiet(rs1, vs2); -}) + res = f32_le(rs1, vs2); +}, false) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index 6115d06..0979185 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,5 +1,5 @@ // vfgt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(rs1, vs2); -}) + res = f32_lt(rs1, vs2); +}, false) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index 998b93b..90607ec 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_le(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index c716312..6ccdfec 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,5 +1,5 @@ // vfle.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_le_quiet(vs2, vs1); -}) + res = f32_le(vs2, vs1); +}, true) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index af436e4..6b71a4a 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,5 +1,5 @@ // vflt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(vs2, rs1); -}) + res = f32_lt(vs2, rs1); +}, false) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index ded867d..a2ed8e3 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,5 +1,5 @@ // vflt.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(vs2, vs1); -}) + res = f32_lt(vs2, vs1); +}, true) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index ac2eced..ef63678 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index 3fa8beb..8378a23 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, vs1); -}) +}, true) diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h deleted file mode 100644 index b5e74f2..0000000 --- a/riscv/insns/vmford_vf.h +++ /dev/null @@ -1,5 +0,0 @@ -// vford.vf vd, vs2, rs1, vm -VI_VFP_LOOP_CMP -({ - res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1)); -}) diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h deleted file mode 100644 index 2e459c1..0000000 --- a/riscv/insns/vmford_vv.h +++ /dev/null @@ -1,5 +0,0 @@ -// vford.vv vd, vs2, vs1, vm -VI_VFP_LOOP_CMP -({ - res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1)); -}) diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h index 3804ba8..f4ce6f4 100644 --- a/riscv/insns/vmsbc_vvm.h +++ b/riscv/insns/vmsbc_vvm.h @@ -1,5 +1,4 @@ // vmsbc.vvm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_VV_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h index d5332f5..aec4409 100644 --- a/riscv/insns/vmsbc_vxm.h +++ b/riscv/insns/vmsbc_vxm.h @@ -1,5 +1,4 @@ // vmsbc.vxm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto &v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index 3047cca..443fcbb 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index 826e7cd..381088b 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 48805f7..d66002d 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -28,5 +28,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h index 59882da..b918551 100644 --- a/riscv/insns/vmulhsu_vv.h +++ b/riscv/insns/vmulhsu_vv.h @@ -1,4 +1,5 @@ // vmulhsu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE switch(sew) { case e8: { diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h index d39615a..cb2db3d 100644 --- a/riscv/insns/vmulhsu_vx.h +++ b/riscv/insns/vmulhsu_vx.h @@ -1,4 +1,5 @@ // vmulhsu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE switch(sew) { case e8: { diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h index 38b2697..948b5be 100644 --- a/riscv/insns/vmv_s_x.h +++ b/riscv/insns/vmv_s_x.h @@ -24,23 +24,5 @@ if (vl > 0) { break; } - const reg_t max_len = P.VU.VLEN / sew; - for (reg_t i = 1; i < max_len; ++i) { - switch(sew) { - case e8: - P.VU.elt<uint8_t>(rd_num, i) = 0; - break; - case e16: - P.VU.elt<uint16_t>(rd_num, i) = 0; - break; - case e32: - P.VU.elt<uint32_t>(rd_num, i) = 0; - break; - default: - P.VU.elt<uint64_t>(rd_num, i) = 0; - break; - } - } - vl = 0; } diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h index 734010b..a4f9a5c 100644 --- a/riscv/insns/vmv_v_v.h +++ b/riscv/insns/vmv_v_v.h @@ -1,4 +1,5 @@ // vvmv.v.v vd, vs1 +require((insn.rs1() & (P.VU.vlmul - 1)) == 0); VI_VVXI_MERGE_LOOP ({ vd = vs1; diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h index f22c2dd..50f2e79 100644 --- a/riscv/insns/vmv_x_s.h +++ b/riscv/insns/vmv_x_s.h @@ -1,25 +1,28 @@ -// vext_x_v: rd = vs2[0] +// vmv_x_s: rd = vs2[rs1] require(insn.v_vm() == 1); uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen()); -VI_LOOP_BASE -VI_LOOP_END_NO_TAIL_ZERO -switch(sew) { -case e8: - WRITE_RD(P.VU.elt<uint8_t>(rs2_num, 0)); - break; -case e16: - WRITE_RD(P.VU.elt<uint16_t>(rs2_num, 0)); - break; -case e32: - if (P.get_max_xlen() == 32) - WRITE_RD(P.VU.elt<int32_t>(rs2_num, 0)); - else - WRITE_RD(P.VU.elt<uint32_t>(rs2_num, 0)); - break; -case e64: - if (P.get_max_xlen() <= sew) - WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0) & xmask); - else - WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0)); - break; +reg_t rs1 = RS1; +reg_t sew = P.VU.vsew; +reg_t rs2_num = insn.rs2(); + +if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) { + WRITE_RD(0); +} else { + switch(sew) { + case e8: + WRITE_RD(P.VU.elt<int8_t>(rs2_num, rs1)); + break; + case e16: + WRITE_RD(P.VU.elt<int16_t>(rs2_num, rs1)); + break; + case e32: + WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1)); + break; + case e64: + if (P.get_max_xlen() <= sew) + WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask); + else + WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1)); + break; + } } diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h index ca27593..eb21710 100644 --- a/riscv/insns/vnclip_vi.h +++ b/riscv/insns/vnclip_vi.h @@ -4,14 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); - result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -21,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h index 7bcb4cb..92575a6 100644 --- a/riscv/insns/vnclip_vv.h +++ b/riscv/insns/vnclip_vv.h @@ -4,20 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = vs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - - result = (vsext(result, sew * 2)) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -27,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, true) diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h index b66e830..96409de 100644 --- a/riscv/insns/vnclip_vx.h +++ b/riscv/insns/vnclip_vx.h @@ -4,19 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = rs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - result = vsext(result, sew * 2) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -26,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h index 61cb015..b1527f7 100644 --- a/riscv/insns/vnclipu_vi.h +++ b/riscv/insns/vnclipu_vi.h @@ -4,11 +4,13 @@ uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ uint64_t result = vs2_u; + unsigned shift = zimm5 & ((sew * 2) - 1); + // rounding - INT_ROUNDING(result, xrm, sew); + INT_ROUNDING(result, xrm, shift); // unsigned shifting to rs1 - result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; // saturation if (result & (uint64_t)(-1ll << sew)) { @@ -17,4 +19,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h index 004f24f..217e82f 100644 --- a/riscv/insns/vnclipu_vv.h +++ b/riscv/insns/vnclipu_vv.h @@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ + uint128_t result = vs2_u; + unsigned shift = vs1 & ((sew * 2) - 1); - uint64_t result = vs2_u; + // rounding + INT_ROUNDING(result, xrm, shift); -// rounding - INT_ROUNDING(result, xrm, sew); + result = result >> shift; -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; } vd = result; -}) +}, true) diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h index 0507a2b..ce15b55 100644 --- a/riscv/insns/vnclipu_vx.h +++ b/riscv/insns/vnclipu_vx.h @@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ - uint64_t result = vs2; + uint128_t result = vs2_u; + unsigned shift = rs1 & ((sew * 2) - 1); -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } + result = result >> shift; -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; } vd = result; -}) +}, false) diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h index 0502ff1..f41979e 100644 --- a/riscv/insns/vnsra_vi.h +++ b/riscv/insns/vnsra_vi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); -}) +}, false) diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h index 555ce3f..59f255e 100644 --- a/riscv/insns/vnsra_vv.h +++ b/riscv/insns/vnsra_vv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2 >> (vs1 & (sew * 2 - 1)); -}) +}, true) diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h index 05a55e3..adaa24c 100644 --- a/riscv/insns/vnsra_vx.h +++ b/riscv/insns/vnsra_vx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2 >> (rs1 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h index d4dfcf0..91402c0 100644 --- a/riscv/insns/vnsrl_vi.h +++ b/riscv/insns/vnsrl_vi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2_u >> (zimm5 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h index ab72b84..609299f 100644 --- a/riscv/insns/vnsrl_vv.h +++ b/riscv/insns/vnsrl_vv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2_u >> (vs1 & (sew * 2 - 1)); -}) +}, true) diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h index e149b38..8356a2b 100644 --- a/riscv/insns/vnsrl_vx.h +++ b/riscv/insns/vnsrl_vx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2_u >> (rs1 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h index eff67b8..cab4a78 100644 --- a/riscv/insns/vrgather_vi.h +++ b/riscv/insns/vrgather_vi.h @@ -1,11 +1,14 @@ // vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + reg_t zimm5 = insn.v_zimm5(); + +VI_LOOP_BASE + for (reg_t i = P.VU.vstart; i < vl; ++i) { VI_LOOP_ELEMENT_SKIP(); @@ -25,5 +28,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { } } -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h index ce0c2a6..8266c95 100644 --- a/riscv/insns/vrgather_vv.h +++ b/riscv/insns/vrgather_vv.h @@ -1,15 +1,12 @@ // vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); -for (reg_t i = P.VU.vstart; i < vl; ++i) { - VI_LOOP_ELEMENT_SKIP(); - VI_CHECK_VREG_OVERLAP(rd_num, rs1_num); - VI_CHECK_VREG_OVERLAP(rd_num, rs2_num); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rs1() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + +VI_LOOP_BASE switch (sew) { case e8: { auto vs1 = P.VU.elt<uint8_t>(rs1_num, i); @@ -33,7 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { break; } } -} - -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h index e9ff3b1..15e16b7 100644 --- a/riscv/insns/vrgather_vx.h +++ b/riscv/insns/vrgather_vx.h @@ -1,15 +1,13 @@ // vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + reg_t rs1 = RS1; -for (reg_t i = P.VU.vstart; i < vl; ++i) { - VI_LOOP_ELEMENT_SKIP(); +VI_LOOP_BASE switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, rs1); @@ -24,7 +22,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { P.VU.elt<uint64_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, rs1); break; } -} - -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h index de2cb83..c361f08 100644 --- a/riscv/insns/vsadd_vi.h +++ b/riscv/insns/vsadd_vi.h @@ -1,4 +1,5 @@ // vsadd.vi vd, vs2 simm5 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h index 2152bab..ce0ef40 100644 --- a/riscv/insns/vsadd_vv.h +++ b/riscv/insns/vsadd_vv.h @@ -1,4 +1,5 @@ // vsadd.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h index 781e9e8..691f017 100644 --- a/riscv/insns/vsadd_vx.h +++ b/riscv/insns/vsadd_vx.h @@ -1,4 +1,5 @@ // vsadd.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h index 0069df7..04e2540 100644 --- a/riscv/insns/vslide1down_vx.h +++ b/riscv/insns/vslide1down_vx.h @@ -1,4 +1,9 @@ //vslide1down.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + VI_LOOP_BASE if (i != vl - 1) { switch (sew) { diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h index 50cc503..69ce0fd 100644 --- a/riscv/insns/vslide1up_vx.h +++ b/riscv/insns/vslide1up_vx.h @@ -1,8 +1,10 @@ //vslide1up.vx vd, vs2, rs1 -if (insn.v_vm() == 0) +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) require(insn.rd() != 0); -VI_CHECK_SS VI_LOOP_BASE if (i != 0) { if (sew == e8) { diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h index c21c5f2..dd58c1e 100644 --- a/riscv/insns/vslidedown_vi.h +++ b/riscv/insns/vslidedown_vi.h @@ -1,8 +1,14 @@ // vslidedown.vi vd, vs2, rs1 -VI_LOOP_BASE +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + const reg_t sh = insn.v_zimm5(); -bool is_valid = (i + sh) < P.VU.vlmax; +VI_LOOP_BASE + reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; if (is_valid) { offset = sh; diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h index 251740c..9881e0e 100644 --- a/riscv/insns/vslidedown_vx.h +++ b/riscv/insns/vslidedown_vx.h @@ -1,11 +1,17 @@ //vslidedown.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + +const reg_t sh = RS1; VI_LOOP_BASE -reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1; -bool is_valid = offset < P.VU.vlmax; +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; -if (!is_valid) { - offset = 0; +if (is_valid) { + offset = sh; } switch (sew) { diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h index 4135b20..64b4aca 100644 --- a/riscv/insns/vslideup_vi.h +++ b/riscv/insns/vslideup_vi.h @@ -1,8 +1,10 @@ // vslideup.vi vd, vs2, rs1 -if (insn.v_vm() == 0) +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) require(insn.rd() != 0); -VI_CHECK_SS const reg_t offset = insn.v_zimm5(); VI_LOOP_BASE if (P.VU.vstart < offset && i < offset) diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h index bf73fcd..063c061 100644 --- a/riscv/insns/vslideup_vx.h +++ b/riscv/insns/vslideup_vx.h @@ -1,4 +1,10 @@ //vslideup.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + const reg_t offset = RS1; VI_LOOP_BASE if (P.VU.vstart < offset && i < offset) diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h index a0c7f99..0807899 100644 --- a/riscv/insns/vsmul_vv.h +++ b/riscv/insns/vsmul_vv.h @@ -1,33 +1,32 @@ // vsmul: Signed saturating and rounding fractional multiply VRM xrm = P.VU.get_vround_mode(); -uint64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; -uint64_t int_min = - (1 << (P.VU.vsew - 1)); -uint64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); +int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; +int64_t int_min = - (1 << (P.VU.vsew - 1)); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); -VI_VV_ULOOP +VI_VV_LOOP ({ - uint64_t vs1_sign; - uint64_t vs2_sign; - uint64_t result_sign; + int64_t vs1_sign; + int64_t vs2_sign; + int64_t result_sign; vs1_sign = vs1 & sign_mask; vs2_sign = vs2 & sign_mask; bool overflow = vs1 == vs2 && vs1 == int_min; - uint128_t result = (uint128_t)vs1 * (uint128_t)vs2; - result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + int128_t result = (int128_t)vs1 * (int128_t)vs2; result_sign = (vs1_sign ^ vs2_sign) & sign_mask; + // rounding INT_ROUNDING(result, xrm, sew - 1); - // unsigned shifting + // remove guard bits result = result >> (sew - 1); // saturation if (overflow) { result = int_max; P.VU.vxsat = 1; - } else { - result |= result_sign; } + vd = result; }) diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h index c7909c7..4326d8f 100644 --- a/riscv/insns/vsmul_vx.h +++ b/riscv/insns/vsmul_vx.h @@ -1,34 +1,33 @@ // vsmul VRM xrm = P.VU.get_vround_mode(); -uint128_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; -uint128_t int_min = - (1 << (P.VU.vsew - 1)); -uint128_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); +int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; +int64_t int_min = - (1 << (P.VU.vsew - 1)); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); -VI_VX_ULOOP +VI_VX_LOOP ({ - uint128_t rs1_sign; - uint128_t vs2_sign; - uint128_t result_sign; + int64_t rs1_sign; + int64_t vs2_sign; + int64_t result_sign; rs1_sign = rs1 & sign_mask; vs2_sign = vs2 & sign_mask; bool overflow = rs1 == vs2 && rs1 == int_min; - uint128_t result = (uint128_t)rs1 * (uint128_t)vs2; - result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + int128_t result = (int128_t)rs1 * (int128_t)vs2; result_sign = (rs1_sign ^ vs2_sign) & sign_mask; + // rounding INT_ROUNDING(result, xrm, sew - 1); - // unsigned shifting + // remove guard bits result = result >> (sew - 1); - // saturation + // max saturation if (overflow) { result = int_max; P.VU.vxsat = 1; - } else { - result |= result_sign; } + vd = result; }) diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h index ef2390c..c854ca6 100644 --- a/riscv/insns/vssra_vi.h +++ b/riscv/insns/vssra_vi.h @@ -3,6 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VI_LOOP ({ int sh = simm5 & (sew - 1) & 0x1f; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + int64_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h index e697b52..7bbc766 100644 --- a/riscv/insns/vssra_vv.h +++ b/riscv/insns/vssra_vv.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VV_LOOP ({ int sh = vs1 & (sew - 1); + int128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h index 8d7ad20..068a22b 100644 --- a/riscv/insns/vssra_vx.h +++ b/riscv/insns/vssra_vx.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VX_LOOP ({ int sh = rs1 & (sew - 1); + int128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h index 8a10df0..bf554ca 100644 --- a/riscv/insns/vssrl_vi.h +++ b/riscv/insns/vssrl_vi.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VI_ULOOP ({ int sh = simm5 & (sew - 1) & 0x1f; + uint64_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h index f40cd90..a8e5d16 100644 --- a/riscv/insns/vssrl_vv.h +++ b/riscv/insns/vssrl_vv.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VV_ULOOP ({ int sh = vs1 & (sew - 1); + uint128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h index 5da3f75..ee3cb34 100644 --- a/riscv/insns/vssrl_vx.h +++ b/riscv/insns/vssrl_vx.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VX_ULOOP ({ int sh = rs1 & (sew - 1); + uint128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h index fd3ee21..18fe4fb 100644 --- a/riscv/insns/vssub_vv.h +++ b/riscv/insns/vssub_vv.h @@ -1,4 +1,5 @@ // vssub.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h index 5c5c781..7a01125 100644 --- a/riscv/insns/vssub_vx.h +++ b/riscv/insns/vssub_vx.h @@ -1,4 +1,5 @@ // vssub.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h index c5c74fe..e58076e 100644 --- a/riscv/insns/vssubu_vv.h +++ b/riscv/insns/vssubu_vv.h @@ -1,4 +1,5 @@ // vssubu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h index 12cfdbb..556c759 100644 --- a/riscv/insns/vssubu_vx.h +++ b/riscv/insns/vssubu_vx.h @@ -1,4 +1,5 @@ // vssubu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vsuxb_v.h b/riscv/insns/vsuxb_v.h index cf928f8..03f1980 100644 --- a/riscv/insns/vsuxb_v.h +++ b/riscv/insns/vsuxb_v.h @@ -1,6 +1,7 @@ // vsuxb.v and vsxseg[2-8]b.v -require_vector; require(P.VU.vsew >= e8); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,30 +9,25 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e8: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint8_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint8_t>(vs3, vreg_inx)); break; case e16: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], + MMU.store_uint8(baseAddr + index[i], P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxe_v.h b/riscv/insns/vsuxe_v.h index 8bd7545..22d6fb5 100644 --- a/riscv/insns/vsuxe_v.h +++ b/riscv/insns/vsuxe_v.h @@ -1,38 +1,34 @@ // vsxe.v and vsxseg[2-8]e.v -require_vector; const reg_t sew = P.VU.vsew; const reg_t vl = P.VU.vl; require(sew >= e8 && sew <= e64); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t baseAddr = RS1; reg_t stride = insn.rs2(); reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (sew) { case e8: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint8_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint8_t>(vs3, vreg_inx)); break; case e16: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint64(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint64(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxh_v.h b/riscv/insns/vsuxh_v.h index 1d5a1bd..a34bc27 100644 --- a/riscv/insns/vsuxh_v.h +++ b/riscv/insns/vsuxh_v.h @@ -1,6 +1,7 @@ // vsxh.v and vsxseg[2-8]h.v -require_vector; require(P.VU.vsew >= e16); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,25 +9,21 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e16: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxw_v.h b/riscv/insns/vsuxw_v.h index ec1a8fe..f42092d 100644 --- a/riscv/insns/vsuxw_v.h +++ b/riscv/insns/vsuxw_v.h @@ -1,6 +1,7 @@ // vsxw.v and vsxseg[2-8]w.v -require_vector; require(P.VU.vsew >= e32); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,20 +9,17 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e32: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsxb_v.h b/riscv/insns/vsxb_v.h index 3e50597..fb567fb 100644 --- a/riscv/insns/vsxb_v.h +++ b/riscv/insns/vsxb_v.h @@ -1,4 +1,5 @@ // vsxb.v and vsxseg[2-8]b.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint8, 1); diff --git a/riscv/insns/vsxe_v.h b/riscv/insns/vsxe_v.h index 28984ac..78c6605 100644 --- a/riscv/insns/vsxe_v.h +++ b/riscv/insns/vsxe_v.h @@ -1,6 +1,7 @@ // vsxe.v and vsxseg[2-8]e.v reg_t sew = P.VU.vsew; require(sew >= e8 && sew <= e64); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); if (sew == e8) { VI_ST(index[i], fn, uint8, 1); diff --git a/riscv/insns/vsxh_v.h b/riscv/insns/vsxh_v.h index 2e5506a..6b0fcfd 100644 --- a/riscv/insns/vsxh_v.h +++ b/riscv/insns/vsxh_v.h @@ -1,4 +1,5 @@ // vsxh.v and vsxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint16, 2); diff --git a/riscv/insns/vsxw_v.h b/riscv/insns/vsxw_v.h index 9a2119f..2223d5b 100644 --- a/riscv/insns/vsxw_v.h +++ b/riscv/insns/vsxw_v.h @@ -1,4 +1,5 @@ // vsxw.v and vsxseg[2-8]w.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint32, 4); diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h index 86d588d..42c21db 100644 --- a/riscv/insns/vwsmacc_vv.h +++ b/riscv/insns/vwsmacc_vv.h @@ -1,2 +1,2 @@ // vwsmacc.vv vd, vs2, vs1 -VI_VVX_LOOP_WIDE_SSMA(vs1); +VI_VVX_LOOP_WIDE_SSMA(vs1, true); diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h index f0f04a3..2095665 100644 --- a/riscv/insns/vwsmacc_vx.h +++ b/riscv/insns/vwsmacc_vx.h @@ -1,2 +1,2 @@ // vwsmacc.vx vd, vs2, rs1 -VI_VVX_LOOP_WIDE_SSMA(rs1); +VI_VVX_LOOP_WIDE_SSMA(rs1, false); diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h index cf1aa1e..9df7833 100644 --- a/riscv/insns/vwsmaccsu_vv.h +++ b/riscv/insns/vwsmaccsu_vv.h @@ -1,2 +1,2 @@ // vwsmaccsu.vx vd, vs2, vs1 -VI_VVX_LOOP_WIDE_SU_SSMA(vs1); +VI_VVX_LOOP_WIDE_SU_SSMA(vs1, true); diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h index 681c309..8565c98 100644 --- a/riscv/insns/vwsmaccsu_vx.h +++ b/riscv/insns/vwsmaccsu_vx.h @@ -1,2 +1,2 @@ // vwsmaccsu.vx vd, vs2, rs1 -VI_VVX_LOOP_WIDE_SU_SSMA(rs1); +VI_VVX_LOOP_WIDE_SU_SSMA(rs1, false); diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h index e873d93..7075247 100644 --- a/riscv/insns/vwsmaccu_vv.h +++ b/riscv/insns/vwsmaccu_vv.h @@ -1,2 +1,2 @@ // vwsmaccu.vv vd, vs2, vs1 -VI_VVX_LOOP_WIDE_USSMA(vs1); +VI_VVX_LOOP_WIDE_USSMA(vs1, true); diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h index 7318fa7..15027cf 100644 --- a/riscv/insns/vwsmaccu_vx.h +++ b/riscv/insns/vwsmaccu_vx.h @@ -1,2 +1,2 @@ // vwsmaccu vd, vs2, rs1 -VI_VVX_LOOP_WIDE_USSMA(rs1); +VI_VVX_LOOP_WIDE_USSMA(rs1, false); diff --git a/riscv/processor.cc b/riscv/processor.cc index 00d36bc..59fa062 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -208,7 +208,7 @@ void vectorUnit_t::reset(){ set_vl(-1, 0, -1); // default to illegal configuration } -reg_t vectorUnit_t::set_vl(uint64_t regId, reg_t reqVL, reg_t newType){ +reg_t vectorUnit_t::set_vl(int regId, reg_t reqVL, reg_t newType){ if (vtype != newType){ vtype = newType; vsew = 1 << (BITS(newType, 4, 2) + 3); @@ -218,11 +218,24 @@ reg_t vectorUnit_t::set_vl(uint64_t regId, reg_t reqVL, reg_t newType){ vmlen = vsew / vlmul; reg_mask = (NVPR-1) & ~(vlmul-1); - vill = vsew > e64 || vediv != 1 || (newType >> 7) != 0; - if (vill) + vill = vsew > ELEN || vediv != 1 || (newType >> 7) != 0; + if (vill) { vlmax = 0; + vtype = UINT64_MAX << (p->get_xlen() - 1); + } + } + + // set vl + if (vlmax == 0) { + vl = 0; + } else if (regId == 0) { + vl = vl > vlmax ? vlmax : vl; + } else if (regId == -1) { + vl = vlmax; + } else if (regId >= 0) { + vl = reqVL > vlmax ? vlmax : reqVL; } - vl = reqVL <= vlmax && regId != 0 ? reqVL : vlmax; + vstart = 0; setvl_count++; return vl; diff --git a/riscv/processor.h b/riscv/processor.h index 68e6249..3e72282 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -200,7 +200,7 @@ class vectorUnit_t { reg_file = 0; } - reg_t set_vl(uint64_t regId, reg_t reqVL, reg_t newType); + reg_t set_vl(int regId, reg_t reqVL, reg_t newType); reg_t get_vlen() { return VLEN; } reg_t get_elen() { return ELEN; } diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index af5bbdc..15ca3b9 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -315,7 +315,6 @@ riscv_insn_ext_v_alu_int = \ vdivu_vx \ vdot_vv \ vdotu_vv \ - vmv_x_s \ vid_v \ viota_m \ vmacc_vv \ @@ -381,6 +380,7 @@ riscv_insn_ext_v_alu_int = \ vmv_v_i \ vmv_v_v \ vmv_v_x \ + vmv_x_s \ vmxnor_mm \ vmxor_mm \ vnclip_vi \ @@ -590,8 +590,6 @@ riscv_insn_ext_v_alu_fp = \ vmflt_vv \ vmfne_vf \ vmfne_vv \ - vmford_vf \ - vmford_vv \ riscv_insn_ext_v_ldst = \ vlb_v \ |