diff options
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 1041 |
1 files changed, 1041 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index 6cbf934..ca6a999 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -11,6 +11,7 @@ # error spike requires a little-endian host #endif +#include <algorithm> #include <cstdint> #include <string.h> #include <strings.h> @@ -23,6 +24,8 @@ typedef int64_t sreg_t; typedef uint64_t reg_t; +typedef __int128 int128_t; +typedef unsigned __int128 uint128_t; const int NXPR = 32; const int NFPR = 32; @@ -63,6 +66,12 @@ const int NCSR = 4096; #define MAX_INSN_LENGTH 8 #define PC_ALIGN 2 +#ifndef TAIL_ZEROING + #define TAIL_ZEROING true +#else + #define TAIL_ZEROING false +#endif + typedef uint64_t insn_bits_t; class insn_t { @@ -141,8 +150,10 @@ private: #define P (*p) #define READ_REG(reg) STATE.XPR[reg] #define READ_FREG(reg) STATE.FPR[reg] +#define RD READ_REG(insn.rd()) #define RS1 READ_REG(insn.rs1()) #define RS2 READ_REG(insn.rs2()) +#define RS3 READ_REG(insn.rs3()) #define WRITE_RD(value) WRITE_REG(insn.rd(), value) #ifndef RISCV_ENABLE_COMMITLOG @@ -288,6 +299,1036 @@ inline freg_t f128_negate(freg_t a) throw trap_illegal_instruction(0); \ (which); }) +/* For debug only. This will fail if the native machine's float types are not IEEE */ +inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;} +inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;} +inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;} + +// Vector macros +#define e8 8 // 8b elements +#define e16 16 // 16b elements +#define e32 32 // 32b elements +#define e64 64 // 64b elements +#define e128 128 // 128b elements + +#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew)) +#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew)) + +// +// vector: masking skip helper +// +#define VI_LOOP_ELEMENT_SKIP(BODY) \ + const int mlen = P.VU.vmlen; \ + const int midx = (mlen * i) / 64; \ + const int mpos = (mlen * i) % 64; \ + if (insn.v_vm() == 0) { \ + BODY; \ + bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \ + if (skip) \ + continue; \ + } + +#define VI_ELEMENT_SKIP(inx) \ + if (inx >= vl && TAIL_ZEROING) { \ + is_valid = false; \ + } else if (inx >= vl && !TAIL_ZEROING) { \ + continue; \ + } else if (inx < P.VU.vstart) { \ + continue; \ + } else { \ + VI_LOOP_ELEMENT_SKIP(); \ + } + +// +// vector: operation and register acccess check helper +// +static inline bool is_overlaped(const int astart, const int asize, + const int bstart, const int bsize) +{ + const int aend = astart + asize; + const int bend = bstart + bsize; + return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize; +} + +#define VI_NARROW_CHECK_COMMON \ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require(insn.rs2() + P.VU.vlmul * 2 <= 32); + +#define VI_WIDE_CHECK_COMMON \ + require(!P.VU.vill);\ + require(P.VU.vlmul <= 4); \ + require(P.VU.vsew * 2 <= P.VU.ELEN); \ + require(insn.rd() + P.VU.vlmul * 2 <= 32); \ + if (insn.v_vm() == 0) \ + require(insn.rd() != 0); + +#define VI_CHECK_VREG_OVERLAP(v1, v2) \ + require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul)); + +#define VI_CHECK_SS \ + require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul)); + +#define VI_CHECK_SD \ + require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); + +#define VI_CHECK_DSS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ + if (is_rs) \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + +#define VI_CHECK_DDS(is_rs) \ + VI_WIDE_CHECK_COMMON; \ + require(insn.rs2() + P.VU.vlmul * 2 <= 32); \ + if (is_rs) \ + require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); + +// +// vector: loop header and end helper +// +#define VI_GENERAL_LOOP_BASE \ + require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ + +#define VI_TAIL_ZERO(elm) \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \ + uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * elm)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * elm)); \ + } + +#define VI_TAIL_ZERO_MASK(dst) \ + if (vl != 0 && TAIL_ZEROING){ \ + for (reg_t i=vl; i<P.VU.vlmax; ++i){ \ + const int mlen = P.VU.vmlen; \ + const int midx = (mlen * i) / 64; \ + const int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt<uint64_t>(dst, midx); \ + vdi = (vdi & ~mmask);\ + }\ + }\ + +#define VI_LOOP_BASE \ + VI_GENERAL_LOOP_BASE \ + VI_LOOP_ELEMENT_SKIP(); + +#define VI_LOOP_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 1)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 1)); \ + }\ + P.VU.vstart = 0; + +#define VI_LOOP_END_NO_TAIL_ZERO \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_WIDEN_END \ + } \ + if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \ + uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 2)); \ + memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 2)); \ + }\ + P.VU.vstart = 0; + +#define VI_LOOP_REDUCTION_END(x) \ + } \ + if (vl > 0 && TAIL_ZEROING) { \ + vd_0_des = vd_0_res; \ + uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \ + memset(tail, 0, (P.VU.get_vlen() - x) >> 3); \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_CMP_BASE \ + require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t sew = P.VU.vsew; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \ + uint64_t res = 0; + +#define VI_LOOP_CMP_END \ + vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + } \ + VI_TAIL_ZERO_MASK(rd_num); \ + P.VU.vstart = 0; + +#define VI_LOOP_MASK(op) \ + require(P.VU.vsew <= e64); \ + reg_t vl = P.VU.vl; \ + for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + int mlen = P.VU.vmlen; \ + int midx = (mlen * i) / 64; \ + int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \ + uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \ + uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ + res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + } \ + \ + if (TAIL_ZEROING) {\ + for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \ + int mlen = P.VU.vmlen; \ + int midx = (mlen * i) / 64; \ + int mpos = (mlen * i) % 64; \ + uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \ + uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \ + res = (res & ~mmask); \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LOOP_NSHIFT_BASE \ + require(P.VU.vsew <= e32); \ + if (insn.rd() != 0){ \ + VI_CHECK_SD; \ + } \ + VI_GENERAL_LOOP_BASE; \ + VI_LOOP_ELEMENT_SKIP({\ + require(!(insn.rd() == 0 && P.VU.vlmul > 1));\ + }); + + +#define INT_ROUNDING(result, xrm, gb) \ + if (gb > 0) { \ + switch(xrm) {\ + case VRM::RNU:\ + result += ((uint64_t)1 << ((gb) - 1));\ + break;\ + case VRM::RNE:\ + if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\ + result -= ((uint64_t)1 << ((gb) - 1));\ + }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\ + result += ((uint64_t)1 << ((gb) - 1));\ + }\ + break;\ + case VRM::RDN:\ + result = (result >> ((gb) - 1)) << ((gb) - 1);\ + break;\ + case VRM::ROD:\ + result |= ((uint64_t)1ul << (gb)); \ + break;\ + case VRM::INVALID_RM:\ + assert(true);\ + } \ + } else if (gb == 0 && xrm == VRM::ROD) { \ + result |= 1ul; \ + } + + +// +// vector: integer and masking operand access helper +// +#define VXI_PARAMS(x) \ + type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ + type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \ + type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \ + type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); + +#define VV_U_PARAMS(x) \ + type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \ + type_usew_t<x>::type vs1 = P.VU.elt<type_usew_t<x>::type>(rs1_num, i); \ + type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i); + +#define VX_U_PARAMS(x) \ + type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \ + type_usew_t<x>::type rs1 = (type_usew_t<x>::type)RS1; \ + type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i); + +#define VI_U_PARAMS(x) \ + type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \ + type_usew_t<x>::type simm5 = (type_usew_t<x>::type)insn.v_zimm5(); \ + type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i); + +#define VV_PARAMS(x) \ + type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ + type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); + +#define VX_PARAMS(x) \ + type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \ + type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); + +#define VI_PARAMS(x) \ + type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ + type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); + +#define XV_PARAMS(x) \ + type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, RS1); + +#define VI_XI_SLIDEDOWN_PARAMS(x, off) \ + auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i + off); + +#define VI_XI_SLIDEUP_PARAMS(x, offset) \ + auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i - offset); + +#define VI_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \ + auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ + auto zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5(); + +#define VX_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \ + auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ + auto rs1 = (type_sew_t<sew1>::type)RS1; + +#define VV_NSHIFT_PARAMS(sew1, sew2) \ + auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \ + auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ + auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); + +#define XI_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \ + auto rs1 = (type_sew_t<x>::type)RS1; \ + auto simm5 = (type_sew_t<x>::type)insn.v_simm5(); \ + auto &vd = P.VU.elt<uint64_t>(rd_num, midx); + +#define VV_CARRY_PARAMS(x) \ + auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \ + auto vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ + auto &vd = P.VU.elt<uint64_t>(rd_num, midx); + +// +// vector: integer and masking operation loop +// + +// comparision result to masking register +#define VI_VV_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_LOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VV_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VX_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +#define VI_VI_ULOOP_CMP(BODY) \ + VI_LOOP_CMP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_CMP_END + +// merge and copy loop +#define VI_VVXI_MERGE_LOOP(BODY) \ + VI_GENERAL_LOOP_BASE \ + if (sew == e8){ \ + VXI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VXI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VXI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VXI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// reduction loop - signed +#define VI_LOOP_REDUCTION_BASE(x) \ + require(x == e8 || x == e16 || x == e32 || x == e64); \ + require(!P.VU.vill);\ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); \ + auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \ + +#define REDUCTION_LOOP(x, BODY) \ + VI_LOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_LOOP_REDUCTION(BODY) \ + reg_t sew = P.VU.vsew; \ + if (sew == e8) { \ + REDUCTION_LOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_LOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_LOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_LOOP(e64, BODY) \ + } + +// reduction loop - unsgied +#define VI_ULOOP_REDUCTION_BASE(x) \ + require(x == e8 || x == e16 || x == e32 || x == e64); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); \ + auto vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i); + +#define REDUCTION_ULOOP(x, BODY) \ + VI_ULOOP_REDUCTION_BASE(x) \ + BODY; \ + VI_LOOP_REDUCTION_END(x) + +#define VI_VV_ULOOP_REDUCTION(BODY) \ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + REDUCTION_ULOOP(e8, BODY) \ + } else if(sew == e16) { \ + REDUCTION_ULOOP(e16, BODY) \ + } else if(sew == e32) { \ + REDUCTION_ULOOP(e32, BODY) \ + } else if(sew == e64) { \ + REDUCTION_ULOOP(e64, BODY) \ + } + +// genearl VXI signed/unsgied loop +#define VI_VV_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_ULOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_U_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_U_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_U_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_U_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VI_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VI_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VI_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + +// narrow operation loop +#define VI_VV_LOOP_NARROW(BODY) \ +VI_NARROW_CHECK_COMMON; \ +VI_LOOP_BASE \ +if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ +}else if(sew == e16){ \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ +}else if(sew == e32){ \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ +} \ +VI_LOOP_END + +#define VI_NARROW_SHIFT(sew1, sew2) \ + type_usew_t<sew1>::type &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \ + type_usew_t<sew2>::type vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \ + type_usew_t<sew1>::type zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5(); \ + type_sew_t<sew2>::type vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \ + type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \ + type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1; + +#define VI_VVXI_LOOP_NARROW(BODY) \ + require(P.VU.vsew <= e32); \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_NARROW_SHIFT(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NARROW_SHIFT(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NARROW_SHIFT(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VI_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VI_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VI_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VI_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VX_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VX_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VX_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VX_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +#define VI_VV_LOOP_NSHIFT(BODY) \ + VI_LOOP_NSHIFT_BASE \ + if (sew == e8){ \ + VV_NSHIFT_PARAMS(e8, e16) \ + BODY; \ + } else if (sew == e16) { \ + VV_NSHIFT_PARAMS(e16, e32) \ + BODY; \ + } else if (sew == e32) { \ + VV_NSHIFT_PARAMS(e32, e64) \ + BODY; \ + } \ + VI_LOOP_END + +// widen operation loop +#define VI_VV_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VV_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VV_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VV_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_WIDEN_END + +#define VI_VX_LOOP_WIDEN(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VX_PARAMS(e8); \ + BODY; \ + }else if(sew == e16){ \ + VX_PARAMS(e16); \ + BODY; \ + }else if(sew == e32){ \ + VX_PARAMS(e32); \ + BODY; \ + }else if(sew == e64){ \ + VX_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t vd_w = P.VU.elt<sign##16_t>(rd_num, i); \ + P.VU.elt<uint16_t>(rd_num, i) = \ + op1((sign##16_t)(sign##8_t)var0 op0 (sign##16_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign##32_t vd_w = P.VU.elt<sign##32_t>(rd_num, i); \ + P.VU.elt<uint32_t>(rd_num, i) = \ + op1((sign##32_t)(sign##16_t)var0 op0 (sign##32_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t vd_w = P.VU.elt<sign##64_t>(rd_num, i); \ + P.VU.elt<uint64_t>(rd_num, i) = \ + op1((sign##64_t)(sign##32_t)var0 op0 (sign##64_t)(sign##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign_d##16_t vd_w = P.VU.elt<sign_d##16_t>(rd_num, i); \ + P.VU.elt<uint16_t>(rd_num, i) = \ + op1((sign_1##16_t)(sign_1##8_t)var0 op0 (sign_2##16_t)(sign_2##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign_d##32_t vd_w = P.VU.elt<sign_d##32_t>(rd_num, i); \ + P.VU.elt<uint32_t>(rd_num, i) = \ + op1((sign_1##32_t)(sign_1##16_t)var0 op0 (sign_2##32_t)(sign_2##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign_d##64_t vd_w = P.VU.elt<sign_d##64_t>(rd_num, i); \ + P.VU.elt<uint64_t>(rd_num, i) = \ + op1((sign_1##64_t)(sign_1##32_t)var0 op0 (sign_2##64_t)(sign_2##32_t)var1) + var2; \ + } \ + break; \ + } + +#define VI_WIDE_WVX_OP(var0, op0, sign) \ + switch(P.VU.vsew) { \ + case e8: { \ + sign##16_t &vd_w = P.VU.elt<sign##16_t>(rd_num, i); \ + sign##16_t vs2_w = P.VU.elt<sign##16_t>(rs2_num, i); \ + vd_w = vs2_w op0 (sign##16_t)(sign##8_t)var0; \ + } \ + break; \ + case e16: { \ + sign##32_t &vd_w = P.VU.elt<sign##32_t>(rd_num, i); \ + sign##32_t vs2_w = P.VU.elt<sign##32_t>(rs2_num, i); \ + vd_w = vs2_w op0 (sign##32_t)(sign##16_t)var0; \ + } \ + break; \ + default: { \ + sign##64_t &vd_w = P.VU.elt<sign##64_t>(rd_num, i); \ + sign##64_t vs2_w = P.VU.elt<sign##64_t>(rs2_num, i); \ + vd_w = vs2_w op0 (sign##64_t)(sign##32_t)var0; \ + } \ + break; \ + } + +#define VI_WIDE_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \ + auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); \ + auto rs1 = (type_sew_t<sew1>::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (int##sew2##_t)vs2 * (int##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + res = res >> gb; \ + vd = sat_add<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_USSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt<type_usew_t<sew2>::type>(rd_num, i); \ + auto vs1 = P.VU.elt<type_usew_t<sew1>::type>(rs1_num, i); \ + auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); \ + auto rs1 = (type_usew_t<sew1>::type)RS1; \ + uint##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (uint##sew2##_t)vs2 * (uint##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_addu<uint##sew2##_t>(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_USSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_USSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_USSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_USSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_SU_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \ + auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \ + auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); \ + auto rs1 = (type_sew_t<sew1>::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (uint##sew2##_t)vs2 * (int##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_SU_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_SU_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_SU_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +#define VI_WIDE_US_SSMA(sew1, sew2, opd) \ + auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \ + auto vs1 = P.VU.elt<type_usew_t<sew1>::type>(rs1_num, i); \ + auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); \ + auto rs1 = (type_usew_t<sew1>::type)RS1; \ + int##sew2##_t res; \ + bool sat = false; \ + const int gb = sew1 / 2; \ + VRM vrm = P.VU.get_vround_mode(); \ + res = (int##sew2##_t)vs2 * (uint##sew2##_t)opd; \ + INT_ROUNDING(res, vrm, gb); \ + \ + res = res >> gb; \ + vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \ + P.VU.vxsat |= sat; + +#define VI_VVX_LOOP_WIDE_US_SSMA(opd) \ + VI_WIDE_CHECK_COMMON \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VI_WIDE_US_SSMA(8, 16, opd); \ + } else if(sew == e16){ \ + VI_WIDE_US_SSMA(16, 32, opd); \ + } else if(sew == e32){ \ + VI_WIDE_US_SSMA(32, 64, opd); \ + } \ + VI_LOOP_WIDEN_END + +// wide reduction loop - signed +#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + VI_CHECK_DSS(false); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt<type_sew_t<sew2>::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt<type_sew_t<sew2>::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + VI_LOOP_ELEMENT_SKIP(); \ + auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); + +#define WIDE_REDUCTION_LOOP(sew1, sew2, BODY) \ + VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \ + require(!P.VU.vill);\ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_LOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_LOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_LOOP(e32, e64, BODY) \ + } + +// wide reduction loop - unsigned +#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + VI_CHECK_DSS(false); \ + reg_t vl = P.VU.vl; \ + reg_t rd_num = insn.rd(); \ + reg_t rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + auto &vd_0_des = P.VU.elt<type_usew_t<sew2>::type>(rd_num, 0); \ + auto vd_0_res = P.VU.elt<type_usew_t<sew2>::type>(rs1_num, 0); \ + for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + VI_LOOP_ELEMENT_SKIP(); \ + auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); + +#define WIDE_REDUCTION_ULOOP(sew1, sew2, BODY) \ + VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \ + BODY; \ + VI_LOOP_REDUCTION_END(sew2) + +#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \ + require(!P.VU.vill);\ + reg_t sew = P.VU.vsew; \ + if (sew == e8){ \ + WIDE_REDUCTION_ULOOP(e8, e16, BODY) \ + } else if(sew == e16){ \ + WIDE_REDUCTION_ULOOP(e16, e32, BODY) \ + } else if(sew == e32){ \ + WIDE_REDUCTION_ULOOP(e32, e64, BODY) \ + } + +// carry/borrow bit loop +#define VI_VV_LOOP_CARRY(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + VV_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + VV_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + VV_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + VV_CARRY_PARAMS(e64) \ + BODY; \ + } \ + } \ + VI_TAIL_ZERO_MASK(rd_num); + +#define VI_XI_LOOP_CARRY(BODY) \ + VI_LOOP_BASE \ + if (sew == e8){ \ + XI_CARRY_PARAMS(e8) \ + BODY; \ + } else if (sew == e16) { \ + XI_CARRY_PARAMS(e16) \ + BODY; \ + } else if (sew == e32) { \ + XI_CARRY_PARAMS(e32) \ + BODY; \ + } else if (sew == e64) { \ + XI_CARRY_PARAMS(e64) \ + BODY; \ + } \ + } \ + VI_TAIL_ZERO_MASK(rd_num); + +// average loop +#define VI_VVX_LOOP_AVG(opd, op) \ +VRM xrm = p->VU.get_vround_mode(); \ +VI_LOOP_BASE \ + switch(sew) { \ + case e8: { \ + VV_PARAMS(e8); \ + type_sew_t<e8>::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e16: { \ + VV_PARAMS(e16); \ + type_sew_t<e16>::type rs1 = RS1; \ + auto res = (int32_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + case e32: { \ + VV_PARAMS(e32); \ + type_sew_t<e32>::type rs1 = RS1; \ + auto res = (int64_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + default: { \ + VV_PARAMS(e64); \ + type_sew_t<e64>::type rs1 = RS1; \ + auto res = (int128_t)vs2 op opd; \ + INT_ROUNDING(res, xrm, 1); \ + vd = res >> 1; \ + break; \ + } \ + } \ +VI_LOOP_END // Seems that 0x0 doesn't work. #define DEBUG_START 0x100 #define DEBUG_END (0x1000 - 1) |