aboutsummaryrefslogtreecommitdiff
path: root/riscv
diff options
context:
space:
mode:
authorAndrew Waterman <andrew@sifive.com>2019-11-12 11:54:33 -0800
committerGitHub <noreply@github.com>2019-11-12 11:54:33 -0800
commitff81dea8593c6e51b45e7bed230a2cafd56e4caf (patch)
tree62bd0cca8bb49199737a5d5532314cb8080d4031 /riscv
parent3db3d4b1221a145c9703ba5bd82db8b5c6e9ee78 (diff)
parentc8da0f2446d1261397965e6268d117bb50004ac9 (diff)
downloadspike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.zip
spike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.tar.gz
spike-ff81dea8593c6e51b45e7bed230a2cafd56e4caf.tar.bz2
Merge pull request #355 from chihminchao/rvv-0.8-2019-11
rvv-0.8-2019-11
Diffstat (limited to 'riscv')
-rw-r--r--riscv/decode.h290
-rw-r--r--riscv/encoding.h6
-rw-r--r--riscv/insns/vaadd_vi.h1
-rw-r--r--riscv/insns/vaadd_vv.h2
-rw-r--r--riscv/insns/vaadd_vx.h2
-rw-r--r--riscv/insns/vasub_vv.h2
-rw-r--r--riscv/insns/vasub_vx.h2
-rw-r--r--riscv/insns/vcompress_vm.h23
-rw-r--r--riscv/insns/vfcvt_f_x_v.h2
-rw-r--r--riscv/insns/vfcvt_f_xu_v.h2
-rw-r--r--riscv/insns/vfcvt_x_f_v.h2
-rw-r--r--riscv/insns/vfmerge_vfm.h12
-rw-r--r--riscv/insns/vfmv_f_s.h1
-rw-r--r--riscv/insns/vfmv_s_f.h12
-rw-r--r--riscv/insns/vfmv_v_f.h14
-rw-r--r--riscv/insns/vid_v.h4
-rw-r--r--riscv/insns/viota_m.h5
-rw-r--r--riscv/insns/vleff_v.h19
-rw-r--r--riscv/insns/vlxb_v.h1
-rw-r--r--riscv/insns/vlxbu_v.h1
-rw-r--r--riscv/insns/vlxe_v.h1
-rw-r--r--riscv/insns/vlxh_v.h1
-rw-r--r--riscv/insns/vlxhu_v.h1
-rw-r--r--riscv/insns/vlxw_v.h1
-rw-r--r--riscv/insns/vlxwu_v.h1
-rw-r--r--riscv/insns/vmadc_vim.h1
-rw-r--r--riscv/insns/vmadc_vvm.h1
-rw-r--r--riscv/insns/vmadc_vxm.h1
-rw-r--r--riscv/insns/vmerge_vim.h1
-rw-r--r--riscv/insns/vmerge_vvm.h1
-rw-r--r--riscv/insns/vmerge_vxm.h1
-rw-r--r--riscv/insns/vmfeq_vf.h2
-rw-r--r--riscv/insns/vmfeq_vv.h2
-rw-r--r--riscv/insns/vmfge_vf.h4
-rw-r--r--riscv/insns/vmfgt_vf.h4
-rw-r--r--riscv/insns/vmfle_vf.h2
-rw-r--r--riscv/insns/vmfle_vv.h4
-rw-r--r--riscv/insns/vmflt_vf.h4
-rw-r--r--riscv/insns/vmflt_vv.h4
-rw-r--r--riscv/insns/vmfne_vf.h2
-rw-r--r--riscv/insns/vmfne_vv.h2
-rw-r--r--riscv/insns/vmford_vf.h5
-rw-r--r--riscv/insns/vmford_vv.h5
-rw-r--r--riscv/insns/vmsbc_vvm.h1
-rw-r--r--riscv/insns/vmsbc_vxm.h1
-rw-r--r--riscv/insns/vmsbf_m.h1
-rw-r--r--riscv/insns/vmsif_m.h1
-rw-r--r--riscv/insns/vmsof_m.h1
-rw-r--r--riscv/insns/vmulhsu_vv.h1
-rw-r--r--riscv/insns/vmulhsu_vx.h1
-rw-r--r--riscv/insns/vmv_s_x.h18
-rw-r--r--riscv/insns/vmv_v_v.h1
-rw-r--r--riscv/insns/vmv_x_s.h47
-rw-r--r--riscv/insns/vnclip_vi.h13
-rw-r--r--riscv/insns/vnclip_vv.h19
-rw-r--r--riscv/insns/vnclip_vx.h18
-rw-r--r--riscv/insns/vnclipu_vi.h8
-rw-r--r--riscv/insns/vnclipu_vv.h19
-rw-r--r--riscv/insns/vnclipu_vx.h19
-rw-r--r--riscv/insns/vnsra_vi.h2
-rw-r--r--riscv/insns/vnsra_vv.h2
-rw-r--r--riscv/insns/vnsra_vx.h2
-rw-r--r--riscv/insns/vnsrl_vi.h2
-rw-r--r--riscv/insns/vnsrl_vv.h2
-rw-r--r--riscv/insns/vnsrl_vx.h2
-rw-r--r--riscv/insns/vrgather_vi.h18
-rw-r--r--riscv/insns/vrgather_vv.h24
-rw-r--r--riscv/insns/vrgather_vx.h21
-rw-r--r--riscv/insns/vsadd_vi.h1
-rw-r--r--riscv/insns/vsadd_vv.h1
-rw-r--r--riscv/insns/vsadd_vx.h1
-rw-r--r--riscv/insns/vslide1down_vx.h5
-rw-r--r--riscv/insns/vslide1up_vx.h6
-rw-r--r--riscv/insns/vslidedown_vi.h10
-rw-r--r--riscv/insns/vslidedown_vx.h14
-rw-r--r--riscv/insns/vslideup_vi.h6
-rw-r--r--riscv/insns/vslideup_vx.h6
-rw-r--r--riscv/insns/vsmul_vv.h23
-rw-r--r--riscv/insns/vsmul_vx.h25
-rw-r--r--riscv/insns/vssra_vi.h6
-rw-r--r--riscv/insns/vssra_vv.h5
-rw-r--r--riscv/insns/vssra_vx.h5
-rw-r--r--riscv/insns/vssrl_vi.h5
-rw-r--r--riscv/insns/vssrl_vv.h5
-rw-r--r--riscv/insns/vssrl_vx.h5
-rw-r--r--riscv/insns/vssub_vv.h1
-rw-r--r--riscv/insns/vssub_vx.h1
-rw-r--r--riscv/insns/vssubu_vv.h1
-rw-r--r--riscv/insns/vssubu_vx.h1
-rw-r--r--riscv/insns/vsuxb_v.h22
-rw-r--r--riscv/insns/vsuxe_v.h24
-rw-r--r--riscv/insns/vsuxh_v.h19
-rw-r--r--riscv/insns/vsuxw_v.h14
-rw-r--r--riscv/insns/vsxb_v.h1
-rw-r--r--riscv/insns/vsxe_v.h1
-rw-r--r--riscv/insns/vsxh_v.h1
-rw-r--r--riscv/insns/vsxw_v.h1
-rw-r--r--riscv/insns/vwsmacc_vv.h2
-rw-r--r--riscv/insns/vwsmacc_vx.h2
-rw-r--r--riscv/insns/vwsmaccsu_vv.h2
-rw-r--r--riscv/insns/vwsmaccsu_vx.h2
-rw-r--r--riscv/insns/vwsmaccu_vv.h2
-rw-r--r--riscv/insns/vwsmaccu_vx.h2
-rw-r--r--riscv/processor.cc21
-rw-r--r--riscv/processor.h2
-rw-r--r--riscv/riscv.mk.in4
106 files changed, 441 insertions, 512 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 7ecd74f..a756607 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -65,29 +65,6 @@ const int NCSR = 4096;
#define MAX_INSN_LENGTH 8
#define PC_ALIGN 2
-#ifndef TAIL_ZEROING
- #define TAIL_ZEROING true
-#else
- #define TAIL_ZEROING false
-#endif
-
-#ifdef WORDS_BIGENDIAN
- // Elements are stored in opposite order, see comment in processor.h
- #define TAIL_ZERO(x) \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x) - 1); \
- memset(tail - (P.VU.vlmax - vl) * (x), 0, (P.VU.vlmax - vl) * (x));
- #define TAIL_ZERO_REDUCTION(x) \
- uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
- memset(tail - ((P.VU.get_vlen() - x) >> 3), 0, (P.VU.get_vlen() - x) >> 3);
-#else
- #define TAIL_ZERO(x) \
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * (x)); \
- memset(tail, 0, (P.VU.vlmax - vl) * (x));
- #define TAIL_ZERO_REDUCTION(x) \
- uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
- memset(tail, 0, (P.VU.get_vlen() - x) >> 3);
-#endif
-
typedef uint64_t insn_bits_t;
class insn_t
{
@@ -230,7 +207,7 @@ private:
#define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0)
#define require_vector_vs do { } while (0) // TODO MSTATUS_VS
-#define require_vector do { require_vector_vs; require(!P.VU.vill); } while (0)
+#define require_vector do { require_vector_vs; require_extension('V'); require(!P.VU.vill); } while (0)
#define require_vector_for_vsetvl do { require_vector_vs; require_extension('V'); } while (0)
#define set_fp_exceptions ({ if (softfloat_exceptionFlags) { \
@@ -368,9 +345,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
}
#define VI_ELEMENT_SKIP(inx) \
- if (inx >= vl && TAIL_ZEROING) { \
- is_valid = false; \
- } else if (inx >= vl && !TAIL_ZEROING) { \
+ if (inx >= vl) { \
continue; \
} else if (inx < P.VU.vstart) { \
continue; \
@@ -381,7 +356,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
//
// vector: operation and register acccess check helper
//
-static inline bool is_overlaped(const int astart, const int asize,
+static inline bool is_overlapped(const int astart, const int asize,
const int bstart, const int bsize)
{
const int aend = astart + asize;
@@ -390,38 +365,84 @@ static inline bool is_overlaped(const int astart, const int asize,
}
#define VI_NARROW_CHECK_COMMON \
+ require_vector;\
require(P.VU.vlmul <= 4); \
require(P.VU.vsew * 2 <= P.VU.ELEN); \
- require(insn.rs2() + P.VU.vlmul * 2 <= 32);
+ require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \
+ require((insn.rd() & (P.VU.vlmul - 1)) == 0); \
+ if (insn.v_vm() == 0 && P.VU.vlmul > 1) \
+ require(insn.rd() != 0);
#define VI_WIDE_CHECK_COMMON \
require_vector;\
require(P.VU.vlmul <= 4); \
require(P.VU.vsew * 2 <= P.VU.ELEN); \
- require(insn.rd() + P.VU.vlmul * 2 <= 32); \
+ require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \
if (insn.v_vm() == 0) \
require(insn.rd() != 0);
-#define VI_CHECK_VREG_OVERLAP(v1, v2) \
- require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul));
+#define VI_CHECK_MSS(is_vs1) \
+ if (P.VU.vlmul > 1) { \
+ require(!is_overlapped(insn.rd(), 1, insn.rs2(), P.VU.vlmul)); \
+ require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
+ if (is_vs1) {\
+ require(!is_overlapped(insn.rd(), 1, insn.rs1(), P.VU.vlmul)); \
+ require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \
+ } \
+ }
-#define VI_CHECK_SS \
- require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul));
+#define VI_CHECK_SSS(is_vs1) \
+ if (P.VU.vlmul > 1) { \
+ require((insn.rd() & (P.VU.vlmul - 1)) == 0); \
+ require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
+ if (is_vs1) { \
+ require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \
+ } \
+ if (insn.v_vm() == 0) \
+ require(insn.rd() != 0); \
+ }
+
+#define VI_CHECK_SXX \
+ require_vector; \
+ if (P.VU.vlmul > 1) { \
+ require((insn.rd() & (P.VU.vlmul - 1)) == 0); \
+ if (insn.v_vm() == 0) \
+ require(insn.rd() != 0); \
+ }
#define VI_CHECK_SD \
- require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2));
+ require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2));
-#define VI_CHECK_DSS(is_rs) \
+#define VI_CHECK_DSS(is_vs1) \
VI_WIDE_CHECK_COMMON; \
- require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \
- if (is_rs) \
- require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul));
+ require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \
+ require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
+ if (is_vs1) {\
+ require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \
+ require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \
+ }
#define VI_CHECK_DDS(is_rs) \
VI_WIDE_CHECK_COMMON; \
- require(insn.rs2() + P.VU.vlmul * 2 <= 32); \
- if (is_rs) \
- require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul));
+ require((insn.rs2() & (P.VU.vlmul * 2 - 1)) == 0); \
+ if (is_rs) { \
+ require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \
+ require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \
+ }
+
+#define VI_CHECK_SDS(is_vs1) \
+ VI_NARROW_CHECK_COMMON; \
+ require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); \
+ if (is_vs1) \
+ require((insn.rs1() & (P.VU.vlmul - 1)) == 0); \
+
+#define VI_CHECK_REDUCTION(is_wide) \
+ require_vector;\
+ if (is_wide) {\
+ require(P.VU.vlmul <= 4); \
+ require(P.VU.vsew * 2 <= P.VU.ELEN); \
+ } \
+ require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
//
// vector: loop header and end helper
@@ -436,50 +457,22 @@ static inline bool is_overlaped(const int astart, const int asize,
reg_t rs2_num = insn.rs2(); \
for (reg_t i=P.VU.vstart; i<vl; ++i){
-#define VI_TAIL_ZERO(elm) \
- if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \
- TAIL_ZERO((sew >> 3) * elm); \
- }
-
-#define VI_TAIL_ZERO_MASK(dst) \
- if (vl != 0 && TAIL_ZEROING){ \
- for (reg_t i=vl; i<P.VU.vlmax; ++i){ \
- const int mlen = P.VU.vmlen; \
- const int midx = (mlen * i) / 64; \
- const int mpos = (mlen * i) % 64; \
- uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
- uint64_t &vdi = P.VU.elt<uint64_t>(dst, midx); \
- vdi = (vdi & ~mmask);\
- }\
- }\
-
#define VI_LOOP_BASE \
VI_GENERAL_LOOP_BASE \
VI_LOOP_ELEMENT_SKIP();
#define VI_LOOP_END \
} \
- if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- TAIL_ZERO((sew >> 3) * 1); \
- }\
- P.VU.vstart = 0;
-
-#define VI_LOOP_END_NO_TAIL_ZERO \
- } \
P.VU.vstart = 0;
#define VI_LOOP_WIDEN_END \
} \
- if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- TAIL_ZERO((sew >> 3) * 2); \
- }\
P.VU.vstart = 0;
#define VI_LOOP_REDUCTION_END(x) \
} \
- if (vl > 0 && TAIL_ZEROING) { \
+ if (vl > 0) { \
vd_0_des = vd_0_res; \
- TAIL_ZERO_REDUCTION(x); \
} \
P.VU.vstart = 0;
@@ -500,7 +493,6 @@ static inline bool is_overlaped(const int astart, const int asize,
#define VI_LOOP_CMP_END \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
} \
- VI_TAIL_ZERO_MASK(rd_num); \
P.VU.vstart = 0;
#define VI_LOOP_MASK(op) \
@@ -516,24 +508,9 @@ static inline bool is_overlaped(const int astart, const int asize,
uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \
res = (res & ~mmask) | ((op) & (1ULL << mpos)); \
} \
- \
- if (TAIL_ZEROING) {\
- for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \
- int mlen = P.VU.vmlen; \
- int midx = (mlen * i) / 64; \
- int mpos = (mlen * i) % 64; \
- uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
- uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \
- res = (res & ~mmask); \
- } \
- } \
P.VU.vstart = 0;
#define VI_LOOP_NSHIFT_BASE \
- require(P.VU.vsew <= e32); \
- if (insn.rd() != 0){ \
- VI_CHECK_SD; \
- } \
VI_GENERAL_LOOP_BASE; \
VI_LOOP_ELEMENT_SKIP({\
require(!(insn.rd() == 0 && P.VU.vlmul > 1));\
@@ -541,31 +518,27 @@ static inline bool is_overlaped(const int astart, const int asize,
#define INT_ROUNDING(result, xrm, gb) \
- if (gb > 0) { \
- switch(xrm) {\
+ do { \
+ const uint64_t lsb = 1UL << (gb); \
+ const uint64_t lsb_half = lsb >> 1; \
+ switch (xrm) {\
case VRM::RNU:\
- result += ((uint64_t)1 << ((gb) - 1));\
+ result += lsb_half; \
break;\
case VRM::RNE:\
- if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\
- result -= ((uint64_t)1 << ((gb) - 1));\
- }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\
- result += ((uint64_t)1 << ((gb) - 1));\
- }\
+ if ((result & lsb_half) && ((result & (lsb_half - 1)) || (result & lsb))) \
+ result += lsb; \
break;\
case VRM::RDN:\
- result = (result >> ((gb) - 1)) << ((gb) - 1);\
break;\
case VRM::ROD:\
- result |= ((uint64_t)1ul << (gb)); \
+ if (result & (lsb - 1)) \
+ result |= lsb; \
break;\
case VRM::INVALID_RM:\
assert(true);\
} \
- } else if (gb == 0 && xrm == VRM::ROD) { \
- result |= 1ul; \
- }
-
+ } while (0)
//
// vector: integer and masking operand access helper
@@ -654,6 +627,7 @@ static inline bool is_overlaped(const int astart, const int asize,
// comparision result to masking register
#define VI_VV_LOOP_CMP(BODY) \
+ VI_CHECK_MSS(true); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VV_PARAMS(e8); \
@@ -671,6 +645,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_CMP_END
#define VI_VX_LOOP_CMP(BODY) \
+ VI_CHECK_MSS(false); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VX_PARAMS(e8); \
@@ -688,6 +663,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_CMP_END
#define VI_VI_LOOP_CMP(BODY) \
+ VI_CHECK_MSS(false); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VI_PARAMS(e8); \
@@ -705,6 +681,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_CMP_END
#define VI_VV_ULOOP_CMP(BODY) \
+ VI_CHECK_MSS(true); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VV_U_PARAMS(e8); \
@@ -722,6 +699,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_CMP_END
#define VI_VX_ULOOP_CMP(BODY) \
+ VI_CHECK_MSS(false); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VX_U_PARAMS(e8); \
@@ -739,6 +717,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_CMP_END
#define VI_VI_ULOOP_CMP(BODY) \
+ VI_CHECK_MSS(false); \
VI_LOOP_CMP_BASE \
if (sew == e8){ \
VI_U_PARAMS(e8); \
@@ -757,6 +736,7 @@ static inline bool is_overlaped(const int astart, const int asize,
// merge and copy loop
#define VI_VVXI_MERGE_LOOP(BODY) \
+ VI_CHECK_SXX; \
VI_GENERAL_LOOP_BASE \
if (sew == e8){ \
VXI_PARAMS(e8); \
@@ -776,7 +756,6 @@ static inline bool is_overlaped(const int astart, const int asize,
// reduction loop - signed
#define VI_LOOP_REDUCTION_BASE(x) \
require(x == e8 || x == e16 || x == e32 || x == e64); \
- require_vector;\
reg_t vl = P.VU.vl; \
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
@@ -793,6 +772,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_REDUCTION_END(x)
#define VI_VV_LOOP_REDUCTION(BODY) \
+ VI_CHECK_REDUCTION(false); \
reg_t sew = P.VU.vsew; \
if (sew == e8) { \
REDUCTION_LOOP(e8, BODY) \
@@ -823,6 +803,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_REDUCTION_END(x)
#define VI_VV_ULOOP_REDUCTION(BODY) \
+ VI_CHECK_REDUCTION(false); \
reg_t sew = P.VU.vsew; \
if (sew == e8){ \
REDUCTION_ULOOP(e8, BODY) \
@@ -836,6 +817,7 @@ static inline bool is_overlaped(const int astart, const int asize,
// genearl VXI signed/unsgied loop
#define VI_VV_ULOOP(BODY) \
+ VI_CHECK_SSS(true) \
VI_LOOP_BASE \
if (sew == e8){ \
VV_U_PARAMS(e8); \
@@ -853,6 +835,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_END
#define VI_VV_LOOP(BODY) \
+ VI_CHECK_SSS(true) \
VI_LOOP_BASE \
if (sew == e8){ \
VV_PARAMS(e8); \
@@ -870,6 +853,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_END
#define VI_VX_ULOOP(BODY) \
+ VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8){ \
VX_U_PARAMS(e8); \
@@ -887,6 +871,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_END
#define VI_VX_LOOP(BODY) \
+ VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8){ \
VX_PARAMS(e8); \
@@ -904,6 +889,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_END
#define VI_VI_ULOOP(BODY) \
+ VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_U_PARAMS(e8); \
@@ -921,6 +907,7 @@ static inline bool is_overlaped(const int astart, const int asize,
VI_LOOP_END
#define VI_VI_LOOP(BODY) \
+ VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_PARAMS(e8); \
@@ -961,8 +948,8 @@ VI_LOOP_END
type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \
type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1;
-#define VI_VVXI_LOOP_NARROW(BODY) \
- require(P.VU.vsew <= e32); \
+#define VI_VVXI_LOOP_NARROW(BODY, is_vs1) \
+ VI_CHECK_SDS(is_vs1); \
VI_LOOP_BASE \
if (sew == e8){ \
VI_NARROW_SHIFT(e8, e16) \
@@ -976,7 +963,8 @@ VI_LOOP_END
} \
VI_LOOP_END
-#define VI_VI_LOOP_NSHIFT(BODY) \
+#define VI_VI_LOOP_NSHIFT(BODY, is_vs1) \
+ VI_CHECK_SDS(is_vs1); \
VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \
VI_NSHIFT_PARAMS(e8, e16) \
@@ -990,7 +978,8 @@ VI_LOOP_END
} \
VI_LOOP_END
-#define VI_VX_LOOP_NSHIFT(BODY) \
+#define VI_VX_LOOP_NSHIFT(BODY, is_vs1) \
+ VI_CHECK_SDS(is_vs1); \
VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \
VX_NSHIFT_PARAMS(e8, e16) \
@@ -1004,7 +993,8 @@ VI_LOOP_END
} \
VI_LOOP_END
-#define VI_VV_LOOP_NSHIFT(BODY) \
+#define VI_VV_LOOP_NSHIFT(BODY, is_vs1) \
+ VI_CHECK_SDS(is_vs1); \
VI_LOOP_NSHIFT_BASE \
if (sew == e8){ \
VV_NSHIFT_PARAMS(e8, e16) \
@@ -1134,8 +1124,8 @@ VI_LOOP_END
vd = sat_add<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \
P.VU.vxsat |= sat;
-#define VI_VVX_LOOP_WIDE_SSMA(opd) \
- VI_WIDE_CHECK_COMMON \
+#define VI_VVX_LOOP_WIDE_SSMA(opd, is_vs1) \
+ VI_CHECK_DSS(is_vs1) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_WIDE_SSMA(8, 16, opd); \
@@ -1162,8 +1152,8 @@ VI_LOOP_END
vd = sat_addu<uint##sew2##_t>(vd, res, sat); \
P.VU.vxsat |= sat;
-#define VI_VVX_LOOP_WIDE_USSMA(opd) \
- VI_WIDE_CHECK_COMMON \
+#define VI_VVX_LOOP_WIDE_USSMA(opd, is_vs1) \
+ VI_CHECK_DSS(is_vs1) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_WIDE_USSMA(8, 16, opd); \
@@ -1190,8 +1180,8 @@ VI_LOOP_END
vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \
P.VU.vxsat |= sat;
-#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \
- VI_WIDE_CHECK_COMMON \
+#define VI_VVX_LOOP_WIDE_SU_SSMA(opd, is_vs1) \
+ VI_CHECK_DSS(is_vs1) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_WIDE_SU_SSMA(8, 16, opd); \
@@ -1219,7 +1209,7 @@ VI_LOOP_END
P.VU.vxsat |= sat;
#define VI_VVX_LOOP_WIDE_US_SSMA(opd) \
- VI_WIDE_CHECK_COMMON \
+ VI_CHECK_DSS(false) \
VI_LOOP_BASE \
if (sew == e8){ \
VI_WIDE_US_SSMA(8, 16, opd); \
@@ -1232,7 +1222,6 @@ VI_LOOP_END
// wide reduction loop - signed
#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
- VI_CHECK_DSS(false); \
reg_t vl = P.VU.vl; \
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
@@ -1249,7 +1238,7 @@ VI_LOOP_END
VI_LOOP_REDUCTION_END(sew2)
#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \
- require_vector;\
+ VI_CHECK_REDUCTION(true); \
reg_t sew = P.VU.vsew; \
if (sew == e8){ \
WIDE_REDUCTION_LOOP(e8, e16, BODY) \
@@ -1261,7 +1250,6 @@ VI_LOOP_END
// wide reduction loop - unsigned
#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
- VI_CHECK_DSS(false); \
reg_t vl = P.VU.vl; \
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
@@ -1278,7 +1266,7 @@ VI_LOOP_END
VI_LOOP_REDUCTION_END(sew2)
#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \
- require_vector;\
+ VI_CHECK_REDUCTION(true); \
reg_t sew = P.VU.vsew; \
if (sew == e8){ \
WIDE_REDUCTION_ULOOP(e8, e16, BODY) \
@@ -1290,6 +1278,7 @@ VI_LOOP_END
// carry/borrow bit loop
#define VI_VV_LOOP_CARRY(BODY) \
+ VI_CHECK_MSS(true); \
VI_LOOP_BASE \
if (sew == e8){ \
VV_CARRY_PARAMS(e8) \
@@ -1305,9 +1294,9 @@ VI_LOOP_END
BODY; \
} \
} \
- VI_TAIL_ZERO_MASK(rd_num);
#define VI_XI_LOOP_CARRY(BODY) \
+ VI_CHECK_MSS(false); \
VI_LOOP_BASE \
if (sew == e8){ \
XI_CARRY_PARAMS(e8) \
@@ -1323,10 +1312,10 @@ VI_LOOP_END
BODY; \
} \
} \
- VI_TAIL_ZERO_MASK(rd_num);
// average loop
-#define VI_VVX_LOOP_AVG(opd, op) \
+#define VI_VVX_LOOP_AVG(opd, op, is_vs1) \
+VI_CHECK_SSS(is_vs1); \
VRM xrm = p->VU.get_vround_mode(); \
VI_LOOP_BASE \
switch(sew) { \
@@ -1399,19 +1388,16 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_ST(stride, offset, st_width, elt_byte) \
const reg_t nf = insn.v_nf() + 1; \
- require_vector; \
require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ VI_CHECK_SXX; \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vs3 = insn.rd(); \
const reg_t vlmax = P.VU.vlmax; \
const reg_t vlmul = P.VU.vlmul; \
for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
- bool is_valid = true; \
VI_STRIP(i) \
VI_ELEMENT_SKIP(i); \
- if (!is_valid) \
- continue; \
for (reg_t fn = 0; fn < nf; ++fn) { \
st_width##_t val = 0; \
switch (P.VU.vsew) { \
@@ -1435,19 +1421,18 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_LD(stride, offset, ld_width, elt_byte) \
const reg_t nf = insn.v_nf() + 1; \
- require_vector; \
require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ VI_CHECK_SXX; \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vd = insn.rd(); \
const reg_t vlmax = P.VU.vlmax; \
const reg_t vlmul = P.VU.vlmul; \
for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
- bool is_valid = true; \
VI_ELEMENT_SKIP(i); \
VI_STRIP(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
- ld_width##_t val = is_valid ? MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte) : 0; \
+ ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \
if (vd + fn >= NVPR){ \
P.VU.vstart = vreg_inx;\
require(false); \
@@ -1471,10 +1456,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_LDST_FF(itype, tsew) \
- require_vector; \
require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \
const reg_t nf = insn.v_nf() + 1; \
require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ VI_CHECK_SXX; \
const reg_t sew = p->VU.vsew; \
const reg_t vl = p->VU.vl; \
const reg_t baseAddr = RS1; \
@@ -1483,7 +1468,6 @@ for (reg_t i = 0; i < vlmax; ++i) { \
const reg_t vlmax = P.VU.vlmax; \
const reg_t vlmul = P.VU.vlmul; \
for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
- bool is_valid = true; \
VI_STRIP(i); \
VI_ELEMENT_SKIP(i); \
\
@@ -1492,20 +1476,20 @@ for (reg_t i = 0; i < vlmax; ++i) { \
\
switch (sew) { \
case e8: \
- p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = val; \
break; \
case e16: \
- p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = val; \
break; \
case e32: \
- p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = val; \
break; \
case e64: \
- p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = val; \
break; \
} \
\
- if (val == 0 && is_valid) { \
+ if (val == 0) { \
p->VU.vl = i; \
early_stop = true; \
break; \
@@ -1566,16 +1550,10 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_VFP_LOOP_END \
} \
- if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- TAIL_ZERO((P.VU.vsew >> 3) * 1); \
- }\
P.VU.vstart = 0; \
#define VI_VFP_LOOP_WIDE_END \
} \
- if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
- TAIL_ZERO((P.VU.vsew >> 3) * 2); \
- }\
P.VU.vstart = 0; \
set_fp_exceptions;
@@ -1583,11 +1561,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \
} \
P.VU.vstart = 0; \
set_fp_exceptions; \
- if (vl > 0 && TAIL_ZEROING) { \
+ if (vl > 0) { \
P.VU.elt<type_sew_t<x>::type>(rd_num, 0) = vd_0.v; \
- for (reg_t i = 1; i < (P.VU.VLEN / x); ++i) { \
- P.VU.elt<type_sew_t<x>::type>(rd_num, i) = 0; \
- } \
}
#define VI_VFP_LOOP_CMP_END \
@@ -1603,20 +1578,11 @@ for (reg_t i = 0; i < vlmax; ++i) { \
break; \
}; \
} \
- if (vl != 0 && TAIL_ZEROING){ \
- for (reg_t i=vl; i<P.VU.vlmax; ++i){ \
- const int mlen = P.VU.vmlen; \
- const int midx = (mlen * i) / 64; \
- const int mpos = (mlen * i) % 64; \
- uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
- uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \
- vdi = (vdi & ~mmask);\
- }\
- }\
P.VU.vstart = 0; \
set_fp_exceptions;
#define VI_VFP_VV_LOOP(BODY) \
+ VI_CHECK_SSS(true); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
@@ -1637,6 +1603,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
VI_VFP_LOOP_END
#define VI_VFP_VV_LOOP_REDUCTION(BODY) \
+ VI_CHECK_REDUCTION(false) \
VI_VFP_LOOP_REDUCTION_BASE \
float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
BODY; \
@@ -1651,6 +1618,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \
VI_VFP_LOOP_REDUCTION_END(e64)
#define VI_VFP_VF_LOOP(BODY) \
+ VI_CHECK_SSS(false); \
VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
@@ -1670,15 +1638,17 @@ for (reg_t i = 0; i < vlmax; ++i) { \
DEBUG_RVV_FP_VF; \
VI_VFP_LOOP_END
-#define VI_VFP_LOOP_CMP(BODY) \
+#define VI_VFP_LOOP_CMP(BODY, is_vs1) \
+ VI_CHECK_MSS(is_vs1); \
VI_VFP_LOOP_CMP_BASE \
BODY; \
+ set_fp_exceptions; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_CMP_END \
#define VI_VFP_VF_LOOP_WIDE(BODY) \
- VI_VFP_LOOP_BASE \
VI_CHECK_DSS(false); \
+ VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
@@ -1699,8 +1669,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \
#define VI_VFP_VV_LOOP_WIDE(BODY) \
- VI_VFP_LOOP_BASE \
VI_CHECK_DSS(true); \
+ VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
@@ -1720,8 +1690,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \
VI_VFP_LOOP_WIDE_END
#define VI_VFP_WF_LOOP_WIDE(BODY) \
- VI_VFP_LOOP_BASE \
VI_CHECK_DDS(false); \
+ VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
@@ -1740,8 +1710,8 @@ for (reg_t i = 0; i < vlmax; ++i) { \
VI_VFP_LOOP_WIDE_END
#define VI_VFP_WV_LOOP_WIDE(BODY) \
- VI_VFP_LOOP_BASE \
VI_CHECK_DDS(true); \
+ VI_VFP_LOOP_BASE \
switch(P.VU.vsew) { \
case e32: {\
float64_t &vd = P.VU.elt<float64_t>(rd_num, i); \
diff --git a/riscv/encoding.h b/riscv/encoding.h
index a18a0c9..17ba2d3 100644
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@@ -882,8 +882,6 @@
#define MASK_VMFEQ_VF 0xfc00707f
#define MATCH_VMFLE_VF 0x64005057
#define MASK_VMFLE_VF 0xfc00707f
-#define MATCH_VMFORD_VF 0x68005057
-#define MASK_VMFORD_VF 0xfc00707f
#define MATCH_VMFLT_VF 0x6c005057
#define MASK_VMFLT_VF 0xfc00707f
#define MATCH_VMFNE_VF 0x70005057
@@ -962,8 +960,6 @@
#define MASK_VMFEQ_VV 0xfc00707f
#define MATCH_VMFLE_VV 0x64001057
#define MASK_VMFLE_VV 0xfc00707f
-#define MATCH_VMFORD_VV 0x68001057
-#define MASK_VMFORD_VV 0xfc00707f
#define MATCH_VMFLT_VV 0x6c001057
#define MASK_VMFLT_VV 0xfc00707f
#define MATCH_VMFNE_VV 0x70001057
@@ -2103,7 +2099,6 @@ DECLARE_INSN(vfmerge_vfm, MATCH_VFMERGE_VFM, MASK_VFMERGE_VFM)
DECLARE_INSN(vfmv_v_f, MATCH_VFMV_V_F, MASK_VFMV_V_F)
DECLARE_INSN(vmfeq_vf, MATCH_VMFEQ_VF, MASK_VMFEQ_VF)
DECLARE_INSN(vmfle_vf, MATCH_VMFLE_VF, MASK_VMFLE_VF)
-DECLARE_INSN(vmford_vf, MATCH_VMFORD_VF, MASK_VMFORD_VF)
DECLARE_INSN(vmflt_vf, MATCH_VMFLT_VF, MASK_VMFLT_VF)
DECLARE_INSN(vmfne_vf, MATCH_VMFNE_VF, MASK_VMFNE_VF)
DECLARE_INSN(vmfgt_vf, MATCH_VMFGT_VF, MASK_VMFGT_VF)
@@ -2143,7 +2138,6 @@ DECLARE_INSN(vfsgnjx_vv, MATCH_VFSGNJX_VV, MASK_VFSGNJX_VV)
DECLARE_INSN(vfmv_f_s, MATCH_VFMV_F_S, MASK_VFMV_F_S)
DECLARE_INSN(vmfeq_vv, MATCH_VMFEQ_VV, MASK_VMFEQ_VV)
DECLARE_INSN(vmfle_vv, MATCH_VMFLE_VV, MASK_VMFLE_VV)
-DECLARE_INSN(vmford_vv, MATCH_VMFORD_VV, MASK_VMFORD_VV)
DECLARE_INSN(vmflt_vv, MATCH_VMFLT_VV, MASK_VMFLT_VV)
DECLARE_INSN(vmfne_vv, MATCH_VMFNE_VV, MASK_VMFNE_VV)
DECLARE_INSN(vfdiv_vv, MATCH_VFDIV_VV, MASK_VFDIV_VV)
diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h
index 5f8d5f5..6bd1a60 100644
--- a/riscv/insns/vaadd_vi.h
+++ b/riscv/insns/vaadd_vi.h
@@ -1,4 +1,5 @@
// vaadd: Averaging adds of integers
+VI_CHECK_SSS(false);
VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP
({
diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h
index b479970..0a14467 100644
--- a/riscv/insns/vaadd_vv.h
+++ b/riscv/insns/vaadd_vv.h
@@ -1,2 +1,2 @@
// vaadd.vv vd, vs2, vs1
-VI_VVX_LOOP_AVG(vs1, +);
+VI_VVX_LOOP_AVG(vs1, +, true);
diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h
index c811a0a..ae00d8e 100644
--- a/riscv/insns/vaadd_vx.h
+++ b/riscv/insns/vaadd_vx.h
@@ -1,2 +1,2 @@
// vaadd.vx vd, vs2, rs1
-VI_VVX_LOOP_AVG(rs1, +);
+VI_VVX_LOOP_AVG(rs1, +, false);
diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h
index 5a5ccc9..a45c18d 100644
--- a/riscv/insns/vasub_vv.h
+++ b/riscv/insns/vasub_vv.h
@@ -1,2 +1,2 @@
// vasub.vv vd, vs2, vs1
-VI_VVX_LOOP_AVG(vs1, -);
+VI_VVX_LOOP_AVG(vs1, -, true);
diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h
index c3cad4b..4e8dba1 100644
--- a/riscv/insns/vasub_vx.h
+++ b/riscv/insns/vasub_vx.h
@@ -1,2 +1,2 @@
// vasub.vx vd, vs2, rs1
-VI_VVX_LOOP_AVG(rs1, -);
+VI_VVX_LOOP_AVG(rs1, -, false);
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h
index b056b0e..77e91bf 100644
--- a/riscv/insns/vcompress_vm.h
+++ b/riscv/insns/vcompress_vm.h
@@ -1,14 +1,13 @@
// vcompress vd, vs2, vs1
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
require(P.VU.vstart == 0);
-reg_t sew = P.VU.vsew;
-reg_t vl = P.VU.vl;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs1(), 1));
+
reg_t pos = 0;
-for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+
+VI_GENERAL_LOOP_BASE
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
@@ -32,10 +31,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
++pos;
}
-}
-
-if (vl > 0 && TAIL_ZEROING) {
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1));
- memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1));
-}
-
+VI_LOOP_END;
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index 311f875..f6604fb 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -1,5 +1,5 @@
// vfcvt.f.x.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index ceabea3..2c845ac 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -1,5 +1,5 @@
// vfcvt.f.xu.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index ee53c6d..a9eedc4 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -1,5 +1,5 @@
// vfcvt.x.f.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
})
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index 6d12bce..ea78165 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -1,13 +1,7 @@
// vfmerge_vf vd, vs2, vs1, vm
-require_extension('F');
-require_fp;
-require(P.VU.vsew == 32);
-require_vector;
-reg_t vl = P.VU.vl;
+VI_CHECK_SSS(false);
+VI_VFP_COMMON;
reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
@@ -20,6 +14,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) {
vd = use_first ? rs1 : vs2;
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
-set_fp_exceptions;
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index c6dbaff..066db80 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -1,6 +1,5 @@
// vfmv_f_s: rd = vs2[0] (rs1=0)
require_vector;
-require(insn.v_vm() == 1);
require_fp;
require_extension('F');
require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index cb81008..8ff6094 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -15,17 +15,5 @@ if (vl > 0) {
else
P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
- const reg_t max_len = P.VU.VLEN / sew;
- for (reg_t i = 1; i < max_len; ++i) {
- switch(sew) {
- case e32:
- P.VU.elt<uint32_t>(rd_num, i) = 0;
- break;
- default:
- require(false);
- break;
- }
- }
-
vl = 0;
}
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index c85a3e9..f323263 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -1,13 +1,7 @@
-// vfmerge_vf vd, vs2, vs1, vm
-require_extension('F');
-require_fp;
-require(P.VU.vsew == 32);
-require_vector;
-reg_t vl = P.VU.vl;
+// vfmv_vf vd, vs1
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+VI_VFP_COMMON
reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
@@ -15,6 +9,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) {
vd = rs1;
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
-set_fp_exceptions;
diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h
index df6dd04..25422d6 100644
--- a/riscv/insns/vid_v.h
+++ b/riscv/insns/vid_v.h
@@ -6,6 +6,9 @@ reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
+require((rd_num & (P.VU.vlmul - 1)) == 0);
+if (insn.v_vm() == 0 && P.VU.vlmul >= 2) \
+ require(insn.rd() != 0);
for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
@@ -26,5 +29,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index fde0291..04bfcd8 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -7,6 +7,10 @@ reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
require(P.VU.vstart == 0);
+require(!is_overlapped(rd_num, P.VU.vlmul, rs2_num, 1));
+if (insn.v_vm() == 0)
+ require(!is_overlapped(rd_num, P.VU.vlmul, 0, 1));
+require((rd_num & (P.VU.vlmul - 1)) == 0);
int cnt = 0;
for (reg_t i = 0; i < vl; ++i) {
@@ -49,4 +53,3 @@ for (reg_t i = 0; i < vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
diff --git a/riscv/insns/vleff_v.h b/riscv/insns/vleff_v.h
index ec2777a..e858de9 100644
--- a/riscv/insns/vleff_v.h
+++ b/riscv/insns/vleff_v.h
@@ -1,7 +1,7 @@
-require_vector;
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
const reg_t nf = insn.v_nf() + 1;
require((nf * P.VU.vlmul) <= (NVPR / 4));
+VI_CHECK_SXX;
const reg_t sew = P.VU.vsew;
const reg_t vl = P.VU.vl;
const reg_t baseAddr = RS1;
@@ -9,7 +9,6 @@ const reg_t rd_num = insn.rd();
bool early_stop = false;
const reg_t vlmul = P.VU.vlmul;
for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
- bool is_valid = true;
bool is_zero = false;
VI_STRIP(i);
VI_ELEMENT_SKIP(i);
@@ -20,23 +19,23 @@ for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint8(baseAddr + (i * nf + fn) * 1) : 0;
- is_zero = is_valid && P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint8(baseAddr + (i * nf + fn) * 1);
+ is_zero = P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e16:
P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint16(baseAddr + (i * nf + fn) * 2) : 0;
- is_zero = is_valid && P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint16(baseAddr + (i * nf + fn) * 2);
+ is_zero = P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e32:
P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint32(baseAddr + (i * nf + fn) * 4) : 0;
- is_zero = is_valid && P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint32(baseAddr + (i * nf + fn) * 4);
+ is_zero = P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e64:
P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint64(baseAddr + (i * nf + fn) * 8) : 0;
- is_zero = is_valid && P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint64(baseAddr + (i * nf + fn) * 8);
+ is_zero = P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
}
diff --git a/riscv/insns/vlxb_v.h b/riscv/insns/vlxb_v.h
index 5a99bd3..57ce8c8 100644
--- a/riscv/insns/vlxb_v.h
+++ b/riscv/insns/vlxb_v.h
@@ -1,4 +1,5 @@
// vlxb.v and vlsseg[2-8]b.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int8, 1);
diff --git a/riscv/insns/vlxbu_v.h b/riscv/insns/vlxbu_v.h
index daf2d2b..d8e3dd6 100644
--- a/riscv/insns/vlxbu_v.h
+++ b/riscv/insns/vlxbu_v.h
@@ -1,4 +1,5 @@
// vlxbu.v and vlxseg[2-8]bu.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vlxe_v.h b/riscv/insns/vlxe_v.h
index b1190a8..1055eca 100644
--- a/riscv/insns/vlxe_v.h
+++ b/riscv/insns/vlxe_v.h
@@ -1,5 +1,6 @@
// vlxe.v and vlxseg[2-8]e.v
reg_t sew = P.VU.vsew;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
if (sew == e8) {
VI_LD(index[i], fn, int8, 1);
diff --git a/riscv/insns/vlxh_v.h b/riscv/insns/vlxh_v.h
index 98145db..9f4c3a1 100644
--- a/riscv/insns/vlxh_v.h
+++ b/riscv/insns/vlxh_v.h
@@ -1,4 +1,5 @@
// vlxh.v and vlxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int16, 2);
diff --git a/riscv/insns/vlxhu_v.h b/riscv/insns/vlxhu_v.h
index 27d549c..9283127 100644
--- a/riscv/insns/vlxhu_v.h
+++ b/riscv/insns/vlxhu_v.h
@@ -1,4 +1,5 @@
// vlxh.v and vlxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vlxw_v.h b/riscv/insns/vlxw_v.h
index 83300f0..c1117a2 100644
--- a/riscv/insns/vlxw_v.h
+++ b/riscv/insns/vlxw_v.h
@@ -1,5 +1,6 @@
// vlxw.v and vlxseg[2-8]w.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int32, 4);
diff --git a/riscv/insns/vlxwu_v.h b/riscv/insns/vlxwu_v.h
index a2f9913..d3034bd 100644
--- a/riscv/insns/vlxwu_v.h
+++ b/riscv/insns/vlxwu_v.h
@@ -1,4 +1,5 @@
// vlxwu.v and vlxseg[2-8]wu.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint32, 4);
diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h
index fd79089..a8185d1 100644
--- a/riscv/insns/vmadc_vim.h
+++ b/riscv/insns/vmadc_vim.h
@@ -1,5 +1,4 @@
// vmadc.vim vd, vs2, simm5
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h
index 82042ca..8d58658 100644
--- a/riscv/insns/vmadc_vvm.h
+++ b/riscv/insns/vmadc_vvm.h
@@ -1,5 +1,4 @@
// vmadc.vvm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h
index 8f26584..0b6273a 100644
--- a/riscv/insns/vmadc_vxm.h
+++ b/riscv/insns/vmadc_vxm.h
@@ -1,5 +1,4 @@
// vadc.vx vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h
index 13354d6..c6c87c7 100644
--- a/riscv/insns/vmerge_vim.h
+++ b/riscv/insns/vmerge_vim.h
@@ -1,4 +1,5 @@
// vmerge.vim vd, vs2, simm5
+VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h
index 7530b40..97a0182 100644
--- a/riscv/insns/vmerge_vvm.h
+++ b/riscv/insns/vmerge_vvm.h
@@ -1,4 +1,5 @@
// vmerge.vvm vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h
index b1757fa..de7df91 100644
--- a/riscv/insns/vmerge_vxm.h
+++ b/riscv/insns/vmerge_vxm.h
@@ -1,4 +1,5 @@
// vmerge.vxm vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index cedf4b9..f0e7109 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index 7e76cac..1be3a69 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, vs1);
-})
+}, true)
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index 7eade89..1c68366 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,5 +1,5 @@
// vfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_le_quiet(rs1, vs2);
-})
+ res = f32_le(rs1, vs2);
+}, false)
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index 6115d06..0979185 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,5 +1,5 @@
// vfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(rs1, vs2);
-})
+ res = f32_lt(rs1, vs2);
+}, false)
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index 998b93b..90607ec 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_le(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index c716312..6ccdfec 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,5 +1,5 @@
// vfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_le_quiet(vs2, vs1);
-})
+ res = f32_le(vs2, vs1);
+}, true)
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index af436e4..6b71a4a 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,5 +1,5 @@
// vflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(vs2, rs1);
-})
+ res = f32_lt(vs2, rs1);
+}, false)
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index ded867d..a2ed8e3 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,5 +1,5 @@
// vflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(vs2, vs1);
-})
+ res = f32_lt(vs2, vs1);
+}, true)
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index ac2eced..ef63678 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index 3fa8beb..8378a23 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, vs1);
-})
+}, true)
diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h
deleted file mode 100644
index b5e74f2..0000000
--- a/riscv/insns/vmford_vf.h
+++ /dev/null
@@ -1,5 +0,0 @@
-// vford.vf vd, vs2, rs1, vm
-VI_VFP_LOOP_CMP
-({
- res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1));
-})
diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h
deleted file mode 100644
index 2e459c1..0000000
--- a/riscv/insns/vmford_vv.h
+++ /dev/null
@@ -1,5 +0,0 @@
-// vford.vv vd, vs2, vs1, vm
-VI_VFP_LOOP_CMP
-({
- res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1));
-})
diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h
index 3804ba8..f4ce6f4 100644
--- a/riscv/insns/vmsbc_vvm.h
+++ b/riscv/insns/vmsbc_vvm.h
@@ -1,5 +1,4 @@
// vmsbc.vvm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h
index d5332f5..aec4409 100644
--- a/riscv/insns/vmsbc_vxm.h
+++ b/riscv/insns/vmsbc_vxm.h
@@ -1,5 +1,4 @@
// vmsbc.vxm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
index 3047cca..443fcbb 100644
--- a/riscv/insns/vmsbf_m.h
+++ b/riscv/insns/vmsbf_m.h
@@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
index 826e7cd..381088b 100644
--- a/riscv/insns/vmsif_m.h
+++ b/riscv/insns/vmsif_m.h
@@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
index 48805f7..d66002d 100644
--- a/riscv/insns/vmsof_m.h
+++ b/riscv/insns/vmsof_m.h
@@ -28,5 +28,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h
index 59882da..b918551 100644
--- a/riscv/insns/vmulhsu_vv.h
+++ b/riscv/insns/vmulhsu_vv.h
@@ -1,4 +1,5 @@
// vmulhsu.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
switch(sew) {
case e8: {
diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h
index d39615a..cb2db3d 100644
--- a/riscv/insns/vmulhsu_vx.h
+++ b/riscv/insns/vmulhsu_vx.h
@@ -1,4 +1,5 @@
// vmulhsu.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
switch(sew) {
case e8: {
diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h
index 38b2697..948b5be 100644
--- a/riscv/insns/vmv_s_x.h
+++ b/riscv/insns/vmv_s_x.h
@@ -24,23 +24,5 @@ if (vl > 0) {
break;
}
- const reg_t max_len = P.VU.VLEN / sew;
- for (reg_t i = 1; i < max_len; ++i) {
- switch(sew) {
- case e8:
- P.VU.elt<uint8_t>(rd_num, i) = 0;
- break;
- case e16:
- P.VU.elt<uint16_t>(rd_num, i) = 0;
- break;
- case e32:
- P.VU.elt<uint32_t>(rd_num, i) = 0;
- break;
- default:
- P.VU.elt<uint64_t>(rd_num, i) = 0;
- break;
- }
- }
-
vl = 0;
}
diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h
index 734010b..a4f9a5c 100644
--- a/riscv/insns/vmv_v_v.h
+++ b/riscv/insns/vmv_v_v.h
@@ -1,4 +1,5 @@
// vvmv.v.v vd, vs1
+require((insn.rs1() & (P.VU.vlmul - 1)) == 0);
VI_VVXI_MERGE_LOOP
({
vd = vs1;
diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h
index f22c2dd..50f2e79 100644
--- a/riscv/insns/vmv_x_s.h
+++ b/riscv/insns/vmv_x_s.h
@@ -1,25 +1,28 @@
-// vext_x_v: rd = vs2[0]
+// vmv_x_s: rd = vs2[rs1]
require(insn.v_vm() == 1);
uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen());
-VI_LOOP_BASE
-VI_LOOP_END_NO_TAIL_ZERO
-switch(sew) {
-case e8:
- WRITE_RD(P.VU.elt<uint8_t>(rs2_num, 0));
- break;
-case e16:
- WRITE_RD(P.VU.elt<uint16_t>(rs2_num, 0));
- break;
-case e32:
- if (P.get_max_xlen() == 32)
- WRITE_RD(P.VU.elt<int32_t>(rs2_num, 0));
- else
- WRITE_RD(P.VU.elt<uint32_t>(rs2_num, 0));
- break;
-case e64:
- if (P.get_max_xlen() <= sew)
- WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0) & xmask);
- else
- WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0));
- break;
+reg_t rs1 = RS1;
+reg_t sew = P.VU.vsew;
+reg_t rs2_num = insn.rs2();
+
+if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) {
+ WRITE_RD(0);
+} else {
+ switch(sew) {
+ case e8:
+ WRITE_RD(P.VU.elt<int8_t>(rs2_num, rs1));
+ break;
+ case e16:
+ WRITE_RD(P.VU.elt<int16_t>(rs2_num, rs1));
+ break;
+ case e32:
+ WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1));
+ break;
+ case e64:
+ if (P.get_max_xlen() <= sew)
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask);
+ else
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1));
+ break;
+ }
}
diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h
index ca27593..eb21710 100644
--- a/riscv/insns/vnclip_vi.h
+++ b/riscv/insns/vnclip_vi.h
@@ -4,14 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
-
int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ unsigned shift = zimm5 & ((sew * 2) - 1);
+
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
- result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -21,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h
index 7bcb4cb..92575a6 100644
--- a/riscv/insns/vnclip_vv.h
+++ b/riscv/insns/vnclip_vv.h
@@ -4,20 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
+ int128_t result = vs2;
+ unsigned shift = vs1 & ((sew * 2) - 1);
- int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- unsigned_shift_amount = 2 * sew - 1;
- }
-
- result = (vsext(result, sew * 2)) >> unsigned_shift_amount;
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -27,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, true)
diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h
index b66e830..96409de 100644
--- a/riscv/insns/vnclip_vx.h
+++ b/riscv/insns/vnclip_vx.h
@@ -4,19 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
+ int128_t result = vs2;
+ unsigned shift = rs1 & ((sew * 2) - 1);
- int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- unsigned_shift_amount = 2 * sew - 1;
- }
- result = vsext(result, sew * 2) >> unsigned_shift_amount;
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -26,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h
index 61cb015..b1527f7 100644
--- a/riscv/insns/vnclipu_vi.h
+++ b/riscv/insns/vnclipu_vi.h
@@ -4,11 +4,13 @@ uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
uint64_t result = vs2_u;
+ unsigned shift = zimm5 & ((sew * 2) - 1);
+
// rounding
- INT_ROUNDING(result, xrm, sew);
+ INT_ROUNDING(result, xrm, shift);
// unsigned shifting to rs1
- result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+ result = result >> shift;
// saturation
if (result & (uint64_t)(-1ll << sew)) {
@@ -17,4 +19,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h
index 004f24f..217e82f 100644
--- a/riscv/insns/vnclipu_vv.h
+++ b/riscv/insns/vnclipu_vv.h
@@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode();
uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
+ uint128_t result = vs2_u;
+ unsigned shift = vs1 & ((sew * 2) - 1);
- uint64_t result = vs2_u;
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ result = result >> shift;
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- result = 0;
- } else {
- result = vzext(result, sew * 2) >> unsigned_shift_amount;
- }
-// saturation
+ // saturation
if (result & (uint64_t)(-1ll << sew)) {
result = int_max;
P.VU.vxsat = 1;
}
vd = result;
-})
+}, true)
diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h
index 0507a2b..ce15b55 100644
--- a/riscv/insns/vnclipu_vx.h
+++ b/riscv/insns/vnclipu_vx.h
@@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode();
uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
- uint64_t result = vs2;
+ uint128_t result = vs2_u;
+ unsigned shift = rs1 & ((sew * 2) - 1);
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- result = 0;
- } else {
- result = vzext(result, sew * 2) >> unsigned_shift_amount;
- }
+ result = result >> shift;
-// saturation
+ // saturation
if (result & (uint64_t)(-1ll << sew)) {
result = int_max;
P.VU.vxsat = 1;
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h
index 0502ff1..f41979e 100644
--- a/riscv/insns/vnsra_vi.h
+++ b/riscv/insns/vnsra_vi.h
@@ -2,4 +2,4 @@
VI_VI_LOOP_NSHIFT
({
vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f);
-})
+}, false)
diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h
index 555ce3f..59f255e 100644
--- a/riscv/insns/vnsra_vv.h
+++ b/riscv/insns/vnsra_vv.h
@@ -2,4 +2,4 @@
VI_VV_LOOP_NSHIFT
({
vd = vs2 >> (vs1 & (sew * 2 - 1));
-})
+}, true)
diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h
index 05a55e3..adaa24c 100644
--- a/riscv/insns/vnsra_vx.h
+++ b/riscv/insns/vnsra_vx.h
@@ -2,4 +2,4 @@
VI_VX_LOOP_NSHIFT
({
vd = vs2 >> (rs1 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h
index d4dfcf0..91402c0 100644
--- a/riscv/insns/vnsrl_vi.h
+++ b/riscv/insns/vnsrl_vi.h
@@ -2,4 +2,4 @@
VI_VI_LOOP_NSHIFT
({
vd = vs2_u >> (zimm5 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h
index ab72b84..609299f 100644
--- a/riscv/insns/vnsrl_vv.h
+++ b/riscv/insns/vnsrl_vv.h
@@ -2,4 +2,4 @@
VI_VV_LOOP_NSHIFT
({
vd = vs2_u >> (vs1 & (sew * 2 - 1));
-})
+}, true)
diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h
index e149b38..8356a2b 100644
--- a/riscv/insns/vnsrl_vx.h
+++ b/riscv/insns/vnsrl_vx.h
@@ -2,4 +2,4 @@
VI_VX_LOOP_NSHIFT
({
vd = vs2_u >> (rs1 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h
index eff67b8..cab4a78 100644
--- a/riscv/insns/vrgather_vi.h
+++ b/riscv/insns/vrgather_vi.h
@@ -1,11 +1,14 @@
// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
reg_t zimm5 = insn.v_zimm5();
+
+VI_LOOP_BASE
+
for (reg_t i = P.VU.vstart; i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
@@ -25,5 +28,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h
index ce0c2a6..8266c95 100644
--- a/riscv/insns/vrgather_vv.h
+++ b/riscv/insns/vrgather_vv.h
@@ -1,15 +1,12 @@
// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
- VI_LOOP_ELEMENT_SKIP();
- VI_CHECK_VREG_OVERLAP(rd_num, rs1_num);
- VI_CHECK_VREG_OVERLAP(rd_num, rs2_num);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs1() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+VI_LOOP_BASE
switch (sew) {
case e8: {
auto vs1 = P.VU.elt<uint8_t>(rs1_num, i);
@@ -33,7 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
break;
}
}
-}
-
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h
index e9ff3b1..15e16b7 100644
--- a/riscv/insns/vrgather_vx.h
+++ b/riscv/insns/vrgather_vx.h
@@ -1,15 +1,13 @@
// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
reg_t rs1 = RS1;
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
- VI_LOOP_ELEMENT_SKIP();
+VI_LOOP_BASE
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, rs1);
@@ -24,7 +22,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
P.VU.elt<uint64_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, rs1);
break;
}
-}
-
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h
index de2cb83..c361f08 100644
--- a/riscv/insns/vsadd_vi.h
+++ b/riscv/insns/vsadd_vi.h
@@ -1,4 +1,5 @@
// vsadd.vi vd, vs2 simm5
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h
index 2152bab..ce0ef40 100644
--- a/riscv/insns/vsadd_vv.h
+++ b/riscv/insns/vsadd_vv.h
@@ -1,4 +1,5 @@
// vsadd.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h
index 781e9e8..691f017 100644
--- a/riscv/insns/vsadd_vx.h
+++ b/riscv/insns/vsadd_vx.h
@@ -1,4 +1,5 @@
// vsadd.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h
index 0069df7..04e2540 100644
--- a/riscv/insns/vslide1down_vx.h
+++ b/riscv/insns/vslide1down_vx.h
@@ -1,4 +1,9 @@
//vslide1down.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
VI_LOOP_BASE
if (i != vl - 1) {
switch (sew) {
diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h
index 50cc503..69ce0fd 100644
--- a/riscv/insns/vslide1up_vx.h
+++ b/riscv/insns/vslide1up_vx.h
@@ -1,8 +1,10 @@
//vslide1up.vx vd, vs2, rs1
-if (insn.v_vm() == 0)
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
require(insn.rd() != 0);
-VI_CHECK_SS
VI_LOOP_BASE
if (i != 0) {
if (sew == e8) {
diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h
index c21c5f2..dd58c1e 100644
--- a/riscv/insns/vslidedown_vi.h
+++ b/riscv/insns/vslidedown_vi.h
@@ -1,8 +1,14 @@
// vslidedown.vi vd, vs2, rs1
-VI_LOOP_BASE
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
const reg_t sh = insn.v_zimm5();
-bool is_valid = (i + sh) < P.VU.vlmax;
+VI_LOOP_BASE
+
reg_t offset = 0;
+bool is_valid = (i + sh) < P.VU.vlmax;
if (is_valid) {
offset = sh;
diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h
index 251740c..9881e0e 100644
--- a/riscv/insns/vslidedown_vx.h
+++ b/riscv/insns/vslidedown_vx.h
@@ -1,11 +1,17 @@
//vslidedown.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+const reg_t sh = RS1;
VI_LOOP_BASE
-reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1;
-bool is_valid = offset < P.VU.vlmax;
+reg_t offset = 0;
+bool is_valid = (i + sh) < P.VU.vlmax;
-if (!is_valid) {
- offset = 0;
+if (is_valid) {
+ offset = sh;
}
switch (sew) {
diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h
index 4135b20..64b4aca 100644
--- a/riscv/insns/vslideup_vi.h
+++ b/riscv/insns/vslideup_vi.h
@@ -1,8 +1,10 @@
// vslideup.vi vd, vs2, rs1
-if (insn.v_vm() == 0)
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
require(insn.rd() != 0);
-VI_CHECK_SS
const reg_t offset = insn.v_zimm5();
VI_LOOP_BASE
if (P.VU.vstart < offset && i < offset)
diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h
index bf73fcd..063c061 100644
--- a/riscv/insns/vslideup_vx.h
+++ b/riscv/insns/vslideup_vx.h
@@ -1,4 +1,10 @@
//vslideup.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
const reg_t offset = RS1;
VI_LOOP_BASE
if (P.VU.vstart < offset && i < offset)
diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h
index a0c7f99..0807899 100644
--- a/riscv/insns/vsmul_vv.h
+++ b/riscv/insns/vsmul_vv.h
@@ -1,33 +1,32 @@
// vsmul: Signed saturating and rounding fractional multiply
VRM xrm = P.VU.get_vround_mode();
-uint64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
-uint64_t int_min = - (1 << (P.VU.vsew - 1));
-uint64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
+int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
+int64_t int_min = - (1 << (P.VU.vsew - 1));
+int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
-VI_VV_ULOOP
+VI_VV_LOOP
({
- uint64_t vs1_sign;
- uint64_t vs2_sign;
- uint64_t result_sign;
+ int64_t vs1_sign;
+ int64_t vs2_sign;
+ int64_t result_sign;
vs1_sign = vs1 & sign_mask;
vs2_sign = vs2 & sign_mask;
bool overflow = vs1 == vs2 && vs1 == int_min;
- uint128_t result = (uint128_t)vs1 * (uint128_t)vs2;
- result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ int128_t result = (int128_t)vs1 * (int128_t)vs2;
result_sign = (vs1_sign ^ vs2_sign) & sign_mask;
+
// rounding
INT_ROUNDING(result, xrm, sew - 1);
- // unsigned shifting
+ // remove guard bits
result = result >> (sew - 1);
// saturation
if (overflow) {
result = int_max;
P.VU.vxsat = 1;
- } else {
- result |= result_sign;
}
+
vd = result;
})
diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h
index c7909c7..4326d8f 100644
--- a/riscv/insns/vsmul_vx.h
+++ b/riscv/insns/vsmul_vx.h
@@ -1,34 +1,33 @@
// vsmul
VRM xrm = P.VU.get_vround_mode();
-uint128_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
-uint128_t int_min = - (1 << (P.VU.vsew - 1));
-uint128_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
+int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
+int64_t int_min = - (1 << (P.VU.vsew - 1));
+int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
-VI_VX_ULOOP
+VI_VX_LOOP
({
- uint128_t rs1_sign;
- uint128_t vs2_sign;
- uint128_t result_sign;
+ int64_t rs1_sign;
+ int64_t vs2_sign;
+ int64_t result_sign;
rs1_sign = rs1 & sign_mask;
vs2_sign = vs2 & sign_mask;
bool overflow = rs1 == vs2 && rs1 == int_min;
- uint128_t result = (uint128_t)rs1 * (uint128_t)vs2;
- result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ int128_t result = (int128_t)rs1 * (int128_t)vs2;
result_sign = (rs1_sign ^ vs2_sign) & sign_mask;
+
// rounding
INT_ROUNDING(result, xrm, sew - 1);
- // unsigned shifting
+ // remove guard bits
result = result >> (sew - 1);
- // saturation
+ // max saturation
if (overflow) {
result = int_max;
P.VU.vxsat = 1;
- } else {
- result |= result_sign;
}
+
vd = result;
})
diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h
index ef2390c..c854ca6 100644
--- a/riscv/insns/vssra_vi.h
+++ b/riscv/insns/vssra_vi.h
@@ -3,6 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP
({
int sh = simm5 & (sew - 1) & 0x1f;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ int64_t val = vs2;
+
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h
index e697b52..7bbc766 100644
--- a/riscv/insns/vssra_vv.h
+++ b/riscv/insns/vssra_vv.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VV_LOOP
({
int sh = vs1 & (sew - 1);
+ int128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h
index 8d7ad20..068a22b 100644
--- a/riscv/insns/vssra_vx.h
+++ b/riscv/insns/vssra_vx.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VX_LOOP
({
int sh = rs1 & (sew - 1);
+ int128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h
index 8a10df0..bf554ca 100644
--- a/riscv/insns/vssrl_vi.h
+++ b/riscv/insns/vssrl_vi.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VI_ULOOP
({
int sh = simm5 & (sew - 1) & 0x1f;
+ uint64_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h
index f40cd90..a8e5d16 100644
--- a/riscv/insns/vssrl_vv.h
+++ b/riscv/insns/vssrl_vv.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VV_ULOOP
({
int sh = vs1 & (sew - 1);
+ uint128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h
index 5da3f75..ee3cb34 100644
--- a/riscv/insns/vssrl_vx.h
+++ b/riscv/insns/vssrl_vx.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VX_ULOOP
({
int sh = rs1 & (sew - 1);
+ uint128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h
index fd3ee21..18fe4fb 100644
--- a/riscv/insns/vssub_vv.h
+++ b/riscv/insns/vssub_vv.h
@@ -1,4 +1,5 @@
// vssub.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h
index 5c5c781..7a01125 100644
--- a/riscv/insns/vssub_vx.h
+++ b/riscv/insns/vssub_vx.h
@@ -1,4 +1,5 @@
// vssub.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h
index c5c74fe..e58076e 100644
--- a/riscv/insns/vssubu_vv.h
+++ b/riscv/insns/vssubu_vv.h
@@ -1,4 +1,5 @@
// vssubu.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h
index 12cfdbb..556c759 100644
--- a/riscv/insns/vssubu_vx.h
+++ b/riscv/insns/vssubu_vx.h
@@ -1,4 +1,5 @@
// vssubu.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vsuxb_v.h b/riscv/insns/vsuxb_v.h
index cf928f8..03f1980 100644
--- a/riscv/insns/vsuxb_v.h
+++ b/riscv/insns/vsuxb_v.h
@@ -1,6 +1,7 @@
// vsuxb.v and vsxseg[2-8]b.v
-require_vector;
require(P.VU.vsew >= e8);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,30 +9,25 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e8:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint8_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
break;
case e16:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
+ MMU.store_uint8(baseAddr + index[i],
P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxe_v.h b/riscv/insns/vsuxe_v.h
index 8bd7545..22d6fb5 100644
--- a/riscv/insns/vsuxe_v.h
+++ b/riscv/insns/vsuxe_v.h
@@ -1,38 +1,34 @@
// vsxe.v and vsxseg[2-8]e.v
-require_vector;
const reg_t sew = P.VU.vsew;
const reg_t vl = P.VU.vl;
require(sew >= e8 && sew <= e64);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (sew) {
case e8:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint8_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
break;
case e16:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint64(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint64(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxh_v.h b/riscv/insns/vsuxh_v.h
index 1d5a1bd..a34bc27 100644
--- a/riscv/insns/vsuxh_v.h
+++ b/riscv/insns/vsuxh_v.h
@@ -1,6 +1,7 @@
// vsxh.v and vsxseg[2-8]h.v
-require_vector;
require(P.VU.vsew >= e16);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,25 +9,21 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e16:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxw_v.h b/riscv/insns/vsuxw_v.h
index ec1a8fe..f42092d 100644
--- a/riscv/insns/vsuxw_v.h
+++ b/riscv/insns/vsuxw_v.h
@@ -1,6 +1,7 @@
// vsxw.v and vsxseg[2-8]w.v
-require_vector;
require(P.VU.vsew >= e32);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,20 +9,17 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e32:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsxb_v.h b/riscv/insns/vsxb_v.h
index 3e50597..fb567fb 100644
--- a/riscv/insns/vsxb_v.h
+++ b/riscv/insns/vsxb_v.h
@@ -1,4 +1,5 @@
// vsxb.v and vsxseg[2-8]b.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vsxe_v.h b/riscv/insns/vsxe_v.h
index 28984ac..78c6605 100644
--- a/riscv/insns/vsxe_v.h
+++ b/riscv/insns/vsxe_v.h
@@ -1,6 +1,7 @@
// vsxe.v and vsxseg[2-8]e.v
reg_t sew = P.VU.vsew;
require(sew >= e8 && sew <= e64);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
if (sew == e8) {
VI_ST(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vsxh_v.h b/riscv/insns/vsxh_v.h
index 2e5506a..6b0fcfd 100644
--- a/riscv/insns/vsxh_v.h
+++ b/riscv/insns/vsxh_v.h
@@ -1,4 +1,5 @@
// vsxh.v and vsxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vsxw_v.h b/riscv/insns/vsxw_v.h
index 9a2119f..2223d5b 100644
--- a/riscv/insns/vsxw_v.h
+++ b/riscv/insns/vsxw_v.h
@@ -1,4 +1,5 @@
// vsxw.v and vsxseg[2-8]w.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint32, 4);
diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h
index 86d588d..42c21db 100644
--- a/riscv/insns/vwsmacc_vv.h
+++ b/riscv/insns/vwsmacc_vv.h
@@ -1,2 +1,2 @@
// vwsmacc.vv vd, vs2, vs1
-VI_VVX_LOOP_WIDE_SSMA(vs1);
+VI_VVX_LOOP_WIDE_SSMA(vs1, true);
diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h
index f0f04a3..2095665 100644
--- a/riscv/insns/vwsmacc_vx.h
+++ b/riscv/insns/vwsmacc_vx.h
@@ -1,2 +1,2 @@
// vwsmacc.vx vd, vs2, rs1
-VI_VVX_LOOP_WIDE_SSMA(rs1);
+VI_VVX_LOOP_WIDE_SSMA(rs1, false);
diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h
index cf1aa1e..9df7833 100644
--- a/riscv/insns/vwsmaccsu_vv.h
+++ b/riscv/insns/vwsmaccsu_vv.h
@@ -1,2 +1,2 @@
// vwsmaccsu.vx vd, vs2, vs1
-VI_VVX_LOOP_WIDE_SU_SSMA(vs1);
+VI_VVX_LOOP_WIDE_SU_SSMA(vs1, true);
diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h
index 681c309..8565c98 100644
--- a/riscv/insns/vwsmaccsu_vx.h
+++ b/riscv/insns/vwsmaccsu_vx.h
@@ -1,2 +1,2 @@
// vwsmaccsu.vx vd, vs2, rs1
-VI_VVX_LOOP_WIDE_SU_SSMA(rs1);
+VI_VVX_LOOP_WIDE_SU_SSMA(rs1, false);
diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h
index e873d93..7075247 100644
--- a/riscv/insns/vwsmaccu_vv.h
+++ b/riscv/insns/vwsmaccu_vv.h
@@ -1,2 +1,2 @@
// vwsmaccu.vv vd, vs2, vs1
-VI_VVX_LOOP_WIDE_USSMA(vs1);
+VI_VVX_LOOP_WIDE_USSMA(vs1, true);
diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h
index 7318fa7..15027cf 100644
--- a/riscv/insns/vwsmaccu_vx.h
+++ b/riscv/insns/vwsmaccu_vx.h
@@ -1,2 +1,2 @@
// vwsmaccu vd, vs2, rs1
-VI_VVX_LOOP_WIDE_USSMA(rs1);
+VI_VVX_LOOP_WIDE_USSMA(rs1, false);
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 00d36bc..59fa062 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -208,7 +208,7 @@ void vectorUnit_t::reset(){
set_vl(-1, 0, -1); // default to illegal configuration
}
-reg_t vectorUnit_t::set_vl(uint64_t regId, reg_t reqVL, reg_t newType){
+reg_t vectorUnit_t::set_vl(int regId, reg_t reqVL, reg_t newType){
if (vtype != newType){
vtype = newType;
vsew = 1 << (BITS(newType, 4, 2) + 3);
@@ -218,11 +218,24 @@ reg_t vectorUnit_t::set_vl(uint64_t regId, reg_t reqVL, reg_t newType){
vmlen = vsew / vlmul;
reg_mask = (NVPR-1) & ~(vlmul-1);
- vill = vsew > e64 || vediv != 1 || (newType >> 7) != 0;
- if (vill)
+ vill = vsew > ELEN || vediv != 1 || (newType >> 7) != 0;
+ if (vill) {
vlmax = 0;
+ vtype = UINT64_MAX << (p->get_xlen() - 1);
+ }
+ }
+
+ // set vl
+ if (vlmax == 0) {
+ vl = 0;
+ } else if (regId == 0) {
+ vl = vl > vlmax ? vlmax : vl;
+ } else if (regId == -1) {
+ vl = vlmax;
+ } else if (regId >= 0) {
+ vl = reqVL > vlmax ? vlmax : reqVL;
}
- vl = reqVL <= vlmax && regId != 0 ? reqVL : vlmax;
+
vstart = 0;
setvl_count++;
return vl;
diff --git a/riscv/processor.h b/riscv/processor.h
index 68e6249..3e72282 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -200,7 +200,7 @@ class vectorUnit_t {
reg_file = 0;
}
- reg_t set_vl(uint64_t regId, reg_t reqVL, reg_t newType);
+ reg_t set_vl(int regId, reg_t reqVL, reg_t newType);
reg_t get_vlen() { return VLEN; }
reg_t get_elen() { return ELEN; }
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index af5bbdc..15ca3b9 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -315,7 +315,6 @@ riscv_insn_ext_v_alu_int = \
vdivu_vx \
vdot_vv \
vdotu_vv \
- vmv_x_s \
vid_v \
viota_m \
vmacc_vv \
@@ -381,6 +380,7 @@ riscv_insn_ext_v_alu_int = \
vmv_v_i \
vmv_v_v \
vmv_v_x \
+ vmv_x_s \
vmxnor_mm \
vmxor_mm \
vnclip_vi \
@@ -590,8 +590,6 @@ riscv_insn_ext_v_alu_fp = \
vmflt_vv \
vmfne_vf \
vmfne_vv \
- vmford_vf \
- vmford_vv \
riscv_insn_ext_v_ldst = \
vlb_v \