diff options
Diffstat (limited to 'riscv/insns')
101 files changed, 292 insertions, 338 deletions
diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h index 5f8d5f5..6bd1a60 100644 --- a/riscv/insns/vaadd_vi.h +++ b/riscv/insns/vaadd_vi.h @@ -1,4 +1,5 @@ // vaadd: Averaging adds of integers +VI_CHECK_SSS(false); VRM xrm = P.VU.get_vround_mode(); VI_VI_LOOP ({ diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h index b479970..0a14467 100644 --- a/riscv/insns/vaadd_vv.h +++ b/riscv/insns/vaadd_vv.h @@ -1,2 +1,2 @@ // vaadd.vv vd, vs2, vs1 -VI_VVX_LOOP_AVG(vs1, +); +VI_VVX_LOOP_AVG(vs1, +, true); diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h index c811a0a..ae00d8e 100644 --- a/riscv/insns/vaadd_vx.h +++ b/riscv/insns/vaadd_vx.h @@ -1,2 +1,2 @@ // vaadd.vx vd, vs2, rs1 -VI_VVX_LOOP_AVG(rs1, +); +VI_VVX_LOOP_AVG(rs1, +, false); diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h index 5a5ccc9..a45c18d 100644 --- a/riscv/insns/vasub_vv.h +++ b/riscv/insns/vasub_vv.h @@ -1,2 +1,2 @@ // vasub.vv vd, vs2, vs1 -VI_VVX_LOOP_AVG(vs1, -); +VI_VVX_LOOP_AVG(vs1, -, true); diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h index c3cad4b..4e8dba1 100644 --- a/riscv/insns/vasub_vx.h +++ b/riscv/insns/vasub_vx.h @@ -1,2 +1,2 @@ // vasub.vx vd, vs2, rs1 -VI_VVX_LOOP_AVG(rs1, -); +VI_VVX_LOOP_AVG(rs1, -, false); diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index b056b0e..77e91bf 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -1,14 +1,13 @@ // vcompress vd, vs2, vs1 -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; require(P.VU.vstart == 0); -reg_t sew = P.VU.vsew; -reg_t vl = P.VU.vl; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs1(), 1)); + reg_t pos = 0; -for (reg_t i = P.VU.vstart ; i < vl; ++i) { + +VI_GENERAL_LOOP_BASE const int mlen = P.VU.vmlen; const int midx = (mlen * i) / 64; const int mpos = (mlen * i) % 64; @@ -32,10 +31,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { ++pos; } -} - -if (vl > 0 && TAIL_ZEROING) { - uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1)); - memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1)); -} - +VI_LOOP_END; diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h index 311f875..f6604fb 100644 --- a/riscv/insns/vfcvt_f_x_v.h +++ b/riscv/insns/vfcvt_f_x_v.h @@ -1,5 +1,5 @@ // vfcvt.f.x.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ auto vs2_i = P.VU.elt<int32_t>(rs2_num, i); vd = i32_to_f32(vs2_i); diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h index ceabea3..2c845ac 100644 --- a/riscv/insns/vfcvt_f_xu_v.h +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -1,5 +1,5 @@ // vfcvt.f.xu.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i); vd = ui32_to_f32(vs2_u); diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h index ee53c6d..a9eedc4 100644 --- a/riscv/insns/vfcvt_x_f_v.h +++ b/riscv/insns/vfcvt_x_f_v.h @@ -1,5 +1,5 @@ // vfcvt.x.f.v vd, vd2, vm -VI_VFP_VV_LOOP +VI_VFP_VF_LOOP ({ P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); }) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index 6d12bce..ea78165 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -1,13 +1,7 @@ // vfmerge_vf vd, vs2, vs1, vm -require_extension('F'); -require_fp; -require(P.VU.vsew == 32); -require_vector; -reg_t vl = P.VU.vl; +VI_CHECK_SSS(false); +VI_VFP_COMMON; reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); for (reg_t i=P.VU.vstart; i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i); auto rs1 = f32(READ_FREG(rs1_num)); @@ -20,6 +14,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) { vd = use_first ? rs1 : vs2; } -VI_TAIL_ZERO(1); P.VU.vstart = 0; -set_fp_exceptions; diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index c6dbaff..066db80 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -1,6 +1,5 @@ // vfmv_f_s: rd = vs2[0] (rs1=0) require_vector; -require(insn.v_vm() == 1); require_fp; require_extension('F'); require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index cb81008..8ff6094 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -15,17 +15,5 @@ if (vl > 0) { else P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v; - const reg_t max_len = P.VU.VLEN / sew; - for (reg_t i = 1; i < max_len; ++i) { - switch(sew) { - case e32: - P.VU.elt<uint32_t>(rd_num, i) = 0; - break; - default: - require(false); - break; - } - } - vl = 0; } diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h index c85a3e9..f323263 100644 --- a/riscv/insns/vfmv_v_f.h +++ b/riscv/insns/vfmv_v_f.h @@ -1,13 +1,7 @@ -// vfmerge_vf vd, vs2, vs1, vm -require_extension('F'); -require_fp; -require(P.VU.vsew == 32); -require_vector; -reg_t vl = P.VU.vl; +// vfmv_vf vd, vs1 +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +VI_VFP_COMMON reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); for (reg_t i=P.VU.vstart; i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i); auto rs1 = f32(READ_FREG(rs1_num)); @@ -15,6 +9,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) { vd = rs1; } -VI_TAIL_ZERO(1); P.VU.vstart = 0; -set_fp_exceptions; diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h index df6dd04..25422d6 100644 --- a/riscv/insns/vid_v.h +++ b/riscv/insns/vid_v.h @@ -6,6 +6,9 @@ reg_t sew = P.VU.vsew; reg_t rd_num = insn.rd(); reg_t rs1_num = insn.rs1(); reg_t rs2_num = insn.rs2(); +require((rd_num & (P.VU.vlmul - 1)) == 0); +if (insn.v_vm() == 0 && P.VU.vlmul >= 2) \ + require(insn.rd() != 0); for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { VI_LOOP_ELEMENT_SKIP(); @@ -26,5 +29,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { } } -VI_TAIL_ZERO(1); P.VU.vstart = 0; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index fde0291..04bfcd8 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -7,6 +7,10 @@ reg_t rd_num = insn.rd(); reg_t rs1_num = insn.rs1(); reg_t rs2_num = insn.rs2(); require(P.VU.vstart == 0); +require(!is_overlapped(rd_num, P.VU.vlmul, rs2_num, 1)); +if (insn.v_vm() == 0) + require(!is_overlapped(rd_num, P.VU.vlmul, 0, 1)); +require((rd_num & (P.VU.vlmul - 1)) == 0); int cnt = 0; for (reg_t i = 0; i < vl; ++i) { @@ -49,4 +53,3 @@ for (reg_t i = 0; i < vl; ++i) { } } -VI_TAIL_ZERO(1); diff --git a/riscv/insns/vleff_v.h b/riscv/insns/vleff_v.h index ec2777a..e858de9 100644 --- a/riscv/insns/vleff_v.h +++ b/riscv/insns/vleff_v.h @@ -1,7 +1,7 @@ -require_vector; require(P.VU.vsew >= e8 && P.VU.vsew <= e64); const reg_t nf = insn.v_nf() + 1; require((nf * P.VU.vlmul) <= (NVPR / 4)); +VI_CHECK_SXX; const reg_t sew = P.VU.vsew; const reg_t vl = P.VU.vl; const reg_t baseAddr = RS1; @@ -9,7 +9,6 @@ const reg_t rd_num = insn.rd(); bool early_stop = false; const reg_t vlmul = P.VU.vlmul; for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) { - bool is_valid = true; bool is_zero = false; VI_STRIP(i); VI_ELEMENT_SKIP(i); @@ -20,23 +19,23 @@ for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) { switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint8(baseAddr + (i * nf + fn) * 1) : 0; - is_zero = is_valid && P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint8(baseAddr + (i * nf + fn) * 1); + is_zero = P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e16: P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint16(baseAddr + (i * nf + fn) * 2) : 0; - is_zero = is_valid && P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint16(baseAddr + (i * nf + fn) * 2); + is_zero = P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e32: P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint32(baseAddr + (i * nf + fn) * 4) : 0; - is_zero = is_valid && P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint32(baseAddr + (i * nf + fn) * 4); + is_zero = P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; case e64: P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = - is_valid ? MMU.load_uint64(baseAddr + (i * nf + fn) * 8) : 0; - is_zero = is_valid && P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0; + MMU.load_uint64(baseAddr + (i * nf + fn) * 8); + is_zero = P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0; break; } diff --git a/riscv/insns/vlxb_v.h b/riscv/insns/vlxb_v.h index 5a99bd3..57ce8c8 100644 --- a/riscv/insns/vlxb_v.h +++ b/riscv/insns/vlxb_v.h @@ -1,4 +1,5 @@ // vlxb.v and vlsseg[2-8]b.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int8, 1); diff --git a/riscv/insns/vlxbu_v.h b/riscv/insns/vlxbu_v.h index daf2d2b..d8e3dd6 100644 --- a/riscv/insns/vlxbu_v.h +++ b/riscv/insns/vlxbu_v.h @@ -1,4 +1,5 @@ // vlxbu.v and vlxseg[2-8]bu.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint8, 1); diff --git a/riscv/insns/vlxe_v.h b/riscv/insns/vlxe_v.h index b1190a8..1055eca 100644 --- a/riscv/insns/vlxe_v.h +++ b/riscv/insns/vlxe_v.h @@ -1,5 +1,6 @@ // vlxe.v and vlxseg[2-8]e.v reg_t sew = P.VU.vsew; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); if (sew == e8) { VI_LD(index[i], fn, int8, 1); diff --git a/riscv/insns/vlxh_v.h b/riscv/insns/vlxh_v.h index 98145db..9f4c3a1 100644 --- a/riscv/insns/vlxh_v.h +++ b/riscv/insns/vlxh_v.h @@ -1,4 +1,5 @@ // vlxh.v and vlxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int16, 2); diff --git a/riscv/insns/vlxhu_v.h b/riscv/insns/vlxhu_v.h index 27d549c..9283127 100644 --- a/riscv/insns/vlxhu_v.h +++ b/riscv/insns/vlxhu_v.h @@ -1,4 +1,5 @@ // vlxh.v and vlxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint16, 2); diff --git a/riscv/insns/vlxw_v.h b/riscv/insns/vlxw_v.h index 83300f0..c1117a2 100644 --- a/riscv/insns/vlxw_v.h +++ b/riscv/insns/vlxw_v.h @@ -1,5 +1,6 @@ // vlxw.v and vlxseg[2-8]w.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, int32, 4); diff --git a/riscv/insns/vlxwu_v.h b/riscv/insns/vlxwu_v.h index a2f9913..d3034bd 100644 --- a/riscv/insns/vlxwu_v.h +++ b/riscv/insns/vlxwu_v.h @@ -1,4 +1,5 @@ // vlxwu.v and vlxseg[2-8]wu.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_LD(index[i], fn, uint32, 4); diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h index fd79089..a8185d1 100644 --- a/riscv/insns/vmadc_vim.h +++ b/riscv/insns/vmadc_vim.h @@ -1,5 +1,4 @@ // vmadc.vim vd, vs2, simm5 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h index 82042ca..8d58658 100644 --- a/riscv/insns/vmadc_vvm.h +++ b/riscv/insns/vmadc_vvm.h @@ -1,5 +1,4 @@ // vmadc.vvm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_VV_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h index 8f26584..0b6273a 100644 --- a/riscv/insns/vmadc_vxm.h +++ b/riscv/insns/vmadc_vxm.h @@ -1,5 +1,4 @@ // vadc.vx vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h index 13354d6..c6c87c7 100644 --- a/riscv/insns/vmerge_vim.h +++ b/riscv/insns/vmerge_vim.h @@ -1,4 +1,5 @@ // vmerge.vim vd, vs2, simm5 +VI_CHECK_SSS(false); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h index 7530b40..97a0182 100644 --- a/riscv/insns/vmerge_vvm.h +++ b/riscv/insns/vmerge_vvm.h @@ -1,4 +1,5 @@ // vmerge.vvm vd, vs2, vs1 +VI_CHECK_SSS(true); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h index b1757fa..de7df91 100644 --- a/riscv/insns/vmerge_vxm.h +++ b/riscv/insns/vmerge_vxm.h @@ -1,4 +1,5 @@ // vmerge.vxm vd, vs2, rs1 +VI_CHECK_SSS(false); VI_VVXI_MERGE_LOOP ({ int midx = (P.VU.vmlen * i) / 64; diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index cedf4b9..f0e7109 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index 7e76cac..1be3a69 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, vs1); -}) +}, true) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index 7eade89..1c68366 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,5 +1,5 @@ // vfge.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_le_quiet(rs1, vs2); -}) + res = f32_le(rs1, vs2); +}, false) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index 6115d06..0979185 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,5 +1,5 @@ // vfgt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(rs1, vs2); -}) + res = f32_lt(rs1, vs2); +}, false) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index 998b93b..90607ec 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = f32_le(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index c716312..6ccdfec 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,5 +1,5 @@ // vfle.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_le_quiet(vs2, vs1); -}) + res = f32_le(vs2, vs1); +}, true) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index af436e4..6b71a4a 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,5 +1,5 @@ // vflt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(vs2, rs1); -}) + res = f32_lt(vs2, rs1); +}, false) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index ded867d..a2ed8e3 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,5 +1,5 @@ // vflt.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ - res = f32_lt_quiet(vs2, vs1); -}) + res = f32_lt(vs2, vs1); +}, true) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index ac2eced..ef63678 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, rs1); -}) +}, false) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index 3fa8beb..8378a23 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -2,4 +2,4 @@ VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, vs1); -}) +}, true) diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h deleted file mode 100644 index b5e74f2..0000000 --- a/riscv/insns/vmford_vf.h +++ /dev/null @@ -1,5 +0,0 @@ -// vford.vf vd, vs2, rs1, vm -VI_VFP_LOOP_CMP -({ - res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1)); -}) diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h deleted file mode 100644 index 2e459c1..0000000 --- a/riscv/insns/vmford_vv.h +++ /dev/null @@ -1,5 +0,0 @@ -// vford.vv vd, vs2, vs1, vm -VI_VFP_LOOP_CMP -({ - res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1)); -}) diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h index 3804ba8..f4ce6f4 100644 --- a/riscv/insns/vmsbc_vvm.h +++ b/riscv/insns/vmsbc_vvm.h @@ -1,5 +1,4 @@ // vmsbc.vvm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_VV_LOOP_CARRY ({ auto v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h index d5332f5..aec4409 100644 --- a/riscv/insns/vmsbc_vxm.h +++ b/riscv/insns/vmsbc_vxm.h @@ -1,5 +1,4 @@ // vmsbc.vxm vd, vs2, rs1 -require(!(insn.rd() == 0 && P.VU.vlmul > 1)); VI_XI_LOOP_CARRY ({ auto &v0 = P.VU.elt<uint64_t>(0, midx); diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index 3047cca..443fcbb 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index 826e7cd..381088b 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 48805f7..d66002d 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -28,5 +28,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) { } } -VI_TAIL_ZERO_MASK(rd_num); P.VU.vstart = 0; diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h index 59882da..b918551 100644 --- a/riscv/insns/vmulhsu_vv.h +++ b/riscv/insns/vmulhsu_vv.h @@ -1,4 +1,5 @@ // vmulhsu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE switch(sew) { case e8: { diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h index d39615a..cb2db3d 100644 --- a/riscv/insns/vmulhsu_vx.h +++ b/riscv/insns/vmulhsu_vx.h @@ -1,4 +1,5 @@ // vmulhsu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE switch(sew) { case e8: { diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h index 38b2697..948b5be 100644 --- a/riscv/insns/vmv_s_x.h +++ b/riscv/insns/vmv_s_x.h @@ -24,23 +24,5 @@ if (vl > 0) { break; } - const reg_t max_len = P.VU.VLEN / sew; - for (reg_t i = 1; i < max_len; ++i) { - switch(sew) { - case e8: - P.VU.elt<uint8_t>(rd_num, i) = 0; - break; - case e16: - P.VU.elt<uint16_t>(rd_num, i) = 0; - break; - case e32: - P.VU.elt<uint32_t>(rd_num, i) = 0; - break; - default: - P.VU.elt<uint64_t>(rd_num, i) = 0; - break; - } - } - vl = 0; } diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h index 734010b..a4f9a5c 100644 --- a/riscv/insns/vmv_v_v.h +++ b/riscv/insns/vmv_v_v.h @@ -1,4 +1,5 @@ // vvmv.v.v vd, vs1 +require((insn.rs1() & (P.VU.vlmul - 1)) == 0); VI_VVXI_MERGE_LOOP ({ vd = vs1; diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h index f22c2dd..50f2e79 100644 --- a/riscv/insns/vmv_x_s.h +++ b/riscv/insns/vmv_x_s.h @@ -1,25 +1,28 @@ -// vext_x_v: rd = vs2[0] +// vmv_x_s: rd = vs2[rs1] require(insn.v_vm() == 1); uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen()); -VI_LOOP_BASE -VI_LOOP_END_NO_TAIL_ZERO -switch(sew) { -case e8: - WRITE_RD(P.VU.elt<uint8_t>(rs2_num, 0)); - break; -case e16: - WRITE_RD(P.VU.elt<uint16_t>(rs2_num, 0)); - break; -case e32: - if (P.get_max_xlen() == 32) - WRITE_RD(P.VU.elt<int32_t>(rs2_num, 0)); - else - WRITE_RD(P.VU.elt<uint32_t>(rs2_num, 0)); - break; -case e64: - if (P.get_max_xlen() <= sew) - WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0) & xmask); - else - WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0)); - break; +reg_t rs1 = RS1; +reg_t sew = P.VU.vsew; +reg_t rs2_num = insn.rs2(); + +if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) { + WRITE_RD(0); +} else { + switch(sew) { + case e8: + WRITE_RD(P.VU.elt<int8_t>(rs2_num, rs1)); + break; + case e16: + WRITE_RD(P.VU.elt<int16_t>(rs2_num, rs1)); + break; + case e32: + WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1)); + break; + case e64: + if (P.get_max_xlen() <= sew) + WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask); + else + WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1)); + break; + } } diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h index ca27593..eb21710 100644 --- a/riscv/insns/vnclip_vi.h +++ b/riscv/insns/vnclip_vi.h @@ -4,14 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); - result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -21,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h index 7bcb4cb..92575a6 100644 --- a/riscv/insns/vnclip_vv.h +++ b/riscv/insns/vnclip_vv.h @@ -4,20 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = vs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - - result = (vsext(result, sew * 2)) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -27,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, true) diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h index b66e830..96409de 100644 --- a/riscv/insns/vnclip_vx.h +++ b/riscv/insns/vnclip_vx.h @@ -4,19 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = rs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - result = vsext(result, sew * 2) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; @@ -26,4 +22,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h index 61cb015..b1527f7 100644 --- a/riscv/insns/vnclipu_vi.h +++ b/riscv/insns/vnclipu_vi.h @@ -4,11 +4,13 @@ uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ uint64_t result = vs2_u; + unsigned shift = zimm5 & ((sew * 2) - 1); + // rounding - INT_ROUNDING(result, xrm, sew); + INT_ROUNDING(result, xrm, shift); // unsigned shifting to rs1 - result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; // saturation if (result & (uint64_t)(-1ll << sew)) { @@ -17,4 +19,4 @@ VI_VVXI_LOOP_NARROW } vd = result; -}) +}, false) diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h index 004f24f..217e82f 100644 --- a/riscv/insns/vnclipu_vv.h +++ b/riscv/insns/vnclipu_vv.h @@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ + uint128_t result = vs2_u; + unsigned shift = vs1 & ((sew * 2) - 1); - uint64_t result = vs2_u; + // rounding + INT_ROUNDING(result, xrm, shift); -// rounding - INT_ROUNDING(result, xrm, sew); + result = result >> shift; -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; } vd = result; -}) +}, true) diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h index 0507a2b..ce15b55 100644 --- a/riscv/insns/vnclipu_vx.h +++ b/riscv/insns/vnclipu_vx.h @@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ - uint64_t result = vs2; + uint128_t result = vs2_u; + unsigned shift = rs1 & ((sew * 2) - 1); -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } + result = result >> shift; -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; } vd = result; -}) +}, false) diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h index 0502ff1..f41979e 100644 --- a/riscv/insns/vnsra_vi.h +++ b/riscv/insns/vnsra_vi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f); -}) +}, false) diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h index 555ce3f..59f255e 100644 --- a/riscv/insns/vnsra_vv.h +++ b/riscv/insns/vnsra_vv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2 >> (vs1 & (sew * 2 - 1)); -}) +}, true) diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h index 05a55e3..adaa24c 100644 --- a/riscv/insns/vnsra_vx.h +++ b/riscv/insns/vnsra_vx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2 >> (rs1 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h index d4dfcf0..91402c0 100644 --- a/riscv/insns/vnsrl_vi.h +++ b/riscv/insns/vnsrl_vi.h @@ -2,4 +2,4 @@ VI_VI_LOOP_NSHIFT ({ vd = vs2_u >> (zimm5 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h index ab72b84..609299f 100644 --- a/riscv/insns/vnsrl_vv.h +++ b/riscv/insns/vnsrl_vv.h @@ -2,4 +2,4 @@ VI_VV_LOOP_NSHIFT ({ vd = vs2_u >> (vs1 & (sew * 2 - 1)); -}) +}, true) diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h index e149b38..8356a2b 100644 --- a/riscv/insns/vnsrl_vx.h +++ b/riscv/insns/vnsrl_vx.h @@ -2,4 +2,4 @@ VI_VX_LOOP_NSHIFT ({ vd = vs2_u >> (rs1 & (sew * 2 - 1)); -}) +}, false) diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h index eff67b8..cab4a78 100644 --- a/riscv/insns/vrgather_vi.h +++ b/riscv/insns/vrgather_vi.h @@ -1,11 +1,14 @@ // vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + reg_t zimm5 = insn.v_zimm5(); + +VI_LOOP_BASE + for (reg_t i = P.VU.vstart; i < vl; ++i) { VI_LOOP_ELEMENT_SKIP(); @@ -25,5 +28,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { } } -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h index ce0c2a6..8266c95 100644 --- a/riscv/insns/vrgather_vv.h +++ b/riscv/insns/vrgather_vv.h @@ -1,15 +1,12 @@ // vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); -for (reg_t i = P.VU.vstart; i < vl; ++i) { - VI_LOOP_ELEMENT_SKIP(); - VI_CHECK_VREG_OVERLAP(rd_num, rs1_num); - VI_CHECK_VREG_OVERLAP(rd_num, rs2_num); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rs1() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + +VI_LOOP_BASE switch (sew) { case e8: { auto vs1 = P.VU.elt<uint8_t>(rs1_num, i); @@ -33,7 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { break; } } -} - -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h index e9ff3b1..15e16b7 100644 --- a/riscv/insns/vrgather_vx.h +++ b/riscv/insns/vrgather_vx.h @@ -1,15 +1,13 @@ // vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1]; -require(P.VU.vsew >= e8 && P.VU.vsew <= e64); -require_vector; -reg_t vl = P.VU.vl; -reg_t sew = P.VU.vsew; -reg_t rd_num = insn.rd(); -reg_t rs1_num = insn.rs1(); -reg_t rs2_num = insn.rs2(); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (insn.v_vm() == 0) + require(insn.rd() != 0); + reg_t rs1 = RS1; -for (reg_t i = P.VU.vstart; i < vl; ++i) { - VI_LOOP_ELEMENT_SKIP(); +VI_LOOP_BASE switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, rs1); @@ -24,7 +22,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) { P.VU.elt<uint64_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, rs1); break; } -} - -VI_TAIL_ZERO(1); -P.VU.vstart = 0; +VI_LOOP_END; diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h index de2cb83..c361f08 100644 --- a/riscv/insns/vsadd_vi.h +++ b/riscv/insns/vsadd_vi.h @@ -1,4 +1,5 @@ // vsadd.vi vd, vs2 simm5 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h index 2152bab..ce0ef40 100644 --- a/riscv/insns/vsadd_vv.h +++ b/riscv/insns/vsadd_vv.h @@ -1,4 +1,5 @@ // vsadd.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h index 781e9e8..691f017 100644 --- a/riscv/insns/vsadd_vx.h +++ b/riscv/insns/vsadd_vx.h @@ -1,4 +1,5 @@ // vsadd.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; switch(sew) { diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h index 0069df7..04e2540 100644 --- a/riscv/insns/vslide1down_vx.h +++ b/riscv/insns/vslide1down_vx.h @@ -1,4 +1,9 @@ //vslide1down.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + VI_LOOP_BASE if (i != vl - 1) { switch (sew) { diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h index 50cc503..69ce0fd 100644 --- a/riscv/insns/vslide1up_vx.h +++ b/riscv/insns/vslide1up_vx.h @@ -1,8 +1,10 @@ //vslide1up.vx vd, vs2, rs1 -if (insn.v_vm() == 0) +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) require(insn.rd() != 0); -VI_CHECK_SS VI_LOOP_BASE if (i != 0) { if (sew == e8) { diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h index c21c5f2..dd58c1e 100644 --- a/riscv/insns/vslidedown_vi.h +++ b/riscv/insns/vslidedown_vi.h @@ -1,8 +1,14 @@ // vslidedown.vi vd, vs2, rs1 -VI_LOOP_BASE +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + const reg_t sh = insn.v_zimm5(); -bool is_valid = (i + sh) < P.VU.vlmax; +VI_LOOP_BASE + reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; if (is_valid) { offset = sh; diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h index 251740c..9881e0e 100644 --- a/riscv/insns/vslidedown_vx.h +++ b/riscv/insns/vslidedown_vx.h @@ -1,11 +1,17 @@ //vslidedown.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + +const reg_t sh = RS1; VI_LOOP_BASE -reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1; -bool is_valid = offset < P.VU.vlmax; +reg_t offset = 0; +bool is_valid = (i + sh) < P.VU.vlmax; -if (!is_valid) { - offset = 0; +if (is_valid) { + offset = sh; } switch (sew) { diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h index 4135b20..64b4aca 100644 --- a/riscv/insns/vslideup_vi.h +++ b/riscv/insns/vslideup_vi.h @@ -1,8 +1,10 @@ // vslideup.vi vd, vs2, rs1 -if (insn.v_vm() == 0) +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) require(insn.rd() != 0); -VI_CHECK_SS const reg_t offset = insn.v_zimm5(); VI_LOOP_BASE if (P.VU.vstart < offset && i < offset) diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h index bf73fcd..063c061 100644 --- a/riscv/insns/vslideup_vx.h +++ b/riscv/insns/vslideup_vx.h @@ -1,4 +1,10 @@ //vslideup.vx vd, vs2, rs1 +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require((insn.rd() & (P.VU.vlmul - 1)) == 0); +require(insn.rd() != insn.rs2()); +if (P.VU.vlmul > 1 && insn.v_vm() == 0) + require(insn.rd() != 0); + const reg_t offset = RS1; VI_LOOP_BASE if (P.VU.vstart < offset && i < offset) diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h index a0c7f99..0807899 100644 --- a/riscv/insns/vsmul_vv.h +++ b/riscv/insns/vsmul_vv.h @@ -1,33 +1,32 @@ // vsmul: Signed saturating and rounding fractional multiply VRM xrm = P.VU.get_vround_mode(); -uint64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; -uint64_t int_min = - (1 << (P.VU.vsew - 1)); -uint64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); +int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; +int64_t int_min = - (1 << (P.VU.vsew - 1)); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); -VI_VV_ULOOP +VI_VV_LOOP ({ - uint64_t vs1_sign; - uint64_t vs2_sign; - uint64_t result_sign; + int64_t vs1_sign; + int64_t vs2_sign; + int64_t result_sign; vs1_sign = vs1 & sign_mask; vs2_sign = vs2 & sign_mask; bool overflow = vs1 == vs2 && vs1 == int_min; - uint128_t result = (uint128_t)vs1 * (uint128_t)vs2; - result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + int128_t result = (int128_t)vs1 * (int128_t)vs2; result_sign = (vs1_sign ^ vs2_sign) & sign_mask; + // rounding INT_ROUNDING(result, xrm, sew - 1); - // unsigned shifting + // remove guard bits result = result >> (sew - 1); // saturation if (overflow) { result = int_max; P.VU.vxsat = 1; - } else { - result |= result_sign; } + vd = result; }) diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h index c7909c7..4326d8f 100644 --- a/riscv/insns/vsmul_vx.h +++ b/riscv/insns/vsmul_vx.h @@ -1,34 +1,33 @@ // vsmul VRM xrm = P.VU.get_vround_mode(); -uint128_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; -uint128_t int_min = - (1 << (P.VU.vsew - 1)); -uint128_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); +int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1; +int64_t int_min = - (1 << (P.VU.vsew - 1)); +int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1); -VI_VX_ULOOP +VI_VX_LOOP ({ - uint128_t rs1_sign; - uint128_t vs2_sign; - uint128_t result_sign; + int64_t rs1_sign; + int64_t vs2_sign; + int64_t result_sign; rs1_sign = rs1 & sign_mask; vs2_sign = vs2 & sign_mask; bool overflow = rs1 == vs2 && rs1 == int_min; - uint128_t result = (uint128_t)rs1 * (uint128_t)vs2; - result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1; + int128_t result = (int128_t)rs1 * (int128_t)vs2; result_sign = (rs1_sign ^ vs2_sign) & sign_mask; + // rounding INT_ROUNDING(result, xrm, sew - 1); - // unsigned shifting + // remove guard bits result = result >> (sew - 1); - // saturation + // max saturation if (overflow) { result = int_max; P.VU.vxsat = 1; - } else { - result |= result_sign; } + vd = result; }) diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h index ef2390c..c854ca6 100644 --- a/riscv/insns/vssra_vi.h +++ b/riscv/insns/vssra_vi.h @@ -3,6 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VI_LOOP ({ int sh = simm5 & (sew - 1) & 0x1f; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + int64_t val = vs2; + + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h index e697b52..7bbc766 100644 --- a/riscv/insns/vssra_vv.h +++ b/riscv/insns/vssra_vv.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VV_LOOP ({ int sh = vs1 & (sew - 1); + int128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h index 8d7ad20..068a22b 100644 --- a/riscv/insns/vssra_vx.h +++ b/riscv/insns/vssra_vx.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VX_LOOP ({ int sh = rs1 & (sew - 1); + int128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h index 8a10df0..bf554ca 100644 --- a/riscv/insns/vssrl_vi.h +++ b/riscv/insns/vssrl_vi.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VI_ULOOP ({ int sh = simm5 & (sew - 1) & 0x1f; + uint64_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h index f40cd90..a8e5d16 100644 --- a/riscv/insns/vssrl_vv.h +++ b/riscv/insns/vssrl_vv.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VV_ULOOP ({ int sh = vs1 & (sew - 1); + uint128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h index 5da3f75..ee3cb34 100644 --- a/riscv/insns/vssrl_vx.h +++ b/riscv/insns/vssrl_vx.h @@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode(); VI_VX_ULOOP ({ int sh = rs1 & (sew - 1); + uint128_t val = vs2; - INT_ROUNDING(vs2, xrm, sh); - vd = vs2 >> sh; + INT_ROUNDING(val, xrm, sh); + vd = val >> sh; }) diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h index fd3ee21..18fe4fb 100644 --- a/riscv/insns/vssub_vv.h +++ b/riscv/insns/vssub_vv.h @@ -1,4 +1,5 @@ // vssub.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h index 5c5c781..7a01125 100644 --- a/riscv/insns/vssub_vx.h +++ b/riscv/insns/vssub_vx.h @@ -1,4 +1,5 @@ // vssub.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h index c5c74fe..e58076e 100644 --- a/riscv/insns/vssubu_vv.h +++ b/riscv/insns/vssubu_vv.h @@ -1,4 +1,5 @@ // vssubu.vv vd, vs2, vs1 +VI_CHECK_SSS(true); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h index 12cfdbb..556c759 100644 --- a/riscv/insns/vssubu_vx.h +++ b/riscv/insns/vssubu_vx.h @@ -1,4 +1,5 @@ // vssubu.vx vd, vs2, rs1 +VI_CHECK_SSS(false); VI_LOOP_BASE bool sat = false; diff --git a/riscv/insns/vsuxb_v.h b/riscv/insns/vsuxb_v.h index cf928f8..03f1980 100644 --- a/riscv/insns/vsuxb_v.h +++ b/riscv/insns/vsuxb_v.h @@ -1,6 +1,7 @@ // vsuxb.v and vsxseg[2-8]b.v -require_vector; require(P.VU.vsew >= e8); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,30 +9,25 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e8: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint8_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint8_t>(vs3, vreg_inx)); break; case e16: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], + MMU.store_uint8(baseAddr + index[i], P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxe_v.h b/riscv/insns/vsuxe_v.h index 8bd7545..22d6fb5 100644 --- a/riscv/insns/vsuxe_v.h +++ b/riscv/insns/vsuxe_v.h @@ -1,38 +1,34 @@ // vsxe.v and vsxseg[2-8]e.v -require_vector; const reg_t sew = P.VU.vsew; const reg_t vl = P.VU.vl; require(sew >= e8 && sew <= e64); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t baseAddr = RS1; reg_t stride = insn.rs2(); reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (sew) { case e8: - if (is_valid) - MMU.store_uint8(baseAddr + index[i], - P.VU.elt<uint8_t>(vs3, vreg_inx)); + MMU.store_uint8(baseAddr + index[i], + P.VU.elt<uint8_t>(vs3, vreg_inx)); break; case e16: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint64(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint64(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxh_v.h b/riscv/insns/vsuxh_v.h index 1d5a1bd..a34bc27 100644 --- a/riscv/insns/vsuxh_v.h +++ b/riscv/insns/vsuxh_v.h @@ -1,6 +1,7 @@ // vsxh.v and vsxseg[2-8]h.v -require_vector; require(P.VU.vsew >= e16); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,25 +9,21 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e16: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint16_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint16_t>(vs3, vreg_inx)); break; case e32: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint16(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint16(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsuxw_v.h b/riscv/insns/vsuxw_v.h index ec1a8fe..f42092d 100644 --- a/riscv/insns/vsuxw_v.h +++ b/riscv/insns/vsuxw_v.h @@ -1,6 +1,7 @@ // vsxw.v and vsxseg[2-8]w.v -require_vector; require(P.VU.vsew >= e32); +VI_CHECK_SXX; +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ reg_t vl = P.VU.vl; reg_t baseAddr = RS1; reg_t stride = insn.rs2(); @@ -8,20 +9,17 @@ reg_t vs3 = insn.rd(); reg_t vlmax = P.VU.vlmax; VI_DUPLICATE_VREG(stride, vlmax); for (reg_t i = 0; i < vlmax && vl != 0; ++i) { - bool is_valid = true; VI_ELEMENT_SKIP(i); VI_STRIP(i) switch (P.VU.vsew) { case e32: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint32_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint32_t>(vs3, vreg_inx)); break; case e64: - if (is_valid) - MMU.store_uint32(baseAddr + index[i], - P.VU.elt<uint64_t>(vs3, vreg_inx)); + MMU.store_uint32(baseAddr + index[i], + P.VU.elt<uint64_t>(vs3, vreg_inx)); break; } } diff --git a/riscv/insns/vsxb_v.h b/riscv/insns/vsxb_v.h index 3e50597..fb567fb 100644 --- a/riscv/insns/vsxb_v.h +++ b/riscv/insns/vsxb_v.h @@ -1,4 +1,5 @@ // vsxb.v and vsxseg[2-8]b.v require(P.VU.vsew >= e8); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint8, 1); diff --git a/riscv/insns/vsxe_v.h b/riscv/insns/vsxe_v.h index 28984ac..78c6605 100644 --- a/riscv/insns/vsxe_v.h +++ b/riscv/insns/vsxe_v.h @@ -1,6 +1,7 @@ // vsxe.v and vsxseg[2-8]e.v reg_t sew = P.VU.vsew; require(sew >= e8 && sew <= e64); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); if (sew == e8) { VI_ST(index[i], fn, uint8, 1); diff --git a/riscv/insns/vsxh_v.h b/riscv/insns/vsxh_v.h index 2e5506a..6b0fcfd 100644 --- a/riscv/insns/vsxh_v.h +++ b/riscv/insns/vsxh_v.h @@ -1,4 +1,5 @@ // vsxh.v and vsxseg[2-8]h.v require(P.VU.vsew >= e16); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint16, 2); diff --git a/riscv/insns/vsxw_v.h b/riscv/insns/vsxw_v.h index 9a2119f..2223d5b 100644 --- a/riscv/insns/vsxw_v.h +++ b/riscv/insns/vsxw_v.h @@ -1,4 +1,5 @@ // vsxw.v and vsxseg[2-8]w.v require(P.VU.vsew >= e32); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax); VI_ST(index[i], fn, uint32, 4); diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h index 86d588d..42c21db 100644 --- a/riscv/insns/vwsmacc_vv.h +++ b/riscv/insns/vwsmacc_vv.h @@ -1,2 +1,2 @@ // vwsmacc.vv vd, vs2, vs1 -VI_VVX_LOOP_WIDE_SSMA(vs1); +VI_VVX_LOOP_WIDE_SSMA(vs1, true); diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h index f0f04a3..2095665 100644 --- a/riscv/insns/vwsmacc_vx.h +++ b/riscv/insns/vwsmacc_vx.h @@ -1,2 +1,2 @@ // vwsmacc.vx vd, vs2, rs1 -VI_VVX_LOOP_WIDE_SSMA(rs1); +VI_VVX_LOOP_WIDE_SSMA(rs1, false); diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h index cf1aa1e..9df7833 100644 --- a/riscv/insns/vwsmaccsu_vv.h +++ b/riscv/insns/vwsmaccsu_vv.h @@ -1,2 +1,2 @@ // vwsmaccsu.vx vd, vs2, vs1 -VI_VVX_LOOP_WIDE_SU_SSMA(vs1); +VI_VVX_LOOP_WIDE_SU_SSMA(vs1, true); diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h index 681c309..8565c98 100644 --- a/riscv/insns/vwsmaccsu_vx.h +++ b/riscv/insns/vwsmaccsu_vx.h @@ -1,2 +1,2 @@ // vwsmaccsu.vx vd, vs2, rs1 -VI_VVX_LOOP_WIDE_SU_SSMA(rs1); +VI_VVX_LOOP_WIDE_SU_SSMA(rs1, false); diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h index e873d93..7075247 100644 --- a/riscv/insns/vwsmaccu_vv.h +++ b/riscv/insns/vwsmaccu_vv.h @@ -1,2 +1,2 @@ // vwsmaccu.vv vd, vs2, vs1 -VI_VVX_LOOP_WIDE_USSMA(vs1); +VI_VVX_LOOP_WIDE_USSMA(vs1, true); diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h index 7318fa7..15027cf 100644 --- a/riscv/insns/vwsmaccu_vx.h +++ b/riscv/insns/vwsmaccu_vx.h @@ -1,2 +1,2 @@ // vwsmaccu vd, vs2, rs1 -VI_VVX_LOOP_WIDE_USSMA(rs1); +VI_VVX_LOOP_WIDE_USSMA(rs1, false); |