aboutsummaryrefslogtreecommitdiff
path: root/riscv/insns
diff options
context:
space:
mode:
Diffstat (limited to 'riscv/insns')
-rw-r--r--riscv/insns/vaadd_vi.h1
-rw-r--r--riscv/insns/vaadd_vv.h2
-rw-r--r--riscv/insns/vaadd_vx.h2
-rw-r--r--riscv/insns/vasub_vv.h2
-rw-r--r--riscv/insns/vasub_vx.h2
-rw-r--r--riscv/insns/vcompress_vm.h23
-rw-r--r--riscv/insns/vfcvt_f_x_v.h2
-rw-r--r--riscv/insns/vfcvt_f_xu_v.h2
-rw-r--r--riscv/insns/vfcvt_x_f_v.h2
-rw-r--r--riscv/insns/vfmerge_vfm.h12
-rw-r--r--riscv/insns/vfmv_f_s.h1
-rw-r--r--riscv/insns/vfmv_s_f.h12
-rw-r--r--riscv/insns/vfmv_v_f.h14
-rw-r--r--riscv/insns/vid_v.h4
-rw-r--r--riscv/insns/viota_m.h5
-rw-r--r--riscv/insns/vleff_v.h19
-rw-r--r--riscv/insns/vlxb_v.h1
-rw-r--r--riscv/insns/vlxbu_v.h1
-rw-r--r--riscv/insns/vlxe_v.h1
-rw-r--r--riscv/insns/vlxh_v.h1
-rw-r--r--riscv/insns/vlxhu_v.h1
-rw-r--r--riscv/insns/vlxw_v.h1
-rw-r--r--riscv/insns/vlxwu_v.h1
-rw-r--r--riscv/insns/vmadc_vim.h1
-rw-r--r--riscv/insns/vmadc_vvm.h1
-rw-r--r--riscv/insns/vmadc_vxm.h1
-rw-r--r--riscv/insns/vmerge_vim.h1
-rw-r--r--riscv/insns/vmerge_vvm.h1
-rw-r--r--riscv/insns/vmerge_vxm.h1
-rw-r--r--riscv/insns/vmfeq_vf.h2
-rw-r--r--riscv/insns/vmfeq_vv.h2
-rw-r--r--riscv/insns/vmfge_vf.h4
-rw-r--r--riscv/insns/vmfgt_vf.h4
-rw-r--r--riscv/insns/vmfle_vf.h2
-rw-r--r--riscv/insns/vmfle_vv.h4
-rw-r--r--riscv/insns/vmflt_vf.h4
-rw-r--r--riscv/insns/vmflt_vv.h4
-rw-r--r--riscv/insns/vmfne_vf.h2
-rw-r--r--riscv/insns/vmfne_vv.h2
-rw-r--r--riscv/insns/vmford_vf.h5
-rw-r--r--riscv/insns/vmford_vv.h5
-rw-r--r--riscv/insns/vmsbc_vvm.h1
-rw-r--r--riscv/insns/vmsbc_vxm.h1
-rw-r--r--riscv/insns/vmsbf_m.h1
-rw-r--r--riscv/insns/vmsif_m.h1
-rw-r--r--riscv/insns/vmsof_m.h1
-rw-r--r--riscv/insns/vmulhsu_vv.h1
-rw-r--r--riscv/insns/vmulhsu_vx.h1
-rw-r--r--riscv/insns/vmv_s_x.h18
-rw-r--r--riscv/insns/vmv_v_v.h1
-rw-r--r--riscv/insns/vmv_x_s.h47
-rw-r--r--riscv/insns/vnclip_vi.h13
-rw-r--r--riscv/insns/vnclip_vv.h19
-rw-r--r--riscv/insns/vnclip_vx.h18
-rw-r--r--riscv/insns/vnclipu_vi.h8
-rw-r--r--riscv/insns/vnclipu_vv.h19
-rw-r--r--riscv/insns/vnclipu_vx.h19
-rw-r--r--riscv/insns/vnsra_vi.h2
-rw-r--r--riscv/insns/vnsra_vv.h2
-rw-r--r--riscv/insns/vnsra_vx.h2
-rw-r--r--riscv/insns/vnsrl_vi.h2
-rw-r--r--riscv/insns/vnsrl_vv.h2
-rw-r--r--riscv/insns/vnsrl_vx.h2
-rw-r--r--riscv/insns/vrgather_vi.h18
-rw-r--r--riscv/insns/vrgather_vv.h24
-rw-r--r--riscv/insns/vrgather_vx.h21
-rw-r--r--riscv/insns/vsadd_vi.h1
-rw-r--r--riscv/insns/vsadd_vv.h1
-rw-r--r--riscv/insns/vsadd_vx.h1
-rw-r--r--riscv/insns/vslide1down_vx.h5
-rw-r--r--riscv/insns/vslide1up_vx.h6
-rw-r--r--riscv/insns/vslidedown_vi.h10
-rw-r--r--riscv/insns/vslidedown_vx.h14
-rw-r--r--riscv/insns/vslideup_vi.h6
-rw-r--r--riscv/insns/vslideup_vx.h6
-rw-r--r--riscv/insns/vsmul_vv.h23
-rw-r--r--riscv/insns/vsmul_vx.h25
-rw-r--r--riscv/insns/vssra_vi.h6
-rw-r--r--riscv/insns/vssra_vv.h5
-rw-r--r--riscv/insns/vssra_vx.h5
-rw-r--r--riscv/insns/vssrl_vi.h5
-rw-r--r--riscv/insns/vssrl_vv.h5
-rw-r--r--riscv/insns/vssrl_vx.h5
-rw-r--r--riscv/insns/vssub_vv.h1
-rw-r--r--riscv/insns/vssub_vx.h1
-rw-r--r--riscv/insns/vssubu_vv.h1
-rw-r--r--riscv/insns/vssubu_vx.h1
-rw-r--r--riscv/insns/vsuxb_v.h22
-rw-r--r--riscv/insns/vsuxe_v.h24
-rw-r--r--riscv/insns/vsuxh_v.h19
-rw-r--r--riscv/insns/vsuxw_v.h14
-rw-r--r--riscv/insns/vsxb_v.h1
-rw-r--r--riscv/insns/vsxe_v.h1
-rw-r--r--riscv/insns/vsxh_v.h1
-rw-r--r--riscv/insns/vsxw_v.h1
-rw-r--r--riscv/insns/vwsmacc_vv.h2
-rw-r--r--riscv/insns/vwsmacc_vx.h2
-rw-r--r--riscv/insns/vwsmaccsu_vv.h2
-rw-r--r--riscv/insns/vwsmaccsu_vx.h2
-rw-r--r--riscv/insns/vwsmaccu_vv.h2
-rw-r--r--riscv/insns/vwsmaccu_vx.h2
101 files changed, 292 insertions, 338 deletions
diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h
index 5f8d5f5..6bd1a60 100644
--- a/riscv/insns/vaadd_vi.h
+++ b/riscv/insns/vaadd_vi.h
@@ -1,4 +1,5 @@
// vaadd: Averaging adds of integers
+VI_CHECK_SSS(false);
VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP
({
diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h
index b479970..0a14467 100644
--- a/riscv/insns/vaadd_vv.h
+++ b/riscv/insns/vaadd_vv.h
@@ -1,2 +1,2 @@
// vaadd.vv vd, vs2, vs1
-VI_VVX_LOOP_AVG(vs1, +);
+VI_VVX_LOOP_AVG(vs1, +, true);
diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h
index c811a0a..ae00d8e 100644
--- a/riscv/insns/vaadd_vx.h
+++ b/riscv/insns/vaadd_vx.h
@@ -1,2 +1,2 @@
// vaadd.vx vd, vs2, rs1
-VI_VVX_LOOP_AVG(rs1, +);
+VI_VVX_LOOP_AVG(rs1, +, false);
diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h
index 5a5ccc9..a45c18d 100644
--- a/riscv/insns/vasub_vv.h
+++ b/riscv/insns/vasub_vv.h
@@ -1,2 +1,2 @@
// vasub.vv vd, vs2, vs1
-VI_VVX_LOOP_AVG(vs1, -);
+VI_VVX_LOOP_AVG(vs1, -, true);
diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h
index c3cad4b..4e8dba1 100644
--- a/riscv/insns/vasub_vx.h
+++ b/riscv/insns/vasub_vx.h
@@ -1,2 +1,2 @@
// vasub.vx vd, vs2, rs1
-VI_VVX_LOOP_AVG(rs1, -);
+VI_VVX_LOOP_AVG(rs1, -, false);
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h
index b056b0e..77e91bf 100644
--- a/riscv/insns/vcompress_vm.h
+++ b/riscv/insns/vcompress_vm.h
@@ -1,14 +1,13 @@
// vcompress vd, vs2, vs1
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
require(P.VU.vstart == 0);
-reg_t sew = P.VU.vsew;
-reg_t vl = P.VU.vl;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs1(), 1));
+
reg_t pos = 0;
-for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+
+VI_GENERAL_LOOP_BASE
const int mlen = P.VU.vmlen;
const int midx = (mlen * i) / 64;
const int mpos = (mlen * i) % 64;
@@ -32,10 +31,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
++pos;
}
-}
-
-if (vl > 0 && TAIL_ZEROING) {
- uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1));
- memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1));
-}
-
+VI_LOOP_END;
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index 311f875..f6604fb 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -1,5 +1,5 @@
// vfcvt.f.x.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
vd = i32_to_f32(vs2_i);
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index ceabea3..2c845ac 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -1,5 +1,5 @@
// vfcvt.f.xu.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
vd = ui32_to_f32(vs2_u);
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index ee53c6d..a9eedc4 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -1,5 +1,5 @@
// vfcvt.x.f.v vd, vd2, vm
-VI_VFP_VV_LOOP
+VI_VFP_VF_LOOP
({
P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
})
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index 6d12bce..ea78165 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -1,13 +1,7 @@
// vfmerge_vf vd, vs2, vs1, vm
-require_extension('F');
-require_fp;
-require(P.VU.vsew == 32);
-require_vector;
-reg_t vl = P.VU.vl;
+VI_CHECK_SSS(false);
+VI_VFP_COMMON;
reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
@@ -20,6 +14,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) {
vd = use_first ? rs1 : vs2;
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
-set_fp_exceptions;
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index c6dbaff..066db80 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -1,6 +1,5 @@
// vfmv_f_s: rd = vs2[0] (rs1=0)
require_vector;
-require(insn.v_vm() == 1);
require_fp;
require_extension('F');
require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64);
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index cb81008..8ff6094 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -15,17 +15,5 @@ if (vl > 0) {
else
P.VU.elt<uint32_t>(rd_num, 0) = f32(FRS1).v;
- const reg_t max_len = P.VU.VLEN / sew;
- for (reg_t i = 1; i < max_len; ++i) {
- switch(sew) {
- case e32:
- P.VU.elt<uint32_t>(rd_num, i) = 0;
- break;
- default:
- require(false);
- break;
- }
- }
-
vl = 0;
}
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index c85a3e9..f323263 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -1,13 +1,7 @@
-// vfmerge_vf vd, vs2, vs1, vm
-require_extension('F');
-require_fp;
-require(P.VU.vsew == 32);
-require_vector;
-reg_t vl = P.VU.vl;
+// vfmv_vf vd, vs1
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+VI_VFP_COMMON
reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
for (reg_t i=P.VU.vstart; i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i);
auto rs1 = f32(READ_FREG(rs1_num));
@@ -15,6 +9,4 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) {
vd = rs1;
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
-set_fp_exceptions;
diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h
index df6dd04..25422d6 100644
--- a/riscv/insns/vid_v.h
+++ b/riscv/insns/vid_v.h
@@ -6,6 +6,9 @@ reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
+require((rd_num & (P.VU.vlmul - 1)) == 0);
+if (insn.v_vm() == 0 && P.VU.vlmul >= 2) \
+ require(insn.rd() != 0);
for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
@@ -26,5 +29,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
P.VU.vstart = 0;
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index fde0291..04bfcd8 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -7,6 +7,10 @@ reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
require(P.VU.vstart == 0);
+require(!is_overlapped(rd_num, P.VU.vlmul, rs2_num, 1));
+if (insn.v_vm() == 0)
+ require(!is_overlapped(rd_num, P.VU.vlmul, 0, 1));
+require((rd_num & (P.VU.vlmul - 1)) == 0);
int cnt = 0;
for (reg_t i = 0; i < vl; ++i) {
@@ -49,4 +53,3 @@ for (reg_t i = 0; i < vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
diff --git a/riscv/insns/vleff_v.h b/riscv/insns/vleff_v.h
index ec2777a..e858de9 100644
--- a/riscv/insns/vleff_v.h
+++ b/riscv/insns/vleff_v.h
@@ -1,7 +1,7 @@
-require_vector;
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
const reg_t nf = insn.v_nf() + 1;
require((nf * P.VU.vlmul) <= (NVPR / 4));
+VI_CHECK_SXX;
const reg_t sew = P.VU.vsew;
const reg_t vl = P.VU.vl;
const reg_t baseAddr = RS1;
@@ -9,7 +9,6 @@ const reg_t rd_num = insn.rd();
bool early_stop = false;
const reg_t vlmul = P.VU.vlmul;
for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
- bool is_valid = true;
bool is_zero = false;
VI_STRIP(i);
VI_ELEMENT_SKIP(i);
@@ -20,23 +19,23 @@ for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint8(baseAddr + (i * nf + fn) * 1) : 0;
- is_zero = is_valid && P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint8(baseAddr + (i * nf + fn) * 1);
+ is_zero = P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e16:
P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint16(baseAddr + (i * nf + fn) * 2) : 0;
- is_zero = is_valid && P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint16(baseAddr + (i * nf + fn) * 2);
+ is_zero = P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e32:
P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint32(baseAddr + (i * nf + fn) * 4) : 0;
- is_zero = is_valid && P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint32(baseAddr + (i * nf + fn) * 4);
+ is_zero = P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e64:
P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) =
- is_valid ? MMU.load_uint64(baseAddr + (i * nf + fn) * 8) : 0;
- is_zero = is_valid && P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ MMU.load_uint64(baseAddr + (i * nf + fn) * 8);
+ is_zero = P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
}
diff --git a/riscv/insns/vlxb_v.h b/riscv/insns/vlxb_v.h
index 5a99bd3..57ce8c8 100644
--- a/riscv/insns/vlxb_v.h
+++ b/riscv/insns/vlxb_v.h
@@ -1,4 +1,5 @@
// vlxb.v and vlsseg[2-8]b.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int8, 1);
diff --git a/riscv/insns/vlxbu_v.h b/riscv/insns/vlxbu_v.h
index daf2d2b..d8e3dd6 100644
--- a/riscv/insns/vlxbu_v.h
+++ b/riscv/insns/vlxbu_v.h
@@ -1,4 +1,5 @@
// vlxbu.v and vlxseg[2-8]bu.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vlxe_v.h b/riscv/insns/vlxe_v.h
index b1190a8..1055eca 100644
--- a/riscv/insns/vlxe_v.h
+++ b/riscv/insns/vlxe_v.h
@@ -1,5 +1,6 @@
// vlxe.v and vlxseg[2-8]e.v
reg_t sew = P.VU.vsew;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
if (sew == e8) {
VI_LD(index[i], fn, int8, 1);
diff --git a/riscv/insns/vlxh_v.h b/riscv/insns/vlxh_v.h
index 98145db..9f4c3a1 100644
--- a/riscv/insns/vlxh_v.h
+++ b/riscv/insns/vlxh_v.h
@@ -1,4 +1,5 @@
// vlxh.v and vlxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int16, 2);
diff --git a/riscv/insns/vlxhu_v.h b/riscv/insns/vlxhu_v.h
index 27d549c..9283127 100644
--- a/riscv/insns/vlxhu_v.h
+++ b/riscv/insns/vlxhu_v.h
@@ -1,4 +1,5 @@
// vlxh.v and vlxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vlxw_v.h b/riscv/insns/vlxw_v.h
index 83300f0..c1117a2 100644
--- a/riscv/insns/vlxw_v.h
+++ b/riscv/insns/vlxw_v.h
@@ -1,5 +1,6 @@
// vlxw.v and vlxseg[2-8]w.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, int32, 4);
diff --git a/riscv/insns/vlxwu_v.h b/riscv/insns/vlxwu_v.h
index a2f9913..d3034bd 100644
--- a/riscv/insns/vlxwu_v.h
+++ b/riscv/insns/vlxwu_v.h
@@ -1,4 +1,5 @@
// vlxwu.v and vlxseg[2-8]wu.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_LD(index[i], fn, uint32, 4);
diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h
index fd79089..a8185d1 100644
--- a/riscv/insns/vmadc_vim.h
+++ b/riscv/insns/vmadc_vim.h
@@ -1,5 +1,4 @@
// vmadc.vim vd, vs2, simm5
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h
index 82042ca..8d58658 100644
--- a/riscv/insns/vmadc_vvm.h
+++ b/riscv/insns/vmadc_vvm.h
@@ -1,5 +1,4 @@
// vmadc.vvm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h
index 8f26584..0b6273a 100644
--- a/riscv/insns/vmadc_vxm.h
+++ b/riscv/insns/vmadc_vxm.h
@@ -1,5 +1,4 @@
// vadc.vx vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h
index 13354d6..c6c87c7 100644
--- a/riscv/insns/vmerge_vim.h
+++ b/riscv/insns/vmerge_vim.h
@@ -1,4 +1,5 @@
// vmerge.vim vd, vs2, simm5
+VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h
index 7530b40..97a0182 100644
--- a/riscv/insns/vmerge_vvm.h
+++ b/riscv/insns/vmerge_vvm.h
@@ -1,4 +1,5 @@
// vmerge.vvm vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h
index b1757fa..de7df91 100644
--- a/riscv/insns/vmerge_vxm.h
+++ b/riscv/insns/vmerge_vxm.h
@@ -1,4 +1,5 @@
// vmerge.vxm vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_VVXI_MERGE_LOOP
({
int midx = (P.VU.vmlen * i) / 64;
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index cedf4b9..f0e7109 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index 7e76cac..1be3a69 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_eq(vs2, vs1);
-})
+}, true)
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index 7eade89..1c68366 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,5 +1,5 @@
// vfge.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_le_quiet(rs1, vs2);
-})
+ res = f32_le(rs1, vs2);
+}, false)
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index 6115d06..0979185 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,5 +1,5 @@
// vfgt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(rs1, vs2);
-})
+ res = f32_lt(rs1, vs2);
+}, false)
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index 998b93b..90607ec 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = f32_le(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index c716312..6ccdfec 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,5 +1,5 @@
// vfle.vv vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_le_quiet(vs2, vs1);
-})
+ res = f32_le(vs2, vs1);
+}, true)
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index af436e4..6b71a4a 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,5 +1,5 @@
// vflt.vf vd, vs2, rs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(vs2, rs1);
-})
+ res = f32_lt(vs2, rs1);
+}, false)
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index ded867d..a2ed8e3 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,5 +1,5 @@
// vflt.vv vd, vs2, vs1
VI_VFP_LOOP_CMP
({
- res = f32_lt_quiet(vs2, vs1);
-})
+ res = f32_lt(vs2, vs1);
+}, true)
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index ac2eced..ef63678 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, rs1);
-})
+}, false)
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index 3fa8beb..8378a23 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -2,4 +2,4 @@
VI_VFP_LOOP_CMP
({
res = !f32_eq(vs2, vs1);
-})
+}, true)
diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h
deleted file mode 100644
index b5e74f2..0000000
--- a/riscv/insns/vmford_vf.h
+++ /dev/null
@@ -1,5 +0,0 @@
-// vford.vf vd, vs2, rs1, vm
-VI_VFP_LOOP_CMP
-({
- res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1));
-})
diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h
deleted file mode 100644
index 2e459c1..0000000
--- a/riscv/insns/vmford_vv.h
+++ /dev/null
@@ -1,5 +0,0 @@
-// vford.vv vd, vs2, vs1, vm
-VI_VFP_LOOP_CMP
-({
- res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1));
-})
diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h
index 3804ba8..f4ce6f4 100644
--- a/riscv/insns/vmsbc_vvm.h
+++ b/riscv/insns/vmsbc_vvm.h
@@ -1,5 +1,4 @@
// vmsbc.vvm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_VV_LOOP_CARRY
({
auto v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h
index d5332f5..aec4409 100644
--- a/riscv/insns/vmsbc_vxm.h
+++ b/riscv/insns/vmsbc_vxm.h
@@ -1,5 +1,4 @@
// vmsbc.vxm vd, vs2, rs1
-require(!(insn.rd() == 0 && P.VU.vlmul > 1));
VI_XI_LOOP_CARRY
({
auto &v0 = P.VU.elt<uint64_t>(0, midx);
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
index 3047cca..443fcbb 100644
--- a/riscv/insns/vmsbf_m.h
+++ b/riscv/insns/vmsbf_m.h
@@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
index 826e7cd..381088b 100644
--- a/riscv/insns/vmsif_m.h
+++ b/riscv/insns/vmsif_m.h
@@ -30,5 +30,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
index 48805f7..d66002d 100644
--- a/riscv/insns/vmsof_m.h
+++ b/riscv/insns/vmsof_m.h
@@ -28,5 +28,4 @@ for (reg_t i = P.VU.vstart ; i < vl; ++i) {
}
}
-VI_TAIL_ZERO_MASK(rd_num);
P.VU.vstart = 0;
diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h
index 59882da..b918551 100644
--- a/riscv/insns/vmulhsu_vv.h
+++ b/riscv/insns/vmulhsu_vv.h
@@ -1,4 +1,5 @@
// vmulhsu.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
switch(sew) {
case e8: {
diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h
index d39615a..cb2db3d 100644
--- a/riscv/insns/vmulhsu_vx.h
+++ b/riscv/insns/vmulhsu_vx.h
@@ -1,4 +1,5 @@
// vmulhsu.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
switch(sew) {
case e8: {
diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h
index 38b2697..948b5be 100644
--- a/riscv/insns/vmv_s_x.h
+++ b/riscv/insns/vmv_s_x.h
@@ -24,23 +24,5 @@ if (vl > 0) {
break;
}
- const reg_t max_len = P.VU.VLEN / sew;
- for (reg_t i = 1; i < max_len; ++i) {
- switch(sew) {
- case e8:
- P.VU.elt<uint8_t>(rd_num, i) = 0;
- break;
- case e16:
- P.VU.elt<uint16_t>(rd_num, i) = 0;
- break;
- case e32:
- P.VU.elt<uint32_t>(rd_num, i) = 0;
- break;
- default:
- P.VU.elt<uint64_t>(rd_num, i) = 0;
- break;
- }
- }
-
vl = 0;
}
diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h
index 734010b..a4f9a5c 100644
--- a/riscv/insns/vmv_v_v.h
+++ b/riscv/insns/vmv_v_v.h
@@ -1,4 +1,5 @@
// vvmv.v.v vd, vs1
+require((insn.rs1() & (P.VU.vlmul - 1)) == 0);
VI_VVXI_MERGE_LOOP
({
vd = vs1;
diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h
index f22c2dd..50f2e79 100644
--- a/riscv/insns/vmv_x_s.h
+++ b/riscv/insns/vmv_x_s.h
@@ -1,25 +1,28 @@
-// vext_x_v: rd = vs2[0]
+// vmv_x_s: rd = vs2[rs1]
require(insn.v_vm() == 1);
uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen());
-VI_LOOP_BASE
-VI_LOOP_END_NO_TAIL_ZERO
-switch(sew) {
-case e8:
- WRITE_RD(P.VU.elt<uint8_t>(rs2_num, 0));
- break;
-case e16:
- WRITE_RD(P.VU.elt<uint16_t>(rs2_num, 0));
- break;
-case e32:
- if (P.get_max_xlen() == 32)
- WRITE_RD(P.VU.elt<int32_t>(rs2_num, 0));
- else
- WRITE_RD(P.VU.elt<uint32_t>(rs2_num, 0));
- break;
-case e64:
- if (P.get_max_xlen() <= sew)
- WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0) & xmask);
- else
- WRITE_RD(P.VU.elt<uint64_t>(rs2_num, 0));
- break;
+reg_t rs1 = RS1;
+reg_t sew = P.VU.vsew;
+reg_t rs2_num = insn.rs2();
+
+if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) {
+ WRITE_RD(0);
+} else {
+ switch(sew) {
+ case e8:
+ WRITE_RD(P.VU.elt<int8_t>(rs2_num, rs1));
+ break;
+ case e16:
+ WRITE_RD(P.VU.elt<int16_t>(rs2_num, rs1));
+ break;
+ case e32:
+ WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1));
+ break;
+ case e64:
+ if (P.get_max_xlen() <= sew)
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask);
+ else
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1));
+ break;
+ }
}
diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h
index ca27593..eb21710 100644
--- a/riscv/insns/vnclip_vi.h
+++ b/riscv/insns/vnclip_vi.h
@@ -4,14 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
-
int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ unsigned shift = zimm5 & ((sew * 2) - 1);
+
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
- result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -21,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h
index 7bcb4cb..92575a6 100644
--- a/riscv/insns/vnclip_vv.h
+++ b/riscv/insns/vnclip_vv.h
@@ -4,20 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
+ int128_t result = vs2;
+ unsigned shift = vs1 & ((sew * 2) - 1);
- int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- unsigned_shift_amount = 2 * sew - 1;
- }
-
- result = (vsext(result, sew * 2)) >> unsigned_shift_amount;
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -27,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, true)
diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h
index b66e830..96409de 100644
--- a/riscv/insns/vnclip_vx.h
+++ b/riscv/insns/vnclip_vx.h
@@ -4,19 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
int64_t int_min = -(1 << (P.VU.vsew - 1));
VI_VVXI_LOOP_NARROW
({
+ int128_t result = vs2;
+ unsigned shift = rs1 & ((sew * 2) - 1);
- int64_t result = vs2;
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- unsigned_shift_amount = 2 * sew - 1;
- }
- result = vsext(result, sew * 2) >> unsigned_shift_amount;
+ result = result >> shift;
-// saturation
+ // saturation
if (result < int_min) {
result = int_min;
P.VU.vxsat = 1;
@@ -26,4 +22,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h
index 61cb015..b1527f7 100644
--- a/riscv/insns/vnclipu_vi.h
+++ b/riscv/insns/vnclipu_vi.h
@@ -4,11 +4,13 @@ uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
uint64_t result = vs2_u;
+ unsigned shift = zimm5 & ((sew * 2) - 1);
+
// rounding
- INT_ROUNDING(result, xrm, sew);
+ INT_ROUNDING(result, xrm, shift);
// unsigned shifting to rs1
- result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+ result = result >> shift;
// saturation
if (result & (uint64_t)(-1ll << sew)) {
@@ -17,4 +19,4 @@ VI_VVXI_LOOP_NARROW
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h
index 004f24f..217e82f 100644
--- a/riscv/insns/vnclipu_vv.h
+++ b/riscv/insns/vnclipu_vv.h
@@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode();
uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
+ uint128_t result = vs2_u;
+ unsigned shift = vs1 & ((sew * 2) - 1);
- uint64_t result = vs2_u;
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ result = result >> shift;
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- result = 0;
- } else {
- result = vzext(result, sew * 2) >> unsigned_shift_amount;
- }
-// saturation
+ // saturation
if (result & (uint64_t)(-1ll << sew)) {
result = int_max;
P.VU.vxsat = 1;
}
vd = result;
-})
+}, true)
diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h
index 0507a2b..ce15b55 100644
--- a/riscv/insns/vnclipu_vx.h
+++ b/riscv/insns/vnclipu_vx.h
@@ -3,24 +3,19 @@ VRM xrm = P.VU.get_vround_mode();
uint64_t int_max = ~(-1ll << P.VU.vsew);
VI_VVXI_LOOP_NARROW
({
- uint64_t result = vs2;
+ uint128_t result = vs2_u;
+ unsigned shift = rs1 & ((sew * 2) - 1);
-// rounding
- INT_ROUNDING(result, xrm, sew);
+ // rounding
+ INT_ROUNDING(result, xrm, shift);
-// unsigned shifting to rs1
- uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
- if (unsigned_shift_amount >= (2 * sew)) {
- result = 0;
- } else {
- result = vzext(result, sew * 2) >> unsigned_shift_amount;
- }
+ result = result >> shift;
-// saturation
+ // saturation
if (result & (uint64_t)(-1ll << sew)) {
result = int_max;
P.VU.vxsat = 1;
}
vd = result;
-})
+}, false)
diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h
index 0502ff1..f41979e 100644
--- a/riscv/insns/vnsra_vi.h
+++ b/riscv/insns/vnsra_vi.h
@@ -2,4 +2,4 @@
VI_VI_LOOP_NSHIFT
({
vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f);
-})
+}, false)
diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h
index 555ce3f..59f255e 100644
--- a/riscv/insns/vnsra_vv.h
+++ b/riscv/insns/vnsra_vv.h
@@ -2,4 +2,4 @@
VI_VV_LOOP_NSHIFT
({
vd = vs2 >> (vs1 & (sew * 2 - 1));
-})
+}, true)
diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h
index 05a55e3..adaa24c 100644
--- a/riscv/insns/vnsra_vx.h
+++ b/riscv/insns/vnsra_vx.h
@@ -2,4 +2,4 @@
VI_VX_LOOP_NSHIFT
({
vd = vs2 >> (rs1 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h
index d4dfcf0..91402c0 100644
--- a/riscv/insns/vnsrl_vi.h
+++ b/riscv/insns/vnsrl_vi.h
@@ -2,4 +2,4 @@
VI_VI_LOOP_NSHIFT
({
vd = vs2_u >> (zimm5 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h
index ab72b84..609299f 100644
--- a/riscv/insns/vnsrl_vv.h
+++ b/riscv/insns/vnsrl_vv.h
@@ -2,4 +2,4 @@
VI_VV_LOOP_NSHIFT
({
vd = vs2_u >> (vs1 & (sew * 2 - 1));
-})
+}, true)
diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h
index e149b38..8356a2b 100644
--- a/riscv/insns/vnsrl_vx.h
+++ b/riscv/insns/vnsrl_vx.h
@@ -2,4 +2,4 @@
VI_VX_LOOP_NSHIFT
({
vd = vs2_u >> (rs1 & (sew * 2 - 1));
-})
+}, false)
diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h
index eff67b8..cab4a78 100644
--- a/riscv/insns/vrgather_vi.h
+++ b/riscv/insns/vrgather_vi.h
@@ -1,11 +1,14 @@
// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
reg_t zimm5 = insn.v_zimm5();
+
+VI_LOOP_BASE
+
for (reg_t i = P.VU.vstart; i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
@@ -25,5 +28,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
}
}
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h
index ce0c2a6..8266c95 100644
--- a/riscv/insns/vrgather_vv.h
+++ b/riscv/insns/vrgather_vv.h
@@ -1,15 +1,12 @@
// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
- VI_LOOP_ELEMENT_SKIP();
- VI_CHECK_VREG_OVERLAP(rd_num, rs1_num);
- VI_CHECK_VREG_OVERLAP(rd_num, rs2_num);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs1() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2() && insn.rd() != insn.rs1());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+VI_LOOP_BASE
switch (sew) {
case e8: {
auto vs1 = P.VU.elt<uint8_t>(rs1_num, i);
@@ -33,7 +30,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
break;
}
}
-}
-
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h
index e9ff3b1..15e16b7 100644
--- a/riscv/insns/vrgather_vx.h
+++ b/riscv/insns/vrgather_vx.h
@@ -1,15 +1,13 @@
// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1];
-require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
-require_vector;
-reg_t vl = P.VU.vl;
-reg_t sew = P.VU.vsew;
-reg_t rd_num = insn.rd();
-reg_t rs1_num = insn.rs1();
-reg_t rs2_num = insn.rs2();
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
reg_t rs1 = RS1;
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
- VI_LOOP_ELEMENT_SKIP();
+VI_LOOP_BASE
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, rs1);
@@ -24,7 +22,4 @@ for (reg_t i = P.VU.vstart; i < vl; ++i) {
P.VU.elt<uint64_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, rs1);
break;
}
-}
-
-VI_TAIL_ZERO(1);
-P.VU.vstart = 0;
+VI_LOOP_END;
diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h
index de2cb83..c361f08 100644
--- a/riscv/insns/vsadd_vi.h
+++ b/riscv/insns/vsadd_vi.h
@@ -1,4 +1,5 @@
// vsadd.vi vd, vs2 simm5
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h
index 2152bab..ce0ef40 100644
--- a/riscv/insns/vsadd_vv.h
+++ b/riscv/insns/vsadd_vv.h
@@ -1,4 +1,5 @@
// vsadd.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h
index 781e9e8..691f017 100644
--- a/riscv/insns/vsadd_vx.h
+++ b/riscv/insns/vsadd_vx.h
@@ -1,4 +1,5 @@
// vsadd.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
switch(sew) {
diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h
index 0069df7..04e2540 100644
--- a/riscv/insns/vslide1down_vx.h
+++ b/riscv/insns/vslide1down_vx.h
@@ -1,4 +1,9 @@
//vslide1down.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
VI_LOOP_BASE
if (i != vl - 1) {
switch (sew) {
diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h
index 50cc503..69ce0fd 100644
--- a/riscv/insns/vslide1up_vx.h
+++ b/riscv/insns/vslide1up_vx.h
@@ -1,8 +1,10 @@
//vslide1up.vx vd, vs2, rs1
-if (insn.v_vm() == 0)
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
require(insn.rd() != 0);
-VI_CHECK_SS
VI_LOOP_BASE
if (i != 0) {
if (sew == e8) {
diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h
index c21c5f2..dd58c1e 100644
--- a/riscv/insns/vslidedown_vi.h
+++ b/riscv/insns/vslidedown_vi.h
@@ -1,8 +1,14 @@
// vslidedown.vi vd, vs2, rs1
-VI_LOOP_BASE
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
const reg_t sh = insn.v_zimm5();
-bool is_valid = (i + sh) < P.VU.vlmax;
+VI_LOOP_BASE
+
reg_t offset = 0;
+bool is_valid = (i + sh) < P.VU.vlmax;
if (is_valid) {
offset = sh;
diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h
index 251740c..9881e0e 100644
--- a/riscv/insns/vslidedown_vx.h
+++ b/riscv/insns/vslidedown_vx.h
@@ -1,11 +1,17 @@
//vslidedown.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+const reg_t sh = RS1;
VI_LOOP_BASE
-reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1;
-bool is_valid = offset < P.VU.vlmax;
+reg_t offset = 0;
+bool is_valid = (i + sh) < P.VU.vlmax;
-if (!is_valid) {
- offset = 0;
+if (is_valid) {
+ offset = sh;
}
switch (sew) {
diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h
index 4135b20..64b4aca 100644
--- a/riscv/insns/vslideup_vi.h
+++ b/riscv/insns/vslideup_vi.h
@@ -1,8 +1,10 @@
// vslideup.vi vd, vs2, rs1
-if (insn.v_vm() == 0)
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
require(insn.rd() != 0);
-VI_CHECK_SS
const reg_t offset = insn.v_zimm5();
VI_LOOP_BASE
if (P.VU.vstart < offset && i < offset)
diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h
index bf73fcd..063c061 100644
--- a/riscv/insns/vslideup_vx.h
+++ b/riscv/insns/vslideup_vx.h
@@ -1,4 +1,10 @@
//vslideup.vx vd, vs2, rs1
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
+require((insn.rd() & (P.VU.vlmul - 1)) == 0);
+require(insn.rd() != insn.rs2());
+if (P.VU.vlmul > 1 && insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
const reg_t offset = RS1;
VI_LOOP_BASE
if (P.VU.vstart < offset && i < offset)
diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h
index a0c7f99..0807899 100644
--- a/riscv/insns/vsmul_vv.h
+++ b/riscv/insns/vsmul_vv.h
@@ -1,33 +1,32 @@
// vsmul: Signed saturating and rounding fractional multiply
VRM xrm = P.VU.get_vround_mode();
-uint64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
-uint64_t int_min = - (1 << (P.VU.vsew - 1));
-uint64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
+int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
+int64_t int_min = - (1 << (P.VU.vsew - 1));
+int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
-VI_VV_ULOOP
+VI_VV_LOOP
({
- uint64_t vs1_sign;
- uint64_t vs2_sign;
- uint64_t result_sign;
+ int64_t vs1_sign;
+ int64_t vs2_sign;
+ int64_t result_sign;
vs1_sign = vs1 & sign_mask;
vs2_sign = vs2 & sign_mask;
bool overflow = vs1 == vs2 && vs1 == int_min;
- uint128_t result = (uint128_t)vs1 * (uint128_t)vs2;
- result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ int128_t result = (int128_t)vs1 * (int128_t)vs2;
result_sign = (vs1_sign ^ vs2_sign) & sign_mask;
+
// rounding
INT_ROUNDING(result, xrm, sew - 1);
- // unsigned shifting
+ // remove guard bits
result = result >> (sew - 1);
// saturation
if (overflow) {
result = int_max;
P.VU.vxsat = 1;
- } else {
- result |= result_sign;
}
+
vd = result;
})
diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h
index c7909c7..4326d8f 100644
--- a/riscv/insns/vsmul_vx.h
+++ b/riscv/insns/vsmul_vx.h
@@ -1,34 +1,33 @@
// vsmul
VRM xrm = P.VU.get_vround_mode();
-uint128_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
-uint128_t int_min = - (1 << (P.VU.vsew - 1));
-uint128_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
+int64_t int_max = (uint64_t(1) << (P.VU.vsew - 1)) - 1;
+int64_t int_min = - (1 << (P.VU.vsew - 1));
+int64_t sign_mask = uint64_t(1) << (P.VU.vsew - 1);
-VI_VX_ULOOP
+VI_VX_LOOP
({
- uint128_t rs1_sign;
- uint128_t vs2_sign;
- uint128_t result_sign;
+ int64_t rs1_sign;
+ int64_t vs2_sign;
+ int64_t result_sign;
rs1_sign = rs1 & sign_mask;
vs2_sign = vs2 & sign_mask;
bool overflow = rs1 == vs2 && rs1 == int_min;
- uint128_t result = (uint128_t)rs1 * (uint128_t)vs2;
- result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ int128_t result = (int128_t)rs1 * (int128_t)vs2;
result_sign = (rs1_sign ^ vs2_sign) & sign_mask;
+
// rounding
INT_ROUNDING(result, xrm, sew - 1);
- // unsigned shifting
+ // remove guard bits
result = result >> (sew - 1);
- // saturation
+ // max saturation
if (overflow) {
result = int_max;
P.VU.vxsat = 1;
- } else {
- result |= result_sign;
}
+
vd = result;
})
diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h
index ef2390c..c854ca6 100644
--- a/riscv/insns/vssra_vi.h
+++ b/riscv/insns/vssra_vi.h
@@ -3,6 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VI_LOOP
({
int sh = simm5 & (sew - 1) & 0x1f;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ int64_t val = vs2;
+
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h
index e697b52..7bbc766 100644
--- a/riscv/insns/vssra_vv.h
+++ b/riscv/insns/vssra_vv.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VV_LOOP
({
int sh = vs1 & (sew - 1);
+ int128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h
index 8d7ad20..068a22b 100644
--- a/riscv/insns/vssra_vx.h
+++ b/riscv/insns/vssra_vx.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VX_LOOP
({
int sh = rs1 & (sew - 1);
+ int128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h
index 8a10df0..bf554ca 100644
--- a/riscv/insns/vssrl_vi.h
+++ b/riscv/insns/vssrl_vi.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VI_ULOOP
({
int sh = simm5 & (sew - 1) & 0x1f;
+ uint64_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h
index f40cd90..a8e5d16 100644
--- a/riscv/insns/vssrl_vv.h
+++ b/riscv/insns/vssrl_vv.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VV_ULOOP
({
int sh = vs1 & (sew - 1);
+ uint128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h
index 5da3f75..ee3cb34 100644
--- a/riscv/insns/vssrl_vx.h
+++ b/riscv/insns/vssrl_vx.h
@@ -3,7 +3,8 @@ VRM xrm = P.VU.get_vround_mode();
VI_VX_ULOOP
({
int sh = rs1 & (sew - 1);
+ uint128_t val = vs2;
- INT_ROUNDING(vs2, xrm, sh);
- vd = vs2 >> sh;
+ INT_ROUNDING(val, xrm, sh);
+ vd = val >> sh;
})
diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h
index fd3ee21..18fe4fb 100644
--- a/riscv/insns/vssub_vv.h
+++ b/riscv/insns/vssub_vv.h
@@ -1,4 +1,5 @@
// vssub.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h
index 5c5c781..7a01125 100644
--- a/riscv/insns/vssub_vx.h
+++ b/riscv/insns/vssub_vx.h
@@ -1,4 +1,5 @@
// vssub.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h
index c5c74fe..e58076e 100644
--- a/riscv/insns/vssubu_vv.h
+++ b/riscv/insns/vssubu_vv.h
@@ -1,4 +1,5 @@
// vssubu.vv vd, vs2, vs1
+VI_CHECK_SSS(true);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h
index 12cfdbb..556c759 100644
--- a/riscv/insns/vssubu_vx.h
+++ b/riscv/insns/vssubu_vx.h
@@ -1,4 +1,5 @@
// vssubu.vx vd, vs2, rs1
+VI_CHECK_SSS(false);
VI_LOOP_BASE
bool sat = false;
diff --git a/riscv/insns/vsuxb_v.h b/riscv/insns/vsuxb_v.h
index cf928f8..03f1980 100644
--- a/riscv/insns/vsuxb_v.h
+++ b/riscv/insns/vsuxb_v.h
@@ -1,6 +1,7 @@
// vsuxb.v and vsxseg[2-8]b.v
-require_vector;
require(P.VU.vsew >= e8);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,30 +9,25 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e8:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint8_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
break;
case e16:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
+ MMU.store_uint8(baseAddr + index[i],
P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxe_v.h b/riscv/insns/vsuxe_v.h
index 8bd7545..22d6fb5 100644
--- a/riscv/insns/vsuxe_v.h
+++ b/riscv/insns/vsuxe_v.h
@@ -1,38 +1,34 @@
// vsxe.v and vsxseg[2-8]e.v
-require_vector;
const reg_t sew = P.VU.vsew;
const reg_t vl = P.VU.vl;
require(sew >= e8 && sew <= e64);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (sew) {
case e8:
- if (is_valid)
- MMU.store_uint8(baseAddr + index[i],
- P.VU.elt<uint8_t>(vs3, vreg_inx));
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
break;
case e16:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint64(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint64(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxh_v.h b/riscv/insns/vsuxh_v.h
index 1d5a1bd..a34bc27 100644
--- a/riscv/insns/vsuxh_v.h
+++ b/riscv/insns/vsuxh_v.h
@@ -1,6 +1,7 @@
// vsxh.v and vsxseg[2-8]h.v
-require_vector;
require(P.VU.vsew >= e16);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,25 +9,21 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e16:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint16_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
break;
case e32:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint16(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsuxw_v.h b/riscv/insns/vsuxw_v.h
index ec1a8fe..f42092d 100644
--- a/riscv/insns/vsuxw_v.h
+++ b/riscv/insns/vsuxw_v.h
@@ -1,6 +1,7 @@
// vsxw.v and vsxseg[2-8]w.v
-require_vector;
require(P.VU.vsew >= e32);
+VI_CHECK_SXX;
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \
reg_t vl = P.VU.vl;
reg_t baseAddr = RS1;
reg_t stride = insn.rs2();
@@ -8,20 +9,17 @@ reg_t vs3 = insn.rd();
reg_t vlmax = P.VU.vlmax;
VI_DUPLICATE_VREG(stride, vlmax);
for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
- bool is_valid = true;
VI_ELEMENT_SKIP(i);
VI_STRIP(i)
switch (P.VU.vsew) {
case e32:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint32_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
break;
case e64:
- if (is_valid)
- MMU.store_uint32(baseAddr + index[i],
- P.VU.elt<uint64_t>(vs3, vreg_inx));
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
break;
}
}
diff --git a/riscv/insns/vsxb_v.h b/riscv/insns/vsxb_v.h
index 3e50597..fb567fb 100644
--- a/riscv/insns/vsxb_v.h
+++ b/riscv/insns/vsxb_v.h
@@ -1,4 +1,5 @@
// vsxb.v and vsxseg[2-8]b.v
require(P.VU.vsew >= e8);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vsxe_v.h b/riscv/insns/vsxe_v.h
index 28984ac..78c6605 100644
--- a/riscv/insns/vsxe_v.h
+++ b/riscv/insns/vsxe_v.h
@@ -1,6 +1,7 @@
// vsxe.v and vsxseg[2-8]e.v
reg_t sew = P.VU.vsew;
require(sew >= e8 && sew <= e64);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
if (sew == e8) {
VI_ST(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vsxh_v.h b/riscv/insns/vsxh_v.h
index 2e5506a..6b0fcfd 100644
--- a/riscv/insns/vsxh_v.h
+++ b/riscv/insns/vsxh_v.h
@@ -1,4 +1,5 @@
// vsxh.v and vsxseg[2-8]h.v
require(P.VU.vsew >= e16);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vsxw_v.h b/riscv/insns/vsxw_v.h
index 9a2119f..2223d5b 100644
--- a/riscv/insns/vsxw_v.h
+++ b/riscv/insns/vsxw_v.h
@@ -1,4 +1,5 @@
// vsxw.v and vsxseg[2-8]w.v
require(P.VU.vsew >= e32);
+require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
VI_ST(index[i], fn, uint32, 4);
diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h
index 86d588d..42c21db 100644
--- a/riscv/insns/vwsmacc_vv.h
+++ b/riscv/insns/vwsmacc_vv.h
@@ -1,2 +1,2 @@
// vwsmacc.vv vd, vs2, vs1
-VI_VVX_LOOP_WIDE_SSMA(vs1);
+VI_VVX_LOOP_WIDE_SSMA(vs1, true);
diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h
index f0f04a3..2095665 100644
--- a/riscv/insns/vwsmacc_vx.h
+++ b/riscv/insns/vwsmacc_vx.h
@@ -1,2 +1,2 @@
// vwsmacc.vx vd, vs2, rs1
-VI_VVX_LOOP_WIDE_SSMA(rs1);
+VI_VVX_LOOP_WIDE_SSMA(rs1, false);
diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h
index cf1aa1e..9df7833 100644
--- a/riscv/insns/vwsmaccsu_vv.h
+++ b/riscv/insns/vwsmaccsu_vv.h
@@ -1,2 +1,2 @@
// vwsmaccsu.vx vd, vs2, vs1
-VI_VVX_LOOP_WIDE_SU_SSMA(vs1);
+VI_VVX_LOOP_WIDE_SU_SSMA(vs1, true);
diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h
index 681c309..8565c98 100644
--- a/riscv/insns/vwsmaccsu_vx.h
+++ b/riscv/insns/vwsmaccsu_vx.h
@@ -1,2 +1,2 @@
// vwsmaccsu.vx vd, vs2, rs1
-VI_VVX_LOOP_WIDE_SU_SSMA(rs1);
+VI_VVX_LOOP_WIDE_SU_SSMA(rs1, false);
diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h
index e873d93..7075247 100644
--- a/riscv/insns/vwsmaccu_vv.h
+++ b/riscv/insns/vwsmaccu_vv.h
@@ -1,2 +1,2 @@
// vwsmaccu.vv vd, vs2, vs1
-VI_VVX_LOOP_WIDE_USSMA(vs1);
+VI_VVX_LOOP_WIDE_USSMA(vs1, true);
diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h
index 7318fa7..15027cf 100644
--- a/riscv/insns/vwsmaccu_vx.h
+++ b/riscv/insns/vwsmaccu_vx.h
@@ -1,2 +1,2 @@
// vwsmaccu vd, vs2, rs1
-VI_VVX_LOOP_WIDE_USSMA(rs1);
+VI_VVX_LOOP_WIDE_USSMA(rs1, false);