aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2019-06-06 03:24:27 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2019-06-18 08:56:11 -0700
commit655aedc0ebd2326d69d389bc714c2d622bf2cb08 (patch)
treeaa2cf79905906cde9ff6d10c63d1499fb4a484a1
parent235aa58bfb439c9782defe8bdd21f792e40aac31 (diff)
downloadspike-655aedc0ebd2326d69d389bc714c2d622bf2cb08.zip
spike-655aedc0ebd2326d69d389bc714c2d622bf2cb08.tar.gz
spike-655aedc0ebd2326d69d389bc714c2d622bf2cb08.tar.bz2
rvv: add integer/fixed-point/mask/reduction/permutation instructions
based on v-spec 0.7.1, support sections: 12/13/15.1 ~ 15.2/16/17 element size: 8/16/32/64 support ediv: 1 Signed-off-by: Bruce Hoult <bruce@hoult.org> Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com> Signed-off-by: Dave Wen <dave.wen@sifive.com>
-rw-r--r--riscv/decode.h1041
-rw-r--r--riscv/insns/vaadd_vi.h9
-rw-r--r--riscv/insns/vaadd_vv.h2
-rw-r--r--riscv/insns/vaadd_vx.h2
-rw-r--r--riscv/insns/vadc_vim.h11
-rw-r--r--riscv/insns/vadc_vvm.h11
-rw-r--r--riscv/insns/vadc_vxm.h11
-rw-r--r--riscv/insns/vadd_vi.h5
-rw-r--r--riscv/insns/vadd_vv.h5
-rw-r--r--riscv/insns/vadd_vx.h5
-rw-r--r--riscv/insns/vand_vi.h5
-rw-r--r--riscv/insns/vand_vv.h5
-rw-r--r--riscv/insns/vand_vx.h5
-rw-r--r--riscv/insns/vasub_vv.h2
-rw-r--r--riscv/insns/vasub_vx.h2
-rw-r--r--riscv/insns/vcompress_vm.h41
-rw-r--r--riscv/insns/vdiv_vv.h10
-rw-r--r--riscv/insns/vdiv_vx.h10
-rw-r--r--riscv/insns/vdivu_vv.h8
-rw-r--r--riscv/insns/vdivu_vx.h8
-rw-r--r--riscv/insns/vdot_vv.h5
-rw-r--r--riscv/insns/vdotu_vv.h5
-rw-r--r--riscv/insns/vext_x_v.h30
-rw-r--r--riscv/insns/vid_v.h30
-rw-r--r--riscv/insns/viota_m.h52
-rw-r--r--riscv/insns/vmacc_vv.h5
-rw-r--r--riscv/insns/vmacc_vx.h5
-rw-r--r--riscv/insns/vmadc_vim.h14
-rw-r--r--riscv/insns/vmadc_vvm.h14
-rw-r--r--riscv/insns/vmadc_vxm.h14
-rw-r--r--riscv/insns/vmadd_vv.h5
-rw-r--r--riscv/insns/vmadd_vx.h5
-rw-r--r--riscv/insns/vmand_mm.h2
-rw-r--r--riscv/insns/vmandnot_mm.h2
-rw-r--r--riscv/insns/vmax_vv.h10
-rw-r--r--riscv/insns/vmax_vx.h10
-rw-r--r--riscv/insns/vmaxu_vv.h9
-rw-r--r--riscv/insns/vmaxu_vx.h9
-rw-r--r--riscv/insns/vmerge_vim.h9
-rw-r--r--riscv/insns/vmerge_vvm.h9
-rw-r--r--riscv/insns/vmerge_vxm.h9
-rw-r--r--riscv/insns/vmfeq_vf.h5
-rw-r--r--riscv/insns/vmfeq_vv.h5
-rw-r--r--riscv/insns/vmfge_vf.h5
-rw-r--r--riscv/insns/vmfgt_vf.h5
-rw-r--r--riscv/insns/vmfirst_m.h20
-rw-r--r--riscv/insns/vmfle_vf.h5
-rw-r--r--riscv/insns/vmfle_vv.h5
-rw-r--r--riscv/insns/vmflt_vf.h5
-rw-r--r--riscv/insns/vmflt_vv.h5
-rw-r--r--riscv/insns/vmfne_vf.h5
-rw-r--r--riscv/insns/vmfne_vv.h5
-rw-r--r--riscv/insns/vmford_vf.h5
-rw-r--r--riscv/insns/vmford_vv.h5
-rw-r--r--riscv/insns/vmin_vv.h11
-rw-r--r--riscv/insns/vmin_vx.h11
-rw-r--r--riscv/insns/vminu_vv.h9
-rw-r--r--riscv/insns/vminu_vx.h10
-rw-r--r--riscv/insns/vmnand_mm.h2
-rw-r--r--riscv/insns/vmnor_mm.h2
-rw-r--r--riscv/insns/vmor_mm.h2
-rw-r--r--riscv/insns/vmornot_mm.h2
-rw-r--r--riscv/insns/vmpopc_m.h24
-rw-r--r--riscv/insns/vmsbc_vvm.h14
-rw-r--r--riscv/insns/vmsbc_vxm.h14
-rw-r--r--riscv/insns/vmsbf_m.h34
-rw-r--r--riscv/insns/vmseq_vi.h5
-rw-r--r--riscv/insns/vmseq_vv.h6
-rw-r--r--riscv/insns/vmseq_vx.h5
-rw-r--r--riscv/insns/vmsgt_vi.h5
-rw-r--r--riscv/insns/vmsgt_vx.h5
-rw-r--r--riscv/insns/vmsgtu_vi.h5
-rw-r--r--riscv/insns/vmsgtu_vx.h5
-rw-r--r--riscv/insns/vmsif_m.h34
-rw-r--r--riscv/insns/vmsle_vi.h5
-rw-r--r--riscv/insns/vmsle_vv.h5
-rw-r--r--riscv/insns/vmsle_vx.h5
-rw-r--r--riscv/insns/vmsleu_vi.h5
-rw-r--r--riscv/insns/vmsleu_vv.h5
-rw-r--r--riscv/insns/vmsleu_vx.h5
-rw-r--r--riscv/insns/vmslt_vv.h5
-rw-r--r--riscv/insns/vmslt_vx.h5
-rw-r--r--riscv/insns/vmsltu_vv.h5
-rw-r--r--riscv/insns/vmsltu_vx.h5
-rw-r--r--riscv/insns/vmsne_vi.h5
-rw-r--r--riscv/insns/vmsne_vv.h5
-rw-r--r--riscv/insns/vmsne_vx.h5
-rw-r--r--riscv/insns/vmsof_m.h32
-rw-r--r--riscv/insns/vmul_vv.h5
-rw-r--r--riscv/insns/vmul_vx.h5
-rw-r--r--riscv/insns/vmulh_vv.h5
-rw-r--r--riscv/insns/vmulh_vx.h5
-rw-r--r--riscv/insns/vmulhsu_vv.h37
-rw-r--r--riscv/insns/vmulhsu_vx.h37
-rw-r--r--riscv/insns/vmulhu_vv.h5
-rw-r--r--riscv/insns/vmulhu_vx.h5
-rw-r--r--riscv/insns/vmv_s_x.h45
-rw-r--r--riscv/insns/vmv_v_i.h5
-rw-r--r--riscv/insns/vmv_v_v.h5
-rw-r--r--riscv/insns/vmv_v_x.h5
-rw-r--r--riscv/insns/vmxnor_mm.h2
-rw-r--r--riscv/insns/vmxor_mm.h2
-rw-r--r--riscv/insns/vnclip_vi.h24
-rw-r--r--riscv/insns/vnclip_vv.h30
-rw-r--r--riscv/insns/vnclip_vx.h29
-rw-r--r--riscv/insns/vnclipu_vi.h20
-rw-r--r--riscv/insns/vnclipu_vv.h26
-rw-r--r--riscv/insns/vnclipu_vx.h26
-rw-r--r--riscv/insns/vnmsac_vv.h5
-rw-r--r--riscv/insns/vnmsac_vx.h5
-rw-r--r--riscv/insns/vnmsub_vv.h5
-rw-r--r--riscv/insns/vnmsub_vx.h5
-rw-r--r--riscv/insns/vnsra_vi.h5
-rw-r--r--riscv/insns/vnsra_vv.h5
-rw-r--r--riscv/insns/vnsra_vx.h5
-rw-r--r--riscv/insns/vnsrl_vi.h5
-rw-r--r--riscv/insns/vnsrl_vv.h5
-rw-r--r--riscv/insns/vnsrl_vx.h5
-rw-r--r--riscv/insns/vor_vi.h5
-rw-r--r--riscv/insns/vor_vv.h5
-rw-r--r--riscv/insns/vor_vx.h5
-rw-r--r--riscv/insns/vredand_vs.h5
-rw-r--r--riscv/insns/vredmax_vs.h5
-rw-r--r--riscv/insns/vredmaxu_vs.h5
-rw-r--r--riscv/insns/vredmin_vs.h5
-rw-r--r--riscv/insns/vredminu_vs.h5
-rw-r--r--riscv/insns/vredor_vs.h5
-rw-r--r--riscv/insns/vredsum_vs.h5
-rw-r--r--riscv/insns/vredxor_vs.h5
-rw-r--r--riscv/insns/vrem_vv.h11
-rw-r--r--riscv/insns/vrem_vx.h10
-rw-r--r--riscv/insns/vremu_vv.h8
-rw-r--r--riscv/insns/vremu_vx.h8
-rw-r--r--riscv/insns/vrgather_vi.h29
-rw-r--r--riscv/insns/vrgather_vv.h39
-rw-r--r--riscv/insns/vrgather_vx.h30
-rw-r--r--riscv/insns/vrsub_vi.h5
-rw-r--r--riscv/insns/vrsub_vx.h5
-rw-r--r--riscv/insns/vsadd_vi.h27
-rw-r--r--riscv/insns/vsadd_vv.h28
-rw-r--r--riscv/insns/vsadd_vx.h27
-rw-r--r--riscv/insns/vsaddu_vi.h11
-rw-r--r--riscv/insns/vsaddu_vv.h11
-rw-r--r--riscv/insns/vsaddu_vx.h12
-rw-r--r--riscv/insns/vsbc_vvm.h11
-rw-r--r--riscv/insns/vsbc_vxm.h11
-rw-r--r--riscv/insns/vslide1down_vx.h42
-rw-r--r--riscv/insns/vslide1up_vx.h32
-rw-r--r--riscv/insns/vslidedown_vi.h33
-rw-r--r--riscv/insns/vslidedown_vx.h33
-rw-r--r--riscv/insns/vslideup_vi.h33
-rw-r--r--riscv/insns/vslideup_vx.h29
-rw-r--r--riscv/insns/vsll_vi.h5
-rw-r--r--riscv/insns/vsll_vv.h5
-rw-r--r--riscv/insns/vsll_vx.h5
-rw-r--r--riscv/insns/vsmul_vv.h33
-rw-r--r--riscv/insns/vsmul_vx.h34
-rw-r--r--riscv/insns/vsra_vi.h5
-rw-r--r--riscv/insns/vsra_vv.h5
-rw-r--r--riscv/insns/vsra_vx.h5
-rw-r--r--riscv/insns/vsrl_vi.h5
-rw-r--r--riscv/insns/vsrl_vv.h5
-rw-r--r--riscv/insns/vsrl_vx.h5
-rw-r--r--riscv/insns/vssra_vi.h8
-rw-r--r--riscv/insns/vssra_vv.h9
-rw-r--r--riscv/insns/vssra_vx.h9
-rw-r--r--riscv/insns/vssrl_vi.h9
-rw-r--r--riscv/insns/vssrl_vv.h9
-rw-r--r--riscv/insns/vssrl_vx.h9
-rw-r--r--riscv/insns/vssub_vv.h28
-rw-r--r--riscv/insns/vssub_vx.h28
-rw-r--r--riscv/insns/vssubu_vv.h29
-rw-r--r--riscv/insns/vssubu_vx.h28
-rw-r--r--riscv/insns/vsub_vv.h5
-rw-r--r--riscv/insns/vsub_vx.h5
-rw-r--r--riscv/insns/vwadd_vv.h6
-rw-r--r--riscv/insns/vwadd_vx.h6
-rw-r--r--riscv/insns/vwadd_wv.h6
-rw-r--r--riscv/insns/vwadd_wx.h6
-rw-r--r--riscv/insns/vwaddu_vv.h6
-rw-r--r--riscv/insns/vwaddu_vx.h6
-rw-r--r--riscv/insns/vwaddu_wv.h6
-rw-r--r--riscv/insns/vwaddu_wx.h6
-rw-r--r--riscv/insns/vwmacc_vv.h6
-rw-r--r--riscv/insns/vwmacc_vx.h6
-rw-r--r--riscv/insns/vwmaccsu_vv.h6
-rw-r--r--riscv/insns/vwmaccsu_vx.h6
-rw-r--r--riscv/insns/vwmaccu_vv.h6
-rw-r--r--riscv/insns/vwmaccu_vx.h6
-rw-r--r--riscv/insns/vwmaccus_vx.h6
-rw-r--r--riscv/insns/vwmul_vv.h6
-rw-r--r--riscv/insns/vwmul_vx.h6
-rw-r--r--riscv/insns/vwmulsu_vv.h16
-rw-r--r--riscv/insns/vwmulsu_vx.h16
-rw-r--r--riscv/insns/vwmulu_vv.h6
-rw-r--r--riscv/insns/vwmulu_vx.h6
-rw-r--r--riscv/insns/vwredsum_vs.h5
-rw-r--r--riscv/insns/vwredsumu_vs.h5
-rw-r--r--riscv/insns/vwsmacc_vv.h2
-rw-r--r--riscv/insns/vwsmacc_vx.h2
-rw-r--r--riscv/insns/vwsmaccsu_vv.h2
-rw-r--r--riscv/insns/vwsmaccsu_vx.h2
-rw-r--r--riscv/insns/vwsmaccu_vv.h2
-rw-r--r--riscv/insns/vwsmaccu_vx.h2
-rw-r--r--riscv/insns/vwsmaccus_vx.h2
-rw-r--r--riscv/insns/vwsub_vv.h6
-rw-r--r--riscv/insns/vwsub_vx.h6
-rw-r--r--riscv/insns/vwsub_wv.h6
-rw-r--r--riscv/insns/vwsub_wx.h6
-rw-r--r--riscv/insns/vwsubu_vv.h6
-rw-r--r--riscv/insns/vwsubu_vx.h6
-rw-r--r--riscv/insns/vwsubu_wv.h6
-rw-r--r--riscv/insns/vwsubu_wx.h6
-rw-r--r--riscv/insns/vxor_vi.h5
-rw-r--r--riscv/insns/vxor_vv.h5
-rw-r--r--riscv/insns/vxor_vx.h5
-rw-r--r--riscv/riscv.mk.in206
217 files changed, 3473 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index 6cbf934..ca6a999 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -11,6 +11,7 @@
# error spike requires a little-endian host
#endif
+#include <algorithm>
#include <cstdint>
#include <string.h>
#include <strings.h>
@@ -23,6 +24,8 @@
typedef int64_t sreg_t;
typedef uint64_t reg_t;
+typedef __int128 int128_t;
+typedef unsigned __int128 uint128_t;
const int NXPR = 32;
const int NFPR = 32;
@@ -63,6 +66,12 @@ const int NCSR = 4096;
#define MAX_INSN_LENGTH 8
#define PC_ALIGN 2
+#ifndef TAIL_ZEROING
+ #define TAIL_ZEROING true
+#else
+ #define TAIL_ZEROING false
+#endif
+
typedef uint64_t insn_bits_t;
class insn_t
{
@@ -141,8 +150,10 @@ private:
#define P (*p)
#define READ_REG(reg) STATE.XPR[reg]
#define READ_FREG(reg) STATE.FPR[reg]
+#define RD READ_REG(insn.rd())
#define RS1 READ_REG(insn.rs1())
#define RS2 READ_REG(insn.rs2())
+#define RS3 READ_REG(insn.rs3())
#define WRITE_RD(value) WRITE_REG(insn.rd(), value)
#ifndef RISCV_ENABLE_COMMITLOG
@@ -288,6 +299,1036 @@ inline freg_t f128_negate(freg_t a)
throw trap_illegal_instruction(0); \
(which); })
+/* For debug only. This will fail if the native machine's float types are not IEEE */
+inline float to_f(float32_t f){float r; memcpy(&r, &f, sizeof(r)); return r;}
+inline double to_f(float64_t f){double r; memcpy(&r, &f, sizeof(r)); return r;}
+inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); return r;}
+
+// Vector macros
+#define e8 8 // 8b elements
+#define e16 16 // 16b elements
+#define e32 32 // 32b elements
+#define e64 64 // 64b elements
+#define e128 128 // 128b elements
+
+#define vsext(x, sew) (((sreg_t)(x) << (64-sew)) >> (64-sew))
+#define vzext(x, sew) (((reg_t)(x) << (64-sew)) >> (64-sew))
+
+//
+// vector: masking skip helper
+//
+#define VI_LOOP_ELEMENT_SKIP(BODY) \
+ const int mlen = P.VU.vmlen; \
+ const int midx = (mlen * i) / 64; \
+ const int mpos = (mlen * i) % 64; \
+ if (insn.v_vm() == 0) { \
+ BODY; \
+ bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \
+ if (skip) \
+ continue; \
+ }
+
+#define VI_ELEMENT_SKIP(inx) \
+ if (inx >= vl && TAIL_ZEROING) { \
+ is_valid = false; \
+ } else if (inx >= vl && !TAIL_ZEROING) { \
+ continue; \
+ } else if (inx < P.VU.vstart) { \
+ continue; \
+ } else { \
+ VI_LOOP_ELEMENT_SKIP(); \
+ }
+
+//
+// vector: operation and register acccess check helper
+//
+static inline bool is_overlaped(const int astart, const int asize,
+ const int bstart, const int bsize)
+{
+ const int aend = astart + asize;
+ const int bend = bstart + bsize;
+ return std::max(aend, bend) - std::min(astart, bstart) < asize + bsize;
+}
+
+#define VI_NARROW_CHECK_COMMON \
+ require(P.VU.vlmul <= 4); \
+ require(P.VU.vsew * 2 <= P.VU.ELEN); \
+ require(insn.rs2() + P.VU.vlmul * 2 <= 32);
+
+#define VI_WIDE_CHECK_COMMON \
+ require(!P.VU.vill);\
+ require(P.VU.vlmul <= 4); \
+ require(P.VU.vsew * 2 <= P.VU.ELEN); \
+ require(insn.rd() + P.VU.vlmul * 2 <= 32); \
+ if (insn.v_vm() == 0) \
+ require(insn.rd() != 0);
+
+#define VI_CHECK_VREG_OVERLAP(v1, v2) \
+ require(!is_overlaped(v1, P.VU.vlmul, v2, P.VU.vlmul));
+
+#define VI_CHECK_SS \
+ require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul));
+
+#define VI_CHECK_SD \
+ require(!is_overlaped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2));
+
+#define VI_CHECK_DSS(is_rs) \
+ VI_WIDE_CHECK_COMMON; \
+ require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \
+ if (is_rs) \
+ require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul));
+
+#define VI_CHECK_DDS(is_rs) \
+ VI_WIDE_CHECK_COMMON; \
+ require(insn.rs2() + P.VU.vlmul * 2 <= 32); \
+ if (is_rs) \
+ require(!is_overlaped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul));
+
+//
+// vector: loop header and end helper
+//
+#define VI_GENERAL_LOOP_BASE \
+ require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \
+ require(!P.VU.vill);\
+ reg_t vl = P.VU.vl; \
+ reg_t sew = P.VU.vsew; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){
+
+#define VI_TAIL_ZERO(elm) \
+ if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING) { \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * elm)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * elm)); \
+ }
+
+#define VI_TAIL_ZERO_MASK(dst) \
+ if (vl != 0 && TAIL_ZEROING){ \
+ for (reg_t i=vl; i<P.VU.vlmax; ++i){ \
+ const int mlen = P.VU.vmlen; \
+ const int midx = (mlen * i) / 64; \
+ const int mpos = (mlen * i) % 64; \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t &vdi = P.VU.elt<uint64_t>(dst, midx); \
+ vdi = (vdi & ~mmask);\
+ }\
+ }\
+
+#define VI_LOOP_BASE \
+ VI_GENERAL_LOOP_BASE \
+ VI_LOOP_ELEMENT_SKIP();
+
+#define VI_LOOP_END \
+ } \
+ if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 1)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 1)); \
+ }\
+ P.VU.vstart = 0;
+
+#define VI_LOOP_END_NO_TAIL_ZERO \
+ } \
+ P.VU.vstart = 0;
+
+#define VI_LOOP_WIDEN_END \
+ } \
+ if (vl != 0 && vl < P.VU.vlmax && TAIL_ZEROING){ \
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, vl * ((sew >> 3) * 2)); \
+ memset(tail, 0, (P.VU.vlmax - vl) * ((sew >> 3) * 2)); \
+ }\
+ P.VU.vstart = 0;
+
+#define VI_LOOP_REDUCTION_END(x) \
+ } \
+ if (vl > 0 && TAIL_ZEROING) { \
+ vd_0_des = vd_0_res; \
+ uint8_t *tail = (uint8_t *)&P.VU.elt<type_sew_t<x>::type>(rd_num, 1); \
+ memset(tail, 0, (P.VU.get_vlen() - x) >> 3); \
+ } \
+ P.VU.vstart = 0;
+
+#define VI_LOOP_CMP_BASE \
+ require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); \
+ require(!P.VU.vill);\
+ reg_t vl = P.VU.vl; \
+ reg_t sew = P.VU.vsew; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP(); \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx); \
+ uint64_t res = 0;
+
+#define VI_LOOP_CMP_END \
+ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
+ } \
+ VI_TAIL_ZERO_MASK(rd_num); \
+ P.VU.vstart = 0;
+
+#define VI_LOOP_MASK(op) \
+ require(P.VU.vsew <= e64); \
+ reg_t vl = P.VU.vl; \
+ for (reg_t i = P.VU.vstart; i < vl; ++i) { \
+ int mlen = P.VU.vmlen; \
+ int midx = (mlen * i) / 64; \
+ int mpos = (mlen * i) % 64; \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \
+ uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \
+ uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \
+ res = (res & ~mmask) | ((op) & (1ULL << mpos)); \
+ } \
+ \
+ if (TAIL_ZEROING) {\
+ for (reg_t i = vl; i < P.VU.vlmax && i > 0; ++i) { \
+ int mlen = P.VU.vmlen; \
+ int midx = (mlen * i) / 64; \
+ int mpos = (mlen * i) % 64; \
+ uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos); \
+ uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx); \
+ res = (res & ~mmask); \
+ } \
+ } \
+ P.VU.vstart = 0;
+
+#define VI_LOOP_NSHIFT_BASE \
+ require(P.VU.vsew <= e32); \
+ if (insn.rd() != 0){ \
+ VI_CHECK_SD; \
+ } \
+ VI_GENERAL_LOOP_BASE; \
+ VI_LOOP_ELEMENT_SKIP({\
+ require(!(insn.rd() == 0 && P.VU.vlmul > 1));\
+ });
+
+
+#define INT_ROUNDING(result, xrm, gb) \
+ if (gb > 0) { \
+ switch(xrm) {\
+ case VRM::RNU:\
+ result += ((uint64_t)1 << ((gb) - 1));\
+ break;\
+ case VRM::RNE:\
+ if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x1){\
+ result -= ((uint64_t)1 << ((gb) - 1));\
+ }else if ((result & ((uint64_t)0x3 << ((gb) - 1))) == 0x3){\
+ result += ((uint64_t)1 << ((gb) - 1));\
+ }\
+ break;\
+ case VRM::RDN:\
+ result = (result >> ((gb) - 1)) << ((gb) - 1);\
+ break;\
+ case VRM::ROD:\
+ result |= ((uint64_t)1ul << (gb)); \
+ break;\
+ case VRM::INVALID_RM:\
+ assert(true);\
+ } \
+ } else if (gb == 0 && xrm == VRM::ROD) { \
+ result |= 1ul; \
+ }
+
+
+//
+// vector: integer and masking operand access helper
+//
+#define VXI_PARAMS(x) \
+ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \
+ type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
+ type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \
+ type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5();
+
+#define VV_U_PARAMS(x) \
+ type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \
+ type_usew_t<x>::type vs1 = P.VU.elt<type_usew_t<x>::type>(rs1_num, i); \
+ type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
+
+#define VX_U_PARAMS(x) \
+ type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \
+ type_usew_t<x>::type rs1 = (type_usew_t<x>::type)RS1; \
+ type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
+
+#define VI_U_PARAMS(x) \
+ type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i); \
+ type_usew_t<x>::type simm5 = (type_usew_t<x>::type)insn.v_zimm5(); \
+ type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
+
+#define VV_PARAMS(x) \
+ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \
+ type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
+
+#define VX_PARAMS(x) \
+ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \
+ type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
+
+#define VI_PARAMS(x) \
+ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ type_sew_t<x>::type simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
+ type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
+
+#define XV_PARAMS(x) \
+ type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, RS1);
+
+#define VI_XI_SLIDEDOWN_PARAMS(x, off) \
+ auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i + off);
+
+#define VI_XI_SLIDEUP_PARAMS(x, offset) \
+ auto &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i - offset);
+
+#define VI_NSHIFT_PARAMS(sew1, sew2) \
+ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \
+ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
+ auto zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5();
+
+#define VX_NSHIFT_PARAMS(sew1, sew2) \
+ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \
+ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
+ auto rs1 = (type_sew_t<sew1>::type)RS1;
+
+#define VV_NSHIFT_PARAMS(sew1, sew2) \
+ auto &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \
+ auto vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
+ auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i);
+
+#define XI_CARRY_PARAMS(x) \
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
+ auto rs1 = (type_sew_t<x>::type)RS1; \
+ auto simm5 = (type_sew_t<x>::type)insn.v_simm5(); \
+ auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
+
+#define VV_CARRY_PARAMS(x) \
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
+ auto vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \
+ auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
+
+//
+// vector: integer and masking operation loop
+//
+
+// comparision result to masking register
+#define VI_VV_LOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VV_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VV_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VV_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VV_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+#define VI_VX_LOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VX_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VX_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VX_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VX_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+#define VI_VI_LOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VI_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VI_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VI_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VI_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+#define VI_VV_ULOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VV_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VV_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VV_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VV_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+#define VI_VX_ULOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VX_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VX_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VX_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VX_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+#define VI_VI_ULOOP_CMP(BODY) \
+ VI_LOOP_CMP_BASE \
+ if (sew == e8){ \
+ VI_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VI_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VI_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VI_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_CMP_END
+
+// merge and copy loop
+#define VI_VVXI_MERGE_LOOP(BODY) \
+ VI_GENERAL_LOOP_BASE \
+ if (sew == e8){ \
+ VXI_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VXI_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VXI_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VXI_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+// reduction loop - signed
+#define VI_LOOP_REDUCTION_BASE(x) \
+ require(x == e8 || x == e16 || x == e32 || x == e64); \
+ require(!P.VU.vill);\
+ reg_t vl = P.VU.vl; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ auto &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP(); \
+ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
+
+#define REDUCTION_LOOP(x, BODY) \
+ VI_LOOP_REDUCTION_BASE(x) \
+ BODY; \
+ VI_LOOP_REDUCTION_END(x)
+
+#define VI_VV_LOOP_REDUCTION(BODY) \
+ reg_t sew = P.VU.vsew; \
+ if (sew == e8) { \
+ REDUCTION_LOOP(e8, BODY) \
+ } else if(sew == e16) { \
+ REDUCTION_LOOP(e16, BODY) \
+ } else if(sew == e32) { \
+ REDUCTION_LOOP(e32, BODY) \
+ } else if(sew == e64) { \
+ REDUCTION_LOOP(e64, BODY) \
+ }
+
+// reduction loop - unsgied
+#define VI_ULOOP_REDUCTION_BASE(x) \
+ require(x == e8 || x == e16 || x == e32 || x == e64); \
+ reg_t vl = P.VU.vl; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ auto &vd_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP(); \
+ auto vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
+
+#define REDUCTION_ULOOP(x, BODY) \
+ VI_ULOOP_REDUCTION_BASE(x) \
+ BODY; \
+ VI_LOOP_REDUCTION_END(x)
+
+#define VI_VV_ULOOP_REDUCTION(BODY) \
+ reg_t sew = P.VU.vsew; \
+ if (sew == e8){ \
+ REDUCTION_ULOOP(e8, BODY) \
+ } else if(sew == e16) { \
+ REDUCTION_ULOOP(e16, BODY) \
+ } else if(sew == e32) { \
+ REDUCTION_ULOOP(e32, BODY) \
+ } else if(sew == e64) { \
+ REDUCTION_ULOOP(e64, BODY) \
+ }
+
+// genearl VXI signed/unsgied loop
+#define VI_VV_ULOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VV_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VV_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VV_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VV_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VV_LOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VV_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VV_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VV_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VV_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VX_ULOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VX_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VX_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VX_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VX_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VX_LOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VX_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VX_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VX_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VX_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VI_ULOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_U_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VI_U_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VI_U_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VI_U_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VI_LOOP(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VI_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VI_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VI_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+// narrow operation loop
+#define VI_VV_LOOP_NARROW(BODY) \
+VI_NARROW_CHECK_COMMON; \
+VI_LOOP_BASE \
+if (sew == e8){ \
+ VI_NARROW_SHIFT(e8, e16) \
+ BODY; \
+}else if(sew == e16){ \
+ VI_NARROW_SHIFT(e16, e32) \
+ BODY; \
+}else if(sew == e32){ \
+ VI_NARROW_SHIFT(e32, e64) \
+ BODY; \
+} \
+VI_LOOP_END
+
+#define VI_NARROW_SHIFT(sew1, sew2) \
+ type_usew_t<sew1>::type &vd = P.VU.elt<type_usew_t<sew1>::type>(rd_num, i); \
+ type_usew_t<sew2>::type vs2_u = P.VU.elt<type_usew_t<sew2>::type>(rs2_num, i); \
+ type_usew_t<sew1>::type zimm5 = (type_usew_t<sew1>::type)insn.v_zimm5(); \
+ type_sew_t<sew2>::type vs2 = P.VU.elt<type_sew_t<sew2>::type>(rs2_num, i); \
+ type_sew_t<sew1>::type vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \
+ type_sew_t<sew1>::type rs1 = (type_sew_t<sew1>::type)RS1;
+
+#define VI_VVXI_LOOP_NARROW(BODY) \
+ require(P.VU.vsew <= e32); \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_NARROW_SHIFT(e8, e16) \
+ BODY; \
+ } else if (sew == e16) { \
+ VI_NARROW_SHIFT(e16, e32) \
+ BODY; \
+ } else if (sew == e32) { \
+ VI_NARROW_SHIFT(e32, e64) \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VI_LOOP_NSHIFT(BODY) \
+ VI_LOOP_NSHIFT_BASE \
+ if (sew == e8){ \
+ VI_NSHIFT_PARAMS(e8, e16) \
+ BODY; \
+ } else if (sew == e16) { \
+ VI_NSHIFT_PARAMS(e16, e32) \
+ BODY; \
+ } else if (sew == e32) { \
+ VI_NSHIFT_PARAMS(e32, e64) \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VX_LOOP_NSHIFT(BODY) \
+ VI_LOOP_NSHIFT_BASE \
+ if (sew == e8){ \
+ VX_NSHIFT_PARAMS(e8, e16) \
+ BODY; \
+ } else if (sew == e16) { \
+ VX_NSHIFT_PARAMS(e16, e32) \
+ BODY; \
+ } else if (sew == e32) { \
+ VX_NSHIFT_PARAMS(e32, e64) \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+#define VI_VV_LOOP_NSHIFT(BODY) \
+ VI_LOOP_NSHIFT_BASE \
+ if (sew == e8){ \
+ VV_NSHIFT_PARAMS(e8, e16) \
+ BODY; \
+ } else if (sew == e16) { \
+ VV_NSHIFT_PARAMS(e16, e32) \
+ BODY; \
+ } else if (sew == e32) { \
+ VV_NSHIFT_PARAMS(e32, e64) \
+ BODY; \
+ } \
+ VI_LOOP_END
+
+// widen operation loop
+#define VI_VV_LOOP_WIDEN(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VV_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VV_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VV_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VV_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_WIDEN_END
+
+#define VI_VX_LOOP_WIDEN(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VX_PARAMS(e8); \
+ BODY; \
+ }else if(sew == e16){ \
+ VX_PARAMS(e16); \
+ BODY; \
+ }else if(sew == e32){ \
+ VX_PARAMS(e32); \
+ BODY; \
+ }else if(sew == e64){ \
+ VX_PARAMS(e64); \
+ BODY; \
+ } \
+ VI_LOOP_WIDEN_END
+
+#define VI_WIDE_OP_AND_ASSIGN(var0, var1, var2, op0, op1, sign) \
+ switch(P.VU.vsew) { \
+ case e8: { \
+ sign##16_t vd_w = P.VU.elt<sign##16_t>(rd_num, i); \
+ P.VU.elt<uint16_t>(rd_num, i) = \
+ op1((sign##16_t)(sign##8_t)var0 op0 (sign##16_t)(sign##8_t)var1) + var2; \
+ } \
+ break; \
+ case e16: { \
+ sign##32_t vd_w = P.VU.elt<sign##32_t>(rd_num, i); \
+ P.VU.elt<uint32_t>(rd_num, i) = \
+ op1((sign##32_t)(sign##16_t)var0 op0 (sign##32_t)(sign##16_t)var1) + var2; \
+ } \
+ break; \
+ default: { \
+ sign##64_t vd_w = P.VU.elt<sign##64_t>(rd_num, i); \
+ P.VU.elt<uint64_t>(rd_num, i) = \
+ op1((sign##64_t)(sign##32_t)var0 op0 (sign##64_t)(sign##32_t)var1) + var2; \
+ } \
+ break; \
+ }
+
+#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \
+ switch(P.VU.vsew) { \
+ case e8: { \
+ sign_d##16_t vd_w = P.VU.elt<sign_d##16_t>(rd_num, i); \
+ P.VU.elt<uint16_t>(rd_num, i) = \
+ op1((sign_1##16_t)(sign_1##8_t)var0 op0 (sign_2##16_t)(sign_2##8_t)var1) + var2; \
+ } \
+ break; \
+ case e16: { \
+ sign_d##32_t vd_w = P.VU.elt<sign_d##32_t>(rd_num, i); \
+ P.VU.elt<uint32_t>(rd_num, i) = \
+ op1((sign_1##32_t)(sign_1##16_t)var0 op0 (sign_2##32_t)(sign_2##16_t)var1) + var2; \
+ } \
+ break; \
+ default: { \
+ sign_d##64_t vd_w = P.VU.elt<sign_d##64_t>(rd_num, i); \
+ P.VU.elt<uint64_t>(rd_num, i) = \
+ op1((sign_1##64_t)(sign_1##32_t)var0 op0 (sign_2##64_t)(sign_2##32_t)var1) + var2; \
+ } \
+ break; \
+ }
+
+#define VI_WIDE_WVX_OP(var0, op0, sign) \
+ switch(P.VU.vsew) { \
+ case e8: { \
+ sign##16_t &vd_w = P.VU.elt<sign##16_t>(rd_num, i); \
+ sign##16_t vs2_w = P.VU.elt<sign##16_t>(rs2_num, i); \
+ vd_w = vs2_w op0 (sign##16_t)(sign##8_t)var0; \
+ } \
+ break; \
+ case e16: { \
+ sign##32_t &vd_w = P.VU.elt<sign##32_t>(rd_num, i); \
+ sign##32_t vs2_w = P.VU.elt<sign##32_t>(rs2_num, i); \
+ vd_w = vs2_w op0 (sign##32_t)(sign##16_t)var0; \
+ } \
+ break; \
+ default: { \
+ sign##64_t &vd_w = P.VU.elt<sign##64_t>(rd_num, i); \
+ sign##64_t vs2_w = P.VU.elt<sign##64_t>(rs2_num, i); \
+ vd_w = vs2_w op0 (sign##64_t)(sign##32_t)var0; \
+ } \
+ break; \
+ }
+
+#define VI_WIDE_SSMA(sew1, sew2, opd) \
+ auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \
+ auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); \
+ auto rs1 = (type_sew_t<sew1>::type)RS1; \
+ int##sew2##_t res; \
+ bool sat = false; \
+ const int gb = sew1 / 2; \
+ VRM vrm = P.VU.get_vround_mode(); \
+ res = (int##sew2##_t)vs2 * (int##sew2##_t)opd; \
+ INT_ROUNDING(res, vrm, gb); \
+ res = res >> gb; \
+ vd = sat_add<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \
+ P.VU.vxsat |= sat;
+
+#define VI_VVX_LOOP_WIDE_SSMA(opd) \
+ VI_WIDE_CHECK_COMMON \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_WIDE_SSMA(8, 16, opd); \
+ } else if(sew == e16){ \
+ VI_WIDE_SSMA(16, 32, opd); \
+ } else if(sew == e32){ \
+ VI_WIDE_SSMA(32, 64, opd); \
+ } \
+ VI_LOOP_WIDEN_END
+
+#define VI_WIDE_USSMA(sew1, sew2, opd) \
+ auto &vd = P.VU.elt<type_usew_t<sew2>::type>(rd_num, i); \
+ auto vs1 = P.VU.elt<type_usew_t<sew1>::type>(rs1_num, i); \
+ auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); \
+ auto rs1 = (type_usew_t<sew1>::type)RS1; \
+ uint##sew2##_t res; \
+ bool sat = false; \
+ const int gb = sew1 / 2; \
+ VRM vrm = P.VU.get_vround_mode(); \
+ res = (uint##sew2##_t)vs2 * (uint##sew2##_t)opd; \
+ INT_ROUNDING(res, vrm, gb); \
+ \
+ res = res >> gb; \
+ vd = sat_addu<uint##sew2##_t>(vd, res, sat); \
+ P.VU.vxsat |= sat;
+
+#define VI_VVX_LOOP_WIDE_USSMA(opd) \
+ VI_WIDE_CHECK_COMMON \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_WIDE_USSMA(8, 16, opd); \
+ } else if(sew == e16){ \
+ VI_WIDE_USSMA(16, 32, opd); \
+ } else if(sew == e32){ \
+ VI_WIDE_USSMA(32, 64, opd); \
+ } \
+ VI_LOOP_WIDEN_END
+
+#define VI_WIDE_SU_SSMA(sew1, sew2, opd) \
+ auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \
+ auto vs1 = P.VU.elt<type_sew_t<sew1>::type>(rs1_num, i); \
+ auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); \
+ auto rs1 = (type_sew_t<sew1>::type)RS1; \
+ int##sew2##_t res; \
+ bool sat = false; \
+ const int gb = sew1 / 2; \
+ VRM vrm = P.VU.get_vround_mode(); \
+ res = (uint##sew2##_t)vs2 * (int##sew2##_t)opd; \
+ INT_ROUNDING(res, vrm, gb); \
+ \
+ res = res >> gb; \
+ vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \
+ P.VU.vxsat |= sat;
+
+#define VI_VVX_LOOP_WIDE_SU_SSMA(opd) \
+ VI_WIDE_CHECK_COMMON \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_WIDE_SU_SSMA(8, 16, opd); \
+ } else if(sew == e16){ \
+ VI_WIDE_SU_SSMA(16, 32, opd); \
+ } else if(sew == e32){ \
+ VI_WIDE_SU_SSMA(32, 64, opd); \
+ } \
+ VI_LOOP_WIDEN_END
+
+#define VI_WIDE_US_SSMA(sew1, sew2, opd) \
+ auto &vd = P.VU.elt<type_sew_t<sew2>::type>(rd_num, i); \
+ auto vs1 = P.VU.elt<type_usew_t<sew1>::type>(rs1_num, i); \
+ auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); \
+ auto rs1 = (type_usew_t<sew1>::type)RS1; \
+ int##sew2##_t res; \
+ bool sat = false; \
+ const int gb = sew1 / 2; \
+ VRM vrm = P.VU.get_vround_mode(); \
+ res = (int##sew2##_t)vs2 * (uint##sew2##_t)opd; \
+ INT_ROUNDING(res, vrm, gb); \
+ \
+ res = res >> gb; \
+ vd = sat_sub<int##sew2##_t, uint##sew2##_t>(vd, res, sat); \
+ P.VU.vxsat |= sat;
+
+#define VI_VVX_LOOP_WIDE_US_SSMA(opd) \
+ VI_WIDE_CHECK_COMMON \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VI_WIDE_US_SSMA(8, 16, opd); \
+ } else if(sew == e16){ \
+ VI_WIDE_US_SSMA(16, 32, opd); \
+ } else if(sew == e32){ \
+ VI_WIDE_US_SSMA(32, 64, opd); \
+ } \
+ VI_LOOP_WIDEN_END
+
+// wide reduction loop - signed
+#define VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
+ VI_CHECK_DSS(false); \
+ reg_t vl = P.VU.vl; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ auto &vd_0_des = P.VU.elt<type_sew_t<sew2>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_sew_t<sew2>::type>(rs1_num, 0); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ VI_LOOP_ELEMENT_SKIP(); \
+ auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i);
+
+#define WIDE_REDUCTION_LOOP(sew1, sew2, BODY) \
+ VI_LOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
+ BODY; \
+ VI_LOOP_REDUCTION_END(sew2)
+
+#define VI_VV_LOOP_WIDE_REDUCTION(BODY) \
+ require(!P.VU.vill);\
+ reg_t sew = P.VU.vsew; \
+ if (sew == e8){ \
+ WIDE_REDUCTION_LOOP(e8, e16, BODY) \
+ } else if(sew == e16){ \
+ WIDE_REDUCTION_LOOP(e16, e32, BODY) \
+ } else if(sew == e32){ \
+ WIDE_REDUCTION_LOOP(e32, e64, BODY) \
+ }
+
+// wide reduction loop - unsigned
+#define VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
+ VI_CHECK_DSS(false); \
+ reg_t vl = P.VU.vl; \
+ reg_t rd_num = insn.rd(); \
+ reg_t rs1_num = insn.rs1(); \
+ reg_t rs2_num = insn.rs2(); \
+ auto &vd_0_des = P.VU.elt<type_usew_t<sew2>::type>(rd_num, 0); \
+ auto vd_0_res = P.VU.elt<type_usew_t<sew2>::type>(rs1_num, 0); \
+ for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ VI_LOOP_ELEMENT_SKIP(); \
+ auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i);
+
+#define WIDE_REDUCTION_ULOOP(sew1, sew2, BODY) \
+ VI_ULOOP_WIDE_REDUCTION_BASE(sew1, sew2) \
+ BODY; \
+ VI_LOOP_REDUCTION_END(sew2)
+
+#define VI_VV_ULOOP_WIDE_REDUCTION(BODY) \
+ require(!P.VU.vill);\
+ reg_t sew = P.VU.vsew; \
+ if (sew == e8){ \
+ WIDE_REDUCTION_ULOOP(e8, e16, BODY) \
+ } else if(sew == e16){ \
+ WIDE_REDUCTION_ULOOP(e16, e32, BODY) \
+ } else if(sew == e32){ \
+ WIDE_REDUCTION_ULOOP(e32, e64, BODY) \
+ }
+
+// carry/borrow bit loop
+#define VI_VV_LOOP_CARRY(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ VV_CARRY_PARAMS(e8) \
+ BODY; \
+ } else if (sew == e16) { \
+ VV_CARRY_PARAMS(e16) \
+ BODY; \
+ } else if (sew == e32) { \
+ VV_CARRY_PARAMS(e32) \
+ BODY; \
+ } else if (sew == e64) { \
+ VV_CARRY_PARAMS(e64) \
+ BODY; \
+ } \
+ } \
+ VI_TAIL_ZERO_MASK(rd_num);
+
+#define VI_XI_LOOP_CARRY(BODY) \
+ VI_LOOP_BASE \
+ if (sew == e8){ \
+ XI_CARRY_PARAMS(e8) \
+ BODY; \
+ } else if (sew == e16) { \
+ XI_CARRY_PARAMS(e16) \
+ BODY; \
+ } else if (sew == e32) { \
+ XI_CARRY_PARAMS(e32) \
+ BODY; \
+ } else if (sew == e64) { \
+ XI_CARRY_PARAMS(e64) \
+ BODY; \
+ } \
+ } \
+ VI_TAIL_ZERO_MASK(rd_num);
+
+// average loop
+#define VI_VVX_LOOP_AVG(opd, op) \
+VRM xrm = p->VU.get_vround_mode(); \
+VI_LOOP_BASE \
+ switch(sew) { \
+ case e8: { \
+ VV_PARAMS(e8); \
+ type_sew_t<e8>::type rs1 = RS1; \
+ auto res = (int32_t)vs2 op opd; \
+ INT_ROUNDING(res, xrm, 1); \
+ vd = res >> 1; \
+ break; \
+ } \
+ case e16: { \
+ VV_PARAMS(e16); \
+ type_sew_t<e16>::type rs1 = RS1; \
+ auto res = (int32_t)vs2 op opd; \
+ INT_ROUNDING(res, xrm, 1); \
+ vd = res >> 1; \
+ break; \
+ } \
+ case e32: { \
+ VV_PARAMS(e32); \
+ type_sew_t<e32>::type rs1 = RS1; \
+ auto res = (int64_t)vs2 op opd; \
+ INT_ROUNDING(res, xrm, 1); \
+ vd = res >> 1; \
+ break; \
+ } \
+ default: { \
+ VV_PARAMS(e64); \
+ type_sew_t<e64>::type rs1 = RS1; \
+ auto res = (int128_t)vs2 op opd; \
+ INT_ROUNDING(res, xrm, 1); \
+ vd = res >> 1; \
+ break; \
+ } \
+ } \
+VI_LOOP_END
// Seems that 0x0 doesn't work.
#define DEBUG_START 0x100
#define DEBUG_END (0x1000 - 1)
diff --git a/riscv/insns/vaadd_vi.h b/riscv/insns/vaadd_vi.h
new file mode 100644
index 0000000..5f8d5f5
--- /dev/null
+++ b/riscv/insns/vaadd_vi.h
@@ -0,0 +1,9 @@
+// vaadd: Averaging adds of integers
+VRM xrm = P.VU.get_vround_mode();
+VI_VI_LOOP
+({
+ int64_t result = simm5 + vs2;
+ INT_ROUNDING(result, xrm, 1);
+ result = vzext(result >> 1, sew);
+ vd = result;
+})
diff --git a/riscv/insns/vaadd_vv.h b/riscv/insns/vaadd_vv.h
new file mode 100644
index 0000000..b479970
--- /dev/null
+++ b/riscv/insns/vaadd_vv.h
@@ -0,0 +1,2 @@
+// vaadd.vv vd, vs2, vs1
+VI_VVX_LOOP_AVG(vs1, +);
diff --git a/riscv/insns/vaadd_vx.h b/riscv/insns/vaadd_vx.h
new file mode 100644
index 0000000..c811a0a
--- /dev/null
+++ b/riscv/insns/vaadd_vx.h
@@ -0,0 +1,2 @@
+// vaadd.vx vd, vs2, rs1
+VI_VVX_LOOP_AVG(rs1, +);
diff --git a/riscv/insns/vadc_vim.h b/riscv/insns/vadc_vim.h
new file mode 100644
index 0000000..e21e2f8
--- /dev/null
+++ b/riscv/insns/vadc_vim.h
@@ -0,0 +1,11 @@
+// vadc.vim vd, vs2, simm5
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VI_LOOP
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry;
+ vd = res;
+})
diff --git a/riscv/insns/vadc_vvm.h b/riscv/insns/vadc_vvm.h
new file mode 100644
index 0000000..b708ac1
--- /dev/null
+++ b/riscv/insns/vadc_vvm.h
@@ -0,0 +1,11 @@
+// vadc.vvm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VV_LOOP
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry;
+ vd = res;
+})
diff --git a/riscv/insns/vadc_vxm.h b/riscv/insns/vadc_vxm.h
new file mode 100644
index 0000000..6c6e6dc
--- /dev/null
+++ b/riscv/insns/vadc_vxm.h
@@ -0,0 +1,11 @@
+// vadc.vxm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VX_LOOP
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry;
+ vd = res;
+})
diff --git a/riscv/insns/vadd_vi.h b/riscv/insns/vadd_vi.h
new file mode 100644
index 0000000..45fc6b7
--- /dev/null
+++ b/riscv/insns/vadd_vi.h
@@ -0,0 +1,5 @@
+// vadd.vi vd, simm5, vs2, vm
+VI_VI_LOOP
+({
+ vd = simm5 + vs2;
+})
diff --git a/riscv/insns/vadd_vv.h b/riscv/insns/vadd_vv.h
new file mode 100644
index 0000000..45c6bdc
--- /dev/null
+++ b/riscv/insns/vadd_vv.h
@@ -0,0 +1,5 @@
+// vadd.vv vd, vs1, vs2, vm
+VI_VV_LOOP
+({
+ vd = vs1 + vs2;
+})
diff --git a/riscv/insns/vadd_vx.h b/riscv/insns/vadd_vx.h
new file mode 100644
index 0000000..33e72ee
--- /dev/null
+++ b/riscv/insns/vadd_vx.h
@@ -0,0 +1,5 @@
+// vadd.vx vd, rs1, vs2, vm
+VI_VX_LOOP
+({
+ vd = rs1 + vs2;
+})
diff --git a/riscv/insns/vand_vi.h b/riscv/insns/vand_vi.h
new file mode 100644
index 0000000..dd9618b
--- /dev/null
+++ b/riscv/insns/vand_vi.h
@@ -0,0 +1,5 @@
+// vand.vi vd, simm5, vs2, vm
+VI_VI_LOOP
+({
+ vd = simm5 & vs2;
+})
diff --git a/riscv/insns/vand_vv.h b/riscv/insns/vand_vv.h
new file mode 100644
index 0000000..65558e4
--- /dev/null
+++ b/riscv/insns/vand_vv.h
@@ -0,0 +1,5 @@
+// vand.vv vd, vs1, vs2, vm
+VI_VV_LOOP
+({
+ vd = vs1 & vs2;
+})
diff --git a/riscv/insns/vand_vx.h b/riscv/insns/vand_vx.h
new file mode 100644
index 0000000..8eea1ed
--- /dev/null
+++ b/riscv/insns/vand_vx.h
@@ -0,0 +1,5 @@
+// vand.vx vd, rs1, vs2, vm
+VI_VX_LOOP
+({
+ vd = rs1 & vs2;
+})
diff --git a/riscv/insns/vasub_vv.h b/riscv/insns/vasub_vv.h
new file mode 100644
index 0000000..5a5ccc9
--- /dev/null
+++ b/riscv/insns/vasub_vv.h
@@ -0,0 +1,2 @@
+// vasub.vv vd, vs2, vs1
+VI_VVX_LOOP_AVG(vs1, -);
diff --git a/riscv/insns/vasub_vx.h b/riscv/insns/vasub_vx.h
new file mode 100644
index 0000000..c3cad4b
--- /dev/null
+++ b/riscv/insns/vasub_vx.h
@@ -0,0 +1,2 @@
+// vasub.vx vd, vs2, rs1
+VI_VVX_LOOP_AVG(rs1, -);
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h
new file mode 100644
index 0000000..2e0784c
--- /dev/null
+++ b/riscv/insns/vcompress_vm.h
@@ -0,0 +1,41 @@
+// vcompress vd, vs2, vs1
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+require(P.VU.vstart == 0);
+reg_t sew = P.VU.vsew;
+reg_t vl = P.VU.vl;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+reg_t pos = 0;
+for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 64;
+ const int mpos = (mlen * i) % 64;
+
+ bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1;
+ if (do_mask) {
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, pos) = P.VU.elt<uint8_t>(rs2_num, i);
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, pos) = P.VU.elt<uint16_t>(rs2_num, i);
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, pos) = P.VU.elt<uint32_t>(rs2_num, i);
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, pos) = P.VU.elt<uint64_t>(rs2_num, i);
+ break;
+ }
+
+ ++pos;
+ }
+}
+
+if (vl > 0 && TAIL_ZEROING) {
+ uint8_t *tail = &P.VU.elt<uint8_t>(rd_num, pos * ((sew >> 3) * 1));
+ memset(tail, 0, (P.VU.vlmax - pos) * ((sew >> 3) * 1));
+}
+
diff --git a/riscv/insns/vdiv_vv.h b/riscv/insns/vdiv_vv.h
new file mode 100644
index 0000000..67da162
--- /dev/null
+++ b/riscv/insns/vdiv_vv.h
@@ -0,0 +1,10 @@
+// vdiv.vv vd, vs2, vs1
+VI_VV_LOOP
+({
+ if (vs1 == 0)
+ vd = -1;
+ else if (vs2 == -(1 << (sew - 1)) && vs1 == -1)
+ vd = vs2;
+ else
+ vd = vs2 / vs1;
+})
diff --git a/riscv/insns/vdiv_vx.h b/riscv/insns/vdiv_vx.h
new file mode 100644
index 0000000..1a152bd
--- /dev/null
+++ b/riscv/insns/vdiv_vx.h
@@ -0,0 +1,10 @@
+// vdiv.vx vd, vs2, rs1
+VI_VX_LOOP
+({
+ if(rs1 == 0)
+ vd = -1;
+ else if(vs2 == -(1 << (sew - 1)) && rs1 == -1)
+ vd = vs2;
+ else
+ vd = vs2 / rs1;
+})
diff --git a/riscv/insns/vdivu_vv.h b/riscv/insns/vdivu_vv.h
new file mode 100644
index 0000000..ef6e777
--- /dev/null
+++ b/riscv/insns/vdivu_vv.h
@@ -0,0 +1,8 @@
+// vdivu.vv vd, vs2, vs1
+VI_VV_ULOOP
+({
+ if(vs1 == 0)
+ vd = -1;
+ else
+ vd = vs2 / vs1;
+})
diff --git a/riscv/insns/vdivu_vx.h b/riscv/insns/vdivu_vx.h
new file mode 100644
index 0000000..7ffe1c6
--- /dev/null
+++ b/riscv/insns/vdivu_vx.h
@@ -0,0 +1,8 @@
+// vdivu.vx vd, vs2, rs1
+VI_VX_ULOOP
+({
+ if(rs1 == 0)
+ vd = -1;
+ else
+ vd = vs2 / rs1;
+})
diff --git a/riscv/insns/vdot_vv.h b/riscv/insns/vdot_vv.h
new file mode 100644
index 0000000..7685230
--- /dev/null
+++ b/riscv/insns/vdot_vv.h
@@ -0,0 +1,5 @@
+// vdot vd, vs2, vs1
+VI_VV_LOOP
+({
+ vd += vs2 * vs1;
+})
diff --git a/riscv/insns/vdotu_vv.h b/riscv/insns/vdotu_vv.h
new file mode 100644
index 0000000..9c4c59d
--- /dev/null
+++ b/riscv/insns/vdotu_vv.h
@@ -0,0 +1,5 @@
+// vdotu vd, vs2, vs1
+VI_VV_ULOOP
+({
+ vd += vs2 * vs1;
+})
diff --git a/riscv/insns/vext_x_v.h b/riscv/insns/vext_x_v.h
new file mode 100644
index 0000000..837cc22
--- /dev/null
+++ b/riscv/insns/vext_x_v.h
@@ -0,0 +1,30 @@
+// vext_x_v: rd = vs2[rs1]
+require(insn.v_vm() == 1);
+uint64_t xmask = UINT64_MAX >> (64 - P.get_max_xlen());
+reg_t rs1 = RS1;
+VI_LOOP_BASE
+VI_LOOP_END_NO_TAIL_ZERO
+if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen()/sew))) {
+ WRITE_RD(0);
+} else {
+ switch(sew) {
+ case e8:
+ WRITE_RD(P.VU.elt<uint8_t>(rs2_num, rs1));
+ break;
+ case e16:
+ WRITE_RD(P.VU.elt<uint16_t>(rs2_num, rs1));
+ break;
+ case e32:
+ if (P.get_max_xlen() == 32)
+ WRITE_RD(P.VU.elt<int32_t>(rs2_num, rs1));
+ else
+ WRITE_RD(P.VU.elt<uint32_t>(rs2_num, rs1));
+ break;
+ case e64:
+ if (P.get_max_xlen() <= sew)
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1) & xmask);
+ else
+ WRITE_RD(P.VU.elt<uint64_t>(rs2_num, rs1));
+ break;
+ }
+}
diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h
new file mode 100644
index 0000000..ac111d0
--- /dev/null
+++ b/riscv/insns/vid_v.h
@@ -0,0 +1,30 @@
+// vmpopc rd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+
+for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
+ VI_LOOP_ELEMENT_SKIP();
+
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, i) = i;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, i) = i;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = i;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = i;
+ break;
+ }
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
new file mode 100644
index 0000000..c7b831a
--- /dev/null
+++ b/riscv/insns/viota_m.h
@@ -0,0 +1,52 @@
+// vmpopc rd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+require(P.VU.vstart == 0);
+
+int cnt = 0;
+for (reg_t i = 0; i < vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 64;
+ const int mpos = (mlen * i) % 64;
+
+ bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
+ bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ bool has_one = false;
+ if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
+ if (vs2_lsb) {
+ has_one = true;
+ }
+ }
+
+ bool use_ori = (insn.v_vm() == 0) && !do_mask;
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, i) = use_ori ?
+ P.VU.elt<uint8_t>(rd_num, i) : cnt;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, i) = use_ori ?
+ P.VU.elt<uint16_t>(rd_num, i) : cnt;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = use_ori ?
+ P.VU.elt<uint32_t>(rd_num, i) : cnt;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = use_ori ?
+ P.VU.elt<uint64_t>(rd_num, i) : cnt;
+ break;
+ }
+
+ if (has_one) {
+ cnt++;
+ }
+}
+
+VI_TAIL_ZERO(1);
diff --git a/riscv/insns/vmacc_vv.h b/riscv/insns/vmacc_vv.h
new file mode 100644
index 0000000..e6ec93f
--- /dev/null
+++ b/riscv/insns/vmacc_vv.h
@@ -0,0 +1,5 @@
+// vmacc.vv: vd[i] = +(vs1[i] * vs2[i]) + vd[i]
+VI_VV_LOOP
+({
+ vd = vs1 * vs2 + vd;
+})
diff --git a/riscv/insns/vmacc_vx.h b/riscv/insns/vmacc_vx.h
new file mode 100644
index 0000000..d40b264
--- /dev/null
+++ b/riscv/insns/vmacc_vx.h
@@ -0,0 +1,5 @@
+// vmacc.vx: vd[i] = +(x[rs1] * vs2[i]) + vd[i]
+VI_VX_LOOP
+({
+ vd = rs1 * vs2 + vd;
+})
diff --git a/riscv/insns/vmadc_vim.h b/riscv/insns/vmadc_vim.h
new file mode 100644
index 0000000..fd79089
--- /dev/null
+++ b/riscv/insns/vmadc_vim.h
@@ -0,0 +1,14 @@
+// vmadc.vim vd, vs2, simm5
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_XI_LOOP_CARRY
+({
+ auto v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & simm5) + (op_mask & vs2) + carry;
+
+ carry = (res >> sew) & 0x1u;
+ vd = (vd & ~mmask) | ((carry << mpos) & mmask);
+})
diff --git a/riscv/insns/vmadc_vvm.h b/riscv/insns/vmadc_vvm.h
new file mode 100644
index 0000000..82042ca
--- /dev/null
+++ b/riscv/insns/vmadc_vvm.h
@@ -0,0 +1,14 @@
+// vmadc.vvm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VV_LOOP_CARRY
+({
+ auto v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & vs1) + (op_mask & vs2) + carry;
+
+ carry = (res >> sew) & 0x1u;
+ vd = (vd & ~mmask) | ((carry << mpos) & mmask);
+})
diff --git a/riscv/insns/vmadc_vxm.h b/riscv/insns/vmadc_vxm.h
new file mode 100644
index 0000000..8f26584
--- /dev/null
+++ b/riscv/insns/vmadc_vxm.h
@@ -0,0 +1,14 @@
+// vadc.vx vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_XI_LOOP_CARRY
+({
+ auto v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & rs1) + (op_mask & vs2) + carry;
+
+ carry = (res >> sew) & 0x1u;
+ vd = (vd & ~mmask) | ((carry << mpos) & mmask);
+})
diff --git a/riscv/insns/vmadd_vv.h b/riscv/insns/vmadd_vv.h
new file mode 100644
index 0000000..a1c0d2e
--- /dev/null
+++ b/riscv/insns/vmadd_vv.h
@@ -0,0 +1,5 @@
+// vmadd: vd[i] = (vd[i] * vs1[i]) + vs2[i]
+VI_VV_LOOP
+({
+ vd = vd * vs1 + vs2;
+})
diff --git a/riscv/insns/vmadd_vx.h b/riscv/insns/vmadd_vx.h
new file mode 100644
index 0000000..1a8a001
--- /dev/null
+++ b/riscv/insns/vmadd_vx.h
@@ -0,0 +1,5 @@
+// vmadd: vd[i] = (vd[i] * x[rs1]) + vs2[i]
+VI_VX_LOOP
+({
+ vd = vd * rs1 + vs2;
+})
diff --git a/riscv/insns/vmand_mm.h b/riscv/insns/vmand_mm.h
new file mode 100644
index 0000000..04615c6
--- /dev/null
+++ b/riscv/insns/vmand_mm.h
@@ -0,0 +1,2 @@
+// vmand.mm vd, vs2, vs1
+VI_LOOP_MASK(vs2 & vs1);
diff --git a/riscv/insns/vmandnot_mm.h b/riscv/insns/vmandnot_mm.h
new file mode 100644
index 0000000..4c26469
--- /dev/null
+++ b/riscv/insns/vmandnot_mm.h
@@ -0,0 +1,2 @@
+// vmandnot.mm vd, vs2, vs1
+VI_LOOP_MASK(vs2 & ~vs1);
diff --git a/riscv/insns/vmax_vv.h b/riscv/insns/vmax_vv.h
new file mode 100644
index 0000000..b9f15c5
--- /dev/null
+++ b/riscv/insns/vmax_vv.h
@@ -0,0 +1,10 @@
+// vmax.vv vd, vs2, vs1, vm # Vector-vector
+VI_VV_LOOP
+({
+ if (vs1 >= vs2) {
+ vd = vs1;
+ } else {
+ vd = vs2;
+ }
+
+})
diff --git a/riscv/insns/vmax_vx.h b/riscv/insns/vmax_vx.h
new file mode 100644
index 0000000..06f3f43
--- /dev/null
+++ b/riscv/insns/vmax_vx.h
@@ -0,0 +1,10 @@
+// vmax.vx vd, vs2, rs1, vm # vector-scalar
+VI_VX_LOOP
+({
+ if (rs1 >= vs2) {
+ vd = rs1;
+ } else {
+ vd = vs2;
+ }
+
+})
diff --git a/riscv/insns/vmaxu_vv.h b/riscv/insns/vmaxu_vv.h
new file mode 100644
index 0000000..4e6868d
--- /dev/null
+++ b/riscv/insns/vmaxu_vv.h
@@ -0,0 +1,9 @@
+// vmaxu.vv vd, vs2, vs1, vm # Vector-vector
+VI_VV_ULOOP
+({
+ if (vs1 >= vs2) {
+ vd = vs1;
+ } else {
+ vd = vs2;
+ }
+})
diff --git a/riscv/insns/vmaxu_vx.h b/riscv/insns/vmaxu_vx.h
new file mode 100644
index 0000000..cab8918
--- /dev/null
+++ b/riscv/insns/vmaxu_vx.h
@@ -0,0 +1,9 @@
+// vmaxu.vx vd, vs2, rs1, vm # vector-scalar
+VI_VX_ULOOP
+({
+ if (rs1 >= vs2) {
+ vd = rs1;
+ } else {
+ vd = vs2;
+ }
+})
diff --git a/riscv/insns/vmerge_vim.h b/riscv/insns/vmerge_vim.h
new file mode 100644
index 0000000..13354d6
--- /dev/null
+++ b/riscv/insns/vmerge_vim.h
@@ -0,0 +1,9 @@
+// vmerge.vim vd, vs2, simm5
+VI_VVXI_MERGE_LOOP
+({
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? simm5 : vs2;
+})
diff --git a/riscv/insns/vmerge_vvm.h b/riscv/insns/vmerge_vvm.h
new file mode 100644
index 0000000..7530b40
--- /dev/null
+++ b/riscv/insns/vmerge_vvm.h
@@ -0,0 +1,9 @@
+// vmerge.vvm vd, vs2, vs1
+VI_VVXI_MERGE_LOOP
+({
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? vs1 : vs2;
+})
diff --git a/riscv/insns/vmerge_vxm.h b/riscv/insns/vmerge_vxm.h
new file mode 100644
index 0000000..b1757fa
--- /dev/null
+++ b/riscv/insns/vmerge_vxm.h
@@ -0,0 +1,9 @@
+// vmerge.vxm vd, vs2, rs1
+VI_VVXI_MERGE_LOOP
+({
+ int midx = (P.VU.vmlen * i) / 64;
+ int mpos = (P.VU.vmlen * i) % 64;
+ bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+ vd = use_first ? rs1 : vs2;
+})
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
new file mode 100644
index 0000000..cedf4b9
--- /dev/null
+++ b/riscv/insns/vmfeq_vf.h
@@ -0,0 +1,5 @@
+// vfeq.vf vd, vs2, fs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_eq(vs2, rs1);
+})
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
new file mode 100644
index 0000000..7e76cac
--- /dev/null
+++ b/riscv/insns/vmfeq_vv.h
@@ -0,0 +1,5 @@
+// vfeq.vv vd, vs2, vs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_eq(vs2, vs1);
+})
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
new file mode 100644
index 0000000..7eade89
--- /dev/null
+++ b/riscv/insns/vmfge_vf.h
@@ -0,0 +1,5 @@
+// vfge.vf vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_le_quiet(rs1, vs2);
+})
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
new file mode 100644
index 0000000..6115d06
--- /dev/null
+++ b/riscv/insns/vmfgt_vf.h
@@ -0,0 +1,5 @@
+// vfgt.vf vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_lt_quiet(rs1, vs2);
+})
diff --git a/riscv/insns/vmfirst_m.h b/riscv/insns/vmfirst_m.h
new file mode 100644
index 0000000..8c216c0
--- /dev/null
+++ b/riscv/insns/vmfirst_m.h
@@ -0,0 +1,20 @@
+// vmfirst rd, vs2
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs2_num = insn.rs2();
+require(P.VU.vstart == 0);
+reg_t pos = -1;
+for (reg_t i=P.VU.vstart; i < vl; ++i) {
+ VI_LOOP_ELEMENT_SKIP()
+
+ bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
+ if (vs2_lsb) {
+ pos = i;
+ break;
+ }
+}
+P.VU.vstart = 0;
+WRITE_RD(pos);
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
new file mode 100644
index 0000000..998b93b
--- /dev/null
+++ b/riscv/insns/vmfle_vf.h
@@ -0,0 +1,5 @@
+// vfle.vf vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_le(vs2, rs1);
+})
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
new file mode 100644
index 0000000..c716312
--- /dev/null
+++ b/riscv/insns/vmfle_vv.h
@@ -0,0 +1,5 @@
+// vfle.vv vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_le_quiet(vs2, vs1);
+})
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
new file mode 100644
index 0000000..af436e4
--- /dev/null
+++ b/riscv/insns/vmflt_vf.h
@@ -0,0 +1,5 @@
+// vflt.vf vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_lt_quiet(vs2, rs1);
+})
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
new file mode 100644
index 0000000..ded867d
--- /dev/null
+++ b/riscv/insns/vmflt_vv.h
@@ -0,0 +1,5 @@
+// vflt.vv vd, vs2, vs1
+VI_VFP_LOOP_CMP
+({
+ res = f32_lt_quiet(vs2, vs1);
+})
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
new file mode 100644
index 0000000..ac2eced
--- /dev/null
+++ b/riscv/insns/vmfne_vf.h
@@ -0,0 +1,5 @@
+// vfne.vf vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = !f32_eq(vs2, rs1);
+})
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
new file mode 100644
index 0000000..3fa8beb
--- /dev/null
+++ b/riscv/insns/vmfne_vv.h
@@ -0,0 +1,5 @@
+// vfne.vv vd, vs2, rs1
+VI_VFP_LOOP_CMP
+({
+ res = !f32_eq(vs2, vs1);
+})
diff --git a/riscv/insns/vmford_vf.h b/riscv/insns/vmford_vf.h
new file mode 100644
index 0000000..b5e74f2
--- /dev/null
+++ b/riscv/insns/vmford_vf.h
@@ -0,0 +1,5 @@
+// vford.vf vd, vs2, rs1, vm
+VI_VFP_LOOP_CMP
+({
+ res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(rs1));
+})
diff --git a/riscv/insns/vmford_vv.h b/riscv/insns/vmford_vv.h
new file mode 100644
index 0000000..2e459c1
--- /dev/null
+++ b/riscv/insns/vmford_vv.h
@@ -0,0 +1,5 @@
+// vford.vv vd, vs2, vs1, vm
+VI_VFP_LOOP_CMP
+({
+ res = !(f32_isSignalingNaN(vs2) || f32_isSignalingNaN(vs1));
+})
diff --git a/riscv/insns/vmin_vv.h b/riscv/insns/vmin_vv.h
new file mode 100644
index 0000000..21da0b3
--- /dev/null
+++ b/riscv/insns/vmin_vv.h
@@ -0,0 +1,11 @@
+// vmin.vv vd, vs2, vs1, vm # Vector-vector
+VI_VV_LOOP
+({
+ if (vs1 <= vs2) {
+ vd = vs1;
+ } else {
+ vd = vs2;
+ }
+
+
+})
diff --git a/riscv/insns/vmin_vx.h b/riscv/insns/vmin_vx.h
new file mode 100644
index 0000000..3291776
--- /dev/null
+++ b/riscv/insns/vmin_vx.h
@@ -0,0 +1,11 @@
+// vminx.vx vd, vs2, rs1, vm # vector-scalar
+VI_VX_LOOP
+({
+ if (rs1 <= vs2) {
+ vd = rs1;
+ } else {
+ vd = vs2;
+ }
+
+
+})
diff --git a/riscv/insns/vminu_vv.h b/riscv/insns/vminu_vv.h
new file mode 100644
index 0000000..c0ab195
--- /dev/null
+++ b/riscv/insns/vminu_vv.h
@@ -0,0 +1,9 @@
+// vminu.vv vd, vs2, vs1, vm # Vector-vector
+VI_VV_ULOOP
+({
+ if (vs1 <= vs2) {
+ vd = vs1;
+ } else {
+ vd = vs2;
+ }
+})
diff --git a/riscv/insns/vminu_vx.h b/riscv/insns/vminu_vx.h
new file mode 100644
index 0000000..1055895
--- /dev/null
+++ b/riscv/insns/vminu_vx.h
@@ -0,0 +1,10 @@
+// vminu.vx vd, vs2, rs1, vm # vector-scalar
+VI_VX_ULOOP
+({
+ if (rs1 <= vs2) {
+ vd = rs1;
+ } else {
+ vd = vs2;
+ }
+
+})
diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h
new file mode 100644
index 0000000..5a3ab09
--- /dev/null
+++ b/riscv/insns/vmnand_mm.h
@@ -0,0 +1,2 @@
+// vmnand.mm vd, vs2, vs1
+VI_LOOP_MASK(~(vs2 & vs1));
diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h
new file mode 100644
index 0000000..ab93378
--- /dev/null
+++ b/riscv/insns/vmnor_mm.h
@@ -0,0 +1,2 @@
+// vmnor.mm vd, vs2, vs1
+VI_LOOP_MASK(~(vs2 | vs1));
diff --git a/riscv/insns/vmor_mm.h b/riscv/insns/vmor_mm.h
new file mode 100644
index 0000000..32e71b9
--- /dev/null
+++ b/riscv/insns/vmor_mm.h
@@ -0,0 +1,2 @@
+// vmor.mm vd, vs2, vs1
+VI_LOOP_MASK(vs2 | vs1);
diff --git a/riscv/insns/vmornot_mm.h b/riscv/insns/vmornot_mm.h
new file mode 100644
index 0000000..bdc1d8b
--- /dev/null
+++ b/riscv/insns/vmornot_mm.h
@@ -0,0 +1,2 @@
+// vmornot.mm vd, vs2, vs1
+VI_LOOP_MASK(vs2 | ~vs1);
diff --git a/riscv/insns/vmpopc_m.h b/riscv/insns/vmpopc_m.h
new file mode 100644
index 0000000..9e22b2b
--- /dev/null
+++ b/riscv/insns/vmpopc_m.h
@@ -0,0 +1,24 @@
+// vmpopc rd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs2_num = insn.rs2();
+require(P.VU.vstart == 0);
+reg_t popcount = 0;
+for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 32;
+ const int mpos = (mlen * i) % 32;
+
+ bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
+ if (insn.v_vm() == 1) {
+ popcount += vs2_lsb;
+ } else {
+ bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1;
+ popcount += (vs2_lsb && do_mask);
+ }
+}
+P.VU.vstart = 0;
+WRITE_RD(popcount);
diff --git a/riscv/insns/vmsbc_vvm.h b/riscv/insns/vmsbc_vvm.h
new file mode 100644
index 0000000..3804ba8
--- /dev/null
+++ b/riscv/insns/vmsbc_vvm.h
@@ -0,0 +1,14 @@
+// vmsbc.vvm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VV_LOOP_CARRY
+({
+ auto v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & vs1) - (op_mask & vs2) - carry;
+
+ carry = (res >> sew) & 0x1u;
+ vd = (vd & ~mmask) | ((carry << mpos) & mmask);
+})
diff --git a/riscv/insns/vmsbc_vxm.h b/riscv/insns/vmsbc_vxm.h
new file mode 100644
index 0000000..d5332f5
--- /dev/null
+++ b/riscv/insns/vmsbc_vxm.h
@@ -0,0 +1,14 @@
+// vmsbc.vxm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_XI_LOOP_CARRY
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & rs1) - (op_mask & vs2) - carry;
+
+ carry = (res >> sew) & 0x1u;
+ vd = (vd & ~mmask) | ((carry << mpos) & mmask);
+})
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
new file mode 100644
index 0000000..a014900
--- /dev/null
+++ b/riscv/insns/vmsbf_m.h
@@ -0,0 +1,34 @@
+// vmsbf.m vd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+
+bool has_one = false;
+for (reg_t i = P.VU.vstart; i < vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 64;
+ const int mpos = (mlen * i) % 64;
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+
+ bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
+ bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
+
+
+ if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
+ uint64_t res = 0;
+ if (!has_one && !vs2_lsb) {
+ res = 1;
+ } else if(!has_one && vs2_lsb) {
+ has_one = true;
+ }
+ vd = (vd & ~mmask) | ((res << mpos) & mmask);
+ }
+}
+
+VI_TAIL_ZERO_MASK(rd_num);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vmseq_vi.h b/riscv/insns/vmseq_vi.h
new file mode 100644
index 0000000..cfc1682
--- /dev/null
+++ b/riscv/insns/vmseq_vi.h
@@ -0,0 +1,5 @@
+// vseq.vi vd, vs2, simm5
+VI_VI_LOOP_CMP
+({
+ res = simm5 == vs2;
+})
diff --git a/riscv/insns/vmseq_vv.h b/riscv/insns/vmseq_vv.h
new file mode 100644
index 0000000..91fd204
--- /dev/null
+++ b/riscv/insns/vmseq_vv.h
@@ -0,0 +1,6 @@
+// vseq.vv vd, vs2, vs1
+VI_VV_LOOP_CMP
+({
+ res = vs2 == vs1;
+})
+
diff --git a/riscv/insns/vmseq_vx.h b/riscv/insns/vmseq_vx.h
new file mode 100644
index 0000000..ab63323
--- /dev/null
+++ b/riscv/insns/vmseq_vx.h
@@ -0,0 +1,5 @@
+// vseq.vx vd, vs2, rs1
+VI_VX_LOOP_CMP
+({
+ res = rs1 == vs2;
+})
diff --git a/riscv/insns/vmsgt_vi.h b/riscv/insns/vmsgt_vi.h
new file mode 100644
index 0000000..4f7dea8
--- /dev/null
+++ b/riscv/insns/vmsgt_vi.h
@@ -0,0 +1,5 @@
+// vsgt.vi vd, vs2, simm5
+VI_VI_LOOP_CMP
+({
+ res = vs2 > simm5;
+})
diff --git a/riscv/insns/vmsgt_vx.h b/riscv/insns/vmsgt_vx.h
new file mode 100644
index 0000000..5f24db6
--- /dev/null
+++ b/riscv/insns/vmsgt_vx.h
@@ -0,0 +1,5 @@
+// vsgt.vx vd, vs2, rs1
+VI_VX_LOOP_CMP
+({
+ res = vs2 > rs1;
+})
diff --git a/riscv/insns/vmsgtu_vi.h b/riscv/insns/vmsgtu_vi.h
new file mode 100644
index 0000000..268d437
--- /dev/null
+++ b/riscv/insns/vmsgtu_vi.h
@@ -0,0 +1,5 @@
+// vsgtu.vi vd, vd2, zimm5
+VI_VI_ULOOP_CMP
+({
+ res = vs2 > simm5;
+})
diff --git a/riscv/insns/vmsgtu_vx.h b/riscv/insns/vmsgtu_vx.h
new file mode 100644
index 0000000..7f39800
--- /dev/null
+++ b/riscv/insns/vmsgtu_vx.h
@@ -0,0 +1,5 @@
+// vsgtu.vx vd, vs2, rs1
+VI_VX_ULOOP_CMP
+({
+ res = vs2 > rs1;
+})
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
new file mode 100644
index 0000000..144b67c
--- /dev/null
+++ b/riscv/insns/vmsif_m.h
@@ -0,0 +1,34 @@
+// vmpopc rd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+
+bool has_one = false;
+for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 64;
+ const int mpos = (mlen * i) % 64;
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+
+ bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
+ bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ auto &vd = P.VU.elt<uint64_t>(rd_num, midx);
+
+ if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
+ uint64_t res = 0;
+ if (!has_one && !vs2_lsb) {
+ res = 1;
+ } else if(!has_one && vs2_lsb) {
+ has_one = true;
+ res = 1;
+ }
+ vd = (vd & ~mmask) | ((res << mpos) & mmask);
+ }
+}
+
+VI_TAIL_ZERO_MASK(rd_num);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vmsle_vi.h b/riscv/insns/vmsle_vi.h
new file mode 100644
index 0000000..f0f67d0
--- /dev/null
+++ b/riscv/insns/vmsle_vi.h
@@ -0,0 +1,5 @@
+// vsle.vi vd, vs2, simm5
+VI_VI_LOOP_CMP
+({
+ res = vs2 <= simm5;
+})
diff --git a/riscv/insns/vmsle_vv.h b/riscv/insns/vmsle_vv.h
new file mode 100644
index 0000000..30aba06
--- /dev/null
+++ b/riscv/insns/vmsle_vv.h
@@ -0,0 +1,5 @@
+// vsle.vv vd, vs2, vs1
+VI_VV_LOOP_CMP
+({
+ res = vs2 <= vs1;
+})
diff --git a/riscv/insns/vmsle_vx.h b/riscv/insns/vmsle_vx.h
new file mode 100644
index 0000000..c26d596
--- /dev/null
+++ b/riscv/insns/vmsle_vx.h
@@ -0,0 +1,5 @@
+// vsle.vx vd, vs2, rs1
+VI_VX_LOOP_CMP
+({
+ res = vs2 <= rs1;
+})
diff --git a/riscv/insns/vmsleu_vi.h b/riscv/insns/vmsleu_vi.h
new file mode 100644
index 0000000..dc4fd18
--- /dev/null
+++ b/riscv/insns/vmsleu_vi.h
@@ -0,0 +1,5 @@
+// vsleu.vi vd, vs2, zimm5
+VI_VI_ULOOP_CMP
+({
+ res = vs2 <= simm5;
+})
diff --git a/riscv/insns/vmsleu_vv.h b/riscv/insns/vmsleu_vv.h
new file mode 100644
index 0000000..0e46032
--- /dev/null
+++ b/riscv/insns/vmsleu_vv.h
@@ -0,0 +1,5 @@
+// vsleu.vv vd, vs2, vs1
+VI_VV_ULOOP_CMP
+({
+ res = vs2 <= vs1;
+})
diff --git a/riscv/insns/vmsleu_vx.h b/riscv/insns/vmsleu_vx.h
new file mode 100644
index 0000000..935b176
--- /dev/null
+++ b/riscv/insns/vmsleu_vx.h
@@ -0,0 +1,5 @@
+// vsleu.vx vd, vs2, rs1
+VI_VX_ULOOP_CMP
+({
+ res = vs2 <= rs1;
+})
diff --git a/riscv/insns/vmslt_vv.h b/riscv/insns/vmslt_vv.h
new file mode 100644
index 0000000..71e6f87
--- /dev/null
+++ b/riscv/insns/vmslt_vv.h
@@ -0,0 +1,5 @@
+// vslt.vv vd, vd2, vs1
+VI_VV_LOOP_CMP
+({
+ res = vs2 < vs1;
+})
diff --git a/riscv/insns/vmslt_vx.h b/riscv/insns/vmslt_vx.h
new file mode 100644
index 0000000..b32bb14
--- /dev/null
+++ b/riscv/insns/vmslt_vx.h
@@ -0,0 +1,5 @@
+// vslt.vx vd, vs2, vs1
+VI_VX_LOOP_CMP
+({
+ res = vs2 < rs1;
+})
diff --git a/riscv/insns/vmsltu_vv.h b/riscv/insns/vmsltu_vv.h
new file mode 100644
index 0000000..53a570a
--- /dev/null
+++ b/riscv/insns/vmsltu_vv.h
@@ -0,0 +1,5 @@
+// vsltu.vv vd, vs2, vs1
+VI_VV_ULOOP_CMP
+({
+ res = vs2 < vs1;
+})
diff --git a/riscv/insns/vmsltu_vx.h b/riscv/insns/vmsltu_vx.h
new file mode 100644
index 0000000..8082544
--- /dev/null
+++ b/riscv/insns/vmsltu_vx.h
@@ -0,0 +1,5 @@
+// vsltu.vx vd, vs2, vs1
+VI_VX_ULOOP_CMP
+({
+ res = vs2 < rs1;
+})
diff --git a/riscv/insns/vmsne_vi.h b/riscv/insns/vmsne_vi.h
new file mode 100644
index 0000000..5e9758e
--- /dev/null
+++ b/riscv/insns/vmsne_vi.h
@@ -0,0 +1,5 @@
+// vsne.vi vd, vs2, simm5
+VI_VI_LOOP_CMP
+({
+ res = vs2 != simm5;
+})
diff --git a/riscv/insns/vmsne_vv.h b/riscv/insns/vmsne_vv.h
new file mode 100644
index 0000000..e6a7174
--- /dev/null
+++ b/riscv/insns/vmsne_vv.h
@@ -0,0 +1,5 @@
+// vneq.vv vd, vs2, vs1
+VI_VV_LOOP_CMP
+({
+ res = vs2 != vs1;
+})
diff --git a/riscv/insns/vmsne_vx.h b/riscv/insns/vmsne_vx.h
new file mode 100644
index 0000000..9e4c155
--- /dev/null
+++ b/riscv/insns/vmsne_vx.h
@@ -0,0 +1,5 @@
+// vsne.vx vd, vs2, rs1
+VI_VX_LOOP_CMP
+({
+ res = vs2 != rs1;
+})
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
new file mode 100644
index 0000000..b4cbbce
--- /dev/null
+++ b/riscv/insns/vmsof_m.h
@@ -0,0 +1,32 @@
+// vmsof.m rd, vs2, vm
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+
+bool has_one = false;
+for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+ const int mlen = P.VU.vmlen;
+ const int midx = (mlen * i) / 64;
+ const int mpos = (mlen * i) % 64;
+ const uint64_t mmask = (UINT64_MAX << (64 - mlen)) >> (64 - mlen - mpos);
+
+ bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
+ bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx);
+
+ if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
+ uint64_t res = 0;
+ if(!has_one && vs2_lsb) {
+ has_one = true;
+ res = 1;
+ }
+ vd = (vd & ~mmask) | ((res << mpos) & mmask);
+ }
+}
+
+VI_TAIL_ZERO_MASK(rd_num);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vmul_vv.h b/riscv/insns/vmul_vv.h
new file mode 100644
index 0000000..a327817
--- /dev/null
+++ b/riscv/insns/vmul_vv.h
@@ -0,0 +1,5 @@
+// vmul vd, vs2, vs1
+VI_VV_LOOP
+({
+ vd = vs2 * vs1;
+})
diff --git a/riscv/insns/vmul_vx.h b/riscv/insns/vmul_vx.h
new file mode 100644
index 0000000..8d68390
--- /dev/null
+++ b/riscv/insns/vmul_vx.h
@@ -0,0 +1,5 @@
+// vmul vd, vs2, rs1
+VI_VX_LOOP
+({
+ vd = vs2 * rs1;
+})
diff --git a/riscv/insns/vmulh_vv.h b/riscv/insns/vmulh_vv.h
new file mode 100644
index 0000000..e861a33
--- /dev/null
+++ b/riscv/insns/vmulh_vv.h
@@ -0,0 +1,5 @@
+// vmulh vd, vs2, vs1
+VI_VV_LOOP
+({
+ vd = ((int128_t)vs2 * vs1) >> sew;
+})
diff --git a/riscv/insns/vmulh_vx.h b/riscv/insns/vmulh_vx.h
new file mode 100644
index 0000000..b6b5503
--- /dev/null
+++ b/riscv/insns/vmulh_vx.h
@@ -0,0 +1,5 @@
+// vmulh vd, vs2, rs1
+VI_VX_LOOP
+({
+ vd = ((int128_t)vs2 * rs1) >> sew;
+})
diff --git a/riscv/insns/vmulhsu_vv.h b/riscv/insns/vmulhsu_vv.h
new file mode 100644
index 0000000..59882da
--- /dev/null
+++ b/riscv/insns/vmulhsu_vv.h
@@ -0,0 +1,37 @@
+// vmulhsu.vv vd, vs2, vs1
+VI_LOOP_BASE
+switch(sew) {
+case e8: {
+ auto &vd = P.VU.elt<int8_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int8_t>(rs2_num, i);
+ auto vs1 = P.VU.elt<uint8_t>(rs1_num, i);
+
+ vd = ((int16_t)vs2 * (uint16_t)vs1) >> sew;
+ break;
+}
+case e16: {
+ auto &vd = P.VU.elt<int16_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
+ auto vs1 = P.VU.elt<uint16_t>(rs1_num, i);
+
+ vd = ((int32_t)vs2 * (uint32_t)vs1) >> sew;
+ break;
+}
+case e32: {
+ auto &vd = P.VU.elt<int32_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
+ auto vs1 = P.VU.elt<uint32_t>(rs1_num, i);
+
+ vd = ((int64_t)vs2 * (uint64_t)vs1) >> sew;
+ break;
+}
+default: {
+ auto &vd = P.VU.elt<int64_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
+ auto vs1 = P.VU.elt<uint64_t>(rs1_num, i);
+
+ vd = ((int128_t)vs2 * (uint128_t)vs1) >> sew;
+ break;
+}
+}
+VI_LOOP_END
diff --git a/riscv/insns/vmulhsu_vx.h b/riscv/insns/vmulhsu_vx.h
new file mode 100644
index 0000000..d39615a
--- /dev/null
+++ b/riscv/insns/vmulhsu_vx.h
@@ -0,0 +1,37 @@
+// vmulhsu.vx vd, vs2, rs1
+VI_LOOP_BASE
+switch(sew) {
+case e8: {
+ auto &vd = P.VU.elt<int8_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int8_t>(rs2_num, i);
+ uint8_t rs1 = RS1;
+
+ vd = ((int16_t)vs2 * (uint16_t)rs1) >> sew;
+ break;
+}
+case e16: {
+ auto &vd = P.VU.elt<int16_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
+ uint16_t rs1 = RS1;
+
+ vd = ((int32_t)vs2 * (uint32_t)rs1) >> sew;
+ break;
+}
+case e32: {
+ auto &vd = P.VU.elt<int32_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
+ uint32_t rs1 = RS1;
+
+ vd = ((int64_t)vs2 * (uint64_t)rs1) >> sew;
+ break;
+}
+default: {
+ auto &vd = P.VU.elt<int64_t>(rd_num, i);
+ auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
+ uint64_t rs1 = RS1;
+
+ vd = ((int128_t)vs2 * (uint128_t)rs1) >> sew;
+ break;
+}
+}
+VI_LOOP_END
diff --git a/riscv/insns/vmulhu_vv.h b/riscv/insns/vmulhu_vv.h
new file mode 100644
index 0000000..8e318ed
--- /dev/null
+++ b/riscv/insns/vmulhu_vv.h
@@ -0,0 +1,5 @@
+// vmulhu vd ,vs2, vs1
+VI_VV_ULOOP
+({
+ vd = ((uint128_t)vs2 * vs1) >> sew;
+})
diff --git a/riscv/insns/vmulhu_vx.h b/riscv/insns/vmulhu_vx.h
new file mode 100644
index 0000000..672ad32
--- /dev/null
+++ b/riscv/insns/vmulhu_vx.h
@@ -0,0 +1,5 @@
+// vmulhu vd ,vs2, rs1
+VI_VX_ULOOP
+({
+ vd = ((uint128_t)vs2 * rs1) >> sew;
+})
diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h
new file mode 100644
index 0000000..1c4ffb2
--- /dev/null
+++ b/riscv/insns/vmv_s_x.h
@@ -0,0 +1,45 @@
+// vmv_s_x: vd[0] = rs1
+require(insn.v_vm() == 1);
+require(P.VU.vsew == e8 || P.VU.vsew == e16 ||
+ P.VU.vsew == e32 || P.VU.vsew == e64);
+reg_t vl = P.VU.vl;
+
+if (vl > 0) {
+ reg_t rd_num = insn.rd();
+ reg_t sew = P.VU.vsew;
+
+ switch(sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, 0) = RS1;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, 0) = RS1;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, 0) = RS1;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, 0) = RS1;
+ break;
+ }
+
+ const reg_t max_len = P.VU.VLEN / sew;
+ for (reg_t i = 1; i < max_len; ++i) {
+ switch(sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, i) = 0;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, i) = 0;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = 0;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = 0;
+ break;
+ }
+ }
+
+ vl = 0;
+}
diff --git a/riscv/insns/vmv_v_i.h b/riscv/insns/vmv_v_i.h
new file mode 100644
index 0000000..31e9877
--- /dev/null
+++ b/riscv/insns/vmv_v_i.h
@@ -0,0 +1,5 @@
+// vmv.v.i vd, simm5
+VI_VVXI_MERGE_LOOP
+({
+ vd = simm5;
+})
diff --git a/riscv/insns/vmv_v_v.h b/riscv/insns/vmv_v_v.h
new file mode 100644
index 0000000..734010b
--- /dev/null
+++ b/riscv/insns/vmv_v_v.h
@@ -0,0 +1,5 @@
+// vvmv.v.v vd, vs1
+VI_VVXI_MERGE_LOOP
+({
+ vd = vs1;
+})
diff --git a/riscv/insns/vmv_v_x.h b/riscv/insns/vmv_v_x.h
new file mode 100644
index 0000000..4688b3f
--- /dev/null
+++ b/riscv/insns/vmv_v_x.h
@@ -0,0 +1,5 @@
+// vmv.v.x vd, rs1
+VI_VVXI_MERGE_LOOP
+({
+ vd = rs1;
+})
diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h
new file mode 100644
index 0000000..0736d5b
--- /dev/null
+++ b/riscv/insns/vmxnor_mm.h
@@ -0,0 +1,2 @@
+// vmnxor.mm vd, vs2, vs1
+VI_LOOP_MASK(~(vs2 ^ vs1));
diff --git a/riscv/insns/vmxor_mm.h b/riscv/insns/vmxor_mm.h
new file mode 100644
index 0000000..7f0c576
--- /dev/null
+++ b/riscv/insns/vmxor_mm.h
@@ -0,0 +1,2 @@
+// vmxor.mm vd, vs2, vs1
+VI_LOOP_MASK(vs2 ^ vs1);
diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h
new file mode 100644
index 0000000..ca27593
--- /dev/null
+++ b/riscv/insns/vnclip_vi.h
@@ -0,0 +1,24 @@
+// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> simm)
+VRM xrm = P.VU.get_vround_mode();
+int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
+int64_t int_min = -(1 << (P.VU.vsew - 1));
+VI_VVXI_LOOP_NARROW
+({
+
+ int64_t result = vs2;
+// rounding
+ INT_ROUNDING(result, xrm, sew);
+
+ result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+
+// saturation
+ if (result < int_min) {
+ result = int_min;
+ P.VU.vxsat = 1;
+ } else if (result > int_max) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h
new file mode 100644
index 0000000..7bcb4cb
--- /dev/null
+++ b/riscv/insns/vnclip_vv.h
@@ -0,0 +1,30 @@
+// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i])
+VRM xrm = P.VU.get_vround_mode();
+int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
+int64_t int_min = -(1 << (P.VU.vsew - 1));
+VI_VVXI_LOOP_NARROW
+({
+
+ int64_t result = vs2;
+// rounding
+ INT_ROUNDING(result, xrm, sew);
+
+// unsigned shifting to rs1
+ uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
+ if (unsigned_shift_amount >= (2 * sew)) {
+ unsigned_shift_amount = 2 * sew - 1;
+ }
+
+ result = (vsext(result, sew * 2)) >> unsigned_shift_amount;
+
+// saturation
+ if (result < int_min) {
+ result = int_min;
+ P.VU.vxsat = 1;
+ } else if (result > int_max) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h
new file mode 100644
index 0000000..b66e830
--- /dev/null
+++ b/riscv/insns/vnclip_vx.h
@@ -0,0 +1,29 @@
+// vnclip: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i])
+VRM xrm = P.VU.get_vround_mode();
+int64_t int_max = (1 << (P.VU.vsew - 1)) - 1;
+int64_t int_min = -(1 << (P.VU.vsew - 1));
+VI_VVXI_LOOP_NARROW
+({
+
+ int64_t result = vs2;
+// rounding
+ INT_ROUNDING(result, xrm, sew);
+
+// unsigned shifting to rs1
+ uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
+ if (unsigned_shift_amount >= (2 * sew)) {
+ unsigned_shift_amount = 2 * sew - 1;
+ }
+ result = vsext(result, sew * 2) >> unsigned_shift_amount;
+
+// saturation
+ if (result < int_min) {
+ result = int_min;
+ P.VU.vxsat = 1;
+ } else if (result > int_max) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h
new file mode 100644
index 0000000..61cb015
--- /dev/null
+++ b/riscv/insns/vnclipu_vi.h
@@ -0,0 +1,20 @@
+// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> simm)
+VRM xrm = P.VU.get_vround_mode();
+uint64_t int_max = ~(-1ll << P.VU.vsew);
+VI_VVXI_LOOP_NARROW
+({
+ uint64_t result = vs2_u;
+ // rounding
+ INT_ROUNDING(result, xrm, sew);
+
+ // unsigned shifting to rs1
+ result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31));
+
+ // saturation
+ if (result & (uint64_t)(-1ll << sew)) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h
new file mode 100644
index 0000000..004f24f
--- /dev/null
+++ b/riscv/insns/vnclipu_vv.h
@@ -0,0 +1,26 @@
+// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> vs1[i])
+VRM xrm = P.VU.get_vround_mode();
+uint64_t int_max = ~(-1ll << P.VU.vsew);
+VI_VVXI_LOOP_NARROW
+({
+
+ uint64_t result = vs2_u;
+
+// rounding
+ INT_ROUNDING(result, xrm, sew);
+
+// unsigned shifting to rs1
+ uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1));
+ if (unsigned_shift_amount >= (2 * sew)) {
+ result = 0;
+ } else {
+ result = vzext(result, sew * 2) >> unsigned_shift_amount;
+ }
+// saturation
+ if (result & (uint64_t)(-1ll << sew)) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h
new file mode 100644
index 0000000..0507a2b
--- /dev/null
+++ b/riscv/insns/vnclipu_vx.h
@@ -0,0 +1,26 @@
+// vnclipu: vd[i] = clip(round(vs2[i] + rnd) >> rs1[i])
+VRM xrm = P.VU.get_vround_mode();
+uint64_t int_max = ~(-1ll << P.VU.vsew);
+VI_VVXI_LOOP_NARROW
+({
+ uint64_t result = vs2;
+
+// rounding
+ INT_ROUNDING(result, xrm, sew);
+
+// unsigned shifting to rs1
+ uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1));
+ if (unsigned_shift_amount >= (2 * sew)) {
+ result = 0;
+ } else {
+ result = vzext(result, sew * 2) >> unsigned_shift_amount;
+ }
+
+// saturation
+ if (result & (uint64_t)(-1ll << sew)) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ }
+
+ vd = result;
+})
diff --git a/riscv/insns/vnmsac_vv.h b/riscv/insns/vnmsac_vv.h
new file mode 100644
index 0000000..7c10f29
--- /dev/null
+++ b/riscv/insns/vnmsac_vv.h
@@ -0,0 +1,5 @@
+// vmsac.vv: vd[i] = -(vs1[i] * vs2[i]) + vd[i]
+VI_VV_LOOP
+({
+ vd = -(vs1 * vs2) + vd;
+})
diff --git a/riscv/insns/vnmsac_vx.h b/riscv/insns/vnmsac_vx.h
new file mode 100644
index 0000000..44920be
--- /dev/null
+++ b/riscv/insns/vnmsac_vx.h
@@ -0,0 +1,5 @@
+// vmsac: vd[i] = -(x[rs1] * vs2[i]) + vd[i]
+VI_VX_LOOP
+({
+ vd = -(rs1 * vs2) + vd;
+})
diff --git a/riscv/insns/vnmsub_vv.h b/riscv/insns/vnmsub_vv.h
new file mode 100644
index 0000000..37f8228
--- /dev/null
+++ b/riscv/insns/vnmsub_vv.h
@@ -0,0 +1,5 @@
+// vnmsub.vv: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
+VI_VV_LOOP
+({
+ vd = -(vd * vs1) + vs2;
+})
diff --git a/riscv/insns/vnmsub_vx.h b/riscv/insns/vnmsub_vx.h
new file mode 100644
index 0000000..2e00d22
--- /dev/null
+++ b/riscv/insns/vnmsub_vx.h
@@ -0,0 +1,5 @@
+// vnmsub.vx: vd[i] = -(vd[i] * x[rs1]) + vs2[i]
+VI_VX_LOOP
+({
+ vd = -(vd * rs1) + vs2;
+})
diff --git a/riscv/insns/vnsra_vi.h b/riscv/insns/vnsra_vi.h
new file mode 100644
index 0000000..0502ff1
--- /dev/null
+++ b/riscv/insns/vnsra_vi.h
@@ -0,0 +1,5 @@
+// vnsra.vi vd, vs2, zimm5
+VI_VI_LOOP_NSHIFT
+({
+ vd = vs2 >> (zimm5 & (sew * 2 - 1) & 0x1f);
+})
diff --git a/riscv/insns/vnsra_vv.h b/riscv/insns/vnsra_vv.h
new file mode 100644
index 0000000..555ce3f
--- /dev/null
+++ b/riscv/insns/vnsra_vv.h
@@ -0,0 +1,5 @@
+// vnsra.vv vd, vs2, vs1
+VI_VV_LOOP_NSHIFT
+({
+ vd = vs2 >> (vs1 & (sew * 2 - 1));
+})
diff --git a/riscv/insns/vnsra_vx.h b/riscv/insns/vnsra_vx.h
new file mode 100644
index 0000000..05a55e3
--- /dev/null
+++ b/riscv/insns/vnsra_vx.h
@@ -0,0 +1,5 @@
+// vnsra.vx vd, vs2, rs1
+VI_VX_LOOP_NSHIFT
+({
+ vd = vs2 >> (rs1 & (sew * 2 - 1));
+})
diff --git a/riscv/insns/vnsrl_vi.h b/riscv/insns/vnsrl_vi.h
new file mode 100644
index 0000000..d4dfcf0
--- /dev/null
+++ b/riscv/insns/vnsrl_vi.h
@@ -0,0 +1,5 @@
+// vnsrl.vi vd, vs2, zimm5
+VI_VI_LOOP_NSHIFT
+({
+ vd = vs2_u >> (zimm5 & (sew * 2 - 1));
+})
diff --git a/riscv/insns/vnsrl_vv.h b/riscv/insns/vnsrl_vv.h
new file mode 100644
index 0000000..ab72b84
--- /dev/null
+++ b/riscv/insns/vnsrl_vv.h
@@ -0,0 +1,5 @@
+// vnsrl.vv vd, vs2, vs1
+VI_VV_LOOP_NSHIFT
+({
+ vd = vs2_u >> (vs1 & (sew * 2 - 1));
+})
diff --git a/riscv/insns/vnsrl_vx.h b/riscv/insns/vnsrl_vx.h
new file mode 100644
index 0000000..e149b38
--- /dev/null
+++ b/riscv/insns/vnsrl_vx.h
@@ -0,0 +1,5 @@
+// vnsrl.vx vd, vs2, rs1
+VI_VX_LOOP_NSHIFT
+({
+ vd = vs2_u >> (rs1 & (sew * 2 - 1));
+})
diff --git a/riscv/insns/vor_vi.h b/riscv/insns/vor_vi.h
new file mode 100644
index 0000000..f759607
--- /dev/null
+++ b/riscv/insns/vor_vi.h
@@ -0,0 +1,5 @@
+// vor
+VI_VI_LOOP
+({
+ vd = simm5 | vs2;
+})
diff --git a/riscv/insns/vor_vv.h b/riscv/insns/vor_vv.h
new file mode 100644
index 0000000..0c46066
--- /dev/null
+++ b/riscv/insns/vor_vv.h
@@ -0,0 +1,5 @@
+// vor
+VI_VV_LOOP
+({
+ vd = vs1 | vs2;
+})
diff --git a/riscv/insns/vor_vx.h b/riscv/insns/vor_vx.h
new file mode 100644
index 0000000..01c003a
--- /dev/null
+++ b/riscv/insns/vor_vx.h
@@ -0,0 +1,5 @@
+// vor
+VI_VX_LOOP
+({
+ vd = rs1 | vs2;
+})
diff --git a/riscv/insns/vredand_vs.h b/riscv/insns/vredand_vs.h
new file mode 100644
index 0000000..6c2d908
--- /dev/null
+++ b/riscv/insns/vredand_vs.h
@@ -0,0 +1,5 @@
+// vredand.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res &= vs2;
+})
diff --git a/riscv/insns/vredmax_vs.h b/riscv/insns/vredmax_vs.h
new file mode 100644
index 0000000..be2e76a
--- /dev/null
+++ b/riscv/insns/vredmax_vs.h
@@ -0,0 +1,5 @@
+// vredmax.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2;
+})
diff --git a/riscv/insns/vredmaxu_vs.h b/riscv/insns/vredmaxu_vs.h
new file mode 100644
index 0000000..960f486
--- /dev/null
+++ b/riscv/insns/vredmaxu_vs.h
@@ -0,0 +1,5 @@
+// vredmaxu.vs vd, vs2 ,vs1
+VI_VV_ULOOP_REDUCTION
+({
+ vd_0_res = (vd_0_res >= vs2) ? vd_0_res : vs2;
+})
diff --git a/riscv/insns/vredmin_vs.h b/riscv/insns/vredmin_vs.h
new file mode 100644
index 0000000..50359b7
--- /dev/null
+++ b/riscv/insns/vredmin_vs.h
@@ -0,0 +1,5 @@
+// vredmin.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2;
+})
diff --git a/riscv/insns/vredminu_vs.h b/riscv/insns/vredminu_vs.h
new file mode 100644
index 0000000..7082475
--- /dev/null
+++ b/riscv/insns/vredminu_vs.h
@@ -0,0 +1,5 @@
+// vredminu.vs vd, vs2 ,vs1
+VI_VV_ULOOP_REDUCTION
+({
+ vd_0_res = (vd_0_res <= vs2) ? vd_0_res : vs2;
+})
diff --git a/riscv/insns/vredor_vs.h b/riscv/insns/vredor_vs.h
new file mode 100644
index 0000000..f7acd9a
--- /dev/null
+++ b/riscv/insns/vredor_vs.h
@@ -0,0 +1,5 @@
+// vredor.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res |= vs2;
+})
diff --git a/riscv/insns/vredsum_vs.h b/riscv/insns/vredsum_vs.h
new file mode 100644
index 0000000..c4fefe5
--- /dev/null
+++ b/riscv/insns/vredsum_vs.h
@@ -0,0 +1,5 @@
+// vredsum.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res += vs2;
+})
diff --git a/riscv/insns/vredxor_vs.h b/riscv/insns/vredxor_vs.h
new file mode 100644
index 0000000..bb81ad9
--- /dev/null
+++ b/riscv/insns/vredxor_vs.h
@@ -0,0 +1,5 @@
+// vredxor.vs vd, vs2 ,vs1
+VI_VV_LOOP_REDUCTION
+({
+ vd_0_res ^= vs2;
+})
diff --git a/riscv/insns/vrem_vv.h b/riscv/insns/vrem_vv.h
new file mode 100644
index 0000000..da477f0
--- /dev/null
+++ b/riscv/insns/vrem_vv.h
@@ -0,0 +1,11 @@
+// vrem.vv vd, vs2, vs1
+VI_VV_LOOP
+({
+ if (vs1 == 0)
+ vd = vs2;
+ else if(vs2 == -(1 << (sew - 1)) && vs1 == -1)
+ vd = 0;
+ else {
+ vd = vs2 % vs1;
+ }
+})
diff --git a/riscv/insns/vrem_vx.h b/riscv/insns/vrem_vx.h
new file mode 100644
index 0000000..f068842
--- /dev/null
+++ b/riscv/insns/vrem_vx.h
@@ -0,0 +1,10 @@
+// vrem.vx vd, vs2, rs1
+VI_VX_LOOP
+({
+ if (rs1 == 0)
+ vd = vs2;
+ else if (vs2 == -(1 << (sew - 1)) && rs1 == -1)
+ vd = 0;
+ else
+ vd = vs2 % rs1;
+})
diff --git a/riscv/insns/vremu_vv.h b/riscv/insns/vremu_vv.h
new file mode 100644
index 0000000..7e15072
--- /dev/null
+++ b/riscv/insns/vremu_vv.h
@@ -0,0 +1,8 @@
+// vremu.vv vd, vs2, vs1
+VI_VV_ULOOP
+({
+ if (vs1 == 0)
+ vd = vs2;
+ else
+ vd = vs2 % vs1;
+})
diff --git a/riscv/insns/vremu_vx.h b/riscv/insns/vremu_vx.h
new file mode 100644
index 0000000..a87a820
--- /dev/null
+++ b/riscv/insns/vremu_vx.h
@@ -0,0 +1,8 @@
+// vremu.vx vd, vs2, rs1
+VI_VX_ULOOP
+({
+ if (rs1 == 0)
+ vd = vs2;
+ else
+ vd = vs2 % rs1;
+})
diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h
new file mode 100644
index 0000000..a9be102
--- /dev/null
+++ b/riscv/insns/vrgather_vi.h
@@ -0,0 +1,29 @@
+// vrgather.vi vd, vs2, zimm5 vm # vd[i] = (zimm5 >= VLMAX) ? 0 : vs2[zimm5];
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs2_num = insn.rs2();
+reg_t zimm5 = insn.v_zimm5();
+for (reg_t i = P.VU.vstart; i < vl; ++i) {
+ VI_LOOP_ELEMENT_SKIP();
+
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, zimm5);
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt<uint16_t>(rs2_num, zimm5);
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt<uint32_t>(rs2_num, zimm5);
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = zimm5 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, zimm5);
+ break;
+ }
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vrgather_vv.h b/riscv/insns/vrgather_vv.h
new file mode 100644
index 0000000..da8dc81
--- /dev/null
+++ b/riscv/insns/vrgather_vv.h
@@ -0,0 +1,39 @@
+// vrgather.vv vd, vs2, vs1, vm # vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]];
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+for (reg_t i = P.VU.vstart; i < vl; ++i) {
+ VI_LOOP_ELEMENT_SKIP();
+ VI_CHECK_VREG_OVERLAP(rd_num, rs1_num);
+ VI_CHECK_VREG_OVERLAP(rd_num, rs2_num);
+ switch (sew) {
+ case e8: {
+ auto vs1 = P.VU.elt<uint8_t>(rs1_num, i);
+ //if (i > 255) continue;
+ P.VU.elt<uint8_t>(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, vs1);
+ break;
+ }
+ case e16: {
+ auto vs1 = P.VU.elt<uint16_t>(rs1_num, i);
+ P.VU.elt<uint16_t>(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint16_t>(rs2_num, vs1);
+ break;
+ }
+ case e32: {
+ auto vs1 = P.VU.elt<uint32_t>(rs1_num, i);
+ P.VU.elt<uint32_t>(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint32_t>(rs2_num, vs1);
+ break;
+ }
+ default: {
+ auto vs1 = P.VU.elt<uint64_t>(rs1_num, i);
+ P.VU.elt<uint64_t>(rd_num, i) = vs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, vs1);
+ break;
+ }
+ }
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vrgather_vx.h b/riscv/insns/vrgather_vx.h
new file mode 100644
index 0000000..d6c2e38
--- /dev/null
+++ b/riscv/insns/vrgather_vx.h
@@ -0,0 +1,30 @@
+// vrgather.vx vd, vs2, rs1, vm # vd[i] = (rs1 >= VLMAX) ? 0 : vs2[rs1];
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+require(!P.VU.vill);
+reg_t vl = P.VU.vl;
+reg_t sew = P.VU.vsew;
+reg_t rd_num = insn.rd();
+reg_t rs1_num = insn.rs1();
+reg_t rs2_num = insn.rs2();
+reg_t rs1 = RS1;
+for (reg_t i = P.VU.vstart; i < vl; ++i) {
+ VI_LOOP_ELEMENT_SKIP();
+
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint8_t>(rs2_num, rs1);
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint16_t>(rs2_num, rs1);
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint32_t>(rs2_num, rs1);
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = rs1 >= P.VU.vlmax ? 0 : P.VU.elt<uint64_t>(rs2_num, rs1);
+ break;
+ }
+}
+
+VI_TAIL_ZERO(1);
+P.VU.vstart = 0;
diff --git a/riscv/insns/vrsub_vi.h b/riscv/insns/vrsub_vi.h
new file mode 100644
index 0000000..198c33f
--- /dev/null
+++ b/riscv/insns/vrsub_vi.h
@@ -0,0 +1,5 @@
+// vrsub.vi vd, vs2, imm, vm # vd[i] = imm - vs2[i]
+VI_VI_LOOP
+({
+ vd = simm5 - vs2;
+})
diff --git a/riscv/insns/vrsub_vx.h b/riscv/insns/vrsub_vx.h
new file mode 100644
index 0000000..bfd6259
--- /dev/null
+++ b/riscv/insns/vrsub_vx.h
@@ -0,0 +1,5 @@
+// vrsub.vx vd, vs2, rs1, vm # vd[i] = rs1 - vs2[i]
+VI_VX_LOOP
+({
+ vd = rs1 - vs2;
+})
diff --git a/riscv/insns/vsadd_vi.h b/riscv/insns/vsadd_vi.h
new file mode 100644
index 0000000..de2cb83
--- /dev/null
+++ b/riscv/insns/vsadd_vi.h
@@ -0,0 +1,27 @@
+// vsadd.vi vd, vs2 simm5
+VI_LOOP_BASE
+bool sat = false;
+switch(sew) {
+case e8: {
+ VI_PARAMS(e8);
+ vd = sat_add<int8_t, uint8_t>(vs2, vsext(simm5, sew), sat);
+ break;
+}
+case e16: {
+ VI_PARAMS(e16);
+ vd = sat_add<int16_t, uint16_t>(vs2, vsext(simm5, sew), sat);
+ break;
+}
+case e32: {
+ VI_PARAMS(e32);
+ vd = sat_add<int32_t, uint32_t>(vs2, vsext(simm5, sew), sat);
+ break;
+}
+default: {
+ VI_PARAMS(e64);
+ vd = sat_add<int64_t, uint64_t>(vs2, vsext(simm5, sew), sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
diff --git a/riscv/insns/vsadd_vv.h b/riscv/insns/vsadd_vv.h
new file mode 100644
index 0000000..2152bab
--- /dev/null
+++ b/riscv/insns/vsadd_vv.h
@@ -0,0 +1,28 @@
+// vsadd.vv vd, vs2, vs1
+VI_LOOP_BASE
+bool sat = false;
+switch(sew) {
+case e8: {
+ VV_PARAMS(e8);
+ vd = sat_add<int8_t, uint8_t>(vs2, vs1, sat);
+ break;
+}
+case e16: {
+ VV_PARAMS(e16);
+ vd = sat_add<int16_t, uint16_t>(vs2, vs1, sat);
+ break;
+}
+case e32: {
+ VV_PARAMS(e32);
+ vd = sat_add<int32_t, uint32_t>(vs2, vs1, sat);
+ break;
+}
+default: {
+ VV_PARAMS(e64);
+ vd = sat_add<int64_t, uint64_t>(vs2, vs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
+
diff --git a/riscv/insns/vsadd_vx.h b/riscv/insns/vsadd_vx.h
new file mode 100644
index 0000000..781e9e8
--- /dev/null
+++ b/riscv/insns/vsadd_vx.h
@@ -0,0 +1,27 @@
+// vsadd.vx vd, vs2, rs1
+VI_LOOP_BASE
+bool sat = false;
+switch(sew) {
+case e8: {
+ VX_PARAMS(e8);
+ vd = sat_add<int8_t, uint8_t>(vs2, rs1, sat);
+ break;
+}
+case e16: {
+ VX_PARAMS(e16);
+ vd = sat_add<int16_t, uint16_t>(vs2, rs1, sat);
+ break;
+}
+case e32: {
+ VX_PARAMS(e32);
+ vd = sat_add<int32_t, uint32_t>(vs2, rs1, sat);
+ break;
+}
+default: {
+ VX_PARAMS(e64);
+ vd = sat_add<int64_t, uint64_t>(vs2, rs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
diff --git a/riscv/insns/vsaddu_vi.h b/riscv/insns/vsaddu_vi.h
new file mode 100644
index 0000000..9d376cc
--- /dev/null
+++ b/riscv/insns/vsaddu_vi.h
@@ -0,0 +1,11 @@
+// vsaddu vd, vs2, zimm5
+VI_VI_ULOOP
+({
+ bool sat = false;
+ vd = vs2 + simm5;
+
+ sat = vd < vs2;
+ vd |= -(vd < vs2);
+
+ P.VU.vxsat |= sat;
+})
diff --git a/riscv/insns/vsaddu_vv.h b/riscv/insns/vsaddu_vv.h
new file mode 100644
index 0000000..e5d7025
--- /dev/null
+++ b/riscv/insns/vsaddu_vv.h
@@ -0,0 +1,11 @@
+// vsaddu vd, vs2, vs1
+VI_VV_ULOOP
+({
+ bool sat = false;
+ vd = vs2 + vs1;
+
+ sat = vd < vs2;
+ vd |= -(vd < vs2);
+
+ P.VU.vxsat |= sat;
+})
diff --git a/riscv/insns/vsaddu_vx.h b/riscv/insns/vsaddu_vx.h
new file mode 100644
index 0000000..46ec29d
--- /dev/null
+++ b/riscv/insns/vsaddu_vx.h
@@ -0,0 +1,12 @@
+// vsaddu vd, vs2, rs1
+VI_VX_ULOOP
+({
+ bool sat = false;
+ vd = vs2 + rs1;
+
+ sat = vd < vs2;
+ vd |= -(vd < vs2);
+
+ P.VU.vxsat |= sat;
+
+})
diff --git a/riscv/insns/vsbc_vvm.h b/riscv/insns/vsbc_vvm.h
new file mode 100644
index 0000000..4cd58ba
--- /dev/null
+++ b/riscv/insns/vsbc_vvm.h
@@ -0,0 +1,11 @@
+// vsbc.vvm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VV_LOOP
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & vs1) - (op_mask & vs2) - carry;
+ vd = res;
+})
diff --git a/riscv/insns/vsbc_vxm.h b/riscv/insns/vsbc_vxm.h
new file mode 100644
index 0000000..12551b8
--- /dev/null
+++ b/riscv/insns/vsbc_vxm.h
@@ -0,0 +1,11 @@
+// vsbc.vxm vd, vs2, rs1
+require(!(insn.rd() == 0 && P.VU.vlmul > 1));
+VI_VX_ULOOP
+({
+ auto &v0 = P.VU.elt<uint64_t>(0, midx);
+ const uint128_t op_mask = (UINT64_MAX >> (64 - sew));
+ uint64_t carry = (v0 >> mpos) & 0x1;
+
+ uint128_t res = (op_mask & rs1) - (op_mask & vs2) - carry;
+ vd = res;
+})
diff --git a/riscv/insns/vslide1down_vx.h b/riscv/insns/vslide1down_vx.h
new file mode 100644
index 0000000..0069df7
--- /dev/null
+++ b/riscv/insns/vslide1down_vx.h
@@ -0,0 +1,42 @@
+//vslide1down.vx vd, vs2, rs1
+VI_LOOP_BASE
+if (i != vl - 1) {
+ switch (sew) {
+ case e8: {
+ VI_XI_SLIDEDOWN_PARAMS(e8, 1);
+ vd = vs2;
+ }
+ break;
+ case e16: {
+ VI_XI_SLIDEDOWN_PARAMS(e16, 1);
+ vd = vs2;
+ }
+ break;
+ case e32: {
+ VI_XI_SLIDEDOWN_PARAMS(e32, 1);
+ vd = vs2;
+ }
+ break;
+ default: {
+ VI_XI_SLIDEDOWN_PARAMS(e64, 1);
+ vd = vs2;
+ }
+ break;
+ }
+} else {
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num, vl - 1) = RS1;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num, vl - 1) = RS1;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num, vl - 1) = RS1;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, vl - 1) = RS1;
+ break;
+ }
+}
+VI_LOOP_END
diff --git a/riscv/insns/vslide1up_vx.h b/riscv/insns/vslide1up_vx.h
new file mode 100644
index 0000000..50cc503
--- /dev/null
+++ b/riscv/insns/vslide1up_vx.h
@@ -0,0 +1,32 @@
+//vslide1up.vx vd, vs2, rs1
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+VI_CHECK_SS
+VI_LOOP_BASE
+if (i != 0) {
+ if (sew == e8) {
+ VI_XI_SLIDEUP_PARAMS(e8, 1);
+ vd = vs2;
+ } else if(sew == e16) {
+ VI_XI_SLIDEUP_PARAMS(e16, 1);
+ vd = vs2;
+ } else if(sew == e32) {
+ VI_XI_SLIDEUP_PARAMS(e32, 1);
+ vd = vs2;
+ } else if(sew == e64) {
+ VI_XI_SLIDEUP_PARAMS(e64, 1);
+ vd = vs2;
+ }
+} else {
+ if (sew == e8) {
+ P.VU.elt<uint8_t>(rd_num, 0) = RS1;
+ } else if(sew == e16) {
+ P.VU.elt<uint16_t>(rd_num, 0) = RS1;
+ } else if(sew == e32) {
+ P.VU.elt<uint32_t>(rd_num, 0) = RS1;
+ } else if(sew == e64) {
+ P.VU.elt<uint64_t>(rd_num, 0) = RS1;
+ }
+}
+VI_LOOP_END
diff --git a/riscv/insns/vslidedown_vi.h b/riscv/insns/vslidedown_vi.h
new file mode 100644
index 0000000..c21c5f2
--- /dev/null
+++ b/riscv/insns/vslidedown_vi.h
@@ -0,0 +1,33 @@
+// vslidedown.vi vd, vs2, rs1
+VI_LOOP_BASE
+const reg_t sh = insn.v_zimm5();
+bool is_valid = (i + sh) < P.VU.vlmax;
+reg_t offset = 0;
+
+if (is_valid) {
+ offset = sh;
+}
+
+switch (sew) {
+case e8: {
+ VI_XI_SLIDEDOWN_PARAMS(e8, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+case e16: {
+ VI_XI_SLIDEDOWN_PARAMS(e16, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+case e32: {
+ VI_XI_SLIDEDOWN_PARAMS(e32, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+default: {
+ VI_XI_SLIDEDOWN_PARAMS(e64, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+}
+VI_LOOP_END
diff --git a/riscv/insns/vslidedown_vx.h b/riscv/insns/vslidedown_vx.h
new file mode 100644
index 0000000..251740c
--- /dev/null
+++ b/riscv/insns/vslidedown_vx.h
@@ -0,0 +1,33 @@
+//vslidedown.vx vd, vs2, rs1
+VI_LOOP_BASE
+
+reg_t offset = RS1 == (reg_t)-1 ? ((RS1 & (P.VU.vlmax * 2 - 1)) + i) : RS1;
+bool is_valid = offset < P.VU.vlmax;
+
+if (!is_valid) {
+ offset = 0;
+}
+
+switch (sew) {
+case e8: {
+ VI_XI_SLIDEDOWN_PARAMS(e8, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+case e16: {
+ VI_XI_SLIDEDOWN_PARAMS(e16, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+case e32: {
+ VI_XI_SLIDEDOWN_PARAMS(e32, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+default: {
+ VI_XI_SLIDEDOWN_PARAMS(e64, offset);
+ vd = is_valid ? vs2 : 0;
+}
+break;
+}
+VI_LOOP_END
diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h
new file mode 100644
index 0000000..4135b20
--- /dev/null
+++ b/riscv/insns/vslideup_vi.h
@@ -0,0 +1,33 @@
+// vslideup.vi vd, vs2, rs1
+if (insn.v_vm() == 0)
+ require(insn.rd() != 0);
+
+VI_CHECK_SS
+const reg_t offset = insn.v_zimm5();
+VI_LOOP_BASE
+if (P.VU.vstart < offset && i < offset)
+ continue;
+
+switch (sew) {
+case e8: {
+ VI_XI_SLIDEUP_PARAMS(e8, offset);
+ vd = vs2;
+}
+break;
+case e16: {
+ VI_XI_SLIDEUP_PARAMS(e16, offset);
+ vd = vs2;
+}
+break;
+case e32: {
+ VI_XI_SLIDEUP_PARAMS(e32, offset);
+ vd = vs2;
+}
+break;
+default: {
+ VI_XI_SLIDEUP_PARAMS(e64, offset);
+ vd = vs2;
+}
+break;
+}
+VI_LOOP_END
diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h
new file mode 100644
index 0000000..bf73fcd
--- /dev/null
+++ b/riscv/insns/vslideup_vx.h
@@ -0,0 +1,29 @@
+//vslideup.vx vd, vs2, rs1
+const reg_t offset = RS1;
+VI_LOOP_BASE
+if (P.VU.vstart < offset && i < offset)
+ continue;
+
+switch (sew) {
+case e8: {
+ VI_XI_SLIDEUP_PARAMS(e8, offset);
+ vd = vs2;
+}
+break;
+case e16: {
+ VI_XI_SLIDEUP_PARAMS(e16, offset);
+ vd = vs2;
+}
+break;
+case e32: {
+ VI_XI_SLIDEUP_PARAMS(e32, offset);
+ vd = vs2;
+}
+break;
+default: {
+ VI_XI_SLIDEUP_PARAMS(e64, offset);
+ vd = vs2;
+}
+break;
+}
+VI_LOOP_END
diff --git a/riscv/insns/vsll_vi.h b/riscv/insns/vsll_vi.h
new file mode 100644
index 0000000..be46506
--- /dev/null
+++ b/riscv/insns/vsll_vi.h
@@ -0,0 +1,5 @@
+// vsll.vi vd, vs2, zimm5
+VI_VI_LOOP
+({
+ vd = vs2 << (simm5 & (sew - 1) & 0x1f);
+})
diff --git a/riscv/insns/vsll_vv.h b/riscv/insns/vsll_vv.h
new file mode 100644
index 0000000..ce82022
--- /dev/null
+++ b/riscv/insns/vsll_vv.h
@@ -0,0 +1,5 @@
+// vsll
+VI_VV_LOOP
+({
+ vd = vs2 << (vs1 & (sew - 1));
+})
diff --git a/riscv/insns/vsll_vx.h b/riscv/insns/vsll_vx.h
new file mode 100644
index 0000000..823510b
--- /dev/null
+++ b/riscv/insns/vsll_vx.h
@@ -0,0 +1,5 @@
+// vsll
+VI_VX_LOOP
+({
+ vd = vs2 << (rs1 & (sew - 1));
+})
diff --git a/riscv/insns/vsmul_vv.h b/riscv/insns/vsmul_vv.h
new file mode 100644
index 0000000..70d22ae
--- /dev/null
+++ b/riscv/insns/vsmul_vv.h
@@ -0,0 +1,33 @@
+// vsmul: Signed saturating and rounding fractional multiply
+VRM xrm = P.VU.get_vround_mode();
+uint64_t int_max = (1ul << (P.VU.vsew - 1)) - 1;
+uint64_t int_min = - (1 << (P.VU.vsew - 1));
+uint64_t sign_mask = ((1ul << (P.VU.vsew - 1)));
+
+VI_VV_ULOOP
+({
+ uint64_t vs1_sign;
+ uint64_t vs2_sign;
+ uint64_t result_sign;
+
+ vs1_sign = vs1 & sign_mask;
+ vs2_sign = vs2 & sign_mask;
+ bool overflow = vs1 == vs2 && vs1 == int_min;
+
+ uint128_t result = (uint128_t)vs1 * (uint128_t)vs2;
+ result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ result_sign = (vs1_sign ^ vs2_sign) & sign_mask;
+ // rounding
+ INT_ROUNDING(result, xrm, sew - 1);
+ // unsigned shifting
+ result = result >> (sew - 1);
+
+ // saturation
+ if (overflow) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ } else {
+ result |= result_sign;
+ }
+ vd = result;
+})
diff --git a/riscv/insns/vsmul_vx.h b/riscv/insns/vsmul_vx.h
new file mode 100644
index 0000000..ef3751b
--- /dev/null
+++ b/riscv/insns/vsmul_vx.h
@@ -0,0 +1,34 @@
+// vsmul
+VRM xrm = P.VU.get_vround_mode();
+uint128_t int_max = (1ul << (P.VU.vsew - 1)) - 1;
+uint128_t int_min = - (1 << (P.VU.vsew - 1));
+uint128_t sign_mask = ((1ul << (P.VU.vsew - 1)));
+
+VI_VX_ULOOP
+({
+ uint128_t rs1_sign;
+ uint128_t vs2_sign;
+ uint128_t result_sign;
+
+ rs1_sign = rs1 & sign_mask;
+ vs2_sign = vs2 & sign_mask;
+ bool overflow = rs1 == vs2 && rs1 == int_min;
+
+ uint128_t result = (uint128_t)rs1 * (uint128_t)vs2;
+ result &= ((uint128_t)1llu << ((sew * 2) - 2)) - 1;
+ result_sign = (rs1_sign ^ vs2_sign) & sign_mask;
+ // rounding
+ INT_ROUNDING(result, xrm, sew - 1);
+
+ // unsigned shifting
+ result = result >> (sew - 1);
+
+ // saturation
+ if (overflow) {
+ result = int_max;
+ P.VU.vxsat = 1;
+ } else {
+ result |= result_sign;
+ }
+ vd = result;
+})
diff --git a/riscv/insns/vsra_vi.h b/riscv/insns/vsra_vi.h
new file mode 100644
index 0000000..5c58927
--- /dev/null
+++ b/riscv/insns/vsra_vi.h
@@ -0,0 +1,5 @@
+// vsra.vi vd, vs2, zimm5
+VI_VI_LOOP
+({
+ vd = vs2 >> (simm5 & (sew - 1) & 0x1f);
+})
diff --git a/riscv/insns/vsra_vv.h b/riscv/insns/vsra_vv.h
new file mode 100644
index 0000000..8889af9
--- /dev/null
+++ b/riscv/insns/vsra_vv.h
@@ -0,0 +1,5 @@
+// vsra.vv vd, vs2, vs1
+VI_VV_LOOP
+({
+ vd = vs2 >> (vs1 & (sew - 1));
+})
diff --git a/riscv/insns/vsra_vx.h b/riscv/insns/vsra_vx.h
new file mode 100644
index 0000000..c1b0c10
--- /dev/null
+++ b/riscv/insns/vsra_vx.h
@@ -0,0 +1,5 @@
+// vsra.vx vd, vs2, rs1
+VI_VX_LOOP
+({
+ vd = vs2 >> (rs1 & (sew - 1));
+})
diff --git a/riscv/insns/vsrl_vi.h b/riscv/insns/vsrl_vi.h
new file mode 100644
index 0000000..5006854
--- /dev/null
+++ b/riscv/insns/vsrl_vi.h
@@ -0,0 +1,5 @@
+// vsrl.vi vd, vs2, zimm5
+VI_VI_ULOOP
+({
+ vd = vs2 >> (simm5 & (sew - 1) & 0x1f);
+})
diff --git a/riscv/insns/vsrl_vv.h b/riscv/insns/vsrl_vv.h
new file mode 100644
index 0000000..6376af3
--- /dev/null
+++ b/riscv/insns/vsrl_vv.h
@@ -0,0 +1,5 @@
+// vsrl.vv vd, vs2, vs1
+VI_VV_ULOOP
+({
+ vd = vs2 >> (vs1 & (sew - 1));
+})
diff --git a/riscv/insns/vsrl_vx.h b/riscv/insns/vsrl_vx.h
new file mode 100644
index 0000000..a4f899c
--- /dev/null
+++ b/riscv/insns/vsrl_vx.h
@@ -0,0 +1,5 @@
+// vsrl.vx vd, vs2, rs1
+VI_VX_ULOOP
+({
+ vd = vs2 >> (rs1 & (sew - 1));
+})
diff --git a/riscv/insns/vssra_vi.h b/riscv/insns/vssra_vi.h
new file mode 100644
index 0000000..ef2390c
--- /dev/null
+++ b/riscv/insns/vssra_vi.h
@@ -0,0 +1,8 @@
+// vssra.vi vd, vs2, simm5
+VRM xrm = P.VU.get_vround_mode();
+VI_VI_LOOP
+({
+ int sh = simm5 & (sew - 1) & 0x1f;
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssra_vv.h b/riscv/insns/vssra_vv.h
new file mode 100644
index 0000000..e697b52
--- /dev/null
+++ b/riscv/insns/vssra_vv.h
@@ -0,0 +1,9 @@
+// vssra.vv vd, vs2, vs1
+VRM xrm = P.VU.get_vround_mode();
+VI_VV_LOOP
+({
+ int sh = vs1 & (sew - 1);
+
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssra_vx.h b/riscv/insns/vssra_vx.h
new file mode 100644
index 0000000..8d7ad20
--- /dev/null
+++ b/riscv/insns/vssra_vx.h
@@ -0,0 +1,9 @@
+// vssra.vx vd, vs2, rs1
+VRM xrm = P.VU.get_vround_mode();
+VI_VX_LOOP
+({
+ int sh = rs1 & (sew - 1);
+
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssrl_vi.h b/riscv/insns/vssrl_vi.h
new file mode 100644
index 0000000..8a10df0
--- /dev/null
+++ b/riscv/insns/vssrl_vi.h
@@ -0,0 +1,9 @@
+// vssra.vi vd, vs2, simm5
+VRM xrm = P.VU.get_vround_mode();
+VI_VI_ULOOP
+({
+ int sh = simm5 & (sew - 1) & 0x1f;
+
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssrl_vv.h b/riscv/insns/vssrl_vv.h
new file mode 100644
index 0000000..f40cd90
--- /dev/null
+++ b/riscv/insns/vssrl_vv.h
@@ -0,0 +1,9 @@
+// vssrl.vv vd, vs2, vs1
+VRM xrm = P.VU.get_vround_mode();
+VI_VV_ULOOP
+({
+ int sh = vs1 & (sew - 1);
+
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssrl_vx.h b/riscv/insns/vssrl_vx.h
new file mode 100644
index 0000000..5da3f75
--- /dev/null
+++ b/riscv/insns/vssrl_vx.h
@@ -0,0 +1,9 @@
+// vssrl.vx vd, vs2, rs1
+VRM xrm = P.VU.get_vround_mode();
+VI_VX_ULOOP
+({
+ int sh = rs1 & (sew - 1);
+
+ INT_ROUNDING(vs2, xrm, sh);
+ vd = vs2 >> sh;
+})
diff --git a/riscv/insns/vssub_vv.h b/riscv/insns/vssub_vv.h
new file mode 100644
index 0000000..fd3ee21
--- /dev/null
+++ b/riscv/insns/vssub_vv.h
@@ -0,0 +1,28 @@
+// vssub.vv vd, vs2, vs1
+VI_LOOP_BASE
+bool sat = false;
+
+switch (sew) {
+case e8: {
+ VV_PARAMS(e8);
+ vd = sat_sub<int8_t, uint8_t>(vs2, vs1, sat);
+ break;
+}
+case e16: {
+ VV_PARAMS(e16);
+ vd = sat_sub<int16_t, uint16_t>(vs2, vs1, sat);
+ break;
+}
+case e32: {
+ VV_PARAMS(e32);
+ vd = sat_sub<int32_t, uint32_t>(vs2, vs1, sat);
+ break;
+}
+default: {
+ VV_PARAMS(e64);
+ vd = sat_sub<int64_t, uint64_t>(vs2, vs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
diff --git a/riscv/insns/vssub_vx.h b/riscv/insns/vssub_vx.h
new file mode 100644
index 0000000..5c5c781
--- /dev/null
+++ b/riscv/insns/vssub_vx.h
@@ -0,0 +1,28 @@
+// vssub.vx vd, vs2, rs1
+VI_LOOP_BASE
+bool sat = false;
+
+switch (sew) {
+case e8: {
+ VX_PARAMS(e8);
+ vd = sat_sub<int8_t, uint8_t>(vs2, rs1, sat);
+ break;
+}
+case e16: {
+ VX_PARAMS(e16);
+ vd = sat_sub<int16_t, uint16_t>(vs2, rs1, sat);
+ break;
+}
+case e32: {
+ VX_PARAMS(e32);
+ vd = sat_sub<int32_t, uint32_t>(vs2, rs1, sat);
+ break;
+}
+default: {
+ VX_PARAMS(e64);
+ vd = sat_sub<int64_t, uint64_t>(vs2, rs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
diff --git a/riscv/insns/vssubu_vv.h b/riscv/insns/vssubu_vv.h
new file mode 100644
index 0000000..c5c74fe
--- /dev/null
+++ b/riscv/insns/vssubu_vv.h
@@ -0,0 +1,29 @@
+// vssubu.vv vd, vs2, vs1
+VI_LOOP_BASE
+bool sat = false;
+
+switch (sew) {
+case e8: {
+ VV_U_PARAMS(e8);
+ vd = sat_subu<uint8_t>(vs2, vs1, sat);
+ break;
+}
+case e16: {
+ VV_U_PARAMS(e16);
+ vd = sat_subu<uint16_t>(vs2, vs1, sat);
+ break;
+}
+case e32: {
+ VV_U_PARAMS(e32);
+ vd = sat_subu<uint32_t>(vs2, vs1, sat);
+ break;
+}
+default: {
+ VV_U_PARAMS(e64);
+ vd = sat_subu<uint64_t>(vs2, vs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+
+VI_LOOP_END
diff --git a/riscv/insns/vssubu_vx.h b/riscv/insns/vssubu_vx.h
new file mode 100644
index 0000000..12cfdbb
--- /dev/null
+++ b/riscv/insns/vssubu_vx.h
@@ -0,0 +1,28 @@
+// vssubu.vx vd, vs2, rs1
+VI_LOOP_BASE
+bool sat = false;
+
+switch (sew) {
+case e8: {
+ VX_U_PARAMS(e8);
+ vd = sat_subu<uint8_t>(vs2, rs1, sat);
+ break;
+}
+case e16: {
+ VX_U_PARAMS(e16);
+ vd = sat_subu<uint16_t>(vs2, rs1, sat);
+ break;
+}
+case e32: {
+ VX_U_PARAMS(e32);
+ vd = sat_subu<uint32_t>(vs2, rs1, sat);
+ break;
+}
+default: {
+ VX_U_PARAMS(e64);
+ vd = sat_subu<uint64_t>(vs2, rs1, sat);
+ break;
+}
+}
+P.VU.vxsat |= sat;
+VI_LOOP_END
diff --git a/riscv/insns/vsub_vv.h b/riscv/insns/vsub_vv.h
new file mode 100644
index 0000000..7d119d5
--- /dev/null
+++ b/riscv/insns/vsub_vv.h
@@ -0,0 +1,5 @@
+// vsub
+VI_VV_LOOP
+({
+ vd = vs2 - vs1;
+})
diff --git a/riscv/insns/vsub_vx.h b/riscv/insns/vsub_vx.h
new file mode 100644
index 0000000..e075b42
--- /dev/null
+++ b/riscv/insns/vsub_vx.h
@@ -0,0 +1,5 @@
+// vsub: vd[i] = (vd[i] * x[rs1]) - vs2[i]
+VI_VX_LOOP
+({
+ vd = vs2 - rs1;
+})
diff --git a/riscv/insns/vwadd_vv.h b/riscv/insns/vwadd_vv.h
new file mode 100644
index 0000000..df4a135
--- /dev/null
+++ b/riscv/insns/vwadd_vv.h
@@ -0,0 +1,6 @@
+// vwadd.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, int);
+})
diff --git a/riscv/insns/vwadd_vx.h b/riscv/insns/vwadd_vx.h
new file mode 100644
index 0000000..c226389
--- /dev/null
+++ b/riscv/insns/vwadd_vx.h
@@ -0,0 +1,6 @@
+// vwadd.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, int);
+})
diff --git a/riscv/insns/vwadd_wv.h b/riscv/insns/vwadd_wv.h
new file mode 100644
index 0000000..54d2ba4
--- /dev/null
+++ b/riscv/insns/vwadd_wv.h
@@ -0,0 +1,6 @@
+// vwadd.wv vd, vs2, vs1
+VI_CHECK_DDS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(vs1, +, int);
+})
diff --git a/riscv/insns/vwadd_wx.h b/riscv/insns/vwadd_wx.h
new file mode 100644
index 0000000..bb4cee5
--- /dev/null
+++ b/riscv/insns/vwadd_wx.h
@@ -0,0 +1,6 @@
+// vwaddu.wx vd, vs2, rs1
+VI_CHECK_DDS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(rs1, +, int);
+})
diff --git a/riscv/insns/vwaddu_vv.h b/riscv/insns/vwaddu_vv.h
new file mode 100644
index 0000000..286ebc8
--- /dev/null
+++ b/riscv/insns/vwaddu_vv.h
@@ -0,0 +1,6 @@
+// vwaddu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, +, +, uint);
+})
diff --git a/riscv/insns/vwaddu_vx.h b/riscv/insns/vwaddu_vx.h
new file mode 100644
index 0000000..61cddfc
--- /dev/null
+++ b/riscv/insns/vwaddu_vx.h
@@ -0,0 +1,6 @@
+// vwaddu.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, +, +, uint);
+})
diff --git a/riscv/insns/vwaddu_wv.h b/riscv/insns/vwaddu_wv.h
new file mode 100644
index 0000000..fee8136
--- /dev/null
+++ b/riscv/insns/vwaddu_wv.h
@@ -0,0 +1,6 @@
+// vwaddu.wv vd, vs2, vs1
+VI_CHECK_DDS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(vs1, +, uint);
+})
diff --git a/riscv/insns/vwaddu_wx.h b/riscv/insns/vwaddu_wx.h
new file mode 100644
index 0000000..0073ac3
--- /dev/null
+++ b/riscv/insns/vwaddu_wx.h
@@ -0,0 +1,6 @@
+// vwaddu.wx vd, vs2, rs1
+VI_CHECK_DDS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(rs1, +, uint);
+})
diff --git a/riscv/insns/vwmacc_vv.h b/riscv/insns/vwmacc_vv.h
new file mode 100644
index 0000000..7208c6d
--- /dev/null
+++ b/riscv/insns/vwmacc_vv.h
@@ -0,0 +1,6 @@
+// vwmacc.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, int);
+})
diff --git a/riscv/insns/vwmacc_vx.h b/riscv/insns/vwmacc_vx.h
new file mode 100644
index 0000000..5ae597a
--- /dev/null
+++ b/riscv/insns/vwmacc_vx.h
@@ -0,0 +1,6 @@
+// vwmacc.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, int);
+})
diff --git a/riscv/insns/vwmaccsu_vv.h b/riscv/insns/vwmaccsu_vv.h
new file mode 100644
index 0000000..3aa43ef
--- /dev/null
+++ b/riscv/insns/vwmaccsu_vv.h
@@ -0,0 +1,6 @@
+// vwmaccsu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN_MIX(vs2, vs1, vd_w, *, +, int, uint, int);
+})
diff --git a/riscv/insns/vwmaccsu_vx.h b/riscv/insns/vwmaccsu_vx.h
new file mode 100644
index 0000000..e00a21d
--- /dev/null
+++ b/riscv/insns/vwmaccsu_vx.h
@@ -0,0 +1,6 @@
+// vwmaccsu.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, uint, int);
+})
diff --git a/riscv/insns/vwmaccu_vv.h b/riscv/insns/vwmaccu_vv.h
new file mode 100644
index 0000000..2cbdaa3
--- /dev/null
+++ b/riscv/insns/vwmaccu_vv.h
@@ -0,0 +1,6 @@
+// vwmaccu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, vd_w, *, +, uint);
+})
diff --git a/riscv/insns/vwmaccu_vx.h b/riscv/insns/vwmaccu_vx.h
new file mode 100644
index 0000000..533297f
--- /dev/null
+++ b/riscv/insns/vwmaccu_vx.h
@@ -0,0 +1,6 @@
+// vwmaccu.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, vd_w, *, +, uint);
+})
diff --git a/riscv/insns/vwmaccus_vx.h b/riscv/insns/vwmaccus_vx.h
new file mode 100644
index 0000000..5310f0e
--- /dev/null
+++ b/riscv/insns/vwmaccus_vx.h
@@ -0,0 +1,6 @@
+// vwmaccus.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN_MIX(vs2, rs1, vd_w, *, +, int, int, uint);
+})
diff --git a/riscv/insns/vwmul_vv.h b/riscv/insns/vwmul_vv.h
new file mode 100644
index 0000000..2197edb
--- /dev/null
+++ b/riscv/insns/vwmul_vv.h
@@ -0,0 +1,6 @@
+// vwmul.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, int);
+})
diff --git a/riscv/insns/vwmul_vx.h b/riscv/insns/vwmul_vx.h
new file mode 100644
index 0000000..bc1422d
--- /dev/null
+++ b/riscv/insns/vwmul_vx.h
@@ -0,0 +1,6 @@
+// vwmul.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, int);
+})
diff --git a/riscv/insns/vwmulsu_vv.h b/riscv/insns/vwmulsu_vv.h
new file mode 100644
index 0000000..9786adb
--- /dev/null
+++ b/riscv/insns/vwmulsu_vv.h
@@ -0,0 +1,16 @@
+// vwmulsu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ switch(P.VU.vsew) {
+ case e8:
+ P.VU.elt<uint16_t>(rd_num, i) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)vs1;
+ break;
+ case e16:
+ P.VU.elt<uint32_t>(rd_num, i) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)vs1;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)vs1;
+ break;
+ }
+})
diff --git a/riscv/insns/vwmulsu_vx.h b/riscv/insns/vwmulsu_vx.h
new file mode 100644
index 0000000..feb1fd1
--- /dev/null
+++ b/riscv/insns/vwmulsu_vx.h
@@ -0,0 +1,16 @@
+// vwmulsu.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ switch(P.VU.vsew) {
+ case e8:
+ P.VU.elt<uint16_t>(rd_num, i) = (int16_t)(int8_t)vs2 * (int16_t)(uint8_t)rs1;
+ break;
+ case e16:
+ P.VU.elt<uint32_t>(rd_num, i) = (int32_t)(int16_t)vs2 * (int32_t)(uint16_t)rs1;
+ break;
+ default:
+ P.VU.elt<uint64_t>(rd_num, i) = (int64_t)(int32_t)vs2 * (int64_t)(uint32_t)rs1;
+ break;
+ }
+})
diff --git a/riscv/insns/vwmulu_vv.h b/riscv/insns/vwmulu_vv.h
new file mode 100644
index 0000000..8ddbb4b
--- /dev/null
+++ b/riscv/insns/vwmulu_vv.h
@@ -0,0 +1,6 @@
+// vwmulu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, *, +, uint);
+})
diff --git a/riscv/insns/vwmulu_vx.h b/riscv/insns/vwmulu_vx.h
new file mode 100644
index 0000000..1ce77ee
--- /dev/null
+++ b/riscv/insns/vwmulu_vx.h
@@ -0,0 +1,6 @@
+// vwmul.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, *, +, uint);
+})
diff --git a/riscv/insns/vwredsum_vs.h b/riscv/insns/vwredsum_vs.h
new file mode 100644
index 0000000..c7a87db
--- /dev/null
+++ b/riscv/insns/vwredsum_vs.h
@@ -0,0 +1,5 @@
+// vwredsum.vs vd, vs2, vs1
+VI_VV_LOOP_WIDE_REDUCTION
+({
+ vd_0_res += vs2;
+})
diff --git a/riscv/insns/vwredsumu_vs.h b/riscv/insns/vwredsumu_vs.h
new file mode 100644
index 0000000..889a77d
--- /dev/null
+++ b/riscv/insns/vwredsumu_vs.h
@@ -0,0 +1,5 @@
+// vwredsum.vs vd, vs2, vs1
+VI_VV_ULOOP_WIDE_REDUCTION
+({
+ vd_0_res += vs2;
+})
diff --git a/riscv/insns/vwsmacc_vv.h b/riscv/insns/vwsmacc_vv.h
new file mode 100644
index 0000000..86d588d
--- /dev/null
+++ b/riscv/insns/vwsmacc_vv.h
@@ -0,0 +1,2 @@
+// vwsmacc.vv vd, vs2, vs1
+VI_VVX_LOOP_WIDE_SSMA(vs1);
diff --git a/riscv/insns/vwsmacc_vx.h b/riscv/insns/vwsmacc_vx.h
new file mode 100644
index 0000000..f0f04a3
--- /dev/null
+++ b/riscv/insns/vwsmacc_vx.h
@@ -0,0 +1,2 @@
+// vwsmacc.vx vd, vs2, rs1
+VI_VVX_LOOP_WIDE_SSMA(rs1);
diff --git a/riscv/insns/vwsmaccsu_vv.h b/riscv/insns/vwsmaccsu_vv.h
new file mode 100644
index 0000000..cf1aa1e
--- /dev/null
+++ b/riscv/insns/vwsmaccsu_vv.h
@@ -0,0 +1,2 @@
+// vwsmaccsu.vx vd, vs2, vs1
+VI_VVX_LOOP_WIDE_SU_SSMA(vs1);
diff --git a/riscv/insns/vwsmaccsu_vx.h b/riscv/insns/vwsmaccsu_vx.h
new file mode 100644
index 0000000..681c309
--- /dev/null
+++ b/riscv/insns/vwsmaccsu_vx.h
@@ -0,0 +1,2 @@
+// vwsmaccsu.vx vd, vs2, rs1
+VI_VVX_LOOP_WIDE_SU_SSMA(rs1);
diff --git a/riscv/insns/vwsmaccu_vv.h b/riscv/insns/vwsmaccu_vv.h
new file mode 100644
index 0000000..e873d93
--- /dev/null
+++ b/riscv/insns/vwsmaccu_vv.h
@@ -0,0 +1,2 @@
+// vwsmaccu.vv vd, vs2, vs1
+VI_VVX_LOOP_WIDE_USSMA(vs1);
diff --git a/riscv/insns/vwsmaccu_vx.h b/riscv/insns/vwsmaccu_vx.h
new file mode 100644
index 0000000..7318fa7
--- /dev/null
+++ b/riscv/insns/vwsmaccu_vx.h
@@ -0,0 +1,2 @@
+// vwsmaccu vd, vs2, rs1
+VI_VVX_LOOP_WIDE_USSMA(rs1);
diff --git a/riscv/insns/vwsmaccus_vx.h b/riscv/insns/vwsmaccus_vx.h
new file mode 100644
index 0000000..da1a1c8
--- /dev/null
+++ b/riscv/insns/vwsmaccus_vx.h
@@ -0,0 +1,2 @@
+// vwsmaccus.vx vd, vs2, rs1
+VI_VVX_LOOP_WIDE_US_SSMA(rs1);
diff --git a/riscv/insns/vwsub_vv.h b/riscv/insns/vwsub_vv.h
new file mode 100644
index 0000000..99f9348
--- /dev/null
+++ b/riscv/insns/vwsub_vv.h
@@ -0,0 +1,6 @@
+// vwsub.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, int);
+})
diff --git a/riscv/insns/vwsub_vx.h b/riscv/insns/vwsub_vx.h
new file mode 100644
index 0000000..affdf62
--- /dev/null
+++ b/riscv/insns/vwsub_vx.h
@@ -0,0 +1,6 @@
+// vwsub.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, int);
+})
diff --git a/riscv/insns/vwsub_wv.h b/riscv/insns/vwsub_wv.h
new file mode 100644
index 0000000..10db730
--- /dev/null
+++ b/riscv/insns/vwsub_wv.h
@@ -0,0 +1,6 @@
+// vwsub.wv vd, vs2, vs1
+VI_CHECK_DDS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(vs1, -, int);
+})
diff --git a/riscv/insns/vwsub_wx.h b/riscv/insns/vwsub_wx.h
new file mode 100644
index 0000000..f72341b
--- /dev/null
+++ b/riscv/insns/vwsub_wx.h
@@ -0,0 +1,6 @@
+// vwsub.wx vd, vs2, rs1
+VI_CHECK_DDS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(rs1, -, int);
+})
diff --git a/riscv/insns/vwsubu_vv.h b/riscv/insns/vwsubu_vv.h
new file mode 100644
index 0000000..cf68adb
--- /dev/null
+++ b/riscv/insns/vwsubu_vv.h
@@ -0,0 +1,6 @@
+// vwsubu.vv vd, vs2, vs1
+VI_CHECK_DSS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, vs1, 0, -, +, uint);
+})
diff --git a/riscv/insns/vwsubu_vx.h b/riscv/insns/vwsubu_vx.h
new file mode 100644
index 0000000..3e972dd
--- /dev/null
+++ b/riscv/insns/vwsubu_vx.h
@@ -0,0 +1,6 @@
+// vwsubu.vx vd, vs2, rs1
+VI_CHECK_DSS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_OP_AND_ASSIGN(vs2, rs1, 0, -, +, uint);
+})
diff --git a/riscv/insns/vwsubu_wv.h b/riscv/insns/vwsubu_wv.h
new file mode 100644
index 0000000..3687c3d
--- /dev/null
+++ b/riscv/insns/vwsubu_wv.h
@@ -0,0 +1,6 @@
+// vwsubu.wv vd, vs2, vs1
+VI_CHECK_DDS(true);
+VI_VV_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(vs1, -, uint);
+})
diff --git a/riscv/insns/vwsubu_wx.h b/riscv/insns/vwsubu_wx.h
new file mode 100644
index 0000000..c7f20ed
--- /dev/null
+++ b/riscv/insns/vwsubu_wx.h
@@ -0,0 +1,6 @@
+// vwsubu.wx vd, vs2, rs1
+VI_CHECK_DDS(false);
+VI_VX_LOOP_WIDEN
+({
+ VI_WIDE_WVX_OP(rs1, -, uint);
+})
diff --git a/riscv/insns/vxor_vi.h b/riscv/insns/vxor_vi.h
new file mode 100644
index 0000000..b2dcf94
--- /dev/null
+++ b/riscv/insns/vxor_vi.h
@@ -0,0 +1,5 @@
+// vxor
+VI_VI_LOOP
+({
+ vd = simm5 ^ vs2;
+})
diff --git a/riscv/insns/vxor_vv.h b/riscv/insns/vxor_vv.h
new file mode 100644
index 0000000..c37b6ab
--- /dev/null
+++ b/riscv/insns/vxor_vv.h
@@ -0,0 +1,5 @@
+// vxor
+VI_VV_LOOP
+({
+ vd = vs1 ^ vs2;
+})
diff --git a/riscv/insns/vxor_vx.h b/riscv/insns/vxor_vx.h
new file mode 100644
index 0000000..8021e0e
--- /dev/null
+++ b/riscv/insns/vxor_vx.h
@@ -0,0 +1,5 @@
+// vxor
+VI_VX_LOOP
+({
+ vd = rs1 ^ vs2;
+})
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index 4d538c8..e8c7f04 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -291,11 +291,217 @@ riscv_insn_ext_q = \
fsqrt_q \
fsub_q \
+riscv_insn_ext_v_alu_int = \
+ vaadd_vi \
+ vaadd_vv \
+ vaadd_vx \
+ vadc_vim \
+ vadc_vvm \
+ vadc_vxm \
+ vadd_vi \
+ vadd_vv \
+ vadd_vx \
+ vand_vi \
+ vand_vv \
+ vand_vx \
+ vasub_vv \
+ vasub_vx \
+ vcompress_vm \
+ vdiv_vv \
+ vdiv_vx \
+ vdivu_vv \
+ vdivu_vx \
+ vdot_vv \
+ vdotu_vv \
+ vext_x_v \
+ vid_v \
+ viota_m \
+ vmacc_vv \
+ vmacc_vx \
+ vmadc_vim \
+ vmadc_vvm \
+ vmadc_vxm \
+ vmadd_vv \
+ vmadd_vx \
+ vmand_mm \
+ vmandnot_mm \
+ vmax_vv \
+ vmax_vx \
+ vmaxu_vv \
+ vmaxu_vx \
+ vmerge_vim \
+ vmerge_vvm \
+ vmerge_vxm \
+ vmfirst_m \
+ vmin_vv \
+ vmin_vx \
+ vminu_vv \
+ vminu_vx \
+ vmnand_mm \
+ vmnor_mm \
+ vmor_mm \
+ vmornot_mm \
+ vmpopc_m \
+ vmsbc_vvm \
+ vmsbc_vxm \
+ vmsbf_m \
+ vmseq_vi \
+ vmseq_vv \
+ vmseq_vx \
+ vmsgt_vi \
+ vmsgt_vx \
+ vmsgtu_vi \
+ vmsgtu_vx \
+ vmsif_m \
+ vmsle_vi \
+ vmsle_vv \
+ vmsle_vx \
+ vmsleu_vi \
+ vmsleu_vv \
+ vmsleu_vx \
+ vmslt_vv \
+ vmslt_vx \
+ vmsltu_vv \
+ vmsltu_vx \
+ vmsne_vi \
+ vmsne_vv \
+ vmsne_vx \
+ vmsof_m \
+ vmul_vv \
+ vmul_vx \
+ vmulh_vv \
+ vmulh_vx \
+ vmulhsu_vv \
+ vmulhsu_vx \
+ vmulhu_vv \
+ vmulhu_vx \
+ vmv_s_x \
+ vmv_v_i \
+ vmv_v_v \
+ vmv_v_x \
+ vmxnor_mm \
+ vmxor_mm \
+ vnclip_vi \
+ vnclip_vv \
+ vnclip_vx \
+ vnclipu_vi \
+ vnclipu_vv \
+ vnclipu_vx \
+ vnmsac_vv \
+ vnmsac_vx \
+ vnmsub_vv \
+ vnmsub_vx \
+ vnsra_vi \
+ vnsra_vv \
+ vnsra_vx \
+ vnsrl_vi \
+ vnsrl_vv \
+ vnsrl_vx \
+ vor_vi \
+ vor_vv \
+ vor_vx \
+ vredand_vs \
+ vredmax_vs \
+ vredmaxu_vs \
+ vredmin_vs \
+ vredminu_vs \
+ vredor_vs \
+ vredsum_vs \
+ vredxor_vs \
+ vrem_vv \
+ vrem_vx \
+ vremu_vv \
+ vremu_vx \
+ vrgather_vi \
+ vrgather_vv \
+ vrgather_vx \
+ vrsub_vi \
+ vrsub_vx \
+ vsadd_vi \
+ vsadd_vv \
+ vsadd_vx \
+ vsaddu_vi \
+ vsaddu_vv \
+ vsaddu_vx \
+ vsbc_vvm \
+ vsbc_vxm \
+ vslide1down_vx \
+ vslide1up_vx \
+ vslidedown_vi \
+ vslidedown_vx \
+ vslideup_vi \
+ vslideup_vx \
+ vsll_vi \
+ vsll_vv \
+ vsll_vx \
+ vsmul_vv \
+ vsmul_vx \
+ vsra_vi \
+ vsra_vv \
+ vsra_vx \
+ vsrl_vi \
+ vsrl_vv \
+ vsrl_vx \
+ vssra_vi \
+ vssra_vv \
+ vssra_vx \
+ vssrl_vi \
+ vssrl_vv \
+ vssrl_vx \
+ vssub_vv \
+ vssub_vx \
+ vssubu_vv \
+ vssubu_vx \
+ vsub_vv \
+ vsub_vx \
+ vwadd_vv \
+ vwadd_vx \
+ vwadd_wv \
+ vwadd_wx \
+ vwaddu_vv \
+ vwaddu_vx \
+ vwaddu_wv \
+ vwaddu_wx \
+ vwmacc_vv \
+ vwmacc_vx \
+ vwmaccsu_vv \
+ vwmaccsu_vx \
+ vwmaccu_vv \
+ vwmaccu_vx \
+ vwmaccus_vx \
+ vwmul_vv \
+ vwmul_vx \
+ vwmulsu_vv \
+ vwmulsu_vx \
+ vwmulu_vv \
+ vwmulu_vx \
+ vwredsum_vs \
+ vwredsumu_vs \
+ vwsmacc_vv \
+ vwsmacc_vx \
+ vwsmaccsu_vv \
+ vwsmaccsu_vx \
+ vwsmaccu_vv \
+ vwsmaccu_vx \
+ vwsmaccus_vx \
+ vwsub_vv \
+ vwsub_vx \
+ vwsub_wv \
+ vwsub_wx \
+ vwsubu_vv \
+ vwsubu_vx \
+ vwsubu_wv \
+ vwsubu_wx \
+ vxor_vi \
+ vxor_vv \
+ vxor_vx \
+
riscv_insn_ext_v_ctrl = \
vsetvli \
vsetvl \
riscv_insn_ext_v = \
+ $(riscv_insn_ext_v_alu_int) \
$(riscv_insn_ext_v_ctrl) \
riscv_insn_priv = \