aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChih-Min Chao <chihmin.chao@sifive.com>2019-06-06 03:24:52 -0700
committerChih-Min Chao <chihmin.chao@sifive.com>2019-06-18 08:56:11 -0700
commit80ebc70e43e48c5a851348e898c13a2d8a8148d7 (patch)
treea11e7fa87d5c71ea23b4c410cd2c26e7855dcce6
parent655aedc0ebd2326d69d389bc714c2d622bf2cb08 (diff)
downloadspike-80ebc70e43e48c5a851348e898c13a2d8a8148d7.zip
spike-80ebc70e43e48c5a851348e898c13a2d8a8148d7.tar.gz
spike-80ebc70e43e48c5a851348e898c13a2d8a8148d7.tar.bz2
rvv: add load/store instructions
based on v-spec 0.7.1, support section: 7 element size: 8/16/32/64 Signed-off-by: Bruce Hoult <bruce@hoult.org> Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com> Signed-off-by: Dave Wen <dave.wen@sifive.com> Signed-off-by: Zakk Chen <zakk.chen@sifive.com>
-rw-r--r--riscv/decode.h151
-rw-r--r--riscv/insns/vlb_v.h3
-rw-r--r--riscv/insns/vlbff_v.h2
-rw-r--r--riscv/insns/vlbu_v.h3
-rw-r--r--riscv/insns/vlbuff_v.h2
-rw-r--r--riscv/insns/vle_v.h13
-rw-r--r--riscv/insns/vleff_v.h54
-rw-r--r--riscv/insns/vlh_v.h3
-rw-r--r--riscv/insns/vlhff_v.h2
-rw-r--r--riscv/insns/vlhu_v.h3
-rw-r--r--riscv/insns/vlhuff_v.h2
-rw-r--r--riscv/insns/vlsb_v.h3
-rw-r--r--riscv/insns/vlsbu_v.h3
-rw-r--r--riscv/insns/vlse_v.h13
-rw-r--r--riscv/insns/vlsh_v.h3
-rw-r--r--riscv/insns/vlshu_v.h3
-rw-r--r--riscv/insns/vlsw_v.h3
-rw-r--r--riscv/insns/vlswu_v.h3
-rw-r--r--riscv/insns/vlw_v.h3
-rw-r--r--riscv/insns/vlwff_v.h3
-rw-r--r--riscv/insns/vlwu_v.h3
-rw-r--r--riscv/insns/vlwuff_v.h2
-rw-r--r--riscv/insns/vlxb_v.h4
-rw-r--r--riscv/insns/vlxbu_v.h4
-rw-r--r--riscv/insns/vlxe_v.h13
-rw-r--r--riscv/insns/vlxh_v.h4
-rw-r--r--riscv/insns/vlxhu_v.h4
-rw-r--r--riscv/insns/vlxw_v.h5
-rw-r--r--riscv/insns/vlxwu_v.h4
-rw-r--r--riscv/insns/vsb_v.h3
-rw-r--r--riscv/insns/vse_v.h13
-rw-r--r--riscv/insns/vsh_v.h3
-rw-r--r--riscv/insns/vssb_v.h3
-rw-r--r--riscv/insns/vsse_v.h13
-rw-r--r--riscv/insns/vssh_v.h3
-rw-r--r--riscv/insns/vssw_v.h3
-rw-r--r--riscv/insns/vsuxb_v.h37
-rw-r--r--riscv/insns/vsuxe_v.h38
-rw-r--r--riscv/insns/vsuxh_v.h32
-rw-r--r--riscv/insns/vsuxw_v.h27
-rw-r--r--riscv/insns/vsw_v.h3
-rw-r--r--riscv/insns/vsxb_v.h4
-rw-r--r--riscv/insns/vsxe_v.h14
-rw-r--r--riscv/insns/vsxh_v.h4
-rw-r--r--riscv/insns/vsxw_v.h4
-rw-r--r--riscv/riscv.mk.in47
46 files changed, 569 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h
index ca6a999..86fd799 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -1329,6 +1329,157 @@ VI_LOOP_BASE \
} \
} \
VI_LOOP_END
+
+//
+// vector: load/store helper
+//
+#define VI_STRIP(inx) \
+ reg_t elems_per_strip = P.VU.get_slen()/P.VU.vsew; \
+ reg_t elems_per_vreg = P.VU.get_vlen()/P.VU.vsew; \
+ reg_t elems_per_lane = P.VU.vlmul * elems_per_strip; \
+ reg_t strip_index = (inx) / elems_per_lane; \
+ reg_t index_in_strip = (inx) % elems_per_strip; \
+ int32_t lmul_inx = (int32_t)(((inx) % elems_per_lane) / elems_per_strip); \
+ reg_t vreg_inx = lmul_inx * elems_per_vreg + strip_index * elems_per_strip + index_in_strip;
+
+
+#define VI_DUPLICATE_VREG(v, vlmax) \
+reg_t index[vlmax] = {0}; \
+for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
+ switch(P.VU.vsew) { \
+ case e8: \
+ index[i] = P.VU.elt<int8_t>(v, i); \
+ break; \
+ case e16: \
+ index[i] = P.VU.elt<int16_t>(v, i); \
+ break; \
+ case e32: \
+ index[i] = P.VU.elt<int32_t>(v, i); \
+ break; \
+ case e64: \
+ index[i] = P.VU.elt<int64_t>(v, i); \
+ break; \
+ } \
+}
+
+#define VI_ST(stride, offset, st_width, elt_byte) \
+ const reg_t nf = insn.v_nf() + 1; \
+ require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ const reg_t vl = P.VU.vl; \
+ const reg_t baseAddr = RS1; \
+ const reg_t vs3 = insn.rd(); \
+ const reg_t vlmax = P.VU.vlmax; \
+ const reg_t vlmul = P.VU.vlmul; \
+ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
+ bool is_valid = true; \
+ VI_STRIP(i) \
+ VI_ELEMENT_SKIP(i); \
+ if (!is_valid) \
+ continue; \
+ for (reg_t fn = 0; fn < nf; ++fn) { \
+ st_width##_t val = 0; \
+ switch (P.VU.vsew) { \
+ case e8: \
+ val = P.VU.elt<uint8_t>(vs3 + fn * vlmul, vreg_inx); \
+ break; \
+ case e16: \
+ val = P.VU.elt<uint16_t>(vs3 + fn * vlmul, vreg_inx); \
+ break; \
+ case e32: \
+ val = P.VU.elt<uint32_t>(vs3 + fn * vlmul, vreg_inx); \
+ break; \
+ default: \
+ val = P.VU.elt<uint64_t>(vs3 + fn * vlmul, vreg_inx); \
+ break; \
+ } \
+ MMU.store_##st_width(baseAddr + (stride) + (offset) * elt_byte, val); \
+ } \
+ } \
+ P.VU.vstart = 0;
+
+#define VI_LD(stride, offset, ld_width, elt_byte) \
+ const reg_t nf = insn.v_nf() + 1; \
+ require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ const reg_t vl = P.VU.vl; \
+ const reg_t baseAddr = RS1; \
+ const reg_t vd = insn.rd(); \
+ const reg_t vlmax = P.VU.vlmax; \
+ const reg_t vlmul = P.VU.vlmul; \
+ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
+ bool is_valid = true; \
+ VI_ELEMENT_SKIP(i); \
+ VI_STRIP(i); \
+ for (reg_t fn = 0; fn < nf; ++fn) { \
+ ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \
+ if (vd + fn >= NVPR){ \
+ P.VU.vstart = vreg_inx;\
+ require(false); \
+ } \
+ switch(P.VU.vsew){ \
+ case e8: \
+ P.VU.elt<uint8_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ case e16: \
+ P.VU.elt<uint16_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ case e32: \
+ P.VU.elt<uint32_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ default: \
+ P.VU.elt<uint64_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ } \
+ } \
+ } \
+ P.VU.vstart = 0;
+
+
+#define VI_LDST_FF(itype, tsew) \
+ require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \
+ const reg_t nf = insn.v_nf() + 1; \
+ require((nf * P.VU.vlmul) <= (NVPR / 4)); \
+ const reg_t sew = p->VU.vsew; \
+ const reg_t vl = p->VU.vl; \
+ const reg_t baseAddr = RS1; \
+ const reg_t rd_num = insn.rd(); \
+ bool early_stop = false; \
+ const reg_t vlmax = P.VU.vlmax; \
+ const reg_t vlmul = P.VU.vlmul; \
+ for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \
+ bool is_valid = true; \
+ VI_STRIP(i); \
+ VI_ELEMENT_SKIP(i); \
+ \
+ for (reg_t fn = 0; fn < nf; ++fn) { \
+ itype##64_t val = MMU.load_##itype##tsew(baseAddr + (i * nf + fn) * (tsew / 8)); \
+ \
+ switch (sew) { \
+ case e8: \
+ p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ case e16: \
+ p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ case e32: \
+ p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ case e64: \
+ p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \
+ break; \
+ } \
+ \
+ if (val == 0 && is_valid) { \
+ p->VU.vl = i; \
+ early_stop = true; \
+ break; \
+ } \
+ } \
+ \
+ if (early_stop) { \
+ break; \
+ } \
+ } \
+ p->VU.vstart = 0;
+
// Seems that 0x0 doesn't work.
#define DEBUG_START 0x100
#define DEBUG_END (0x1000 - 1)
diff --git a/riscv/insns/vlb_v.h b/riscv/insns/vlb_v.h
new file mode 100644
index 0000000..a83587d
--- /dev/null
+++ b/riscv/insns/vlb_v.h
@@ -0,0 +1,3 @@
+// vlb.v and vlseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_LD(0, i * nf + fn, int8, 1);
diff --git a/riscv/insns/vlbff_v.h b/riscv/insns/vlbff_v.h
new file mode 100644
index 0000000..6517315
--- /dev/null
+++ b/riscv/insns/vlbff_v.h
@@ -0,0 +1,2 @@
+// vlbff.v and vlseg[2-8]bff.v
+VI_LDST_FF(int, 8);
diff --git a/riscv/insns/vlbu_v.h b/riscv/insns/vlbu_v.h
new file mode 100644
index 0000000..c1e3ea7
--- /dev/null
+++ b/riscv/insns/vlbu_v.h
@@ -0,0 +1,3 @@
+// vlbu.v and vlseg[2-8]bu.v
+require(P.VU.vsew >= e8);
+VI_LD(0, i * nf + fn, uint8, 1);
diff --git a/riscv/insns/vlbuff_v.h b/riscv/insns/vlbuff_v.h
new file mode 100644
index 0000000..53a0685
--- /dev/null
+++ b/riscv/insns/vlbuff_v.h
@@ -0,0 +1,2 @@
+// vlbuff.v and vlseg[2-8]buff.v
+VI_LDST_FF(uint, 8);
diff --git a/riscv/insns/vle_v.h b/riscv/insns/vle_v.h
new file mode 100644
index 0000000..67261cc
--- /dev/null
+++ b/riscv/insns/vle_v.h
@@ -0,0 +1,13 @@
+// vle.v and vlseg[2-8]e.v
+reg_t sew = P.VU.vsew;
+
+if (sew == e8) {
+ VI_LD(0, (i * nf + fn), int8, 1);
+} else if (sew == e16) {
+ VI_LD(0, (i * nf + fn), int16, 2);
+} else if (sew == e32) {
+ VI_LD(0, (i * nf + fn), int32, 4);
+} else if (sew == e64) {
+ VI_LD(0, (i * nf + fn), int64, 8);
+}
+
diff --git a/riscv/insns/vleff_v.h b/riscv/insns/vleff_v.h
new file mode 100644
index 0000000..4fae43a
--- /dev/null
+++ b/riscv/insns/vleff_v.h
@@ -0,0 +1,54 @@
+require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
+const reg_t nf = insn.v_nf() + 1;
+require((nf * P.VU.vlmul) <= (NVPR / 4));
+const reg_t sew = P.VU.vsew;
+const reg_t vl = P.VU.vl;
+const reg_t baseAddr = RS1;
+const reg_t rd_num = insn.rd();
+bool early_stop = false;
+const reg_t vlmul = P.VU.vlmul;
+for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
+ bool is_valid = true;
+ bool is_zero = false;
+ VI_STRIP(i);
+ VI_ELEMENT_SKIP(i);
+
+ for (reg_t fn = 0; fn < nf; ++fn) {
+ MMU.load_uint8(baseAddr + (i * nf + fn) * 1);
+
+ switch (sew) {
+ case e8:
+ P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) =
+ is_valid ? MMU.load_uint8(baseAddr + (i * nf + fn) * 1) : 0;
+ is_zero = is_valid && P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ break;
+ case e16:
+ P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) =
+ is_valid ? MMU.load_uint16(baseAddr + (i * nf + fn) * 2) : 0;
+ is_zero = is_valid && P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ break;
+ case e32:
+ P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) =
+ is_valid ? MMU.load_uint32(baseAddr + (i * nf + fn) * 4) : 0;
+ is_zero = is_valid && P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ break;
+ case e64:
+ P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) =
+ is_valid ? MMU.load_uint64(baseAddr + (i * nf + fn) * 8) : 0;
+ is_zero = is_valid && P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
+ break;
+ }
+
+ if (is_zero) {
+ P.VU.vl = i;
+ early_stop = true;
+ break;
+ }
+ }
+
+ if (early_stop) {
+ break;
+ }
+}
+
+P.VU.vstart = 0;
diff --git a/riscv/insns/vlh_v.h b/riscv/insns/vlh_v.h
new file mode 100644
index 0000000..d976693
--- /dev/null
+++ b/riscv/insns/vlh_v.h
@@ -0,0 +1,3 @@
+// vlh.v and vlseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_LD(0, i * nf + fn, int16, 2);
diff --git a/riscv/insns/vlhff_v.h b/riscv/insns/vlhff_v.h
new file mode 100644
index 0000000..c4c2d8e
--- /dev/null
+++ b/riscv/insns/vlhff_v.h
@@ -0,0 +1,2 @@
+// vlh.v and vlseg[2-8]hff.v
+VI_LDST_FF(int, 16);
diff --git a/riscv/insns/vlhu_v.h b/riscv/insns/vlhu_v.h
new file mode 100644
index 0000000..5b936dd
--- /dev/null
+++ b/riscv/insns/vlhu_v.h
@@ -0,0 +1,3 @@
+// vlhu.v and vlseg[2-8]hu.v
+require(P.VU.vsew >= e16);
+VI_LD(0, i * nf + fn, uint16, 2);
diff --git a/riscv/insns/vlhuff_v.h b/riscv/insns/vlhuff_v.h
new file mode 100644
index 0000000..f23f82d
--- /dev/null
+++ b/riscv/insns/vlhuff_v.h
@@ -0,0 +1,2 @@
+// vlhuff.v and vlseg[2-8]huff.v
+VI_LDST_FF(uint, 16);
diff --git a/riscv/insns/vlsb_v.h b/riscv/insns/vlsb_v.h
new file mode 100644
index 0000000..fd57bbe
--- /dev/null
+++ b/riscv/insns/vlsb_v.h
@@ -0,0 +1,3 @@
+// vlsb.v and vlsseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_LD(i * RS2, fn, int8, 1);
diff --git a/riscv/insns/vlsbu_v.h b/riscv/insns/vlsbu_v.h
new file mode 100644
index 0000000..4376833
--- /dev/null
+++ b/riscv/insns/vlsbu_v.h
@@ -0,0 +1,3 @@
+// vlsb.v and vlsseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_LD(i * RS2, fn, uint8, 1);
diff --git a/riscv/insns/vlse_v.h b/riscv/insns/vlse_v.h
new file mode 100644
index 0000000..2ac2f29
--- /dev/null
+++ b/riscv/insns/vlse_v.h
@@ -0,0 +1,13 @@
+// vlse.v and vlsseg[2-8]e.v
+reg_t sew = P.VU.vsew;
+
+if (sew == e8) {
+ VI_LD(i * RS2, fn, int8, 1);
+} else if (sew == e16) {
+ VI_LD(i * RS2, fn, int16, 2);
+} else if (sew == e32) {
+ VI_LD(i * RS2, fn, int32, 4);
+} else if (sew == e64) {
+ VI_LD(i * RS2, fn, int64, 8);
+}
+
diff --git a/riscv/insns/vlsh_v.h b/riscv/insns/vlsh_v.h
new file mode 100644
index 0000000..2834353
--- /dev/null
+++ b/riscv/insns/vlsh_v.h
@@ -0,0 +1,3 @@
+// vlsh.v and vlsseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_LD(i * RS2, fn, int16, 2);
diff --git a/riscv/insns/vlshu_v.h b/riscv/insns/vlshu_v.h
new file mode 100644
index 0000000..9b11b66
--- /dev/null
+++ b/riscv/insns/vlshu_v.h
@@ -0,0 +1,3 @@
+// vlsh.v and vlsseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_LD(i * RS2, fn, uint16, 2);
diff --git a/riscv/insns/vlsw_v.h b/riscv/insns/vlsw_v.h
new file mode 100644
index 0000000..6681acb
--- /dev/null
+++ b/riscv/insns/vlsw_v.h
@@ -0,0 +1,3 @@
+// vlsw.v and vlsseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_LD(i * RS2, fn, int32, 4);
diff --git a/riscv/insns/vlswu_v.h b/riscv/insns/vlswu_v.h
new file mode 100644
index 0000000..865af22
--- /dev/null
+++ b/riscv/insns/vlswu_v.h
@@ -0,0 +1,3 @@
+// vlsw.v and vlsseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_LD(i * RS2, fn, uint32, 4);
diff --git a/riscv/insns/vlw_v.h b/riscv/insns/vlw_v.h
new file mode 100644
index 0000000..b62f3d0
--- /dev/null
+++ b/riscv/insns/vlw_v.h
@@ -0,0 +1,3 @@
+// vlw.v and vlseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_LD(0, i * nf + fn, int32, 4);
diff --git a/riscv/insns/vlwff_v.h b/riscv/insns/vlwff_v.h
new file mode 100644
index 0000000..b671b01
--- /dev/null
+++ b/riscv/insns/vlwff_v.h
@@ -0,0 +1,3 @@
+// vlwff.v
+// vlw.v and vlseg[2-8]wff.v
+VI_LDST_FF(int, 32);
diff --git a/riscv/insns/vlwu_v.h b/riscv/insns/vlwu_v.h
new file mode 100644
index 0000000..a4f8329
--- /dev/null
+++ b/riscv/insns/vlwu_v.h
@@ -0,0 +1,3 @@
+// vlwu.v and vlseg[2-8]wu.v
+require(P.VU.vsew >= e32);
+VI_LD(0, i * nf + fn, uint32, 4);
diff --git a/riscv/insns/vlwuff_v.h b/riscv/insns/vlwuff_v.h
new file mode 100644
index 0000000..d50cb69
--- /dev/null
+++ b/riscv/insns/vlwuff_v.h
@@ -0,0 +1,2 @@
+// vlwuff.v and vlseg[2-8]wuff.v
+VI_LDST_FF(uint, 32);
diff --git a/riscv/insns/vlxb_v.h b/riscv/insns/vlxb_v.h
new file mode 100644
index 0000000..5a99bd3
--- /dev/null
+++ b/riscv/insns/vlxb_v.h
@@ -0,0 +1,4 @@
+// vlxb.v and vlsseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, int8, 1);
diff --git a/riscv/insns/vlxbu_v.h b/riscv/insns/vlxbu_v.h
new file mode 100644
index 0000000..daf2d2b
--- /dev/null
+++ b/riscv/insns/vlxbu_v.h
@@ -0,0 +1,4 @@
+// vlxbu.v and vlxseg[2-8]bu.v
+require(P.VU.vsew >= e8);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vlxe_v.h b/riscv/insns/vlxe_v.h
new file mode 100644
index 0000000..b1190a8
--- /dev/null
+++ b/riscv/insns/vlxe_v.h
@@ -0,0 +1,13 @@
+// vlxe.v and vlxseg[2-8]e.v
+reg_t sew = P.VU.vsew;
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+if (sew == e8) {
+ VI_LD(index[i], fn, int8, 1);
+} else if (sew == e16) {
+ VI_LD(index[i], fn, int16, 2);
+} else if (sew == e32) {
+ VI_LD(index[i], fn, int32, 4);
+} else if (sew == e64) {
+ VI_LD(index[i], fn, int64, 8);
+}
+
diff --git a/riscv/insns/vlxh_v.h b/riscv/insns/vlxh_v.h
new file mode 100644
index 0000000..98145db
--- /dev/null
+++ b/riscv/insns/vlxh_v.h
@@ -0,0 +1,4 @@
+// vlxh.v and vlxseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, int16, 2);
diff --git a/riscv/insns/vlxhu_v.h b/riscv/insns/vlxhu_v.h
new file mode 100644
index 0000000..27d549c
--- /dev/null
+++ b/riscv/insns/vlxhu_v.h
@@ -0,0 +1,4 @@
+// vlxh.v and vlxseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vlxw_v.h b/riscv/insns/vlxw_v.h
new file mode 100644
index 0000000..83300f0
--- /dev/null
+++ b/riscv/insns/vlxw_v.h
@@ -0,0 +1,5 @@
+// vlxw.v and vlxseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, int32, 4);
+
diff --git a/riscv/insns/vlxwu_v.h b/riscv/insns/vlxwu_v.h
new file mode 100644
index 0000000..a2f9913
--- /dev/null
+++ b/riscv/insns/vlxwu_v.h
@@ -0,0 +1,4 @@
+// vlxwu.v and vlxseg[2-8]wu.v
+require(P.VU.vsew >= e32);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_LD(index[i], fn, uint32, 4);
diff --git a/riscv/insns/vsb_v.h b/riscv/insns/vsb_v.h
new file mode 100644
index 0000000..d8c9090
--- /dev/null
+++ b/riscv/insns/vsb_v.h
@@ -0,0 +1,3 @@
+// vsb.v and vsseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_ST(0, i * nf + fn, uint8, 1);
diff --git a/riscv/insns/vse_v.h b/riscv/insns/vse_v.h
new file mode 100644
index 0000000..1e0dac3
--- /dev/null
+++ b/riscv/insns/vse_v.h
@@ -0,0 +1,13 @@
+// vsw.v and vsseg[2-8]w.v
+reg_t sew = P.VU.vsew;
+
+if (sew == e8) {
+ VI_ST(0, (i * nf + fn), uint8, 1);
+} else if (sew == e16) {
+ VI_ST(0, (i * nf + fn), uint16, 2);
+} else if (sew == e32) {
+ VI_ST(0, (i * nf + fn), uint32, 4);
+} else if (sew == e64) {
+ VI_ST(0, (i * nf + fn), uint64, 8);
+}
+
diff --git a/riscv/insns/vsh_v.h b/riscv/insns/vsh_v.h
new file mode 100644
index 0000000..a38bc90
--- /dev/null
+++ b/riscv/insns/vsh_v.h
@@ -0,0 +1,3 @@
+// vsh.v and vsseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_ST(0, i * nf + fn, uint16, 2);
diff --git a/riscv/insns/vssb_v.h b/riscv/insns/vssb_v.h
new file mode 100644
index 0000000..1bf0ecf
--- /dev/null
+++ b/riscv/insns/vssb_v.h
@@ -0,0 +1,3 @@
+// vssb.v and vssseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_ST(i * RS2, fn, uint8, 1);
diff --git a/riscv/insns/vsse_v.h b/riscv/insns/vsse_v.h
new file mode 100644
index 0000000..2242759
--- /dev/null
+++ b/riscv/insns/vsse_v.h
@@ -0,0 +1,13 @@
+// vsse.v and vssseg[2-8]e.v
+reg_t sew = P.VU.vsew;
+
+if (sew == e8) {
+ VI_ST(i * RS2, fn, uint8, 1);
+} else if (sew == e16) {
+ VI_ST(i * RS2, fn, uint16, 2);
+} else if (sew == e32) {
+ VI_ST(i * RS2, fn, uint32, 4);
+} else if (sew == e64) {
+ VI_ST(i * RS2, fn, uint64, 8);
+}
+
diff --git a/riscv/insns/vssh_v.h b/riscv/insns/vssh_v.h
new file mode 100644
index 0000000..e0ebed2
--- /dev/null
+++ b/riscv/insns/vssh_v.h
@@ -0,0 +1,3 @@
+// vssh.v and vssseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_ST(i * RS2, fn, uint16, 2);
diff --git a/riscv/insns/vssw_v.h b/riscv/insns/vssw_v.h
new file mode 100644
index 0000000..c191d2e
--- /dev/null
+++ b/riscv/insns/vssw_v.h
@@ -0,0 +1,3 @@
+// vssw.v and vssseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_ST(i * RS2, fn, uint32, 4);
diff --git a/riscv/insns/vsuxb_v.h b/riscv/insns/vsuxb_v.h
new file mode 100644
index 0000000..9c78154
--- /dev/null
+++ b/riscv/insns/vsuxb_v.h
@@ -0,0 +1,37 @@
+// vsuxb.v and vsxseg[2-8]b.v
+require(P.VU.vsew >= e8);
+reg_t vl = P.VU.vl;
+reg_t baseAddr = RS1;
+reg_t stride = insn.rs2();
+reg_t vs3 = insn.rd();
+reg_t vlmax = P.VU.vlmax;
+VI_DUPLICATE_VREG(stride, vlmax);
+for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
+ bool is_valid = true;
+ VI_ELEMENT_SKIP(i);
+ VI_STRIP(i)
+
+ switch (P.VU.vsew) {
+ case e8:
+ if (is_valid)
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
+ break;
+ case e16:
+ if (is_valid)
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
+ break;
+ case e32:
+ if (is_valid)
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
+ break;
+ case e64:
+ if (is_valid)
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
+ break;
+ }
+}
+P.VU.vstart = 0;
diff --git a/riscv/insns/vsuxe_v.h b/riscv/insns/vsuxe_v.h
new file mode 100644
index 0000000..940d8ad
--- /dev/null
+++ b/riscv/insns/vsuxe_v.h
@@ -0,0 +1,38 @@
+// vsxe.v and vsxseg[2-8]e.v
+const reg_t sew = P.VU.vsew;
+const reg_t vl = P.VU.vl;
+require(sew >= e8 && sew <= e64);
+reg_t baseAddr = RS1;
+reg_t stride = insn.rs2();
+reg_t vs3 = insn.rd();
+reg_t vlmax = P.VU.vlmax;
+VI_DUPLICATE_VREG(stride, vlmax);
+for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
+ bool is_valid = true;
+ VI_ELEMENT_SKIP(i);
+ VI_STRIP(i)
+
+ switch (sew) {
+ case e8:
+ if (is_valid)
+ MMU.store_uint8(baseAddr + index[i],
+ P.VU.elt<uint8_t>(vs3, vreg_inx));
+ break;
+ case e16:
+ if (is_valid)
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
+ break;
+ case e32:
+ if (is_valid)
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
+ break;
+ case e64:
+ if (is_valid)
+ MMU.store_uint64(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
+ break;
+ }
+}
+P.VU.vstart = 0;
diff --git a/riscv/insns/vsuxh_v.h b/riscv/insns/vsuxh_v.h
new file mode 100644
index 0000000..a5113b7
--- /dev/null
+++ b/riscv/insns/vsuxh_v.h
@@ -0,0 +1,32 @@
+// vsxh.v and vsxseg[2-8]h.v
+require(P.VU.vsew >= e16);
+reg_t vl = P.VU.vl;
+reg_t baseAddr = RS1;
+reg_t stride = insn.rs2();
+reg_t vs3 = insn.rd();
+reg_t vlmax = P.VU.vlmax;
+VI_DUPLICATE_VREG(stride, vlmax);
+for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
+ bool is_valid = true;
+ VI_ELEMENT_SKIP(i);
+ VI_STRIP(i)
+
+ switch (P.VU.vsew) {
+ case e16:
+ if (is_valid)
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint16_t>(vs3, vreg_inx));
+ break;
+ case e32:
+ if (is_valid)
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
+ break;
+ case e64:
+ if (is_valid)
+ MMU.store_uint16(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
+ break;
+ }
+}
+P.VU.vstart = 0;
diff --git a/riscv/insns/vsuxw_v.h b/riscv/insns/vsuxw_v.h
new file mode 100644
index 0000000..5320156
--- /dev/null
+++ b/riscv/insns/vsuxw_v.h
@@ -0,0 +1,27 @@
+// vsxw.v and vsxseg[2-8]w.v
+require(P.VU.vsew >= e32);
+reg_t vl = P.VU.vl;
+reg_t baseAddr = RS1;
+reg_t stride = insn.rs2();
+reg_t vs3 = insn.rd();
+reg_t vlmax = P.VU.vlmax;
+VI_DUPLICATE_VREG(stride, vlmax);
+for (reg_t i = 0; i < vlmax && vl != 0; ++i) {
+ bool is_valid = true;
+ VI_ELEMENT_SKIP(i);
+ VI_STRIP(i)
+
+ switch (P.VU.vsew) {
+ case e32:
+ if (is_valid)
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint32_t>(vs3, vreg_inx));
+ break;
+ case e64:
+ if (is_valid)
+ MMU.store_uint32(baseAddr + index[i],
+ P.VU.elt<uint64_t>(vs3, vreg_inx));
+ break;
+ }
+}
+P.VU.vstart = 0;
diff --git a/riscv/insns/vsw_v.h b/riscv/insns/vsw_v.h
new file mode 100644
index 0000000..5066657
--- /dev/null
+++ b/riscv/insns/vsw_v.h
@@ -0,0 +1,3 @@
+// vsw.v and vsseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_ST(0, i * nf + fn, uint32, 4);
diff --git a/riscv/insns/vsxb_v.h b/riscv/insns/vsxb_v.h
new file mode 100644
index 0000000..3e50597
--- /dev/null
+++ b/riscv/insns/vsxb_v.h
@@ -0,0 +1,4 @@
+// vsxb.v and vsxseg[2-8]b.v
+require(P.VU.vsew >= e8);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST(index[i], fn, uint8, 1);
diff --git a/riscv/insns/vsxe_v.h b/riscv/insns/vsxe_v.h
new file mode 100644
index 0000000..28984ac
--- /dev/null
+++ b/riscv/insns/vsxe_v.h
@@ -0,0 +1,14 @@
+// vsxe.v and vsxseg[2-8]e.v
+reg_t sew = P.VU.vsew;
+require(sew >= e8 && sew <= e64);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+if (sew == e8) {
+ VI_ST(index[i], fn, uint8, 1);
+} else if (sew == e16) {
+ VI_ST(index[i], fn, uint16, 2);
+} else if (sew == e32) {
+ VI_ST(index[i], fn, uint32, 4);
+} else if (sew == e64) {
+ VI_ST(index[i], fn, uint64, 8);
+}
+
diff --git a/riscv/insns/vsxh_v.h b/riscv/insns/vsxh_v.h
new file mode 100644
index 0000000..2e5506a
--- /dev/null
+++ b/riscv/insns/vsxh_v.h
@@ -0,0 +1,4 @@
+// vsxh.v and vsxseg[2-8]h.v
+require(P.VU.vsew >= e16);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST(index[i], fn, uint16, 2);
diff --git a/riscv/insns/vsxw_v.h b/riscv/insns/vsxw_v.h
new file mode 100644
index 0000000..9a2119f
--- /dev/null
+++ b/riscv/insns/vsxw_v.h
@@ -0,0 +1,4 @@
+// vsxw.v and vsxseg[2-8]w.v
+require(P.VU.vsew >= e32);
+VI_DUPLICATE_VREG(insn.rs2(), P.VU.vlmax);
+VI_ST(index[i], fn, uint32, 4);
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index e8c7f04..faf4019 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -496,6 +496,52 @@ riscv_insn_ext_v_alu_int = \
vxor_vv \
vxor_vx \
+riscv_insn_ext_v_ldst = \
+ vlb_v \
+ vlh_v \
+ vlw_v \
+ vle_v \
+ vlbu_v \
+ vlhu_v \
+ vlwu_v \
+ vlsb_v \
+ vlsh_v \
+ vlsw_v \
+ vlse_v \
+ vlxb_v \
+ vlxh_v \
+ vlxw_v \
+ vlxe_v \
+ vlsbu_v \
+ vlshu_v \
+ vlswu_v \
+ vlxbu_v \
+ vlxhu_v \
+ vlxwu_v \
+ vlbff_v \
+ vlhff_v \
+ vlwff_v \
+ vleff_v \
+ vlbuff_v \
+ vlhuff_v \
+ vlwuff_v \
+ vsb_v \
+ vsh_v \
+ vsw_v \
+ vse_v \
+ vssb_v \
+ vssh_v \
+ vssw_v \
+ vsse_v \
+ vsxb_v \
+ vsxh_v \
+ vsxw_v \
+ vsxe_v \
+ vsuxb_v \
+ vsuxh_v \
+ vsuxw_v \
+ vsuxe_v \
+
riscv_insn_ext_v_ctrl = \
vsetvli \
vsetvl \
@@ -503,6 +549,7 @@ riscv_insn_ext_v_ctrl = \
riscv_insn_ext_v = \
$(riscv_insn_ext_v_alu_int) \
$(riscv_insn_ext_v_ctrl) \
+ $(riscv_insn_ext_v_ldst) \
riscv_insn_priv = \
csrrc \