diff options
author | Chih-Min Chao <chihmin.chao@sifive.com> | 2019-06-06 03:24:52 -0700 |
---|---|---|
committer | Chih-Min Chao <chihmin.chao@sifive.com> | 2019-06-18 08:56:11 -0700 |
commit | 80ebc70e43e48c5a851348e898c13a2d8a8148d7 (patch) | |
tree | a11e7fa87d5c71ea23b4c410cd2c26e7855dcce6 /riscv/decode.h | |
parent | 655aedc0ebd2326d69d389bc714c2d622bf2cb08 (diff) | |
download | riscv-isa-sim-80ebc70e43e48c5a851348e898c13a2d8a8148d7.zip riscv-isa-sim-80ebc70e43e48c5a851348e898c13a2d8a8148d7.tar.gz riscv-isa-sim-80ebc70e43e48c5a851348e898c13a2d8a8148d7.tar.bz2 |
rvv: add load/store instructions
based on v-spec 0.7.1, support
section: 7
element size: 8/16/32/64
Signed-off-by: Bruce Hoult <bruce@hoult.org>
Signed-off-by: Chih-Min Chao <chihmin.chao@sifive.com>
Signed-off-by: Dave Wen <dave.wen@sifive.com>
Signed-off-by: Zakk Chen <zakk.chen@sifive.com>
Diffstat (limited to 'riscv/decode.h')
-rw-r--r-- | riscv/decode.h | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/riscv/decode.h b/riscv/decode.h index ca6a999..86fd799 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1329,6 +1329,157 @@ VI_LOOP_BASE \ } \ } \ VI_LOOP_END + +// +// vector: load/store helper +// +#define VI_STRIP(inx) \ + reg_t elems_per_strip = P.VU.get_slen()/P.VU.vsew; \ + reg_t elems_per_vreg = P.VU.get_vlen()/P.VU.vsew; \ + reg_t elems_per_lane = P.VU.vlmul * elems_per_strip; \ + reg_t strip_index = (inx) / elems_per_lane; \ + reg_t index_in_strip = (inx) % elems_per_strip; \ + int32_t lmul_inx = (int32_t)(((inx) % elems_per_lane) / elems_per_strip); \ + reg_t vreg_inx = lmul_inx * elems_per_vreg + strip_index * elems_per_strip + index_in_strip; + + +#define VI_DUPLICATE_VREG(v, vlmax) \ +reg_t index[vlmax] = {0}; \ +for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \ + switch(P.VU.vsew) { \ + case e8: \ + index[i] = P.VU.elt<int8_t>(v, i); \ + break; \ + case e16: \ + index[i] = P.VU.elt<int16_t>(v, i); \ + break; \ + case e32: \ + index[i] = P.VU.elt<int32_t>(v, i); \ + break; \ + case e64: \ + index[i] = P.VU.elt<int64_t>(v, i); \ + break; \ + } \ +} + +#define VI_ST(stride, offset, st_width, elt_byte) \ + const reg_t nf = insn.v_nf() + 1; \ + require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vs3 = insn.rd(); \ + const reg_t vlmax = P.VU.vlmax; \ + const reg_t vlmul = P.VU.vlmul; \ + for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ + bool is_valid = true; \ + VI_STRIP(i) \ + VI_ELEMENT_SKIP(i); \ + if (!is_valid) \ + continue; \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + st_width##_t val = 0; \ + switch (P.VU.vsew) { \ + case e8: \ + val = P.VU.elt<uint8_t>(vs3 + fn * vlmul, vreg_inx); \ + break; \ + case e16: \ + val = P.VU.elt<uint16_t>(vs3 + fn * vlmul, vreg_inx); \ + break; \ + case e32: \ + val = P.VU.elt<uint32_t>(vs3 + fn * vlmul, vreg_inx); \ + break; \ + default: \ + val = P.VU.elt<uint64_t>(vs3 + fn * vlmul, vreg_inx); \ + break; \ + } \ + MMU.store_##st_width(baseAddr + (stride) + (offset) * elt_byte, val); \ + } \ + } \ + P.VU.vstart = 0; + +#define VI_LD(stride, offset, ld_width, elt_byte) \ + const reg_t nf = insn.v_nf() + 1; \ + require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + const reg_t vl = P.VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t vd = insn.rd(); \ + const reg_t vlmax = P.VU.vlmax; \ + const reg_t vlmul = P.VU.vlmul; \ + for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ + bool is_valid = true; \ + VI_ELEMENT_SKIP(i); \ + VI_STRIP(i); \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + ld_width##_t val = MMU.load_##ld_width(baseAddr + (stride) + (offset) * elt_byte); \ + if (vd + fn >= NVPR){ \ + P.VU.vstart = vreg_inx;\ + require(false); \ + } \ + switch(P.VU.vsew){ \ + case e8: \ + P.VU.elt<uint8_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + case e16: \ + P.VU.elt<uint16_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + case e32: \ + P.VU.elt<uint32_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + default: \ + P.VU.elt<uint64_t>(vd + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + } \ + } \ + } \ + P.VU.vstart = 0; + + +#define VI_LDST_FF(itype, tsew) \ + require(p->VU.vsew >= e##tsew && p->VU.vsew <= e64); \ + const reg_t nf = insn.v_nf() + 1; \ + require((nf * P.VU.vlmul) <= (NVPR / 4)); \ + const reg_t sew = p->VU.vsew; \ + const reg_t vl = p->VU.vl; \ + const reg_t baseAddr = RS1; \ + const reg_t rd_num = insn.rd(); \ + bool early_stop = false; \ + const reg_t vlmax = P.VU.vlmax; \ + const reg_t vlmul = P.VU.vlmul; \ + for (reg_t i = 0; i < vlmax && vl != 0; ++i) { \ + bool is_valid = true; \ + VI_STRIP(i); \ + VI_ELEMENT_SKIP(i); \ + \ + for (reg_t fn = 0; fn < nf; ++fn) { \ + itype##64_t val = MMU.load_##itype##tsew(baseAddr + (i * nf + fn) * (tsew / 8)); \ + \ + switch (sew) { \ + case e8: \ + p->VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + case e16: \ + p->VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + case e32: \ + p->VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + case e64: \ + p->VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) = is_valid ? val : 0; \ + break; \ + } \ + \ + if (val == 0 && is_valid) { \ + p->VU.vl = i; \ + early_stop = true; \ + break; \ + } \ + } \ + \ + if (early_stop) { \ + break; \ + } \ + } \ + p->VU.vstart = 0; + // Seems that 0x0 doesn't work. #define DEBUG_START 0x100 #define DEBUG_END (0x1000 - 1) |