aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--riscv/csrs.cc6
-rw-r--r--riscv/csrs.h5
-rw-r--r--riscv/decode.h106
-rw-r--r--riscv/insns/vcompress_vm.h2
-rw-r--r--riscv/insns/vcpop_m.h6
-rw-r--r--riscv/insns/vfirst_m.h6
-rw-r--r--riscv/insns/vfmerge_vfm.h8
-rw-r--r--riscv/insns/vfmv_f_s.h2
-rw-r--r--riscv/insns/vfmv_s_f.h4
-rw-r--r--riscv/insns/vfmv_v_f.h8
-rw-r--r--riscv/insns/vid_v.h4
-rw-r--r--riscv/insns/viota_m.h2
-rw-r--r--riscv/insns/vmsbf_m.h4
-rw-r--r--riscv/insns/vmsif_m.h4
-rw-r--r--riscv/insns/vmsof_m.h4
-rw-r--r--riscv/insns/vmv_s_x.h4
-rw-r--r--riscv/insns/vmv_x_s.h2
-rw-r--r--riscv/insns/vmvnfr_v.h8
-rw-r--r--riscv/insns/vrgather_vi.h2
-rw-r--r--riscv/insns/vslideup_vi.h2
-rw-r--r--riscv/insns/vslideup_vx.h2
-rw-r--r--riscv/processor.cc16
-rw-r--r--riscv/processor.h6
23 files changed, 106 insertions, 107 deletions
diff --git a/riscv/csrs.cc b/riscv/csrs.cc
index 4fff9cb..ef247bd 100644
--- a/riscv/csrs.cc
+++ b/riscv/csrs.cc
@@ -1200,6 +1200,12 @@ void vector_csr_t::verify_permissions(insn_t insn, bool write) const {
basic_csr_t::verify_permissions(insn, write);
}
+void vector_csr_t::write_raw(const reg_t val) noexcept {
+ const bool success = basic_csr_t::unlogged_write(val);
+ if (success)
+ log_write();
+}
+
bool vector_csr_t::unlogged_write(const reg_t val) noexcept {
if (mask == 0) return false;
dirty_vs_state;
diff --git a/riscv/csrs.h b/riscv/csrs.h
index 6095b94..ff84246 100644
--- a/riscv/csrs.h
+++ b/riscv/csrs.h
@@ -618,11 +618,16 @@ class vector_csr_t: public basic_csr_t {
public:
vector_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask);
virtual void verify_permissions(insn_t insn, bool write) const override;
+ // Write without regard to mask, and without touching mstatus.VS
+ void write_raw(const reg_t val) noexcept;
protected:
virtual bool unlogged_write(const reg_t val) noexcept override;
private:
reg_t mask;
};
+typedef std::shared_ptr<vector_csr_t> vector_csr_t_p;
+
+
#endif
diff --git a/riscv/decode.h b/riscv/decode.h
index b9af04a..e5c67a9 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -258,7 +258,7 @@ private:
require_extension('V'); \
require(!P.VU.vill); \
if (alu && !P.VU.vstart_alu) \
- require(P.VU.vstart == 0); \
+ require(P.VU.vstart->read() == 0); \
WRITE_VSTATUS; \
dirty_vs_state; \
} while (0);
@@ -267,7 +267,7 @@ private:
require_vector_vs; \
require_extension('V'); \
if (alu && !P.VU.vstart_alu) \
- require(P.VU.vstart == 0); \
+ require(P.VU.vstart->read() == 0); \
if (is_log) \
WRITE_VSTATUS; \
dirty_vs_state; \
@@ -431,7 +431,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r));
#define VI_ELEMENT_SKIP(inx) \
if (inx >= vl) { \
continue; \
- } else if (inx < P.VU.vstart) { \
+ } else if (inx < P.VU.vstart->read()) { \
continue; \
} else { \
VI_LOOP_ELEMENT_SKIP(); \
@@ -598,7 +598,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
require(P.VU.vsew * 2 <= P.VU.ELEN); \
} \
require_align(insn.rs2(), P.VU.vflmul); \
- require(P.VU.vstart == 0); \
+ require(P.VU.vstart->read() == 0); \
#define VI_CHECK_SLIDE(is_over) \
require_align(insn.rs2(), P.VU.vflmul); \
@@ -619,7 +619,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){
#define VI_LOOP_BASE \
VI_GENERAL_LOOP_BASE \
@@ -627,14 +627,14 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define VI_LOOP_END \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LOOP_REDUCTION_END(x) \
} \
if (vl > 0) { \
vd_0_des = vd_0_res; \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LOOP_CMP_BASE \
require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
@@ -644,7 +644,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
reg_t rd_num = insn.rd(); \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
uint64_t mmask = UINT64_C(1) << mpos; \
uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
@@ -653,13 +653,13 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
#define VI_LOOP_CMP_END \
vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LOOP_MASK(op) \
require(P.VU.vsew <= e64); \
require_vector(true);\
reg_t vl = P.VU.vl; \
- for (reg_t i = P.VU.vstart; i < vl; ++i) { \
+ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
int midx = i / 64; \
int mpos = i % 64; \
uint64_t mmask = UINT64_C(1) << mpos; \
@@ -668,7 +668,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
res = (res & ~mmask) | ((op) & (1ULL << mpos)); \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LOOP_NSHIFT_BASE \
VI_GENERAL_LOOP_BASE; \
@@ -955,7 +955,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
reg_t rs2_num = insn.rs2(); \
auto &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0, true); \
auto vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \
@@ -986,7 +986,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos)
reg_t rs2_num = insn.rs2(); \
auto &vd_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0, true); \
auto vd_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
auto vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
@@ -1305,7 +1305,7 @@ VI_LOOP_END
reg_t rs2_num = insn.rs2(); \
auto &vd_0_des = P.VU.elt<type_sew_t<sew2>::type>(rd_num, 0, true); \
auto vd_0_res = P.VU.elt<type_sew_t<sew2>::type>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i);
@@ -1333,7 +1333,7 @@ VI_LOOP_END
reg_t rs2_num = insn.rs2(); \
auto &vd_0_des = P.VU.elt<type_usew_t<sew2>::type>(rd_num, 0, true); \
auto vd_0_res = P.VU.elt<type_usew_t<sew2>::type>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i);
@@ -1547,14 +1547,14 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
for (reg_t i = 0; i < vl; ++i) { \
VI_ELEMENT_SKIP(i); \
VI_STRIP(i); \
- P.VU.vstart = i; \
+ P.VU.vstart->write(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
elt_width##_t val = MMU.load_##elt_width( \
baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \
P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LD_INDEX(elt_width, is_seg) \
const reg_t nf = insn.v_nf() + 1; \
@@ -1568,7 +1568,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
for (reg_t i = 0; i < vl; ++i) { \
VI_ELEMENT_SKIP(i); \
VI_STRIP(i); \
- P.VU.vstart = i; \
+ P.VU.vstart->write(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
switch(P.VU.vsew){ \
case e8: \
@@ -1590,7 +1590,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
} \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_ST(stride, offset, elt_width, is_mask_ldst) \
const reg_t nf = insn.v_nf() + 1; \
@@ -1601,14 +1601,14 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
for (reg_t i = 0; i < vl; ++i) { \
VI_STRIP(i) \
VI_ELEMENT_SKIP(i); \
- P.VU.vstart = i; \
+ P.VU.vstart->write(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
elt_width##_t val = P.VU.elt<elt_width##_t>(vs3 + fn * emul, vreg_inx); \
MMU.store_##elt_width( \
baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_ST_INDEX(elt_width, is_seg) \
const reg_t nf = insn.v_nf() + 1; \
@@ -1622,7 +1622,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
for (reg_t i = 0; i < vl; ++i) { \
VI_STRIP(i) \
VI_ELEMENT_SKIP(i); \
- P.VU.vstart = i; \
+ P.VU.vstart->write(i); \
for (reg_t fn = 0; fn < nf; ++fn) { \
switch (P.VU.vsew) { \
case e8: \
@@ -1644,7 +1644,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
} \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_LDST_FF(elt_width) \
const reg_t nf = insn.v_nf() + 1; \
@@ -1654,7 +1654,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
const reg_t rd_num = insn.rd(); \
VI_CHECK_LOAD(elt_width, false); \
bool early_stop = false; \
- for (reg_t i = p->VU.vstart; i < vl; ++i) { \
+ for (reg_t i = p->VU.vstart->read(); i < vl; ++i) { \
VI_STRIP(i); \
VI_ELEMENT_SKIP(i); \
\
@@ -1678,7 +1678,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
break; \
} \
} \
- p->VU.vstart = 0;
+ p->VU.vstart->write(0);
#define VI_LD_WHOLE(elt_width) \
require_vector_novtype(true, false); \
@@ -1688,24 +1688,24 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
require_align(vd, len); \
const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \
const reg_t size = len * elt_per_reg; \
- if (P.VU.vstart < size) { \
- reg_t i = P.VU.vstart / elt_per_reg; \
- reg_t off = P.VU.vstart % elt_per_reg; \
+ if (P.VU.vstart->read() < size) { \
+ reg_t i = P.VU.vstart->read() / elt_per_reg; \
+ reg_t off = P.VU.vstart->read() % elt_per_reg; \
if (off) { \
for (reg_t pos = off; pos < elt_per_reg; ++pos) { \
auto val = MMU.load_## elt_width(baseAddr + \
- P.VU.vstart * sizeof(elt_width ## _t)); \
+ P.VU.vstart->read() * sizeof(elt_width ## _t)); \
P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \
- P.VU.vstart++; \
+ P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
++i; \
} \
for (; i < len; ++i) { \
for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \
auto val = MMU.load_## elt_width(baseAddr + \
- P.VU.vstart * sizeof(elt_width ## _t)); \
+ P.VU.vstart->read() * sizeof(elt_width ## _t)); \
P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \
- P.VU.vstart++; \
+ P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
} \
} \
@@ -1719,26 +1719,26 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
require_align(vs3, len); \
const reg_t size = len * P.VU.vlenb; \
\
- if (P.VU.vstart < size) { \
- reg_t i = P.VU.vstart / P.VU.vlenb; \
- reg_t off = P.VU.vstart % P.VU.vlenb; \
+ if (P.VU.vstart->read() < size) { \
+ reg_t i = P.VU.vstart->read() / P.VU.vlenb; \
+ reg_t off = P.VU.vstart->read() % P.VU.vlenb; \
if (off) { \
for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \
auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \
- MMU.store_uint8(baseAddr + P.VU.vstart, val); \
- P.VU.vstart++; \
+ MMU.store_uint8(baseAddr + P.VU.vstart->read(), val); \
+ P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
i++; \
} \
for (; i < len; ++i) { \
for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \
auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \
- MMU.store_uint8(baseAddr + P.VU.vstart, val); \
- P.VU.vstart++; \
+ MMU.store_uint8(baseAddr + P.VU.vstart->read(), val); \
+ P.VU.vstart->write(P.VU.vstart->read() + 1); \
} \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
//
// vector: amo
@@ -1768,10 +1768,10 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
const reg_t vl = P.VU.vl; \
const reg_t baseAddr = RS1; \
const reg_t vd = insn.rd(); \
- for (reg_t i = P.VU.vstart; i < vl; ++i) { \
+ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_ELEMENT_SKIP(i); \
VI_STRIP(i); \
- P.VU.vstart = i; \
+ P.VU.vstart->write(i); \
switch (P.VU.vsew) { \
case e32: {\
auto vs3 = P.VU.elt< type ## 32_t>(vd, vreg_inx); \
@@ -1792,7 +1792,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
break; \
} \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
// vector: sign/unsiged extension
#define VI_VV_EXT(div, type) \
@@ -1856,12 +1856,12 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
#define VI_VFP_LOOP_BASE \
VI_VFP_COMMON \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_LOOP_CMP_BASE \
VI_VFP_COMMON \
- for (reg_t i = P.VU.vstart; i < vl; ++i) { \
+ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
uint64_t mmask = UINT64_C(1) << mpos; \
uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx, true); \
@@ -1872,7 +1872,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
float##width##_t vs1_0 = P.VU.elt<float##width##_t>(rs1_num, 0); \
vd_0 = vs1_0; \
bool is_active = false; \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP(); \
float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); \
is_active = true; \
@@ -1880,16 +1880,16 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
#define VI_VFP_LOOP_WIDE_REDUCTION_BASE \
VI_VFP_COMMON \
float64_t vd_0 = f64(P.VU.elt<float64_t>(rs1_num, 0).v); \
- for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_LOOP_END \
} \
- P.VU.vstart = 0; \
+ P.VU.vstart->write(0); \
#define VI_VFP_LOOP_REDUCTION_END(x) \
} \
- P.VU.vstart = 0; \
+ P.VU.vstart->write(0); \
if (vl > 0) { \
if (is_propagate && !is_active) { \
switch (x) { \
@@ -1951,7 +1951,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
break; \
}; \
} \
- P.VU.vstart = 0;
+ P.VU.vstart->write(0);
#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \
VI_CHECK_SSS(true); \
@@ -2056,7 +2056,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
switch(P.VU.vsew) { \
case e16: {\
float32_t vd_0 = P.VU.elt<float32_t>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
is_active = true; \
float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
@@ -2067,7 +2067,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
}\
case e32: {\
float64_t vd_0 = P.VU.elt<float64_t>(rs1_num, 0); \
- for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
is_active = true; \
float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
@@ -2269,7 +2269,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \
reg_t rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
softfloat_roundingMode = STATE.frm->read(); \
- for (reg_t i=P.VU.vstart; i<vl; ++i){ \
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_CVT_SCALE(BODY8, BODY16, BODY32, \
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h
index 325e40a..7195345 100644
--- a/riscv/insns/vcompress_vm.h
+++ b/riscv/insns/vcompress_vm.h
@@ -1,5 +1,5 @@
// vcompress vd, vs2, vs1
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
require_align(insn.rd(), P.VU.vflmul);
require_align(insn.rs2(), P.VU.vflmul);
require(insn.rd() != insn.rs2());
diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h
index c204b2c..fb3e620 100644
--- a/riscv/insns/vcpop_m.h
+++ b/riscv/insns/vcpop_m.h
@@ -5,9 +5,9 @@ reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
reg_t popcount = 0;
-for (reg_t i=P.VU.vstart; i<vl; ++i) {
+for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
const int midx = i / 32;
const int mpos = i % 32;
@@ -19,5 +19,5 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) {
popcount += (vs2_lsb && do_mask);
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
WRITE_RD(popcount);
diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h
index 3095723..71e8379 100644
--- a/riscv/insns/vfirst_m.h
+++ b/riscv/insns/vfirst_m.h
@@ -5,9 +5,9 @@ reg_t vl = P.VU.vl;
reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
reg_t pos = -1;
-for (reg_t i=P.VU.vstart; i < vl; ++i) {
+for (reg_t i=P.VU.vstart->read(); i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP()
bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
@@ -16,5 +16,5 @@ for (reg_t i=P.VU.vstart; i < vl; ++i) {
break;
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
WRITE_RD(pos);
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index c9b39fe..a38cd45 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -4,7 +4,7 @@ VI_VFP_COMMON;
switch(P.VU.vsew) {
case e16:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
auto rs1 = f16(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
@@ -17,7 +17,7 @@ switch(P.VU.vsew) {
}
break;
case e32:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
auto rs1 = f32(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
@@ -30,7 +30,7 @@ switch(P.VU.vsew) {
}
break;
case e64:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float64_t>(rd_num, i, true);
auto rs1 = f64(READ_FREG(rs1_num));
auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
@@ -47,4 +47,4 @@ switch(P.VU.vsew) {
break;
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 06d93b2..81605ea 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -35,4 +35,4 @@ if (FLEN == 64) {
WRITE_FRD(f32(vs2_0));
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 4e7f82e..116ed45 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -8,7 +8,7 @@ require(STATE.frm->read() < 0x5);
reg_t vl = P.VU.vl;
-if (vl > 0 && P.VU.vstart < vl) {
+if (vl > 0 && P.VU.vstart->read() < vl) {
reg_t rd_num = insn.rd();
switch(P.VU.vsew) {
@@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart < vl) {
break;
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index fb9c788..9f66004 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -3,7 +3,7 @@ require_align(insn.rd(), P.VU.vflmul);
VI_VFP_COMMON
switch(P.VU.vsew) {
case e16:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
auto rs1 = f16(READ_FREG(rs1_num));
@@ -11,7 +11,7 @@ switch(P.VU.vsew) {
}
break;
case e32:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
auto rs1 = f32(READ_FREG(rs1_num));
@@ -19,7 +19,7 @@ switch(P.VU.vsew) {
}
break;
case e64:
- for (reg_t i=P.VU.vstart; i<vl; ++i) {
+ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
auto &vd = P.VU.elt<float64_t>(rd_num, i, true);
auto rs1 = f64(READ_FREG(rs1_num));
@@ -28,4 +28,4 @@ switch(P.VU.vsew) {
break;
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h
index 012d124..0b5c89c 100644
--- a/riscv/insns/vid_v.h
+++ b/riscv/insns/vid_v.h
@@ -9,7 +9,7 @@ reg_t rs2_num = insn.rs2();
require_align(rd_num, P.VU.vflmul);
require_vm;
-for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
+for (reg_t i = P.VU.vstart->read() ; i < P.VU.vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
switch (sew) {
@@ -28,4 +28,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) {
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index a436825..68926e4 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -6,7 +6,7 @@ reg_t sew = P.VU.vsew;
reg_t rd_num = insn.rd();
reg_t rs1_num = insn.rs1();
reg_t rs2_num = insn.rs2();
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
require_vm;
require_align(rd_num, P.VU.vflmul);
require_noover(rd_num, P.VU.vflmul, rs2_num, 1);
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
index a4195cf..9e32531 100644
--- a/riscv/insns/vmsbf_m.h
+++ b/riscv/insns/vmsbf_m.h
@@ -1,7 +1,7 @@
// vmsbf.m vd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require_vector(true);
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
require_vm;
require(insn.rd() != insn.rs2());
@@ -10,7 +10,7 @@ reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
bool has_one = false;
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
+for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
const int midx = i / 64;
const int mpos = i % 64;
const uint64_t mmask = UINT64_C(1) << mpos; \
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
index a16ef68..8867646 100644
--- a/riscv/insns/vmsif_m.h
+++ b/riscv/insns/vmsif_m.h
@@ -1,7 +1,7 @@
// vmsif.m rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require_vector(true);
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
require_vm;
require(insn.rd() != insn.rs2());
@@ -10,7 +10,7 @@ reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
bool has_one = false;
-for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
const int midx = i / 64;
const int mpos = i % 64;
const uint64_t mmask = UINT64_C(1) << mpos; \
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
index 5ef0bfd..a2f247f 100644
--- a/riscv/insns/vmsof_m.h
+++ b/riscv/insns/vmsof_m.h
@@ -1,7 +1,7 @@
// vmsof.m rd, vs2, vm
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
require_vector(true);
-require(P.VU.vstart == 0);
+require(P.VU.vstart->read() == 0);
require_vm;
require(insn.rd() != insn.rs2());
@@ -10,7 +10,7 @@ reg_t rd_num = insn.rd();
reg_t rs2_num = insn.rs2();
bool has_one = false;
-for (reg_t i = P.VU.vstart ; i < vl; ++i) {
+for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) {
const int midx = i / 64;
const int mpos = i % 64;
const uint64_t mmask = UINT64_C(1) << mpos; \
diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h
index 0e6a13e..cc2d6f0 100644
--- a/riscv/insns/vmv_s_x.h
+++ b/riscv/insns/vmv_s_x.h
@@ -4,7 +4,7 @@ require(insn.v_vm() == 1);
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
reg_t vl = P.VU.vl;
-if (vl > 0 && P.VU.vstart < vl) {
+if (vl > 0 && P.VU.vstart->read() < vl) {
reg_t rd_num = insn.rd();
reg_t sew = P.VU.vsew;
@@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart < vl) {
vl = 0;
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h
index 2c03e43..39752f9 100644
--- a/riscv/insns/vmv_x_s.h
+++ b/riscv/insns/vmv_x_s.h
@@ -28,4 +28,4 @@ if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) {
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h
index 96f0074..3604935 100644
--- a/riscv/insns/vmvnfr_v.h
+++ b/riscv/insns/vmvnfr_v.h
@@ -9,9 +9,9 @@ require_align(vs2, len);
const reg_t size = len * P.VU.vlenb;
//register needs one-by-one copy to keep commitlog correct
-if (vd != vs2 && P.VU.vstart < size) {
- reg_t i = P.VU.vstart / P.VU.vlenb;
- reg_t off = P.VU.vstart % P.VU.vlenb;
+if (vd != vs2 && P.VU.vstart->read() < size) {
+ reg_t i = P.VU.vstart->read() / P.VU.vlenb;
+ reg_t off = P.VU.vstart->read() % P.VU.vlenb;
if (off) {
memcpy(&P.VU.elt<uint8_t>(vd + i, off, true),
&P.VU.elt<uint8_t>(vs2 + i, off), P.VU.vlenb - off);
@@ -24,4 +24,4 @@ if (vd != vs2 && P.VU.vstart < size) {
}
}
-P.VU.vstart = 0;
+P.VU.vstart->write(0);
diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h
index 385e9be..56e11e1 100644
--- a/riscv/insns/vrgather_vi.h
+++ b/riscv/insns/vrgather_vi.h
@@ -8,7 +8,7 @@ reg_t zimm5 = insn.v_zimm5();
VI_LOOP_BASE
-for (reg_t i = P.VU.vstart; i < vl; ++i) {
+for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP();
switch (sew) {
diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h
index 4265789..3d53794 100644
--- a/riscv/insns/vslideup_vi.h
+++ b/riscv/insns/vslideup_vi.h
@@ -3,7 +3,7 @@ VI_CHECK_SLIDE(true);
const reg_t offset = insn.v_zimm5();
VI_LOOP_BASE
-if (P.VU.vstart < offset && i < offset)
+if (P.VU.vstart->read() < offset && i < offset)
continue;
switch (sew) {
diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h
index 720d2ab..43d41fb 100644
--- a/riscv/insns/vslideup_vx.h
+++ b/riscv/insns/vslideup_vx.h
@@ -3,7 +3,7 @@ VI_CHECK_SLIDE(true);
const reg_t offset = RS1;
VI_LOOP_BASE
-if (P.VU.vstart < offset && i < offset)
+if (P.VU.vstart->read() < offset && i < offset)
continue;
switch (sew) {
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 39f04ff..7c12253 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -554,6 +554,7 @@ void processor_t::vectorUnit_t::reset(){
auto& csrmap = p->get_state()->csrmap;
csrmap[CSR_VXSAT] = vxsat = std::make_shared<vector_csr_t>(p, CSR_VXSAT, /*mask*/ 0x1ul);
+ csrmap[CSR_VSTART] = vstart = std::make_shared<vector_csr_t>(p, CSR_VSTART, /*mask*/ VLEN - 1);
vtype = 0;
set_vl(0, 0, 0, -1); // default to illegal configuration
@@ -591,7 +592,7 @@ reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newT
vl = reqVL > vlmax ? vlmax : reqVL;
}
- vstart = 0;
+ vstart->write_raw(0);
setvl_count++;
return vl;
}
@@ -992,10 +993,6 @@ void processor_t::set_csr(int which, reg_t val)
VU.vxsat->write((val & VCSR_VXSAT) >> VCSR_VXSAT_SHIFT);
VU.vxrm = (val & VCSR_VXRM) >> VCSR_VXRM_SHIFT;
break;
- case CSR_VSTART:
- dirty_vs_state;
- VU.vstart = val & (VU.get_vlen() - 1);
- break;
case CSR_VXRM:
dirty_vs_state;
VU.vxrm = val & 0x3ul;
@@ -1008,10 +1005,6 @@ void processor_t::set_csr(int which, reg_t val)
case CSR_VCSR:
LOG_CSR(CSR_VXRM);
break;
-
- case CSR_VSTART:
- LOG_CSR(CSR_VSTART);
- break;
case CSR_VXRM:
LOG_CSR(CSR_VXRM);
break;
@@ -1044,11 +1037,6 @@ reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek)
if (!extension_enabled('V'))
break;
ret((VU.vxsat->read() << VCSR_VXSAT_SHIFT) | (VU.vxrm << VCSR_VXRM_SHIFT));
- case CSR_VSTART:
- require_vector_vs;
- if (!extension_enabled('V'))
- break;
- ret(VU.vstart);
case CSR_VXRM:
require_vector_vs;
if (!extension_enabled('V'))
diff --git a/riscv/processor.h b/riscv/processor.h
index dc8c987..f2b942f 100644
--- a/riscv/processor.h
+++ b/riscv/processor.h
@@ -511,8 +511,8 @@ public:
char reg_referenced[NVPR];
int setvl_count;
reg_t vlmax;
- reg_t vstart, vxrm, vl, vtype, vlenb;
- csr_t_p vxsat;
+ reg_t vxrm, vl, vtype, vlenb;
+ vector_csr_t_p vstart, vxsat;
reg_t vma, vta;
reg_t vsew;
float vflmul;
@@ -553,11 +553,11 @@ public:
reg_referenced{0},
setvl_count(0),
vlmax(0),
- vstart(0),
vxrm(0),
vl(0),
vtype(0),
vlenb(0),
+ vstart(0),
vxsat(0),
vma(0),
vta(0),