diff options
-rw-r--r-- | riscv/csrs.cc | 6 | ||||
-rw-r--r-- | riscv/csrs.h | 5 | ||||
-rw-r--r-- | riscv/decode.h | 106 | ||||
-rw-r--r-- | riscv/insns/vcompress_vm.h | 2 | ||||
-rw-r--r-- | riscv/insns/vcpop_m.h | 6 | ||||
-rw-r--r-- | riscv/insns/vfirst_m.h | 6 | ||||
-rw-r--r-- | riscv/insns/vfmerge_vfm.h | 8 | ||||
-rw-r--r-- | riscv/insns/vfmv_f_s.h | 2 | ||||
-rw-r--r-- | riscv/insns/vfmv_s_f.h | 4 | ||||
-rw-r--r-- | riscv/insns/vfmv_v_f.h | 8 | ||||
-rw-r--r-- | riscv/insns/vid_v.h | 4 | ||||
-rw-r--r-- | riscv/insns/viota_m.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmsbf_m.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmsif_m.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmsof_m.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmv_s_x.h | 4 | ||||
-rw-r--r-- | riscv/insns/vmv_x_s.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmvnfr_v.h | 8 | ||||
-rw-r--r-- | riscv/insns/vrgather_vi.h | 2 | ||||
-rw-r--r-- | riscv/insns/vslideup_vi.h | 2 | ||||
-rw-r--r-- | riscv/insns/vslideup_vx.h | 2 | ||||
-rw-r--r-- | riscv/processor.cc | 16 | ||||
-rw-r--r-- | riscv/processor.h | 6 |
23 files changed, 106 insertions, 107 deletions
diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 4fff9cb..ef247bd 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1200,6 +1200,12 @@ void vector_csr_t::verify_permissions(insn_t insn, bool write) const { basic_csr_t::verify_permissions(insn, write); } +void vector_csr_t::write_raw(const reg_t val) noexcept { + const bool success = basic_csr_t::unlogged_write(val); + if (success) + log_write(); +} + bool vector_csr_t::unlogged_write(const reg_t val) noexcept { if (mask == 0) return false; dirty_vs_state; diff --git a/riscv/csrs.h b/riscv/csrs.h index 6095b94..ff84246 100644 --- a/riscv/csrs.h +++ b/riscv/csrs.h @@ -618,11 +618,16 @@ class vector_csr_t: public basic_csr_t { public: vector_csr_t(processor_t* const proc, const reg_t addr, const reg_t mask); virtual void verify_permissions(insn_t insn, bool write) const override; + // Write without regard to mask, and without touching mstatus.VS + void write_raw(const reg_t val) noexcept; protected: virtual bool unlogged_write(const reg_t val) noexcept override; private: reg_t mask; }; +typedef std::shared_ptr<vector_csr_t> vector_csr_t_p; + + #endif diff --git a/riscv/decode.h b/riscv/decode.h index b9af04a..e5c67a9 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -258,7 +258,7 @@ private: require_extension('V'); \ require(!P.VU.vill); \ if (alu && !P.VU.vstart_alu) \ - require(P.VU.vstart == 0); \ + require(P.VU.vstart->read() == 0); \ WRITE_VSTATUS; \ dirty_vs_state; \ } while (0); @@ -267,7 +267,7 @@ private: require_vector_vs; \ require_extension('V'); \ if (alu && !P.VU.vstart_alu) \ - require(P.VU.vstart == 0); \ + require(P.VU.vstart->read() == 0); \ if (is_log) \ WRITE_VSTATUS; \ dirty_vs_state; \ @@ -431,7 +431,7 @@ inline long double to_f(float128_t f){long double r; memcpy(&r, &f, sizeof(r)); #define VI_ELEMENT_SKIP(inx) \ if (inx >= vl) { \ continue; \ - } else if (inx < P.VU.vstart) { \ + } else if (inx < P.VU.vstart->read()) { \ continue; \ } else { \ VI_LOOP_ELEMENT_SKIP(); \ @@ -598,7 +598,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) require(P.VU.vsew * 2 <= P.VU.ELEN); \ } \ require_align(insn.rs2(), P.VU.vflmul); \ - require(P.VU.vstart == 0); \ + require(P.VU.vstart->read() == 0); \ #define VI_CHECK_SLIDE(is_over) \ require_align(insn.rs2(), P.VU.vflmul); \ @@ -619,7 +619,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ reg_t rs2_num = insn.rs2(); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ #define VI_LOOP_BASE \ VI_GENERAL_LOOP_BASE \ @@ -627,14 +627,14 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define VI_LOOP_END \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LOOP_REDUCTION_END(x) \ } \ if (vl > 0) { \ vd_0_des = vd_0_res; \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LOOP_CMP_BASE \ require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ @@ -644,7 +644,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) reg_t rd_num = insn.rd(); \ reg_t rs1_num = insn.rs1(); \ reg_t rs2_num = insn.rs2(); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); \ uint64_t mmask = UINT64_C(1) << mpos; \ uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ @@ -653,13 +653,13 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) #define VI_LOOP_CMP_END \ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LOOP_MASK(op) \ require(P.VU.vsew <= e64); \ require_vector(true);\ reg_t vl = P.VU.vl; \ - for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ int midx = i / 64; \ int mpos = i % 64; \ uint64_t mmask = UINT64_C(1) << mpos; \ @@ -668,7 +668,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LOOP_NSHIFT_BASE \ VI_GENERAL_LOOP_BASE; \ @@ -955,7 +955,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) reg_t rs2_num = insn.rs2(); \ auto &vd_0_des = P.VU.elt<type_sew_t<x>::type>(rd_num, 0, true); \ auto vd_0_res = P.VU.elt<type_sew_t<x>::type>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); \ auto vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); \ @@ -986,7 +986,7 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) reg_t rs2_num = insn.rs2(); \ auto &vd_0_des = P.VU.elt<type_usew_t<x>::type>(rd_num, 0, true); \ auto vd_0_res = P.VU.elt<type_usew_t<x>::type>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); \ auto vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i); @@ -1305,7 +1305,7 @@ VI_LOOP_END reg_t rs2_num = insn.rs2(); \ auto &vd_0_des = P.VU.elt<type_sew_t<sew2>::type>(rd_num, 0, true); \ auto vd_0_res = P.VU.elt<type_sew_t<sew2>::type>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); \ auto vs2 = P.VU.elt<type_sew_t<sew1>::type>(rs2_num, i); @@ -1333,7 +1333,7 @@ VI_LOOP_END reg_t rs2_num = insn.rs2(); \ auto &vd_0_des = P.VU.elt<type_usew_t<sew2>::type>(rd_num, 0, true); \ auto vd_0_res = P.VU.elt<type_usew_t<sew2>::type>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ auto vs2 = P.VU.elt<type_usew_t<sew1>::type>(rs2_num, i); @@ -1547,14 +1547,14 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t i = 0; i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ - P.VU.vstart = i; \ + P.VU.vstart->write(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ elt_width##_t val = MMU.load_##elt_width( \ baseAddr + (stride) + (offset) * sizeof(elt_width##_t)); \ P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LD_INDEX(elt_width, is_seg) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1568,7 +1568,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t i = 0; i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ - P.VU.vstart = i; \ + P.VU.vstart->write(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ switch(P.VU.vsew){ \ case e8: \ @@ -1590,7 +1590,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ } \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_ST(stride, offset, elt_width, is_mask_ldst) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1601,14 +1601,14 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ - P.VU.vstart = i; \ + P.VU.vstart->write(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ elt_width##_t val = P.VU.elt<elt_width##_t>(vs3 + fn * emul, vreg_inx); \ MMU.store_##elt_width( \ baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_ST_INDEX(elt_width, is_seg) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1622,7 +1622,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i) \ VI_ELEMENT_SKIP(i); \ - P.VU.vstart = i; \ + P.VU.vstart->write(i); \ for (reg_t fn = 0; fn < nf; ++fn) { \ switch (P.VU.vsew) { \ case e8: \ @@ -1644,7 +1644,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ } \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_LDST_FF(elt_width) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1654,7 +1654,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ const reg_t rd_num = insn.rd(); \ VI_CHECK_LOAD(elt_width, false); \ bool early_stop = false; \ - for (reg_t i = p->VU.vstart; i < vl; ++i) { \ + for (reg_t i = p->VU.vstart->read(); i < vl; ++i) { \ VI_STRIP(i); \ VI_ELEMENT_SKIP(i); \ \ @@ -1678,7 +1678,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ break; \ } \ } \ - p->VU.vstart = 0; + p->VU.vstart->write(0); #define VI_LD_WHOLE(elt_width) \ require_vector_novtype(true, false); \ @@ -1688,24 +1688,24 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ require_align(vd, len); \ const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \ const reg_t size = len * elt_per_reg; \ - if (P.VU.vstart < size) { \ - reg_t i = P.VU.vstart / elt_per_reg; \ - reg_t off = P.VU.vstart % elt_per_reg; \ + if (P.VU.vstart->read() < size) { \ + reg_t i = P.VU.vstart->read() / elt_per_reg; \ + reg_t off = P.VU.vstart->read() % elt_per_reg; \ if (off) { \ for (reg_t pos = off; pos < elt_per_reg; ++pos) { \ auto val = MMU.load_## elt_width(baseAddr + \ - P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.vstart->read() * sizeof(elt_width ## _t)); \ P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \ - P.VU.vstart++; \ + P.VU.vstart->write(P.VU.vstart->read() + 1); \ } \ ++i; \ } \ for (; i < len; ++i) { \ for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \ auto val = MMU.load_## elt_width(baseAddr + \ - P.VU.vstart * sizeof(elt_width ## _t)); \ + P.VU.vstart->read() * sizeof(elt_width ## _t)); \ P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \ - P.VU.vstart++; \ + P.VU.vstart->write(P.VU.vstart->read() + 1); \ } \ } \ } \ @@ -1719,26 +1719,26 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ require_align(vs3, len); \ const reg_t size = len * P.VU.vlenb; \ \ - if (P.VU.vstart < size) { \ - reg_t i = P.VU.vstart / P.VU.vlenb; \ - reg_t off = P.VU.vstart % P.VU.vlenb; \ + if (P.VU.vstart->read() < size) { \ + reg_t i = P.VU.vstart->read() / P.VU.vlenb; \ + reg_t off = P.VU.vstart->read() % P.VU.vlenb; \ if (off) { \ for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \ auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \ - MMU.store_uint8(baseAddr + P.VU.vstart, val); \ - P.VU.vstart++; \ + MMU.store_uint8(baseAddr + P.VU.vstart->read(), val); \ + P.VU.vstart->write(P.VU.vstart->read() + 1); \ } \ i++; \ } \ for (; i < len; ++i) { \ for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \ auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \ - MMU.store_uint8(baseAddr + P.VU.vstart, val); \ - P.VU.vstart++; \ + MMU.store_uint8(baseAddr + P.VU.vstart->read(), val); \ + P.VU.vstart->write(P.VU.vstart->read() + 1); \ } \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); // // vector: amo @@ -1768,10 +1768,10 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ - for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ VI_STRIP(i); \ - P.VU.vstart = i; \ + P.VU.vstart->write(i); \ switch (P.VU.vsew) { \ case e32: {\ auto vs3 = P.VU.elt< type ## 32_t>(vd, vreg_inx); \ @@ -1792,7 +1792,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ break; \ } \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); // vector: sign/unsiged extension #define VI_VV_EXT(div, type) \ @@ -1856,12 +1856,12 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ #define VI_VFP_LOOP_BASE \ VI_VFP_COMMON \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); #define VI_VFP_LOOP_CMP_BASE \ VI_VFP_COMMON \ - for (reg_t i = P.VU.vstart; i < vl; ++i) { \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ uint64_t mmask = UINT64_C(1) << mpos; \ uint64_t &vdi = P.VU.elt<uint64_t>(rd_num, midx, true); \ @@ -1872,7 +1872,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ float##width##_t vs1_0 = P.VU.elt<float##width##_t>(rs1_num, 0); \ vd_0 = vs1_0; \ bool is_active = false; \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); \ float##width##_t vs2 = P.VU.elt<float##width##_t>(rs2_num, i); \ is_active = true; \ @@ -1880,16 +1880,16 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ #define VI_VFP_LOOP_WIDE_REDUCTION_BASE \ VI_VFP_COMMON \ float64_t vd_0 = f64(P.VU.elt<float64_t>(rs1_num, 0).v); \ - for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); #define VI_VFP_LOOP_END \ } \ - P.VU.vstart = 0; \ + P.VU.vstart->write(0); \ #define VI_VFP_LOOP_REDUCTION_END(x) \ } \ - P.VU.vstart = 0; \ + P.VU.vstart->write(0); \ if (vl > 0) { \ if (is_propagate && !is_active) { \ switch (x) { \ @@ -1951,7 +1951,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ break; \ }; \ } \ - P.VU.vstart = 0; + P.VU.vstart->write(0); #define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ VI_CHECK_SSS(true); \ @@ -2056,7 +2056,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ switch(P.VU.vsew) { \ case e16: {\ float32_t vd_0 = P.VU.elt<float32_t>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ is_active = true; \ float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \ @@ -2067,7 +2067,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ }\ case e32: {\ float64_t vd_0 = P.VU.elt<float64_t>(rs1_num, 0); \ - for (reg_t i=P.VU.vstart; i<vl; ++i) { \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ is_active = true; \ float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \ @@ -2269,7 +2269,7 @@ for (reg_t i = 0; i < P.VU.vlmax && P.VU.vl != 0; ++i) { \ reg_t rs1_num = insn.rs1(); \ reg_t rs2_num = insn.rs2(); \ softfloat_roundingMode = STATE.frm->read(); \ - for (reg_t i=P.VU.vstart; i<vl; ++i){ \ + for (reg_t i=P.VU.vstart->read(); i<vl; ++i){ \ VI_LOOP_ELEMENT_SKIP(); #define VI_VFP_CVT_SCALE(BODY8, BODY16, BODY32, \ diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index 325e40a..7195345 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -1,5 +1,5 @@ // vcompress vd, vs2, vs1 -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); require_align(insn.rd(), P.VU.vflmul); require_align(insn.rs2(), P.VU.vflmul); require(insn.rd() != insn.rs2()); diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h index c204b2c..fb3e620 100644 --- a/riscv/insns/vcpop_m.h +++ b/riscv/insns/vcpop_m.h @@ -5,9 +5,9 @@ reg_t vl = P.VU.vl; reg_t sew = P.VU.vsew; reg_t rd_num = insn.rd(); reg_t rs2_num = insn.rs2(); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); reg_t popcount = 0; -for (reg_t i=P.VU.vstart; i<vl; ++i) { +for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { const int midx = i / 32; const int mpos = i % 32; @@ -19,5 +19,5 @@ for (reg_t i=P.VU.vstart; i<vl; ++i) { popcount += (vs2_lsb && do_mask); } } -P.VU.vstart = 0; +P.VU.vstart->write(0); WRITE_RD(popcount); diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h index 3095723..71e8379 100644 --- a/riscv/insns/vfirst_m.h +++ b/riscv/insns/vfirst_m.h @@ -5,9 +5,9 @@ reg_t vl = P.VU.vl; reg_t sew = P.VU.vsew; reg_t rd_num = insn.rd(); reg_t rs2_num = insn.rs2(); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); reg_t pos = -1; -for (reg_t i=P.VU.vstart; i < vl; ++i) { +for (reg_t i=P.VU.vstart->read(); i < vl; ++i) { VI_LOOP_ELEMENT_SKIP() bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; @@ -16,5 +16,5 @@ for (reg_t i=P.VU.vstart; i < vl; ++i) { break; } } -P.VU.vstart = 0; +P.VU.vstart->write(0); WRITE_RD(pos); diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index c9b39fe..a38cd45 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -4,7 +4,7 @@ VI_VFP_COMMON; switch(P.VU.vsew) { case e16: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float16_t>(rd_num, i, true); auto rs1 = f16(READ_FREG(rs1_num)); auto vs2 = P.VU.elt<float16_t>(rs2_num, i); @@ -17,7 +17,7 @@ switch(P.VU.vsew) { } break; case e32: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i, true); auto rs1 = f32(READ_FREG(rs1_num)); auto vs2 = P.VU.elt<float32_t>(rs2_num, i); @@ -30,7 +30,7 @@ switch(P.VU.vsew) { } break; case e64: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float64_t>(rd_num, i, true); auto rs1 = f64(READ_FREG(rs1_num)); auto vs2 = P.VU.elt<float64_t>(rs2_num, i); @@ -47,4 +47,4 @@ switch(P.VU.vsew) { break; } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index 06d93b2..81605ea 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -35,4 +35,4 @@ if (FLEN == 64) { WRITE_FRD(f32(vs2_0)); } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index 4e7f82e..116ed45 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -8,7 +8,7 @@ require(STATE.frm->read() < 0x5); reg_t vl = P.VU.vl; -if (vl > 0 && P.VU.vstart < vl) { +if (vl > 0 && P.VU.vstart->read() < vl) { reg_t rd_num = insn.rd(); switch(P.VU.vsew) { @@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart < vl) { break; } } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h index fb9c788..9f66004 100644 --- a/riscv/insns/vfmv_v_f.h +++ b/riscv/insns/vfmv_v_f.h @@ -3,7 +3,7 @@ require_align(insn.rd(), P.VU.vflmul); VI_VFP_COMMON switch(P.VU.vsew) { case e16: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float16_t>(rd_num, i, true); auto rs1 = f16(READ_FREG(rs1_num)); @@ -11,7 +11,7 @@ switch(P.VU.vsew) { } break; case e32: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float32_t>(rd_num, i, true); auto rs1 = f32(READ_FREG(rs1_num)); @@ -19,7 +19,7 @@ switch(P.VU.vsew) { } break; case e64: - for (reg_t i=P.VU.vstart; i<vl; ++i) { + for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { auto &vd = P.VU.elt<float64_t>(rd_num, i, true); auto rs1 = f64(READ_FREG(rs1_num)); @@ -28,4 +28,4 @@ switch(P.VU.vsew) { break; } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h index 012d124..0b5c89c 100644 --- a/riscv/insns/vid_v.h +++ b/riscv/insns/vid_v.h @@ -9,7 +9,7 @@ reg_t rs2_num = insn.rs2(); require_align(rd_num, P.VU.vflmul); require_vm; -for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { +for (reg_t i = P.VU.vstart->read() ; i < P.VU.vl; ++i) { VI_LOOP_ELEMENT_SKIP(); switch (sew) { @@ -28,4 +28,4 @@ for (reg_t i = P.VU.vstart ; i < P.VU.vl; ++i) { } } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index a436825..68926e4 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -6,7 +6,7 @@ reg_t sew = P.VU.vsew; reg_t rd_num = insn.rd(); reg_t rs1_num = insn.rs1(); reg_t rs2_num = insn.rs2(); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); require_vm; require_align(rd_num, P.VU.vflmul); require_noover(rd_num, P.VU.vflmul, rs2_num, 1); diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index a4195cf..9e32531 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -1,7 +1,7 @@ // vmsbf.m vd, vs2, vm require(P.VU.vsew >= e8 && P.VU.vsew <= e64); require_vector(true); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); require_vm; require(insn.rd() != insn.rs2()); @@ -10,7 +10,7 @@ reg_t rd_num = insn.rd(); reg_t rs2_num = insn.rs2(); bool has_one = false; -for (reg_t i = P.VU.vstart; i < vl; ++i) { +for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { const int midx = i / 64; const int mpos = i % 64; const uint64_t mmask = UINT64_C(1) << mpos; \ diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index a16ef68..8867646 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -1,7 +1,7 @@ // vmsif.m rd, vs2, vm require(P.VU.vsew >= e8 && P.VU.vsew <= e64); require_vector(true); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); require_vm; require(insn.rd() != insn.rs2()); @@ -10,7 +10,7 @@ reg_t rd_num = insn.rd(); reg_t rs2_num = insn.rs2(); bool has_one = false; -for (reg_t i = P.VU.vstart ; i < vl; ++i) { +for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { const int midx = i / 64; const int mpos = i % 64; const uint64_t mmask = UINT64_C(1) << mpos; \ diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 5ef0bfd..a2f247f 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -1,7 +1,7 @@ // vmsof.m rd, vs2, vm require(P.VU.vsew >= e8 && P.VU.vsew <= e64); require_vector(true); -require(P.VU.vstart == 0); +require(P.VU.vstart->read() == 0); require_vm; require(insn.rd() != insn.rs2()); @@ -10,7 +10,7 @@ reg_t rd_num = insn.rd(); reg_t rs2_num = insn.rs2(); bool has_one = false; -for (reg_t i = P.VU.vstart ; i < vl; ++i) { +for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) { const int midx = i / 64; const int mpos = i % 64; const uint64_t mmask = UINT64_C(1) << mpos; \ diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h index 0e6a13e..cc2d6f0 100644 --- a/riscv/insns/vmv_s_x.h +++ b/riscv/insns/vmv_s_x.h @@ -4,7 +4,7 @@ require(insn.v_vm() == 1); require(P.VU.vsew >= e8 && P.VU.vsew <= e64); reg_t vl = P.VU.vl; -if (vl > 0 && P.VU.vstart < vl) { +if (vl > 0 && P.VU.vstart->read() < vl) { reg_t rd_num = insn.rd(); reg_t sew = P.VU.vsew; @@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart < vl) { vl = 0; } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h index 2c03e43..39752f9 100644 --- a/riscv/insns/vmv_x_s.h +++ b/riscv/insns/vmv_x_s.h @@ -28,4 +28,4 @@ if (!(rs1 >= 0 && rs1 < (P.VU.get_vlen() / sew))) { } } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h index 96f0074..3604935 100644 --- a/riscv/insns/vmvnfr_v.h +++ b/riscv/insns/vmvnfr_v.h @@ -9,9 +9,9 @@ require_align(vs2, len); const reg_t size = len * P.VU.vlenb; //register needs one-by-one copy to keep commitlog correct -if (vd != vs2 && P.VU.vstart < size) { - reg_t i = P.VU.vstart / P.VU.vlenb; - reg_t off = P.VU.vstart % P.VU.vlenb; +if (vd != vs2 && P.VU.vstart->read() < size) { + reg_t i = P.VU.vstart->read() / P.VU.vlenb; + reg_t off = P.VU.vstart->read() % P.VU.vlenb; if (off) { memcpy(&P.VU.elt<uint8_t>(vd + i, off, true), &P.VU.elt<uint8_t>(vs2 + i, off), P.VU.vlenb - off); @@ -24,4 +24,4 @@ if (vd != vs2 && P.VU.vstart < size) { } } -P.VU.vstart = 0; +P.VU.vstart->write(0); diff --git a/riscv/insns/vrgather_vi.h b/riscv/insns/vrgather_vi.h index 385e9be..56e11e1 100644 --- a/riscv/insns/vrgather_vi.h +++ b/riscv/insns/vrgather_vi.h @@ -8,7 +8,7 @@ reg_t zimm5 = insn.v_zimm5(); VI_LOOP_BASE -for (reg_t i = P.VU.vstart; i < vl; ++i) { +for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { VI_LOOP_ELEMENT_SKIP(); switch (sew) { diff --git a/riscv/insns/vslideup_vi.h b/riscv/insns/vslideup_vi.h index 4265789..3d53794 100644 --- a/riscv/insns/vslideup_vi.h +++ b/riscv/insns/vslideup_vi.h @@ -3,7 +3,7 @@ VI_CHECK_SLIDE(true); const reg_t offset = insn.v_zimm5(); VI_LOOP_BASE -if (P.VU.vstart < offset && i < offset) +if (P.VU.vstart->read() < offset && i < offset) continue; switch (sew) { diff --git a/riscv/insns/vslideup_vx.h b/riscv/insns/vslideup_vx.h index 720d2ab..43d41fb 100644 --- a/riscv/insns/vslideup_vx.h +++ b/riscv/insns/vslideup_vx.h @@ -3,7 +3,7 @@ VI_CHECK_SLIDE(true); const reg_t offset = RS1; VI_LOOP_BASE -if (P.VU.vstart < offset && i < offset) +if (P.VU.vstart->read() < offset && i < offset) continue; switch (sew) { diff --git a/riscv/processor.cc b/riscv/processor.cc index 39f04ff..7c12253 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -554,6 +554,7 @@ void processor_t::vectorUnit_t::reset(){ auto& csrmap = p->get_state()->csrmap; csrmap[CSR_VXSAT] = vxsat = std::make_shared<vector_csr_t>(p, CSR_VXSAT, /*mask*/ 0x1ul); + csrmap[CSR_VSTART] = vstart = std::make_shared<vector_csr_t>(p, CSR_VSTART, /*mask*/ VLEN - 1); vtype = 0; set_vl(0, 0, 0, -1); // default to illegal configuration @@ -591,7 +592,7 @@ reg_t processor_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newT vl = reqVL > vlmax ? vlmax : reqVL; } - vstart = 0; + vstart->write_raw(0); setvl_count++; return vl; } @@ -992,10 +993,6 @@ void processor_t::set_csr(int which, reg_t val) VU.vxsat->write((val & VCSR_VXSAT) >> VCSR_VXSAT_SHIFT); VU.vxrm = (val & VCSR_VXRM) >> VCSR_VXRM_SHIFT; break; - case CSR_VSTART: - dirty_vs_state; - VU.vstart = val & (VU.get_vlen() - 1); - break; case CSR_VXRM: dirty_vs_state; VU.vxrm = val & 0x3ul; @@ -1008,10 +1005,6 @@ void processor_t::set_csr(int which, reg_t val) case CSR_VCSR: LOG_CSR(CSR_VXRM); break; - - case CSR_VSTART: - LOG_CSR(CSR_VSTART); - break; case CSR_VXRM: LOG_CSR(CSR_VXRM); break; @@ -1044,11 +1037,6 @@ reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek) if (!extension_enabled('V')) break; ret((VU.vxsat->read() << VCSR_VXSAT_SHIFT) | (VU.vxrm << VCSR_VXRM_SHIFT)); - case CSR_VSTART: - require_vector_vs; - if (!extension_enabled('V')) - break; - ret(VU.vstart); case CSR_VXRM: require_vector_vs; if (!extension_enabled('V')) diff --git a/riscv/processor.h b/riscv/processor.h index dc8c987..f2b942f 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -511,8 +511,8 @@ public: char reg_referenced[NVPR]; int setvl_count; reg_t vlmax; - reg_t vstart, vxrm, vl, vtype, vlenb; - csr_t_p vxsat; + reg_t vxrm, vl, vtype, vlenb; + vector_csr_t_p vstart, vxsat; reg_t vma, vta; reg_t vsew; float vflmul; @@ -553,11 +553,11 @@ public: reg_referenced{0}, setvl_count(0), vlmax(0), - vstart(0), vxrm(0), vl(0), vtype(0), vlenb(0), + vstart(0), vxsat(0), vma(0), vta(0), |