diff options
32 files changed, 83 insertions, 76 deletions
diff --git a/riscv/csrs.cc b/riscv/csrs.cc index cc12e1d..27c26d3 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1598,7 +1598,7 @@ void vector_csr_t::write_raw(const reg_t val) noexcept { bool vector_csr_t::unlogged_write(const reg_t val) noexcept { if (mask == 0) return false; - dirty_vs_state; + STATE.sstatus->dirty(SSTATUS_VS); return basic_csr_t::unlogged_write(val & mask); } @@ -1612,7 +1612,7 @@ void vxsat_csr_t::verify_permissions(insn_t insn, bool write) const { } bool vxsat_csr_t::unlogged_write(const reg_t val) noexcept { - dirty_vs_state; + STATE.sstatus->dirty(SSTATUS_VS); return masked_csr_t::unlogged_write(val); } diff --git a/riscv/decode_macros.h b/riscv/decode_macros.h index 6f24799..b778668 100644 --- a/riscv/decode_macros.h +++ b/riscv/decode_macros.h @@ -110,7 +110,6 @@ #define FRS3_D READ_FREG_D(insn.rs3()) #define dirty_fp_state STATE.sstatus->dirty(SSTATUS_FS) #define dirty_ext_state STATE.sstatus->dirty(SSTATUS_XS) -#define dirty_vs_state STATE.sstatus->dirty(SSTATUS_VS) #define DO_WRITE_FREG(reg, value) (STATE.FPR.write(reg, value), dirty_fp_state) #define WRITE_FRD(value) WRITE_FREG(insn.rd(), value) #define WRITE_FRD_H(value) \ @@ -173,14 +172,12 @@ static inline bool is_aligned(const unsigned val, const unsigned pos) if (alu && !P.VU.vstart_alu) \ require(P.VU.vstart->read() == 0); \ WRITE_VSTATUS; \ - dirty_vs_state; \ } while (0); #define require_vector_novtype(is_log) \ do { \ require_vector_vs; \ if (is_log) \ WRITE_VSTATUS; \ - dirty_vs_state; \ } while (0); #define require_align(val, pos) require(is_aligned(val, pos)) #define require_noover(astart, asize, bstart, bsize) \ diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index 6624d8b..8754eb0 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -27,4 +27,5 @@ VI_GENERAL_LOOP_BASE ++pos; } -VI_LOOP_END_BASE; + +VI_LOOP_END; diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h index 26a1276..260f45d 100644 --- a/riscv/insns/vcpop_m.h +++ b/riscv/insns/vcpop_m.h @@ -10,3 +10,5 @@ for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i)); } WRITE_RD(popcount); + +VECTOR_END; diff --git a/riscv/insns/vfbdot_vv.h b/riscv/insns/vfbdot_vv.h index 8d4c792..b70e309 100644 --- a/riscv/insns/vfbdot_vv.h +++ b/riscv/insns/vfbdot_vv.h @@ -14,3 +14,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h index e3f5263..261ea5a 100644 --- a/riscv/insns/vfirst_m.h +++ b/riscv/insns/vfirst_m.h @@ -14,3 +14,5 @@ for (reg_t i=P.VU.vstart->read(); i < vl; ++i) { } } WRITE_RD(pos); + +VECTOR_END; diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index 65a3cff..f4e984e 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -29,4 +29,4 @@ if (FLEN == 64) { WRITE_FRD(f32(vs2_0)); } -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index 917948d..11975f2 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -21,4 +21,4 @@ if (vl > 0 && P.VU.vstart->read() < vl) { break; } } -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/vfqbdot_alt_vv.h b/riscv/insns/vfqbdot_alt_vv.h index f1df781..dcdcb11 100644 --- a/riscv/insns/vfqbdot_alt_vv.h +++ b/riscv/insns/vfqbdot_alt_vv.h @@ -15,3 +15,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfqbdot_vv.h b/riscv/insns/vfqbdot_vv.h index fe3e652..b4fdffe 100644 --- a/riscv/insns/vfqbdot_vv.h +++ b/riscv/insns/vfqbdot_vv.h @@ -15,3 +15,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfqldot_alt_vv.h b/riscv/insns/vfqldot_alt_vv.h index ea18828..a2b7fd2 100644 --- a/riscv/insns/vfqldot_alt_vv.h +++ b/riscv/insns/vfqldot_alt_vv.h @@ -15,3 +15,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfqldot_vv.h b/riscv/insns/vfqldot_vv.h index b03ec29..ca46d26 100644 --- a/riscv/insns/vfqldot_vv.h +++ b/riscv/insns/vfqldot_vv.h @@ -15,3 +15,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfwbdot_vv.h b/riscv/insns/vfwbdot_vv.h index b8d35a7..47f7c9b 100644 --- a/riscv/insns/vfwbdot_vv.h +++ b/riscv/insns/vfwbdot_vv.h @@ -13,3 +13,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vfwldot_vv.h b/riscv/insns/vfwldot_vv.h index 63a4e47..fe1cf09 100644 --- a/riscv/insns/vfwldot_vv.h +++ b/riscv/insns/vfwldot_vv.h @@ -13,3 +13,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vid_v.h b/riscv/insns/vid_v.h index 510132d..ad7da21 100644 --- a/riscv/insns/vid_v.h +++ b/riscv/insns/vid_v.h @@ -25,4 +25,4 @@ for (reg_t i = P.VU.vstart->read() ; i < P.VU.vl->read(); ++i) { } } -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index 00155db..4a67769 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -45,3 +45,4 @@ for (reg_t i = 0; i < vl; ++i) { } } +VECTOR_END; diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index 3f736e0..7c907a6 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -25,3 +25,5 @@ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { P.VU.set_mask_elt(rd_num, i, res); } } + +VECTOR_END; diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index b029327..7d98655 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -26,3 +26,5 @@ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { P.VU.set_mask_elt(rd_num, i, res); } } + +VECTOR_END; diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 5753dbf..caaff04 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -24,3 +24,5 @@ for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) { P.VU.set_mask_elt(rd_num, i, res); } } + +VECTOR_END; diff --git a/riscv/insns/vmv_s_x.h b/riscv/insns/vmv_s_x.h index 23a6b56..bd848a8 100644 --- a/riscv/insns/vmv_s_x.h +++ b/riscv/insns/vmv_s_x.h @@ -26,4 +26,4 @@ if (vl > 0 && P.VU.vstart->read() < vl) { vl = 0; } -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/vmv_x_s.h b/riscv/insns/vmv_x_s.h index 57a9e1a..c28dbf4 100644 --- a/riscv/insns/vmv_x_s.h +++ b/riscv/insns/vmv_x_s.h @@ -24,4 +24,4 @@ default: WRITE_RD(sext_xlen(res)); -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h index 9c52810..fa47bb6 100644 --- a/riscv/insns/vmvnfr_v.h +++ b/riscv/insns/vmvnfr_v.h @@ -24,4 +24,4 @@ if (vd != vs2 && start < size) { } } -P.VU.vstart->write(0); +VECTOR_END; diff --git a/riscv/insns/vqbdots_vv.h b/riscv/insns/vqbdots_vv.h index 55c3dd2..de34835 100644 --- a/riscv/insns/vqbdots_vv.h +++ b/riscv/insns/vqbdots_vv.h @@ -21,3 +21,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vqbdotu_vv.h b/riscv/insns/vqbdotu_vv.h index a73d568..8eee3a9 100644 --- a/riscv/insns/vqbdotu_vv.h +++ b/riscv/insns/vqbdotu_vv.h @@ -21,3 +21,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vqldots_vv.h b/riscv/insns/vqldots_vv.h index ce6376a..bde2296 100644 --- a/riscv/insns/vqldots_vv.h +++ b/riscv/insns/vqldots_vv.h @@ -21,3 +21,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vqldotu_vv.h b/riscv/insns/vqldotu_vv.h index 2b674b1..cd3691a 100644 --- a/riscv/insns/vqldotu_vv.h +++ b/riscv/insns/vqldotu_vv.h @@ -21,3 +21,5 @@ switch (P.VU.vsew) { } default: require(false); } + +VECTOR_END; diff --git a/riscv/insns/vsetivli.h b/riscv/insns/vsetivli.h index f880e96..f30564f 100644 --- a/riscv/insns/vsetivli.h +++ b/riscv/insns/vsetivli.h @@ -1,2 +1,3 @@ require_vector_novtype(false); WRITE_RD(P.VU.set_vl(insn.rd(), -1, insn.rs1(), insn.v_zimm10())); +VECTOR_END; diff --git a/riscv/insns/vsetvl.h b/riscv/insns/vsetvl.h index 4d03542..f18d5be 100644 --- a/riscv/insns/vsetvl.h +++ b/riscv/insns/vsetvl.h @@ -1,2 +1,3 @@ require_vector_novtype(false); WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, RS2)); +VECTOR_END; diff --git a/riscv/insns/vsetvli.h b/riscv/insns/vsetvli.h index d1f43b5..140cc30 100644 --- a/riscv/insns/vsetvli.h +++ b/riscv/insns/vsetvli.h @@ -1,2 +1,3 @@ require_vector_novtype(false); WRITE_RD(P.VU.set_vl(insn.rd(), insn.rs1(), RS1, insn.v_zimm11())); +VECTOR_END; diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 7f5256c..e96fc12 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -233,16 +233,19 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_LOOP_END_BASE \ } +#define VECTOR_END \ + P.VU.vstart->write(0) + #define VI_LOOP_END \ VI_LOOP_END_BASE \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LOOP_REDUCTION_END(x) \ } \ if (vl > 0) { \ vd_0_des = vd_0_res; \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LOOP_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ @@ -253,7 +256,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_LOOP_CARRY_END \ P.VU.set_mask_elt(insn.rd(), i, res); \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LOOP_WITH_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \ @@ -274,7 +277,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_LOOP_CMP_END \ P.VU.set_mask_elt(insn.rd(), i, res); \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LOOP_MASK(op) \ require(P.VU.vsew <= e64); \ @@ -285,7 +288,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, bool vs1 = P.VU.mask_elt(insn.rs1(), i); \ P.VU.set_mask_elt(insn.rd(), i, (op)); \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LOOP_NSHIFT_BASE \ VI_GENERAL_LOOP_BASE; \ @@ -1199,7 +1202,7 @@ VI_VX_ULOOP({ \ P.VU.elt<elt_width##_t>(vd + fn * emul, vreg_inx, true) = val; \ } \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LDST_GET_INDEX(elt_width) \ reg_t index; \ @@ -1252,7 +1255,7 @@ VI_VX_ULOOP({ \ } \ } \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_ST(stride, offset, elt_width, is_mask_ldst) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1270,7 +1273,7 @@ VI_VX_ULOOP({ \ baseAddr + (stride) + (offset) * sizeof(elt_width##_t), val); \ } \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_ST_INDEX(elt_width, is_seg) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1306,7 +1309,7 @@ VI_VX_ULOOP({ \ } \ } \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_LDST_FF(elt_width) \ const reg_t nf = insn.v_nf() + 1; \ @@ -1325,8 +1328,10 @@ VI_VX_ULOOP({ \ val = MMU.load<elt_width##_t>( \ baseAddr + (i * nf + fn) * sizeof(elt_width##_t)); \ } catch (trap_t& t) { \ - if (i == 0) \ + if (i == 0) { \ + P.VU.vstart->write(0); /* dirty VS */ \ throw; /* Only take exception on zeroth element */ \ + } \ /* Reduce VL if an exception occurs on a later element */ \ early_stop = true; \ P.VU.vl->write_raw(i); \ @@ -1339,7 +1344,7 @@ VI_VX_ULOOP({ \ break; \ } \ } \ - p->VU.vstart->write(0); + VECTOR_END; #define VI_LD_WHOLE(elt_width) \ require_vector_novtype(true); \ @@ -1350,28 +1355,12 @@ VI_VX_ULOOP({ \ require_align(vd, len); \ const reg_t elt_per_reg = P.VU.vlenb / sizeof(elt_width ## _t); \ const reg_t size = len * elt_per_reg; \ - if (P.VU.vstart->read() < size) { \ - reg_t i = P.VU.vstart->read() / elt_per_reg; \ - reg_t off = P.VU.vstart->read() % elt_per_reg; \ - if (off) { \ - for (reg_t pos = off; pos < elt_per_reg; ++pos) { \ - auto val = MMU.load<elt_width##_t>(baseAddr + \ - P.VU.vstart->read() * sizeof(elt_width ## _t)); \ - P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \ - P.VU.vstart->write(P.VU.vstart->read() + 1); \ - } \ - ++i; \ - } \ - for (; i < len; ++i) { \ - for (reg_t pos = 0; pos < elt_per_reg; ++pos) { \ - auto val = MMU.load<elt_width##_t>(baseAddr + \ - P.VU.vstart->read() * sizeof(elt_width ## _t)); \ - P.VU.elt<elt_width ## _t>(vd + i, pos, true) = val; \ - P.VU.vstart->write(P.VU.vstart->read() + 1); \ - } \ - } \ + for (reg_t i = P.VU.vstart->read(); i < size; i++) { \ + P.VU.vstart->write(i); \ + auto val = MMU.load<elt_width##_t>(baseAddr + i * sizeof(elt_width ## _t)); \ + P.VU.elt<elt_width ## _t>(vd, i, true) = val; \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_ST_WHOLE \ require_vector_novtype(true); \ @@ -1380,27 +1369,12 @@ VI_VX_ULOOP({ \ const reg_t len = insn.v_nf() + 1; \ require_align(vs3, len); \ const reg_t size = len * P.VU.vlenb; \ - \ - if (P.VU.vstart->read() < size) { \ - reg_t i = P.VU.vstart->read() / P.VU.vlenb; \ - reg_t off = P.VU.vstart->read() % P.VU.vlenb; \ - if (off) { \ - for (reg_t pos = off; pos < P.VU.vlenb; ++pos) { \ - auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \ - MMU.store<uint8_t>(baseAddr + P.VU.vstart->read(), val); \ - P.VU.vstart->write(P.VU.vstart->read() + 1); \ - } \ - i++; \ - } \ - for (; i < len; ++i) { \ - for (reg_t pos = 0; pos < P.VU.vlenb; ++pos) { \ - auto val = P.VU.elt<uint8_t>(vs3 + i, pos); \ - MMU.store<uint8_t>(baseAddr + P.VU.vstart->read(), val); \ - P.VU.vstart->write(P.VU.vstart->read() + 1); \ - } \ - } \ + for (reg_t i = P.VU.vstart->read(); i < size; i++) { \ + P.VU.vstart->write(i); \ + auto val = P.VU.elt<uint8_t>(vs3, i); \ + MMU.store<uint8_t>(baseAddr + i, val); \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_EXT_CHECK(div) \ require(insn.rd() != insn.rs2()); \ @@ -1509,11 +1483,11 @@ VI_VX_ULOOP({ \ #define VI_VFP_LOOP_END \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ #define VI_VFP_LOOP_REDUCTION_END(x) \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ if (vl > 0) { \ if (is_propagate && !is_active) { \ switch (x) { \ @@ -1575,7 +1549,7 @@ VI_VX_ULOOP({ \ break; \ }; \ } \ - P.VU.vstart->write(0); + VECTOR_END; #define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \ VI_CHECK_SSS(true); \ diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index a7ba018..6eadf59 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -89,7 +89,6 @@ reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t new vl->write_raw(std::min(reqVL, vlmax)); } - vstart->write_raw(0); return vl->read(); } diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index e96e0a8..1ab4331 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -322,7 +322,7 @@ VV_VD_VS1_VS2_EGU32x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ EG_BODY \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*8 element groups available in the vector register @@ -377,7 +377,7 @@ VV_VD_VS1_VS2_EGU32x8_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ EG_BODY \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*4 element groups available in the vector register @@ -445,7 +445,7 @@ EG_BODY \ } \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*4 element groups available in the vector register @@ -513,7 +513,7 @@ EG_BODY \ } \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*4 element groups available in the vector registers @@ -560,7 +560,7 @@ VV_VD_VS2_EGU32x4_PARAMS(vd_num, vs2_num, idx_eg); \ EG_BODY \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*4 element groups available in the vector registers @@ -616,7 +616,7 @@ EG_BODY \ } \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 32b*8 element groups available in the vector registers @@ -672,7 +672,7 @@ EG_BODY \ } \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) // Processes all 64b*4 element groups available in the vector registers @@ -726,7 +726,7 @@ VV_VD_VS1_VS2_EGU64x4_PARAMS(vd_num, vs1_num, vs2_num, idx_eg); \ EG_BODY \ } \ - P.VU.vstart->write(0); \ + VECTOR_END; \ } while (0) |
