diff options
-rw-r--r-- | riscv/insns/vcompress_vm.h | 6 | ||||
-rw-r--r-- | riscv/insns/vcpop_m.h | 12 | ||||
-rw-r--r-- | riscv/insns/vfirst_m.h | 3 | ||||
-rw-r--r-- | riscv/insns/viota_m.h | 8 | ||||
-rw-r--r-- | riscv/insns/vmandn_mm.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmnand_mm.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmnor_mm.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmorn_mm.h | 2 | ||||
-rw-r--r-- | riscv/insns/vmsbf_m.h | 17 | ||||
-rw-r--r-- | riscv/insns/vmsif_m.h | 18 | ||||
-rw-r--r-- | riscv/insns/vmsof_m.h | 16 | ||||
-rw-r--r-- | riscv/insns/vmxnor_mm.h | 2 | ||||
-rw-r--r-- | riscv/mmu.h | 24 | ||||
-rw-r--r-- | riscv/v_ext_macros.h | 47 | ||||
-rw-r--r-- | riscv/vector_unit.h | 11 |
15 files changed, 76 insertions, 96 deletions
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h index a1969de..6624d8b 100644 --- a/riscv/insns/vcompress_vm.h +++ b/riscv/insns/vcompress_vm.h @@ -9,11 +9,7 @@ require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1); reg_t pos = 0; VI_GENERAL_LOOP_BASE - const int midx = i / 64; - const int mpos = i % 64; - - bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1; - if (do_mask) { + if (P.VU.mask_elt(rs1_num, i)) { switch (sew) { case e8: P.VU.elt<uint8_t>(rd_num, pos, true) = P.VU.elt<uint8_t>(rs2_num, i); diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h index f909311..26a1276 100644 --- a/riscv/insns/vcpop_m.h +++ b/riscv/insns/vcpop_m.h @@ -6,15 +6,7 @@ reg_t rs2_num = insn.rs2(); require(P.VU.vstart->read() == 0); reg_t popcount = 0; for (reg_t i=P.VU.vstart->read(); i<vl; ++i) { - const int midx = i / 32; - const int mpos = i % 32; - - bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - if (insn.v_vm() == 1) { - popcount += vs2_lsb; - } else { - bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1; - popcount += (vs2_lsb && do_mask); - } + bool vs2_bit = P.VU.mask_elt(rs2_num, i); + popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i)); } WRITE_RD(popcount); diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h index a130e5d..e3f5263 100644 --- a/riscv/insns/vfirst_m.h +++ b/riscv/insns/vfirst_m.h @@ -8,8 +8,7 @@ reg_t pos = -1; for (reg_t i=P.VU.vstart->read(); i < vl; ++i) { VI_LOOP_ELEMENT_SKIP() - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - if (vs2_lsb) { + if (P.VU.mask_elt(rs2_num, i)) { pos = i; break; } diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h index 1ee9229..49c804c 100644 --- a/riscv/insns/viota_m.h +++ b/riscv/insns/viota_m.h @@ -12,15 +12,11 @@ require_noover(rd_num, P.VU.vflmul, rs2_num, 1); int cnt = 0; for (reg_t i = 0; i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool do_mask = P.VU.mask_elt(0, i); bool has_one = false; if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - if (vs2_lsb) { + if (P.VU.mask_elt(rs2_num, i)) { has_one = true; } } diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h index e9a87cf..49129f7 100644 --- a/riscv/insns/vmandn_mm.h +++ b/riscv/insns/vmandn_mm.h @@ -1,2 +1,2 @@ // vmandn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 & ~vs1); +VI_LOOP_MASK(vs2 & !vs1); diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h index 5a3ab09..4659e2f 100644 --- a/riscv/insns/vmnand_mm.h +++ b/riscv/insns/vmnand_mm.h @@ -1,2 +1,2 @@ // vmnand.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 & vs1)); +VI_LOOP_MASK(!(vs2 & vs1)); diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h index ab93378..37327c0 100644 --- a/riscv/insns/vmnor_mm.h +++ b/riscv/insns/vmnor_mm.h @@ -1,2 +1,2 @@ // vmnor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 | vs1)); +VI_LOOP_MASK(!(vs2 | vs1)); diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h index 23026f5..71acc05 100644 --- a/riscv/insns/vmorn_mm.h +++ b/riscv/insns/vmorn_mm.h @@ -1,2 +1,2 @@ // vmorn.mm vd, vs2, vs1 -VI_LOOP_MASK(vs2 | ~vs1); +VI_LOOP_MASK(vs2 | !vs1); diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h index 1275872..3f736e0 100644 --- a/riscv/insns/vmsbf_m.h +++ b/riscv/insns/vmsbf_m.h @@ -11,22 +11,17 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; - + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && !vs2_lsb) { - res = 1; + res = true; } else if (!has_one && vs2_lsb) { has_one = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h index cbcbc2a..b029327 100644 --- a/riscv/insns/vmsif_m.h +++ b/riscv/insns/vmsif_m.h @@ -11,22 +11,18 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && !vs2_lsb) { - res = 1; + res = true; } else if (!has_one && vs2_lsb) { has_one = true; - res = 1; + res = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h index 9bd4f0c..5753dbf 100644 --- a/riscv/insns/vmsof_m.h +++ b/riscv/insns/vmsof_m.h @@ -11,20 +11,16 @@ reg_t rs2_num = insn.rs2(); bool has_one = false; for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) { - const int midx = i / 64; - const int mpos = i % 64; - const uint64_t mmask = UINT64_C(1) << mpos; \ - - bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1; - bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool vs2_lsb = P.VU.mask_elt(rs2_num, i); + bool do_mask = P.VU.mask_elt(0, i); if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) { - uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); - uint64_t res = 0; + bool res = false; if (!has_one && vs2_lsb) { has_one = true; - res = 1; + res = true; } - vd = (vd & ~mmask) | ((res << mpos) & mmask); + + P.VU.set_mask_elt(rd_num, i, res); } } diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h index 0736d5b..8db61c2 100644 --- a/riscv/insns/vmxnor_mm.h +++ b/riscv/insns/vmxnor_mm.h @@ -1,2 +1,2 @@ // vmnxor.mm vd, vs2, vs1 -VI_LOOP_MASK(~(vs2 ^ vs1)); +VI_LOOP_MASK(!(vs2 ^ vs1)); diff --git a/riscv/mmu.h b/riscv/mmu.h index 9e3aca3..86f06ab 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -20,6 +20,21 @@ const reg_t PGSIZE = 1 << PGSHIFT; #define MAX_PADDR_BITS 64 +// observability hooks for load, store and fetch +// intentionally empty not to cause runtime overhead +// can be redefined if needed +#ifndef MMU_OBSERVE_FETCH +#define MMU_OBSERVE_FETCH(addr, insn, length) +#endif + +#ifndef MMU_OBSERVE_LOAD +#define MMU_OBSERVE_LOAD(addr, data, length) +#endif + +#ifndef MMU_OBSERVE_STORE +#define MMU_OBSERVE_STORE(addr, data, length) +#endif + struct insn_fetch_t { insn_func_t func; @@ -89,6 +104,8 @@ public: load_slow_path(addr, sizeof(T), (uint8_t*)&res, xlate_flags); } + MMU_OBSERVE_LOAD(addr,from_target(res),sizeof(T)); + return from_target(res); } @@ -117,6 +134,7 @@ public: template<typename T> void ALWAYS_INLINE store(reg_t addr, T val, xlate_flags_t xlate_flags = {}) { + MMU_OBSERVE_STORE(addr, val, sizeof(T)); bool aligned = (addr & (sizeof(T) - 1)) == 0; auto [tlb_hit, host_addr, _] = access_tlb(tlb_store, addr); @@ -323,15 +341,17 @@ public: tracer.trace(paddr, paddr + length, FETCH); } } - + MMU_OBSERVE_FETCH(addr, insn, length); return entry; } inline icache_entry_t* access_icache(reg_t addr) { icache_entry_t* entry = &icache[icache_index(addr)]; - if (likely(entry->tag == addr)) + if (likely(entry->tag == addr)){ + MMU_OBSERVE_FETCH(addr, entry->data.insn, insn_length(entry->data.insn.bits())); return entry; + } return refill_icache(addr, entry); } diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index 5b4d9bd..b6a4b92 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -8,16 +8,10 @@ // // vector: masking skip helper // -#define VI_MASK_VARS \ - const int midx = i / 64; \ - const int mpos = i % 64; - #define VI_LOOP_ELEMENT_SKIP(BODY) \ - VI_MASK_VARS \ if (insn.v_vm() == 0) { \ BODY; \ - bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \ - if (skip) { \ + if (!P.VU.mask_elt(0, i)) { \ continue; \ } \ } @@ -231,24 +225,18 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define VI_LOOP_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ - VI_MASK_VARS \ - auto v0 = P.VU.elt<uint64_t>(0, midx); \ - const uint64_t mmask = UINT64_C(1) << mpos; \ const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \ - uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; \ - uint128_t res = 0; \ - auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true); + uint64_t carry = insn.v_vm() == 0 ? P.VU.mask_elt(0, i) : 0; \ + bool res = false; #define VI_LOOP_CARRY_END \ - vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ } \ P.VU.vstart->write(0); #define VI_LOOP_WITH_CARRY_BASE \ VI_GENERAL_LOOP_BASE \ - VI_MASK_VARS \ - auto &v0 = P.VU.elt<uint64_t>(0, midx); \ const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \ - uint64_t carry = (v0 >> mpos) & 0x1; + uint64_t carry = P.VU.mask_elt(0, i); #define VI_LOOP_CMP_BASE \ require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ @@ -260,12 +248,10 @@ static inline bool is_overlapped_widen(const int astart, int asize, reg_t rs2_num = insn.rs2(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ - uint64_t res = 0; + bool res = false; #define VI_LOOP_CMP_END \ - vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ } \ P.VU.vstart->write(0); @@ -274,13 +260,9 @@ static inline bool is_overlapped_widen(const int astart, int asize, require_vector(true); \ reg_t vl = P.VU.vl->read(); \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ - int midx = i / 64; \ - int mpos = i % 64; \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \ - uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \ - uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \ - res = (res & ~mmask) | ((op) & (1ULL << mpos)); \ + bool vs2 = P.VU.mask_elt(insn.rs2(), i); \ + bool vs1 = P.VU.mask_elt(insn.rs1(), i); \ + P.VU.set_mask_elt(insn.rd(), i, (op)); \ } \ P.VU.vstart->write(0); @@ -523,8 +505,7 @@ static inline bool is_overlapped_widen(const int astart, int asize, // merge and copy loop #define VI_MERGE_VARS \ - VI_MASK_VARS \ - bool UNUSED use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1; + bool UNUSED use_first = P.VU.mask_elt(0, i); #define VI_MERGE_LOOP_BASE \ VI_GENERAL_LOOP_BASE \ @@ -1482,9 +1463,7 @@ VI_VX_ULOOP({ \ VI_VFP_COMMON \ for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ VI_LOOP_ELEMENT_SKIP(); \ - uint64_t mmask = UINT64_C(1) << mpos; \ - uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \ - uint64_t res = 0; + bool res = false; #define VI_VFP_LOOP_REDUCTION_BASE(width) \ float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \ @@ -1562,7 +1541,7 @@ VI_VX_ULOOP({ \ case e16: \ case e32: \ case e64: { \ - vd = (vd & ~mmask) | (((res) << mpos) & mmask); \ + P.VU.set_mask_elt(insn.rd(), i, res); \ break; \ } \ default: \ diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h index a057c62..0e80618 100644 --- a/riscv/vector_unit.h +++ b/riscv/vector_unit.h @@ -108,6 +108,17 @@ public: template<typename EG> EG& elt_group(reg_t vReg, reg_t n, bool is_write = false); + bool mask_elt(reg_t vReg, reg_t n) + { + return (elt<uint8_t>(vReg, n / 8) >> (n % 8)) & 1; + } + + void set_mask_elt(reg_t vReg, reg_t n, bool value) + { + auto& e = elt<uint8_t>(vReg, n / 8, true); + e = (e & ~(1U << (n % 8))) | (value << (n % 8)); + } + public: void reset(); |