aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--riscv/insns/vcompress_vm.h6
-rw-r--r--riscv/insns/vcpop_m.h12
-rw-r--r--riscv/insns/vfirst_m.h3
-rw-r--r--riscv/insns/viota_m.h8
-rw-r--r--riscv/insns/vmandn_mm.h2
-rw-r--r--riscv/insns/vmnand_mm.h2
-rw-r--r--riscv/insns/vmnor_mm.h2
-rw-r--r--riscv/insns/vmorn_mm.h2
-rw-r--r--riscv/insns/vmsbf_m.h17
-rw-r--r--riscv/insns/vmsif_m.h18
-rw-r--r--riscv/insns/vmsof_m.h16
-rw-r--r--riscv/insns/vmxnor_mm.h2
-rw-r--r--riscv/mmu.h24
-rw-r--r--riscv/v_ext_macros.h47
-rw-r--r--riscv/vector_unit.h11
15 files changed, 76 insertions, 96 deletions
diff --git a/riscv/insns/vcompress_vm.h b/riscv/insns/vcompress_vm.h
index a1969de..6624d8b 100644
--- a/riscv/insns/vcompress_vm.h
+++ b/riscv/insns/vcompress_vm.h
@@ -9,11 +9,7 @@ require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), 1);
reg_t pos = 0;
VI_GENERAL_LOOP_BASE
- const int midx = i / 64;
- const int mpos = i % 64;
-
- bool do_mask = (P.VU.elt<uint64_t>(rs1_num, midx) >> mpos) & 0x1;
- if (do_mask) {
+ if (P.VU.mask_elt(rs1_num, i)) {
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num, pos, true) = P.VU.elt<uint8_t>(rs2_num, i);
diff --git a/riscv/insns/vcpop_m.h b/riscv/insns/vcpop_m.h
index f909311..26a1276 100644
--- a/riscv/insns/vcpop_m.h
+++ b/riscv/insns/vcpop_m.h
@@ -6,15 +6,7 @@ reg_t rs2_num = insn.rs2();
require(P.VU.vstart->read() == 0);
reg_t popcount = 0;
for (reg_t i=P.VU.vstart->read(); i<vl; ++i) {
- const int midx = i / 32;
- const int mpos = i % 32;
-
- bool vs2_lsb = ((P.VU.elt<uint32_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
- if (insn.v_vm() == 1) {
- popcount += vs2_lsb;
- } else {
- bool do_mask = (P.VU.elt<uint32_t>(0, midx) >> mpos) & 0x1;
- popcount += (vs2_lsb && do_mask);
- }
+ bool vs2_bit = P.VU.mask_elt(rs2_num, i);
+ popcount += vs2_bit && (insn.v_vm() || P.VU.mask_elt(0, i));
}
WRITE_RD(popcount);
diff --git a/riscv/insns/vfirst_m.h b/riscv/insns/vfirst_m.h
index a130e5d..e3f5263 100644
--- a/riscv/insns/vfirst_m.h
+++ b/riscv/insns/vfirst_m.h
@@ -8,8 +8,7 @@ reg_t pos = -1;
for (reg_t i=P.VU.vstart->read(); i < vl; ++i) {
VI_LOOP_ELEMENT_SKIP()
- bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
- if (vs2_lsb) {
+ if (P.VU.mask_elt(rs2_num, i)) {
pos = i;
break;
}
diff --git a/riscv/insns/viota_m.h b/riscv/insns/viota_m.h
index 1ee9229..49c804c 100644
--- a/riscv/insns/viota_m.h
+++ b/riscv/insns/viota_m.h
@@ -12,15 +12,11 @@ require_noover(rd_num, P.VU.vflmul, rs2_num, 1);
int cnt = 0;
for (reg_t i = 0; i < vl; ++i) {
- const int midx = i / 64;
- const int mpos = i % 64;
-
- bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
- bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ bool do_mask = P.VU.mask_elt(0, i);
bool has_one = false;
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
- if (vs2_lsb) {
+ if (P.VU.mask_elt(rs2_num, i)) {
has_one = true;
}
}
diff --git a/riscv/insns/vmandn_mm.h b/riscv/insns/vmandn_mm.h
index e9a87cf..49129f7 100644
--- a/riscv/insns/vmandn_mm.h
+++ b/riscv/insns/vmandn_mm.h
@@ -1,2 +1,2 @@
// vmandn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 & ~vs1);
+VI_LOOP_MASK(vs2 & !vs1);
diff --git a/riscv/insns/vmnand_mm.h b/riscv/insns/vmnand_mm.h
index 5a3ab09..4659e2f 100644
--- a/riscv/insns/vmnand_mm.h
+++ b/riscv/insns/vmnand_mm.h
@@ -1,2 +1,2 @@
// vmnand.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 & vs1));
+VI_LOOP_MASK(!(vs2 & vs1));
diff --git a/riscv/insns/vmnor_mm.h b/riscv/insns/vmnor_mm.h
index ab93378..37327c0 100644
--- a/riscv/insns/vmnor_mm.h
+++ b/riscv/insns/vmnor_mm.h
@@ -1,2 +1,2 @@
// vmnor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 | vs1));
+VI_LOOP_MASK(!(vs2 | vs1));
diff --git a/riscv/insns/vmorn_mm.h b/riscv/insns/vmorn_mm.h
index 23026f5..71acc05 100644
--- a/riscv/insns/vmorn_mm.h
+++ b/riscv/insns/vmorn_mm.h
@@ -1,2 +1,2 @@
// vmorn.mm vd, vs2, vs1
-VI_LOOP_MASK(vs2 | ~vs1);
+VI_LOOP_MASK(vs2 | !vs1);
diff --git a/riscv/insns/vmsbf_m.h b/riscv/insns/vmsbf_m.h
index 1275872..3f736e0 100644
--- a/riscv/insns/vmsbf_m.h
+++ b/riscv/insns/vmsbf_m.h
@@ -11,22 +11,17 @@ reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
- const int midx = i / 64;
- const int mpos = i % 64;
- const uint64_t mmask = UINT64_C(1) << mpos; \
-
- bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx) >> mpos) & 0x1) == 1;
- bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
-
+ bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
+ bool do_mask = P.VU.mask_elt(0, i);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
- auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
- uint64_t res = 0;
+ bool res = false;
if (!has_one && !vs2_lsb) {
- res = 1;
+ res = true;
} else if (!has_one && vs2_lsb) {
has_one = true;
}
- vd = (vd & ~mmask) | ((res << mpos) & mmask);
+
+ P.VU.set_mask_elt(rd_num, i, res);
}
}
diff --git a/riscv/insns/vmsif_m.h b/riscv/insns/vmsif_m.h
index cbcbc2a..b029327 100644
--- a/riscv/insns/vmsif_m.h
+++ b/riscv/insns/vmsif_m.h
@@ -11,22 +11,18 @@ reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) {
- const int midx = i / 64;
- const int mpos = i % 64;
- const uint64_t mmask = UINT64_C(1) << mpos; \
-
- bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
- bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
+ bool do_mask = P.VU.mask_elt(0, i);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
- auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
- uint64_t res = 0;
+ bool res = false;
if (!has_one && !vs2_lsb) {
- res = 1;
+ res = true;
} else if (!has_one && vs2_lsb) {
has_one = true;
- res = 1;
+ res = true;
}
- vd = (vd & ~mmask) | ((res << mpos) & mmask);
+
+ P.VU.set_mask_elt(rd_num, i, res);
}
}
diff --git a/riscv/insns/vmsof_m.h b/riscv/insns/vmsof_m.h
index 9bd4f0c..5753dbf 100644
--- a/riscv/insns/vmsof_m.h
+++ b/riscv/insns/vmsof_m.h
@@ -11,20 +11,16 @@ reg_t rs2_num = insn.rs2();
bool has_one = false;
for (reg_t i = P.VU.vstart->read() ; i < vl; ++i) {
- const int midx = i / 64;
- const int mpos = i % 64;
- const uint64_t mmask = UINT64_C(1) << mpos; \
-
- bool vs2_lsb = ((P.VU.elt<uint64_t>(rs2_num, midx ) >> mpos) & 0x1) == 1;
- bool do_mask = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ bool vs2_lsb = P.VU.mask_elt(rs2_num, i);
+ bool do_mask = P.VU.mask_elt(0, i);
if (insn.v_vm() == 1 || (insn.v_vm() == 0 && do_mask)) {
- uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
- uint64_t res = 0;
+ bool res = false;
if (!has_one && vs2_lsb) {
has_one = true;
- res = 1;
+ res = true;
}
- vd = (vd & ~mmask) | ((res << mpos) & mmask);
+
+ P.VU.set_mask_elt(rd_num, i, res);
}
}
diff --git a/riscv/insns/vmxnor_mm.h b/riscv/insns/vmxnor_mm.h
index 0736d5b..8db61c2 100644
--- a/riscv/insns/vmxnor_mm.h
+++ b/riscv/insns/vmxnor_mm.h
@@ -1,2 +1,2 @@
// vmnxor.mm vd, vs2, vs1
-VI_LOOP_MASK(~(vs2 ^ vs1));
+VI_LOOP_MASK(!(vs2 ^ vs1));
diff --git a/riscv/mmu.h b/riscv/mmu.h
index 9e3aca3..86f06ab 100644
--- a/riscv/mmu.h
+++ b/riscv/mmu.h
@@ -20,6 +20,21 @@
const reg_t PGSIZE = 1 << PGSHIFT;
#define MAX_PADDR_BITS 64
+// observability hooks for load, store and fetch
+// intentionally empty not to cause runtime overhead
+// can be redefined if needed
+#ifndef MMU_OBSERVE_FETCH
+#define MMU_OBSERVE_FETCH(addr, insn, length)
+#endif
+
+#ifndef MMU_OBSERVE_LOAD
+#define MMU_OBSERVE_LOAD(addr, data, length)
+#endif
+
+#ifndef MMU_OBSERVE_STORE
+#define MMU_OBSERVE_STORE(addr, data, length)
+#endif
+
struct insn_fetch_t
{
insn_func_t func;
@@ -89,6 +104,8 @@ public:
load_slow_path(addr, sizeof(T), (uint8_t*)&res, xlate_flags);
}
+ MMU_OBSERVE_LOAD(addr,from_target(res),sizeof(T));
+
return from_target(res);
}
@@ -117,6 +134,7 @@ public:
template<typename T>
void ALWAYS_INLINE store(reg_t addr, T val, xlate_flags_t xlate_flags = {}) {
+ MMU_OBSERVE_STORE(addr, val, sizeof(T));
bool aligned = (addr & (sizeof(T) - 1)) == 0;
auto [tlb_hit, host_addr, _] = access_tlb(tlb_store, addr);
@@ -323,15 +341,17 @@ public:
tracer.trace(paddr, paddr + length, FETCH);
}
}
-
+ MMU_OBSERVE_FETCH(addr, insn, length);
return entry;
}
inline icache_entry_t* access_icache(reg_t addr)
{
icache_entry_t* entry = &icache[icache_index(addr)];
- if (likely(entry->tag == addr))
+ if (likely(entry->tag == addr)){
+ MMU_OBSERVE_FETCH(addr, entry->data.insn, insn_length(entry->data.insn.bits()));
return entry;
+ }
return refill_icache(addr, entry);
}
diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h
index 5b4d9bd..b6a4b92 100644
--- a/riscv/v_ext_macros.h
+++ b/riscv/v_ext_macros.h
@@ -8,16 +8,10 @@
//
// vector: masking skip helper
//
-#define VI_MASK_VARS \
- const int midx = i / 64; \
- const int mpos = i % 64;
-
#define VI_LOOP_ELEMENT_SKIP(BODY) \
- VI_MASK_VARS \
if (insn.v_vm() == 0) { \
BODY; \
- bool skip = ((P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1) == 0; \
- if (skip) { \
+ if (!P.VU.mask_elt(0, i)) { \
continue; \
} \
}
@@ -231,24 +225,18 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define VI_LOOP_CARRY_BASE \
VI_GENERAL_LOOP_BASE \
- VI_MASK_VARS \
- auto v0 = P.VU.elt<uint64_t>(0, midx); \
- const uint64_t mmask = UINT64_C(1) << mpos; \
const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \
- uint64_t carry = insn.v_vm() == 0 ? (v0 >> mpos) & 0x1 : 0; \
- uint128_t res = 0; \
- auto &vd = P.VU.elt<uint64_t>(rd_num, midx, true);
+ uint64_t carry = insn.v_vm() == 0 ? P.VU.mask_elt(0, i) : 0; \
+ bool res = false;
#define VI_LOOP_CARRY_END \
- vd = (vd & ~mmask) | (((res) << mpos) & mmask); \
+ P.VU.set_mask_elt(insn.rd(), i, res); \
} \
P.VU.vstart->write(0);
#define VI_LOOP_WITH_CARRY_BASE \
VI_GENERAL_LOOP_BASE \
- VI_MASK_VARS \
- auto &v0 = P.VU.elt<uint64_t>(0, midx); \
const uint128_t op_mask = (UINT64_MAX >> (64 - sew)); \
- uint64_t carry = (v0 >> mpos) & 0x1;
+ uint64_t carry = P.VU.mask_elt(0, i);
#define VI_LOOP_CMP_BASE \
require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
@@ -260,12 +248,10 @@ static inline bool is_overlapped_widen(const int astart, int asize,
reg_t rs2_num = insn.rs2(); \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
- uint64_t mmask = UINT64_C(1) << mpos; \
- uint64_t &vdi = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
- uint64_t res = 0;
+ bool res = false;
#define VI_LOOP_CMP_END \
- vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
+ P.VU.set_mask_elt(insn.rd(), i, res); \
} \
P.VU.vstart->write(0);
@@ -274,13 +260,9 @@ static inline bool is_overlapped_widen(const int astart, int asize,
require_vector(true); \
reg_t vl = P.VU.vl->read(); \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
- int midx = i / 64; \
- int mpos = i % 64; \
- uint64_t mmask = UINT64_C(1) << mpos; \
- uint64_t vs2 = P.VU.elt<uint64_t>(insn.rs2(), midx); \
- uint64_t vs1 = P.VU.elt<uint64_t>(insn.rs1(), midx); \
- uint64_t &res = P.VU.elt<uint64_t>(insn.rd(), midx, true); \
- res = (res & ~mmask) | ((op) & (1ULL << mpos)); \
+ bool vs2 = P.VU.mask_elt(insn.rs2(), i); \
+ bool vs1 = P.VU.mask_elt(insn.rs1(), i); \
+ P.VU.set_mask_elt(insn.rd(), i, (op)); \
} \
P.VU.vstart->write(0);
@@ -523,8 +505,7 @@ static inline bool is_overlapped_widen(const int astart, int asize,
// merge and copy loop
#define VI_MERGE_VARS \
- VI_MASK_VARS \
- bool UNUSED use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+ bool UNUSED use_first = P.VU.mask_elt(0, i);
#define VI_MERGE_LOOP_BASE \
VI_GENERAL_LOOP_BASE \
@@ -1482,9 +1463,7 @@ VI_VX_ULOOP({ \
VI_VFP_COMMON \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP(); \
- uint64_t mmask = UINT64_C(1) << mpos; \
- uint64_t &vd = P.VU.elt<uint64_t>(rd_num, midx, true); \
- uint64_t res = 0;
+ bool res = false;
#define VI_VFP_LOOP_REDUCTION_BASE(width) \
float##width##_t vd_0 = P.VU.elt<float##width##_t>(rd_num, 0); \
@@ -1562,7 +1541,7 @@ VI_VX_ULOOP({ \
case e16: \
case e32: \
case e64: { \
- vd = (vd & ~mmask) | (((res) << mpos) & mmask); \
+ P.VU.set_mask_elt(insn.rd(), i, res); \
break; \
} \
default: \
diff --git a/riscv/vector_unit.h b/riscv/vector_unit.h
index a057c62..0e80618 100644
--- a/riscv/vector_unit.h
+++ b/riscv/vector_unit.h
@@ -108,6 +108,17 @@ public:
template<typename EG> EG&
elt_group(reg_t vReg, reg_t n, bool is_write = false);
+ bool mask_elt(reg_t vReg, reg_t n)
+ {
+ return (elt<uint8_t>(vReg, n / 8) >> (n % 8)) & 1;
+ }
+
+ void set_mask_elt(reg_t vReg, reg_t n, bool value)
+ {
+ auto& e = elt<uint8_t>(vReg, n / 8, true);
+ e = (e & ~(1U << (n % 8))) | (value << (n % 8));
+ }
+
public:
void reset();