diff options
Diffstat (limited to 'riscv/vector_unit.cc')
-rw-r--r-- | riscv/vector_unit.cc | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/riscv/vector_unit.cc b/riscv/vector_unit.cc index 9128df6..08adc61 100644 --- a/riscv/vector_unit.cc +++ b/riscv/vector_unit.cc @@ -86,6 +86,56 @@ template<class T> T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write return regStart[n]; } +// The logic differences between 'elt()' and 'elt_group()' come from +// the fact that, while 'elt()' requires that the element is fully +// contained in a single vector register, the element group may span +// multiple registers in a single register group (LMUL>1). +// +// Notes: +// - We do NOT check that a single element - i.e., the T in the element +// group type std::array<T, N> - fits within a single register, or that +// T is smaller or equal to VSEW. Implementations of the instructions +// sometimes use a different T than what the specification suggests. +// Instructon implementations should 'require()' what the specification +// dictates. +// - We do NOT check that 'vReg' is a valid register group, or that +// 'n+1' element groups fit in the register group 'vReg'. It is +// the responsibility of the caller to validate those preconditions. +template<typename EG> EG& +vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) { +#ifdef WORDS_BIGENDIAN + fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n", + stderr); + abort(); +#endif + using T = typename EG::value_type; + constexpr std::size_t N = std::tuple_size<EG>::value; + assert(N > 0); + + assert(vsew != 0); + constexpr reg_t elt_group_size = N * sizeof(T); + const reg_t reg_group_size = (VLEN >> 3) * vflmul; + assert(((n + 1) * elt_group_size) <= reg_group_size); + + const reg_t start_byte = n * elt_group_size; + const reg_t bytes_per_reg = VLEN >> 3; + + // Inclusive first/last register indices. + const reg_t reg_first = vReg + start_byte / bytes_per_reg; + const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg; + + // Element groups per register groups + for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) { + reg_referenced[vidx] = 1; + + if (unlikely(p->get_log_commits_enabled() && is_write)) { + p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0}; + } + } + + return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte); +} + template signed char& vectorUnit_t::elt<signed char>(reg_t, reg_t, bool); template short& vectorUnit_t::elt<short>(reg_t, reg_t, bool); template int& vectorUnit_t::elt<int>(reg_t, reg_t, bool); @@ -98,3 +148,8 @@ template uint64_t& vectorUnit_t::elt<uint64_t>(reg_t, reg_t, bool); template float16_t& vectorUnit_t::elt<float16_t>(reg_t, reg_t, bool); template float32_t& vectorUnit_t::elt<float32_t>(reg_t, reg_t, bool); template float64_t& vectorUnit_t::elt<float64_t>(reg_t, reg_t, bool); + +template EGU32x4_t& vectorUnit_t::elt_group<EGU32x4_t>(reg_t, reg_t, bool); +template EGU32x8_t& vectorUnit_t::elt_group<EGU32x8_t>(reg_t, reg_t, bool); +template EGU64x4_t& vectorUnit_t::elt_group<EGU64x4_t>(reg_t, reg_t, bool); +template EGU8x16_t& vectorUnit_t::elt_group<EGU8x16_t>(reg_t, reg_t, bool); |