riscv/vector_unit.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

// See LICENSE for license details

#include "config.h"
#include "vector_unit.h"
#include "processor.h"
#include "arith.h"

void vectorUnit_t::vectorUnit_t::reset()
{
  free(reg_file);
  VLEN = get_vlen();
  ELEN = get_elen();
  reg_file = malloc(NVPR * vlenb);
  memset(reg_file, 0, NVPR * vlenb);

  auto& csrmap = p->get_state()->csrmap;
  csrmap[CSR_VXSAT] = vxsat = std::make_shared<vxsat_csr_t>(p, CSR_VXSAT);
  csrmap[CSR_VSTART] = vstart = std::make_shared<vector_csr_t>(p, CSR_VSTART, /*mask*/ VLEN - 1);
  csrmap[CSR_VXRM] = vxrm = std::make_shared<vector_csr_t>(p, CSR_VXRM, /*mask*/ 0x3ul);
  csrmap[CSR_VL] = vl = std::make_shared<vector_csr_t>(p, CSR_VL, /*mask*/ 0);
  csrmap[CSR_VTYPE] = vtype = std::make_shared<vector_csr_t>(p, CSR_VTYPE, /*mask*/ 0);
  csrmap[CSR_VLENB] = std::make_shared<vector_csr_t>(p, CSR_VLENB, /*mask*/ 0, /*init*/ vlenb);
  assert(VCSR_VXSAT_SHIFT == 0);  // composite_csr_t assumes vxsat begins at bit 0
  csrmap[CSR_VCSR] = std::make_shared<composite_csr_t>(p, CSR_VCSR, vxrm, vxsat, VCSR_VXRM_SHIFT);

  vtype->write_raw(0);
  set_vl(0, 0, 0, -1); // default to illegal configuration
}

reg_t vectorUnit_t::vectorUnit_t::set_vl(int rd, int rs1, reg_t reqVL, reg_t newType)
{
  int new_vlmul = 0;
  if (vtype->read() != newType) {
    vsew = 1 << (extract64(newType, 3, 3) + 3);
    new_vlmul = int8_t(extract64(newType, 0, 3) << 5) >> 5;
    vflmul = new_vlmul >= 0 ? 1 << new_vlmul : 1.0 / (1 << -new_vlmul);
    vlmax = (VLEN/vsew) * vflmul;
    vta = extract64(newType, 6, 1);
    vma = extract64(newType, 7, 1);

    vill = !(vflmul >= 0.125 && vflmul <= 8)
           || vsew > std::min(vflmul, 1.0f) * ELEN
           || (newType >> 8) != 0;

    if (vill) {
      vlmax = 0;
      vtype->write_raw(UINT64_MAX << (p->get_xlen() - 1));
    } else {
      vtype->write_raw(newType);
    }
  }

  // set vl
  if (vlmax == 0) {
    vl->write_raw(0);
  } else if (rd == 0 && rs1 == 0) {
    vl->write_raw(std::min(vl->read(), vlmax));
  } else if (rd != 0 && rs1 == 0) {
    vl->write_raw(vlmax);
  } else if (rs1 != 0) {
    vl->write_raw(std::min(reqVL, vlmax));
  }

  vstart->write_raw(0);
  setvl_count++;
  return vl->read();
}

template<class T> T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write) {
  assert(vsew != 0);
  assert((VLEN >> 3)/sizeof(T) > 0);
  reg_t elts_per_reg = (VLEN >> 3) / (sizeof(T));
  vReg += n / elts_per_reg;
  n = n % elts_per_reg;
#ifdef WORDS_BIGENDIAN
  // "V" spec 0.7.1 requires lower indices to map to lower significant
  // bits when changing SEW, thus we need to index from the end on BE.
  n ^= elts_per_reg - 1;
#endif
  reg_referenced[vReg] = 1;

  if (unlikely(p->get_log_commits_enabled() && is_write))
    p->get_state()->log_reg_write[((vReg) << 4) | 2] = {0, 0};

  T *regStart = (T*)((char*)reg_file + vReg * (VLEN >> 3));
  return regStart[n];
}

// The logic differences between 'elt()' and 'elt_group()' come from
// the fact that, while 'elt()' requires that the element is fully
// contained in a single vector register, the element group may span
// multiple registers in a single register group (LMUL>1).
//
// Notes:
// - We do NOT check that a single element - i.e., the T in the element
//   group type std::array<T, N> - fits within a single register, or that
//   T is smaller or equal to VSEW. Implementations of the instructions
//   sometimes use a different T than what the specification suggests.
//   Instructon implementations should 'require()' what the specification
//   dictates.
// - We do NOT check that 'vReg' is a valid register group, or that
//   'n+1' element groups fit in the register group 'vReg'. It is
//   the responsibility of the caller to validate those preconditions.
template<typename EG> EG&
vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) {
#ifdef WORDS_BIGENDIAN
  fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n",
          stderr);
  abort();
#endif
  using T = typename EG::value_type;
  constexpr std::size_t N = std::tuple_size<EG>::value;
  assert(N > 0);

  assert(vsew != 0);
  constexpr reg_t elt_group_size = N * sizeof(T);
  const reg_t reg_group_size = (VLEN >> 3) * vflmul;
  assert(((n + 1) * elt_group_size) <= reg_group_size);

  const reg_t start_byte = n * elt_group_size;
  const reg_t bytes_per_reg = VLEN >> 3;

  // Inclusive first/last register indices.
  const reg_t reg_first = vReg + start_byte / bytes_per_reg;
  const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg;

  // Element groups per register groups
  for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) {
      reg_referenced[vidx] = 1;

      if (unlikely(p->get_log_commits_enabled() && is_write)) {
          p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0};
      }
  }

  return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte);
}

template signed char& vectorUnit_t::elt<signed char>(reg_t, reg_t, bool);
template short& vectorUnit_t::elt<short>(reg_t, reg_t, bool);
template int& vectorUnit_t::elt<int>(reg_t, reg_t, bool);
template long& vectorUnit_t::elt<long>(reg_t, reg_t, bool);
template long long& vectorUnit_t::elt<long long>(reg_t, reg_t, bool);
template uint8_t& vectorUnit_t::elt<uint8_t>(reg_t, reg_t, bool);
template uint16_t& vectorUnit_t::elt<uint16_t>(reg_t, reg_t, bool);
template uint32_t& vectorUnit_t::elt<uint32_t>(reg_t, reg_t, bool);
template uint64_t& vectorUnit_t::elt<uint64_t>(reg_t, reg_t, bool);
template float16_t& vectorUnit_t::elt<float16_t>(reg_t, reg_t, bool);
template float32_t& vectorUnit_t::elt<float32_t>(reg_t, reg_t, bool);
template float64_t& vectorUnit_t::elt<float64_t>(reg_t, reg_t, bool);

template EGU32x4_t& vectorUnit_t::elt_group<EGU32x4_t>(reg_t, reg_t, bool);
template EGU32x8_t& vectorUnit_t::elt_group<EGU32x8_t>(reg_t, reg_t, bool);
template EGU64x4_t& vectorUnit_t::elt_group<EGU64x4_t>(reg_t, reg_t, bool);
template EGU8x16_t& vectorUnit_t::elt_group<EGU8x16_t>(reg_t, reg_t, bool);