blob: e858de9b726b0e2f565e44711ef527d5c01f3df0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
require(P.VU.vsew >= e8 && P.VU.vsew <= e64);
const reg_t nf = insn.v_nf() + 1;
require((nf * P.VU.vlmul) <= (NVPR / 4));
VI_CHECK_SXX;
const reg_t sew = P.VU.vsew;
const reg_t vl = P.VU.vl;
const reg_t baseAddr = RS1;
const reg_t rd_num = insn.rd();
bool early_stop = false;
const reg_t vlmul = P.VU.vlmul;
for (reg_t i = 0; i < P.VU.vlmax && vl != 0; ++i) {
bool is_zero = false;
VI_STRIP(i);
VI_ELEMENT_SKIP(i);
for (reg_t fn = 0; fn < nf; ++fn) {
MMU.load_uint8(baseAddr + (i * nf + fn) * 1);
switch (sew) {
case e8:
P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) =
MMU.load_uint8(baseAddr + (i * nf + fn) * 1);
is_zero = P.VU.elt<uint8_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e16:
P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) =
MMU.load_uint16(baseAddr + (i * nf + fn) * 2);
is_zero = P.VU.elt<uint16_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e32:
P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) =
MMU.load_uint32(baseAddr + (i * nf + fn) * 4);
is_zero = P.VU.elt<uint32_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
case e64:
P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) =
MMU.load_uint64(baseAddr + (i * nf + fn) * 8);
is_zero = P.VU.elt<uint64_t>(rd_num + fn * vlmul, vreg_inx) == 0;
break;
}
if (is_zero) {
P.VU.vl = i;
early_stop = true;
break;
}
}
if (early_stop) {
break;
}
}
P.VU.vstart = 0;
|