blob: 33057ca04e585e25403d73f12867e34eacb19de8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
require_rv64;
require_extension(EXT_XZBM);
// transpose of rs2
int64_t rs2t = RS2;
for (int i = 0; i < 3; i++) {
rs2t = (rs2t & 0xFFFF00000000FFFFLL) | ((rs2t & 0x0000FFFF00000000LL) >> 16) | ((rs2t & 0x00000000FFFF0000LL) << 16);
rs2t = (rs2t & 0xFF0000FFFF0000FFLL) | ((rs2t & 0x00FF000000FF0000LL) >> 8) | ((rs2t & 0x0000FF000000FF00LL) << 8);
rs2t = (rs2t & 0xF00FF00FF00FF00FLL) | ((rs2t & 0x0F000F000F000F00LL) >> 4) | ((rs2t & 0x00F000F000F000F0LL) << 4);
rs2t = (rs2t & 0xC3C3C3C3C3C3C3C3LL) | ((rs2t & 0x3030303030303030LL) >> 2) | ((rs2t & 0x0C0C0C0C0C0C0C0CLL) << 2);
rs2t = (rs2t & 0x9999999999999999LL) | ((rs2t & 0x4444444444444444LL) >> 1) | ((rs2t & 0x2222222222222222LL) << 1);
}
int64_t rs1 = RS1;
uint8_t u[8]; // rows of rs1
uint8_t v[8]; // cols of rs2
for (int i = 0; i < 8; i++) {
u[i] = rs1 >> (i*8);
v[i] = rs2t >> (i*8);
}
uint64_t x = 0;
for (int i = 0; i < 64; i++) {
if ((u[i / 8] & v[i % 8]) != 0)
x |= 1LL << i;
}
WRITE_RD(sext_xlen(x));
|