1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
// vsm4r.vs vd, vs2
#include "zvksed_ext_macros.h"
require_vsm4_constraints;
// No overlap of vd and vs2.
require(insn.rd() != insn.rs2());
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU32x4_t scalar_key = P.VU.elt_group<EGU32x4_t>(vs2_num, 0);
const uint32_t rk0 = scalar_key[0];
const uint32_t rk1 = scalar_key[1];
const uint32_t rk2 = scalar_key[2];
const uint32_t rk3 = scalar_key[3];,
{
EGU32x4_t &state = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true);
// {x0, x1,x2, x3} <- vd
EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3);
uint32_t B;
uint32_t S;
B = x1 ^ x2 ^ x3 ^ rk0;
S = ZVKSED_SUB_BYTES(B);
const uint32_t x4 = ZVKSED_ROUND(x0, S);
B = x2 ^ x3 ^ x4 ^ rk1;
S = ZVKSED_SUB_BYTES(B);
const uint32_t x5 = ZVKSED_ROUND(x1, S);
B = x3 ^ x4 ^ x5 ^ rk2;
S = ZVKSED_SUB_BYTES(B);
const uint32_t x6 = ZVKSED_ROUND(x2, S);
B = x4 ^ x5 ^ x6 ^ rk3;
S = ZVKSED_SUB_BYTES(B);
const uint32_t x7 = ZVKSED_ROUND(x3, S);
// Update the destination register.
SET_EGU32x4_LE(state, x4, x5, x6, x7);
}
);
|