1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
// vsm4k.vi vd, vs2, round#
#include "zvksed_ext_macros.h"
// SM4 Constant Key (CK) - section 7.3.2. of the IETF draft.
static constexpr uint32_t zvksed_ck[32] = {
0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269,
0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9,
0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249,
0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9,
0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229,
0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299,
0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209,
0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
};
require_vsm4_constraints;
VI_ZVK_VD_VS2_ZIMM5_EGU32x4_NOVM_LOOP(
{},
// The following statements will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the 'round' variable
// declared and defined here here to be visible in the loop block.
// Only consider the bottom 3 bits of the immediate, ensuring that
// 'round' is in the valid range [0, 7].
const reg_t round = zimm5 & 0x7;,
// Per Element Group body.
{
// {rk0, rk1, rk2, rk3} <- vs2
EXTRACT_EGU32x4_WORDS_LE(vs2, rk0, rk1, rk2, rk3);
uint32_t B = rk1 ^ rk2 ^ rk3 ^ zvksed_ck[4 * round];
uint32_t S = ZVKSED_SUB_BYTES(B);
uint32_t rk4 = ZVKSED_ROUND_KEY(rk0, S);
B = rk2 ^ rk3 ^ rk4 ^ zvksed_ck[4 * round + 1];
S = ZVKSED_SUB_BYTES(B);
uint32_t rk5 = ZVKSED_ROUND_KEY(rk1, S);
B = rk3 ^ rk4 ^ rk5 ^ zvksed_ck[4 * round + 2];
S = ZVKSED_SUB_BYTES(B);
uint32_t rk6 = ZVKSED_ROUND_KEY(rk2, S);
B = rk4 ^ rk5 ^ rk6 ^ zvksed_ck[4 * round + 3];
S = ZVKSED_SUB_BYTES(B);
uint32_t rk7 = ZVKSED_ROUND_KEY(rk3, S);
// Update the destination register.
SET_EGU32x4_LE(vd, rk4, rk5, rk6, rk7);
}
);
|