1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
|
// vaesdm.vs vd, vs2
#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"
require_vaes_vs_constraints;
VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd in contains the input state,
// - vs2 contains the input round key,
// - vd out receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, scalar_key);
// InvMixColumns
VAES_INV_MIX_COLUMNS(aes_state);
// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
|