riscv/insns/vaesdm_vs.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44

// vaesdm.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
  {},
  // This statement will be executed before the first execution
  // of the loop, and only if the loop is going to be entered.
  // We cannot use a block ( { ... } ) since we want the variables declared
  // here to be visible in the loop block.
  // We capture the "scalar", vs2's first element, by copy, even though
  // the "no overlap" constraint means that vs2 should remain constant
  // during the loop.
  const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
  {
    // For AES128, AES192, or AES256, state and key are 128b/16B values:
    //  - vd in contains the input state,
    //  - vs2 contains the input round key,
    //  - vd out receives the output state.
    //
    // While the spec calls for handling the vector as made of EGU32x4
    // element groups (i.e., 4 uint32_t), it is convenient to treat
    // AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
    // we extract the operands here instead of using the existing LOOP
    // macro that defines/extracts the operand variables as EGU32x4.
    EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

    // InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
    VAES_INV_SHIFT_ROWS(aes_state);
    // InvSubBytes - Apply S-box to every byte in the state
    VAES_INV_SUB_BYTES(aes_state);
    // AddRoundKey (which is also InvAddRoundKey as it's xor)
    EGU8x16_XOREQ(aes_state, scalar_key);
    // InvMixColumns
    VAES_INV_MIX_COLUMNS(aes_state);

    // Update the destination register.
    EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
    EGU8x16_COPY(vd, aes_state);
  }
);