aboutsummaryrefslogtreecommitdiff
path: root/riscv/insns/vsha2ms_vv.h
blob: 8f1ca085ae67ba1d5cd86b39f55bd52bd9683343 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// vshams.vv vd, vs2, vs1

#include "zvknh_ext_macros.h"

// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2.
require_vsha2_common_constraints;

switch (P.VU.vsew) {
  case e32: {
    require_vsha2_vsew32_constraints;

    VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
      {},
      {
        // {w3, w2, w1, w0} <- vd
        EXTRACT_EGU32x4_WORDS_BE(vd, w3, w2, w1, w0);
        // {w11, w10, w9, w4} <- vs2
        EXTRACT_EGU32x4_WORDS_BE(vs2, w11, w10, w9, w4);
        // {w15, w14, w13, w12} <- vs1
        EXTRACT_EGU32x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12);

        const uint32_t w16 = ZVK_SHA256_SCHEDULE(w14,  w9, w1, w0);
        const uint32_t w17 = ZVK_SHA256_SCHEDULE(w15, w10, w2, w1);
        const uint32_t w18 = ZVK_SHA256_SCHEDULE(w16, w11, w3, w2);
        const uint32_t w19 = ZVK_SHA256_SCHEDULE(w17, w12, w4, w3);

        // Update the destination register.
        SET_EGU32x4_BE(vd, w19, w18, w17, w16);;
      }
    );
    break;
  }

  case e64: {
    require_vsha2_vsew64_constraints;

    VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP(
      {},
      {
        // {w3, w2, w1, w0} <- vd
        EXTRACT_EGU64x4_WORDS_BE(vd, w3, w2, w1, w0);
        // {w11, w10, w9, w4} <- vs2
        EXTRACT_EGU64x4_WORDS_BE(vs2, w11, w10, w9, w4);
        // {w15, w14, w13, w12} <- vs1
        EXTRACT_EGU64x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12);

        const uint64_t w16 = ZVK_SHA512_SCHEDULE(w14,  w9, w1, w0);
        const uint64_t w17 = ZVK_SHA512_SCHEDULE(w15, w10, w2, w1);
        const uint64_t w18 = ZVK_SHA512_SCHEDULE(w16, w11, w3, w2);
        const uint64_t w19 = ZVK_SHA512_SCHEDULE(w17, w12, w4, w3);

        // Update the destination register.
        SET_EGU64x4_BE(vd, w19, w18, w17, w16);;
      }
    );
    break;
  }

  // 'require_vsha2_common_constraints' ensures that
  // VSEW is either 32 or 64.
  default:
    require(false);
}