From 00873aa61acae4a17c1d269cddf1885e83b50102 Mon Sep 17 00:00:00 2001 From: Eric Gouriou Date: Thu, 1 Jun 2023 18:07:32 -0700 Subject: Zvk: Implement Zvknh[ab], NIST Suite: Vector SHA-2 Implement the instructions part of the Zvknha and Zvknhb sub-extensions: - vsha2ms.vv, message schedule - vsha2ch.vv / vsha2cl.vv, compression rounds A header files for common macros is added. Signed-off-by: Eric Gouriou --- riscv/insns/vsha2ch_vv.h | 61 +++++++++++++++++++ riscv/insns/vsha2cl_vv.h | 62 +++++++++++++++++++ riscv/insns/vsha2ms_vv.h | 63 +++++++++++++++++++ riscv/riscv.mk.in | 7 +++ riscv/zvknh_ext_macros.h | 155 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 348 insertions(+) create mode 100644 riscv/insns/vsha2ch_vv.h create mode 100644 riscv/insns/vsha2cl_vv.h create mode 100644 riscv/insns/vsha2ms_vv.h create mode 100644 riscv/zvknh_ext_macros.h diff --git a/riscv/insns/vsha2ch_vv.h b/riscv/insns/vsha2ch_vv.h new file mode 100644 index 0000000..34c6e05 --- /dev/null +++ b/riscv/insns/vsha2ch_vv.h @@ -0,0 +1,61 @@ +// vsha2ch.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU32x4_WORDS_BE(vs1, kw3, kw2, + UNUSED _unused_kw1, UNUSED _unused_kw0); + + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw2); + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw3); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU32x4_BE(vd, a, b, e, f); + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU64x4_WORDS_BE(vs1, kw3, kw2, + UNUSED _unused_kw1, UNUSED _unused_kw0); + + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw2); + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw3); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU64x4_BE(vd, a, b, e, f); + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} diff --git a/riscv/insns/vsha2cl_vv.h b/riscv/insns/vsha2cl_vv.h new file mode 100644 index 0000000..4a1df09 --- /dev/null +++ b/riscv/insns/vsha2cl_vv.h @@ -0,0 +1,62 @@ +// vsha2cl.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU32x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, + kw1, kw0); + + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw0); + ZVK_SHA256_COMPRESS(a, b, c, d, e, f, g, h, kw1); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU32x4_BE(vd, a, b, e, f); + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {c, d, g, h} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, c, d, g, h); + // {a, b, e, f} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, a, b, e, f); + // {kw3, kw2, kw1, kw0} <- vs1. "kw" stands for K+W + EXTRACT_EGU64x4_WORDS_BE(vs1, UNUSED _unused_kw3, UNUSED _unused_kw2, + kw1, kw0); + + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw0); + ZVK_SHA512_COMPRESS(a, b, c, d, e, f, g, h, kw1); + + // Update the destination register, vd <- {a, b, e, f}. + SET_EGU64x4_BE(vd, a, b, e, f); + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} + diff --git a/riscv/insns/vsha2ms_vv.h b/riscv/insns/vsha2ms_vv.h new file mode 100644 index 0000000..8f1ca08 --- /dev/null +++ b/riscv/insns/vsha2ms_vv.h @@ -0,0 +1,63 @@ +// vshams.vv vd, vs2, vs1 + +#include "zvknh_ext_macros.h" + +// Ensures VSEW is 32 or 64, and vd doesn't overlap with either vs1 or vs2. +require_vsha2_common_constraints; + +switch (P.VU.vsew) { + case e32: { + require_vsha2_vsew32_constraints; + + VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + // {w3, w2, w1, w0} <- vd + EXTRACT_EGU32x4_WORDS_BE(vd, w3, w2, w1, w0); + // {w11, w10, w9, w4} <- vs2 + EXTRACT_EGU32x4_WORDS_BE(vs2, w11, w10, w9, w4); + // {w15, w14, w13, w12} <- vs1 + EXTRACT_EGU32x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); + + const uint32_t w16 = ZVK_SHA256_SCHEDULE(w14, w9, w1, w0); + const uint32_t w17 = ZVK_SHA256_SCHEDULE(w15, w10, w2, w1); + const uint32_t w18 = ZVK_SHA256_SCHEDULE(w16, w11, w3, w2); + const uint32_t w19 = ZVK_SHA256_SCHEDULE(w17, w12, w4, w3); + + // Update the destination register. + SET_EGU32x4_BE(vd, w19, w18, w17, w16);; + } + ); + break; + } + + case e64: { + require_vsha2_vsew64_constraints; + + VI_ZVK_VD_VS1_VS2_EGU64x4_NOVM_LOOP( + {}, + { + // {w3, w2, w1, w0} <- vd + EXTRACT_EGU64x4_WORDS_BE(vd, w3, w2, w1, w0); + // {w11, w10, w9, w4} <- vs2 + EXTRACT_EGU64x4_WORDS_BE(vs2, w11, w10, w9, w4); + // {w15, w14, w13, w12} <- vs1 + EXTRACT_EGU64x4_WORDS_BE(vs1, w15, w14, UNUSED _unused_w13, w12); + + const uint64_t w16 = ZVK_SHA512_SCHEDULE(w14, w9, w1, w0); + const uint64_t w17 = ZVK_SHA512_SCHEDULE(w15, w10, w2, w1); + const uint64_t w18 = ZVK_SHA512_SCHEDULE(w16, w11, w3, w2); + const uint64_t w19 = ZVK_SHA512_SCHEDULE(w17, w12, w4, w3); + + // Update the destination register. + SET_EGU64x4_BE(vd, w19, w18, w17, w16);; + } + ); + break; + } + + // 'require_vsha2_common_constraints' ensures that + // VSEW is either 32 or 64. + default: + require(false); +} diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index 5562c09..4ce088f 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1368,10 +1368,17 @@ riscv_insn_ext_zvkg= \ vghsh_vv \ vgmul_vv \ +# Covers both Zvknha and Zvkhnb. +riscv_insn_ext_zvknh = \ + vsha2cl_vv \ + vsha2ch_vv \ + vsha2ms_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ $(riscv_insn_ext_zvkg) \ + $(riscv_insn_ext_zvknh) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvknh_ext_macros.h b/riscv/zvknh_ext_macros.h new file mode 100644 index 0000000..b50818b --- /dev/null +++ b/riscv/zvknh_ext_macros.h @@ -0,0 +1,155 @@ +// Helper macros to help implement instructions defined as part of +// the RISC-V Zvknh[ab] extensions (vector SHA-256/SHA-512 cryptography). + +#include "zvk_ext_macros.h" + +#ifndef RISCV_ZVKNH_EXT_MACROS_H_ +#define RISCV_ZVKNH_EXT_MACROS_H_ + +// Constraints common to all vsha* instructions, across all VSEW: +// - VSEW is 32 (SHA-256) or 64 (SHA-512) +// - No overlap of vd with vs1 or vs2. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_..._EGU32x4_..._LOOP and VI_..._EGU64x4_..._LOOP +// macros. +#define require_vsha2_common_constraints \ + do { \ + require(P.VU.vsew == 32 || P.VU.vsew == 64); \ + require(insn.rd() != insn.rs1()); \ + require(insn.rd() != insn.rs2()); \ + } while (false) + +// Constraints on vsha2 instructions that must be verified when VSEW==32. +// Those are *IN ADDITION* to the constraints checked by +// 'require_vsha2_common_constraints', which is meant to be run earlier. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU32x4_..._LOOP macros. +#define require_vsha2_vsew32_constraints \ + do { \ + require_zvknh_256; \ + require_egw_fits(128); \ + } while (false) + +// Constraints on vsha2 instructions that must be verified when VSEW==32. +// Those are *IN ADDITION* to the constraints checked by +// 'require_vsha2_common_constraints', which is meant to be run earlier. +// +// The constraint that vstart and vl are both EGS (4) aligned +// is checked in the VI_ZVK_..._EGU64x4_..._LOOP macros. +#define require_vsha2_vsew64_constraints \ + do { \ + require_zvknh_512; \ + require_egw_fits(256); \ + } while (false) + +// +// SHA-256 and SHA-512 common logic +// + +// Ch(x, y, z) = (xy) ⊕ (~xz) = xy | ~xz +#define ZVK_SHA_CH(X, Y, Z) (((X) & (Y)) ^ ((~(X)) & (Z))) + +// Maj(x,y,z) = (xy) ⊕ (xz) ⊕(yz) = xy | xz | yz +#define ZVK_SHA_MAJ(X, Y, Z) (((X) & (Y)) ^ ((X) & (Z)) ^ ((Y) & (Z))) + +// +// SHA-256 +// + +// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) +#define ZVK_SHA256_SUM0(X) \ + (ZVK_ROR32(X, 2) ^ ZVK_ROR32(X, 13) ^ ZVK_ROR32(X, 22)) + +// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) +#define ZVK_SHA256_SUM1(X) \ + (ZVK_ROR32(X, 6) ^ ZVK_ROR32(X, 11) ^ ZVK_ROR32(X, 25)) + +// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) +#define ZVK_SHA256_SIG0(X) \ + (ZVK_ROR32(X, 7) ^ ZVK_ROR32(X, 18) ^ ((X) >> 3)) + +// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) +#define ZVK_SHA256_SIG1(X) \ + (ZVK_ROR32(X, 17) ^ ZVK_ROR32(X, 19) ^ ((X) >> 10)) + +// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes +// W[t+16]. +#define ZVK_SHA256_SCHEDULE(W14, W9, W1, W0) \ + (ZVK_SHA256_SIG1(W14) + (W9) + ZVK_SHA256_SIG0(W1) + (W0)) + +// Performs one round of compression (out of the 64 rounds), given the state +// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. +// Updates A,B,C,...,H to their new values. KW is not modified. +// +// Note that some of the logic could be omitted in vshac[ab] since +// some of the variables are dropped in each of those. However removing +// those unnecessary updates reduces the opportunities to share this single +// per-round logic and forces us to move further away from the how the logic +// is expressed in FIPS PUB 180-4. +#define ZVK_SHA256_COMPRESS(A, B, C, D, E, F, G, H, KW) \ + { \ + const uint32_t t1 = (H) + ZVK_SHA256_SUM1(E) + \ + ZVK_SHA_CH((E), (F), (G)) + (KW); \ + const uint32_t t2 = ZVK_SHA256_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ + (H) = (G); \ + (G) = (F); \ + (F) = (E); \ + (E) = (D) + t1; \ + (D) = (C); \ + (C) = (B); \ + (B) = (A); \ + (A) = t1 + t2; \ + } + +// +// SHA-512 +// + +// sum0(x) = ROTR2(x) ⊕ ROTR13(x) ⊕ ROTR22(x) +#define ZVK_SHA512_SUM0(X) \ + (ZVK_ROR64(X, 28) ^ ZVK_ROR64(X, 34) ^ ZVK_ROR64(X, 39)) + +// sum1(x) = ROTR6(x) ⊕ ROTR11(x) ⊕ ROTR25(x) +#define ZVK_SHA512_SUM1(X) \ + (ZVK_ROR64(X, 14) ^ ZVK_ROR64(X, 18) ^ ZVK_ROR64(X, 41)) + +// sig0(x) = ROTR7(x) ⊕ ROTR18(x) ⊕ SHR3 (x) +#define ZVK_SHA512_SIG0(X) \ + (ZVK_ROR64(X, 1) ^ ZVK_ROR64(X, 8) ^ ((X) >> 7)) + +// sig1(x) = ROTR17(x) ⊕ ROTR19(x) ⊕ SHR10(x) +#define ZVK_SHA512_SIG1(X) \ + (ZVK_ROR64(X, 19) ^ ZVK_ROR64(X, 61) ^ ((X) >> 6)) + +// Given the schedule words W[t+0], W[t+1], W[t+9], W[t+14], computes +// W[t+16]. +#define ZVK_SHA512_SCHEDULE(W14, W9, W1, W0) \ + (ZVK_SHA512_SIG1(W14) + (W9) + ZVK_SHA512_SIG0(W1) + (W0)) + +// Performs one round of compression (out of the 64 rounds), given the state +// temporaries A,B,C,...,H, and KW, the sum Kt+Wt. +// Updates A,B,C,...,H to their new values. KW is not modified. +// +// Note that some of the logic could be omitted in vshac[ab] since +// some of the variables are dropped in each of those. However removing +// those unnecessary updates reduces the opportunities to share this single +// per-round logic and forces us to move further away from the how the logic +// is expressed in FIPS PUB 180-4. +#define ZVK_SHA512_COMPRESS(A, B, C, D, E, F, G, H, KW) \ + { \ + const uint64_t t1 = (H) + ZVK_SHA512_SUM1(E) + \ + ZVK_SHA_CH((E), (F), (G)) + (KW); \ + const uint64_t t2 = ZVK_SHA512_SUM0(A) + ZVK_SHA_MAJ((A), (B), (C)); \ + (H) = (G); \ + (G) = (F); \ + (F) = (E); \ + (E) = (D) + t1; \ + (D) = (C); \ + (C) = (B); \ + (B) = (A); \ + (A) = t1 + t2; \ + } + +#endif // RISCV_ZVKNH_EXT_MACROS_H_ -- cgit v1.1