diff options
author | Eric Gouriou <ego@rivosinc.com> | 2023-06-01 18:07:22 -0700 |
---|---|---|
committer | Eric Gouriou <ego@rivosinc.com> | 2023-06-19 14:30:34 -0700 |
commit | fbd4ca2eef884b6835e848d761b3e375a66fc47a (patch) | |
tree | 6fff44dce7b7a5adc93d10886cd52929f1a2c32e /riscv | |
parent | d633af2b180391b6f73f84f56d8b305a3af7c152 (diff) | |
download | spike-fbd4ca2eef884b6835e848d761b3e375a66fc47a.zip spike-fbd4ca2eef884b6835e848d761b3e375a66fc47a.tar.gz spike-fbd4ca2eef884b6835e848d761b3e375a66fc47a.tar.bz2 |
Zvk: Implement Zvkg, Vector GCM/GMAC instruction
Implement the proposed instruction in Zvkg, vghmac.vv,
Vector Carryless Multiply Accumulate over GHASH Galois-Field.
The instruction performs one step of GHASH routine as described
in "NIST Special Publication 800-38D" a.k.a the AES-GCM specification.
The logic was written to closely track the pseudo-code
in the Zvk specification.
Signed-off-by: Eric Gouriou <ego@rivosinc.com>
Co-authored-by: Kornel Duleba <mindal@semihalf.com>
Signed-off-by: Eric Gouriou <ego@rivosinc.com>
Diffstat (limited to 'riscv')
-rw-r--r-- | riscv/insns/vghsh_vv.h | 38 | ||||
-rw-r--r-- | riscv/insns/vgmul_vv.h | 32 | ||||
-rw-r--r-- | riscv/riscv.mk.in | 5 | ||||
-rw-r--r-- | riscv/zvk_ext_macros.h | 16 |
4 files changed, 89 insertions, 2 deletions
diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h new file mode 100644 index 0000000..bcbfe74 --- /dev/null +++ b/riscv/insns/vghsh_vv.h @@ -0,0 +1,38 @@ +// vghsh.vv vd, vs2, vs1 + +#include "zvk_ext_macros.h" + +require_zvkg; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP( + {}, + { + EGU32x4_t Y = vd; // Current partial hash + EGU32x4_t X = vs1; // Block cipher output + EGU32x4_t H = vs2; // Hash subkey + + EGU32x4_BREV8(H); + EGU32x4_t Z = {}; + + // S = brev8(Y ^ X) + EGU32x4_t S; + EGU32x4_XOR(S, Y, X); + EGU32x4_BREV8(S); + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(S, bit)) { + EGU32x4_XOREQ(Z, H); + } + + const bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Left shift by 1. + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + vd = Z; + } +); diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h new file mode 100644 index 0000000..820b396 --- /dev/null +++ b/riscv/insns/vgmul_vv.h @@ -0,0 +1,32 @@ +// vgmul.vv vd, vs2 + +#include "zvk_ext_macros.h" + +require_zvkg; +require(P.VU.vsew == 32); +require_egw_fits(128); + +VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP( + {}, + { + EGU32x4_t Y = vd; // Multiplier + EGU32x4_BREV8(Y); + EGU32x4_t H = vs2; // Multiplicand + EGU32x4_BREV8(H); + EGU32x4_t Z = {}; + + for (int bit = 0; bit < 128; bit++) { + if (EGU32x4_ISSET(Y, bit)) { + EGU32x4_XOREQ(Z, H); + } + + bool reduce = EGU32x4_ISSET(H, 127); + EGU32x4_LSHIFT(H); // Lef shift by 1 + if (reduce) { + H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial + } + } + EGU32x4_BREV8(Z); + vd = Z; + } +); diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index dcf2640..5562c09 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1364,9 +1364,14 @@ riscv_insn_ext_zvbc = \ vclmulh_vv \ vclmulh_vx \ +riscv_insn_ext_zvkg= \ + vghsh_vv \ + vgmul_vv \ + riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvbb) \ $(riscv_insn_ext_zvbc) \ + $(riscv_insn_ext_zvkg) \ riscv_insn_list = \ $(if $(HAVE_INT128),$(riscv_insn_ext_v),) \ diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h index 7efbac8..bf893f9 100644 --- a/riscv/zvk_ext_macros.h +++ b/riscv/zvk_ext_macros.h @@ -942,8 +942,8 @@ // Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes // in A (mutated) with the bytes in B (unchanged). #define EGU32x4_XOREQ(MUT_A, CONST_B) \ - for (std::size_t bidx = 0; bidx < 4; ++bidx) { \ - (MUT_A)[bidx] ^= (CONST_B)[bidx]; \ + for (std::size_t idx = 0; idx < 4; ++idx) { \ + (MUT_A)[idx] ^= (CONST_B)[idx]; \ } // Performs "DST = A ^ B;", i.e., DST (overwritten) receives @@ -953,6 +953,18 @@ (DST)[bidx] = (A)[bidx] ^ (B)[bidx]; \ } +// Performs "DST = A ^ B;", i.e., DST (overwritten) receives +// the xor of the bytes in A and B (both unchanged). +#define EGU32x4_XOR(DST, A, B) \ + do { \ + static_assert(std::is_same<EGU32x4_t, decltype(A)>::value); \ + static_assert(std::is_same<EGU32x4_t, decltype(B)>::value); \ + static_assert(std::is_same<EGU32x4_t, decltype(DST)>::value); \ + for (std::size_t idx = 0; idx < 4; ++idx) { \ + (DST)[idx] = (A)[idx] ^ (B)[idx]; \ + } \ + } while (0) + // // Common bit manipulations logic. // |