aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Gouriou <ego@rivosinc.com>2023-06-01 18:07:22 -0700
committerEric Gouriou <ego@rivosinc.com>2023-06-19 14:30:34 -0700
commitfbd4ca2eef884b6835e848d761b3e375a66fc47a (patch)
tree6fff44dce7b7a5adc93d10886cd52929f1a2c32e
parentd633af2b180391b6f73f84f56d8b305a3af7c152 (diff)
downloadriscv-isa-sim-fbd4ca2eef884b6835e848d761b3e375a66fc47a.zip
riscv-isa-sim-fbd4ca2eef884b6835e848d761b3e375a66fc47a.tar.gz
riscv-isa-sim-fbd4ca2eef884b6835e848d761b3e375a66fc47a.tar.bz2
Zvk: Implement Zvkg, Vector GCM/GMAC instruction
Implement the proposed instruction in Zvkg, vghmac.vv, Vector Carryless Multiply Accumulate over GHASH Galois-Field. The instruction performs one step of GHASH routine as described in "NIST Special Publication 800-38D" a.k.a the AES-GCM specification. The logic was written to closely track the pseudo-code in the Zvk specification. Signed-off-by: Eric Gouriou <ego@rivosinc.com> Co-authored-by: Kornel Duleba <mindal@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>
-rw-r--r--riscv/insns/vghsh_vv.h38
-rw-r--r--riscv/insns/vgmul_vv.h32
-rw-r--r--riscv/riscv.mk.in5
-rw-r--r--riscv/zvk_ext_macros.h16
4 files changed, 89 insertions, 2 deletions
diff --git a/riscv/insns/vghsh_vv.h b/riscv/insns/vghsh_vv.h
new file mode 100644
index 0000000..bcbfe74
--- /dev/null
+++ b/riscv/insns/vghsh_vv.h
@@ -0,0 +1,38 @@
+// vghsh.vv vd, vs2, vs1
+
+#include "zvk_ext_macros.h"
+
+require_zvkg;
+require(P.VU.vsew == 32);
+require_egw_fits(128);
+
+VI_ZVK_VD_VS1_VS2_EGU32x4_NOVM_LOOP(
+ {},
+ {
+ EGU32x4_t Y = vd; // Current partial hash
+ EGU32x4_t X = vs1; // Block cipher output
+ EGU32x4_t H = vs2; // Hash subkey
+
+ EGU32x4_BREV8(H);
+ EGU32x4_t Z = {};
+
+ // S = brev8(Y ^ X)
+ EGU32x4_t S;
+ EGU32x4_XOR(S, Y, X);
+ EGU32x4_BREV8(S);
+
+ for (int bit = 0; bit < 128; bit++) {
+ if (EGU32x4_ISSET(S, bit)) {
+ EGU32x4_XOREQ(Z, H);
+ }
+
+ const bool reduce = EGU32x4_ISSET(H, 127);
+ EGU32x4_LSHIFT(H); // Left shift by 1.
+ if (reduce) {
+ H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
+ }
+ }
+ EGU32x4_BREV8(Z);
+ vd = Z;
+ }
+);
diff --git a/riscv/insns/vgmul_vv.h b/riscv/insns/vgmul_vv.h
new file mode 100644
index 0000000..820b396
--- /dev/null
+++ b/riscv/insns/vgmul_vv.h
@@ -0,0 +1,32 @@
+// vgmul.vv vd, vs2
+
+#include "zvk_ext_macros.h"
+
+require_zvkg;
+require(P.VU.vsew == 32);
+require_egw_fits(128);
+
+VI_ZVK_VD_VS2_EGU32x4_NOVM_LOOP(
+ {},
+ {
+ EGU32x4_t Y = vd; // Multiplier
+ EGU32x4_BREV8(Y);
+ EGU32x4_t H = vs2; // Multiplicand
+ EGU32x4_BREV8(H);
+ EGU32x4_t Z = {};
+
+ for (int bit = 0; bit < 128; bit++) {
+ if (EGU32x4_ISSET(Y, bit)) {
+ EGU32x4_XOREQ(Z, H);
+ }
+
+ bool reduce = EGU32x4_ISSET(H, 127);
+ EGU32x4_LSHIFT(H); // Lef shift by 1
+ if (reduce) {
+ H[0] ^= 0x87; // Reduce using x^7 + x^2 + x^1 + 1 polynomial
+ }
+ }
+ EGU32x4_BREV8(Z);
+ vd = Z;
+ }
+);
diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in
index dcf2640..5562c09 100644
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@@ -1364,9 +1364,14 @@ riscv_insn_ext_zvbc = \
vclmulh_vv \
vclmulh_vx \
+riscv_insn_ext_zvkg= \
+ vghsh_vv \
+ vgmul_vv \
+
riscv_insn_ext_zvk = \
$(riscv_insn_ext_zvbb) \
$(riscv_insn_ext_zvbc) \
+ $(riscv_insn_ext_zvkg) \
riscv_insn_list = \
$(if $(HAVE_INT128),$(riscv_insn_ext_v),) \
diff --git a/riscv/zvk_ext_macros.h b/riscv/zvk_ext_macros.h
index 7efbac8..bf893f9 100644
--- a/riscv/zvk_ext_macros.h
+++ b/riscv/zvk_ext_macros.h
@@ -942,8 +942,8 @@
// Performs "MUT_A ^= CONST_B;", i.e., xor of the bytes
// in A (mutated) with the bytes in B (unchanged).
#define EGU32x4_XOREQ(MUT_A, CONST_B) \
- for (std::size_t bidx = 0; bidx < 4; ++bidx) { \
- (MUT_A)[bidx] ^= (CONST_B)[bidx]; \
+ for (std::size_t idx = 0; idx < 4; ++idx) { \
+ (MUT_A)[idx] ^= (CONST_B)[idx]; \
}
// Performs "DST = A ^ B;", i.e., DST (overwritten) receives
@@ -953,6 +953,18 @@
(DST)[bidx] = (A)[bidx] ^ (B)[bidx]; \
}
+// Performs "DST = A ^ B;", i.e., DST (overwritten) receives
+// the xor of the bytes in A and B (both unchanged).
+#define EGU32x4_XOR(DST, A, B) \
+ do { \
+ static_assert(std::is_same<EGU32x4_t, decltype(A)>::value); \
+ static_assert(std::is_same<EGU32x4_t, decltype(B)>::value); \
+ static_assert(std::is_same<EGU32x4_t, decltype(DST)>::value); \
+ for (std::size_t idx = 0; idx < 4; ++idx) { \
+ (DST)[idx] = (A)[idx] ^ (B)[idx]; \
+ } \
+ } while (0)
+
//
// Common bit manipulations logic.
//