aboutsummaryrefslogtreecommitdiff
path: root/riscv/insns/vsm4r_vs.h
diff options
context:
space:
mode:
Diffstat (limited to 'riscv/insns/vsm4r_vs.h')
-rw-r--r--riscv/insns/vsm4r_vs.h51
1 files changed, 51 insertions, 0 deletions
diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h
new file mode 100644
index 0000000..44011eb
--- /dev/null
+++ b/riscv/insns/vsm4r_vs.h
@@ -0,0 +1,51 @@
+// vsm4r.vs vd, vs2
+
+#include "zvksed_ext_macros.h"
+
+require_vsm4_constraints;
+// No overlap of vd and vs2.
+require(insn.rd() != insn.rs2());
+
+VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
+ {},
+ // This statement will be executed before the first execution
+ // of the loop, and only if the loop is going to be entered.
+ // We cannot use a block ( { ... } ) since we want the variables declared
+ // here to be visible in the loop block.
+ // We capture the "scalar", vs2's first element, by copy, even though
+ // the "no overlap" constraint means that vs2 should remain constant
+ // during the loop.
+ const EGU32x4_t scalar_key = P.VU.elt_group<EGU32x4_t>(vs2_num, 0);
+ const uint32_t rk0 = scalar_key[0];
+ const uint32_t rk1 = scalar_key[1];
+ const uint32_t rk2 = scalar_key[2];
+ const uint32_t rk3 = scalar_key[3];,
+ {
+ EGU32x4_t &state = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true);
+
+ // {x0, x1,x2, x3} <- vd
+ EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3);
+
+ uint32_t B;
+ uint32_t S;
+
+ B = x1 ^ x2 ^ x3 ^ rk0;
+ S = ZVKSED_SUB_BYTES(B);
+ const uint32_t x4 = ZVKSED_ROUND(x0, S);
+
+ B = x2 ^ x3 ^ x4 ^ rk1;
+ S = ZVKSED_SUB_BYTES(B);
+ const uint32_t x5 = ZVKSED_ROUND(x1, S);
+
+ B = x3 ^ x4 ^ x5 ^ rk2;
+ S = ZVKSED_SUB_BYTES(B);
+ const uint32_t x6 = ZVKSED_ROUND(x2, S);
+
+ B = x4 ^ x5 ^ x6 ^ rk3;
+ S = ZVKSED_SUB_BYTES(B);
+ const uint32_t x7 = ZVKSED_ROUND(x3, S);
+
+ // Update the destination register.
+ SET_EGU32x4_LE(state, x4, x5, x6, x7);
+ }
+);