diff options
Diffstat (limited to 'riscv/insns/vsm4r_vs.h')
-rw-r--r-- | riscv/insns/vsm4r_vs.h | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/riscv/insns/vsm4r_vs.h b/riscv/insns/vsm4r_vs.h new file mode 100644 index 0000000..44011eb --- /dev/null +++ b/riscv/insns/vsm4r_vs.h @@ -0,0 +1,51 @@ +// vsm4r.vs vd, vs2 + +#include "zvksed_ext_macros.h" + +require_vsm4_constraints; +// No overlap of vd and vs2. +require(insn.rd() != insn.rs2()); + +VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( + {}, + // This statement will be executed before the first execution + // of the loop, and only if the loop is going to be entered. + // We cannot use a block ( { ... } ) since we want the variables declared + // here to be visible in the loop block. + // We capture the "scalar", vs2's first element, by copy, even though + // the "no overlap" constraint means that vs2 should remain constant + // during the loop. + const EGU32x4_t scalar_key = P.VU.elt_group<EGU32x4_t>(vs2_num, 0); + const uint32_t rk0 = scalar_key[0]; + const uint32_t rk1 = scalar_key[1]; + const uint32_t rk2 = scalar_key[2]; + const uint32_t rk3 = scalar_key[3];, + { + EGU32x4_t &state = P.VU.elt_group<EGU32x4_t>(vd_num, idx_eg, true); + + // {x0, x1,x2, x3} <- vd + EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3); + + uint32_t B; + uint32_t S; + + B = x1 ^ x2 ^ x3 ^ rk0; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x4 = ZVKSED_ROUND(x0, S); + + B = x2 ^ x3 ^ x4 ^ rk1; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x5 = ZVKSED_ROUND(x1, S); + + B = x3 ^ x4 ^ x5 ^ rk2; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x6 = ZVKSED_ROUND(x2, S); + + B = x4 ^ x5 ^ x6 ^ rk3; + S = ZVKSED_SUB_BYTES(B); + const uint32_t x7 = ZVKSED_ROUND(x3, S); + + // Update the destination register. + SET_EGU32x4_LE(state, x4, x5, x6, x7); + } +); |