// vsm4r.vs vd, vs2 #include "zvksed_ext_macros.h" require_vsm4_constraints; // No overlap of vd and vs2. require(insn.rd() != insn.rs2()); VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP( {}, // This statement will be executed before the first execution // of the loop, and only if the loop is going to be entered. // We cannot use a block ( { ... } ) since we want the variables declared // here to be visible in the loop block. // We capture the "scalar", vs2's first element, by copy, even though // the "no overlap" constraint means that vs2 should remain constant // during the loop. const EGU32x4_t scalar_key = P.VU.elt_group(vs2_num, 0); const uint32_t rk0 = scalar_key[0]; const uint32_t rk1 = scalar_key[1]; const uint32_t rk2 = scalar_key[2]; const uint32_t rk3 = scalar_key[3];, { EGU32x4_t &state = P.VU.elt_group(vd_num, idx_eg, true); // {x0, x1,x2, x3} <- vd EXTRACT_EGU32x4_WORDS_LE(state, x0, x1, x2, x3); uint32_t B; uint32_t S; B = x1 ^ x2 ^ x3 ^ rk0; S = ZVKSED_SUB_BYTES(B); const uint32_t x4 = ZVKSED_ROUND(x0, S); B = x2 ^ x3 ^ x4 ^ rk1; S = ZVKSED_SUB_BYTES(B); const uint32_t x5 = ZVKSED_ROUND(x1, S); B = x3 ^ x4 ^ x5 ^ rk2; S = ZVKSED_SUB_BYTES(B); const uint32_t x6 = ZVKSED_ROUND(x2, S); B = x4 ^ x5 ^ x6 ^ rk3; S = ZVKSED_SUB_BYTES(B); const uint32_t x7 = ZVKSED_ROUND(x3, S); // Update the destination register. SET_EGU32x4_LE(state, x4, x5, x6, x7); } );