diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-12-17 22:02:57 -0800 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2022-03-04 08:50:41 -1000 |
commit | 4e73f842e8b4d90776fb25a5b699b807902f1881 (patch) | |
tree | 9b0b35c0f0d92e05bc1b7a6c7b15ba9f6b9fdbb5 /tcg | |
parent | 264e4182303488fdb2917cd908f9243b317bb499 (diff) | |
download | qemu-4e73f842e8b4d90776fb25a5b699b807902f1881.zip qemu-4e73f842e8b4d90776fb25a5b699b807902f1881.tar.gz qemu-4e73f842e8b4d90776fb25a5b699b807902f1881.tar.bz2 |
tcg/i386: Implement avx512 immediate rotate
AVX512VL has VPROLD and VPROLQ, layered onto the same
opcode as PSHIFTD, but requires EVEX encoding and W1.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 15 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 2 |
2 files changed, 14 insertions, 3 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index de01fbf..3a9f6a3 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -362,7 +362,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) #define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2) #define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3) #define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */ -#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */ +#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */ #define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */ #define OPC_PSLLW (0xf1 | P_EXT | P_DATA16) #define OPC_PSLLD (0xf2 | P_EXT | P_DATA16) @@ -3000,6 +3000,14 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, insn = shift_imm_insn[vece]; } sub = 4; + goto gen_shift; + case INDEX_op_rotli_vec: + insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */ + if (vece == MO_64) { + insn |= P_VEXW; + } + sub = 1; + goto gen_shift; gen_shift: tcg_debug_assert(vece != MO_8); if (type == TCG_TYPE_V256) { @@ -3289,6 +3297,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_shli_vec: case INDEX_op_shri_vec: case INDEX_op_sari_vec: + case INDEX_op_rotli_vec: case INDEX_op_x86_psrldq_vec: return C_O1_I1(x, x); @@ -3310,11 +3319,13 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_xor_vec: case INDEX_op_andc_vec: return 1; - case INDEX_op_rotli_vec: case INDEX_op_cmp_vec: case INDEX_op_cmpsel_vec: return -1; + case INDEX_op_rotli_vec: + return have_avx512vl && vece >= MO_32 ? 1 : -1; + case INDEX_op_shli_vec: case INDEX_op_shri_vec: /* We must expand the operation for MO_8. */ diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 79af353..23a8b2a 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -195,7 +195,7 @@ extern bool have_movbe; #define TCG_TARGET_HAS_not_vec 0 #define TCG_TARGET_HAS_neg_vec 0 #define TCG_TARGET_HAS_abs_vec 1 -#define TCG_TARGET_HAS_roti_vec 0 +#define TCG_TARGET_HAS_roti_vec have_avx512vl #define TCG_TARGET_HAS_rots_vec 0 #define TCG_TARGET_HAS_rotv_vec 0 #define TCG_TARGET_HAS_shi_vec 1 |