diff options
Diffstat (limited to 'tcg/i386/tcg-target.c.inc')
-rw-r--r-- | tcg/i386/tcg-target.c.inc | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 2103899..b1d642f 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -413,6 +413,14 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_UD2 (0x0b | P_EXT) #define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16) #define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16) +#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX) +#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) +#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX) #define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16) #define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16) #define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16) @@ -421,6 +429,10 @@ static bool tcg_target_const_match(int64_t val, int ct, #define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16) #define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16) +#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) +#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX) +#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX) #define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW) #define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL) #define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX) @@ -3110,9 +3122,59 @@ static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece, return fixup & NEED_INV; } +static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece, + TCGReg v1, TCGReg v2, TCGCond cond) +{ + static const int cmpm_insn[2][4] = { + { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ }, + { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ } + }; + static const int cond_ext[16] = { + [TCG_COND_EQ] = 0, + [TCG_COND_NE] = 4, + [TCG_COND_LT] = 1, + [TCG_COND_LTU] = 1, + [TCG_COND_LE] = 2, + [TCG_COND_LEU] = 2, + [TCG_COND_NEVER] = 3, + [TCG_COND_GE] = 5, + [TCG_COND_GEU] = 5, + [TCG_COND_GT] = 6, + [TCG_COND_GTU] = 6, + [TCG_COND_ALWAYS] = 7, + }; + + tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece], + /* k1 */ 1, v1, v2, type); + tcg_out8(s, cond_ext[cond]); +} + +static void tcg_out_k1_to_vec(TCGContext *s, TCGType type, + unsigned vece, TCGReg dest) +{ + static const int movm_insn[] = { + OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q + }; + tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type); +} + static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece, TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond) { + /* + * With avx512, we have a complete set of comparisons into mask. + * Unless there's a single insn expansion for the comparision, + * expand via a mask in k1. + */ + if ((vece <= MO_16 ? have_avx512bw : have_avx512dq) + && cond != TCG_COND_EQ + && cond != TCG_COND_LT + && cond != TCG_COND_GT) { + tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond); + tcg_out_k1_to_vec(s, type, vece, v0); + return; + } + if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) { tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1); tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type); @@ -4078,7 +4140,7 @@ static TCGCond expand_vec_cond(TCGType type, unsigned vece, * We must bias the inputs so that they become signed. * All other swapping and inversion are handled during code generation. */ - if (vece == MO_64 && is_unsigned_cond(cond)) { + if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) { TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1)); TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2)); TCGv_vec t1 = tcg_temp_new_vec(type); |