aboutsummaryrefslogtreecommitdiff
path: root/tcg/i386/tcg-target.c.inc
diff options
context:
space:
mode:
Diffstat (limited to 'tcg/i386/tcg-target.c.inc')
-rw-r--r--tcg/i386/tcg-target.c.inc64
1 files changed, 63 insertions, 1 deletions
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 2103899..b1d642f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -413,6 +413,14 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_UD2 (0x0b | P_EXT)
#define OPC_VPBLENDD (0x02 | P_EXT3A | P_DATA16)
#define OPC_VPBLENDVB (0x4c | P_EXT3A | P_DATA16)
+#define OPC_VPCMPB (0x3f | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPUB (0x3e | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPW (0x3f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPUW (0x3e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPD (0x1f | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPUD (0x1e | P_EXT3A | P_DATA16 | P_EVEX)
+#define OPC_VPCMPQ (0x1f | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
+#define OPC_VPCMPUQ (0x1e | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
#define OPC_VPINSRB (0x20 | P_EXT3A | P_DATA16)
#define OPC_VPINSRW (0xc4 | P_EXT | P_DATA16)
#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
@@ -421,6 +429,10 @@ static bool tcg_target_const_match(int64_t val, int ct,
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
+#define OPC_VPMOVM2B (0x28 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPMOVM2W (0x28 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
+#define OPC_VPMOVM2D (0x38 | P_EXT38 | P_SIMDF3 | P_EVEX)
+#define OPC_VPMOVM2Q (0x38 | P_EXT38 | P_SIMDF3 | P_VEXW | P_EVEX)
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
@@ -3110,9 +3122,59 @@ static bool tcg_out_cmp_vec_noinv(TCGContext *s, TCGType type, unsigned vece,
return fixup & NEED_INV;
}
+static void tcg_out_cmp_vec_k1(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg v1, TCGReg v2, TCGCond cond)
+{
+ static const int cmpm_insn[2][4] = {
+ { OPC_VPCMPB, OPC_VPCMPW, OPC_VPCMPD, OPC_VPCMPQ },
+ { OPC_VPCMPUB, OPC_VPCMPUW, OPC_VPCMPUD, OPC_VPCMPUQ }
+ };
+ static const int cond_ext[16] = {
+ [TCG_COND_EQ] = 0,
+ [TCG_COND_NE] = 4,
+ [TCG_COND_LT] = 1,
+ [TCG_COND_LTU] = 1,
+ [TCG_COND_LE] = 2,
+ [TCG_COND_LEU] = 2,
+ [TCG_COND_NEVER] = 3,
+ [TCG_COND_GE] = 5,
+ [TCG_COND_GEU] = 5,
+ [TCG_COND_GT] = 6,
+ [TCG_COND_GTU] = 6,
+ [TCG_COND_ALWAYS] = 7,
+ };
+
+ tcg_out_vex_modrm_type(s, cmpm_insn[is_unsigned_cond(cond)][vece],
+ /* k1 */ 1, v1, v2, type);
+ tcg_out8(s, cond_ext[cond]);
+}
+
+static void tcg_out_k1_to_vec(TCGContext *s, TCGType type,
+ unsigned vece, TCGReg dest)
+{
+ static const int movm_insn[] = {
+ OPC_VPMOVM2B, OPC_VPMOVM2W, OPC_VPMOVM2D, OPC_VPMOVM2Q
+ };
+ tcg_out_vex_modrm_type(s, movm_insn[vece], dest, 0, /* k1 */ 1, type);
+}
+
static void tcg_out_cmp_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg v0, TCGReg v1, TCGReg v2, TCGCond cond)
{
+ /*
+ * With avx512, we have a complete set of comparisons into mask.
+ * Unless there's a single insn expansion for the comparision,
+ * expand via a mask in k1.
+ */
+ if ((vece <= MO_16 ? have_avx512bw : have_avx512dq)
+ && cond != TCG_COND_EQ
+ && cond != TCG_COND_LT
+ && cond != TCG_COND_GT) {
+ tcg_out_cmp_vec_k1(s, type, vece, v1, v2, cond);
+ tcg_out_k1_to_vec(s, type, vece, v0);
+ return;
+ }
+
if (tcg_out_cmp_vec_noinv(s, type, vece, v0, v1, v2, cond)) {
tcg_out_dupi_vec(s, type, vece, TCG_TMP_VEC, -1);
tcg_out_vex_modrm_type(s, OPC_PXOR, v0, v0, TCG_TMP_VEC, type);
@@ -4078,7 +4140,7 @@ static TCGCond expand_vec_cond(TCGType type, unsigned vece,
* We must bias the inputs so that they become signed.
* All other swapping and inversion are handled during code generation.
*/
- if (vece == MO_64 && is_unsigned_cond(cond)) {
+ if (vece == MO_64 && !have_avx512dq && is_unsigned_cond(cond)) {
TCGv_vec v1 = temp_tcgv_vec(arg_temp(*a1));
TCGv_vec v2 = temp_tcgv_vec(arg_temp(*a2));
TCGv_vec t1 = tcg_temp_new_vec(type);