tcg/i386: Support vector comparison select value

We already had backend support for this feature. Expand the new cmpsel opcode using vpblendb. The combination allows us to avoid an extra NOT for some comparison codes. Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
author: Richard Henderson <richard.henderson@linaro.org> 2019-04-19 10:13:33 -1000
committer: Richard Henderson <richard.henderson@linaro.org> 2019-05-22 15:09:43 -0400
commit: 904c5e19672778cc3349f4975437cfdf3371abb6 (patch)
tree: 53f00e0e9c424a9aced2c136997809f595b5d17c
parent: 25c012b4009256505be3430480954a0233de343e (diff)
download: qemu-904c5e19672778cc3349f4975437cfdf3371abb6.zip
qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.gz
qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.bz2
2 files changed, 36 insertions, 5 deletions
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 16a83a7..928e8b8 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -191,7 +191,7 @@ extern bool have_avx2;
 #define TCG_TARGET_HAS_sat_vec          1
 #define TCG_TARGET_HAS_minmax_vec       1
 #define TCG_TARGET_HAS_bitsel_vec       0
-#define TCG_TARGET_HAS_cmpsel_vec       0
+#define TCG_TARGET_HAS_cmpsel_vec       -1
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
     (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index b360144..ffcafb1 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -3246,6 +3246,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
     case INDEX_op_andc_vec:
         return 1;
     case INDEX_op_cmp_vec:
+    case INDEX_op_cmpsel_vec:
         return -1;
 
     case INDEX_op_shli_vec:
@@ -3464,8 +3465,8 @@ static void expand_vec_mul(TCGType type, unsigned vece,
     }
 }
 
-static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
-                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
+                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
 {
     enum {
         NEED_SWAP = 1,
@@ -3522,11 +3523,34 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
             tcg_temp_free_vec(t2);
         }
     }
-    if (fixup & NEED_INV) {
+    return fixup & NEED_INV;
+}
+
+static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
+                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
+{
+    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
         tcg_gen_not_vec(vece, v0, v0);
     }
 }
 
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+                              TCGv_vec c1, TCGv_vec c2,
+                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+    TCGv_vec t = tcg_temp_new_vec(type);
+
+    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+        /* Invert the sense of the compare by swapping arguments.  */
+        TCGv_vec x;
+        x = v3, v3 = v4, v4 = x;
+    }
+    vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece,
+              tcgv_vec_arg(v0), tcgv_vec_arg(v4),
+              tcgv_vec_arg(v3), tcgv_vec_arg(t));
+    tcg_temp_free_vec(t);
+}
+
 static void expand_vec_minmax(TCGType type, unsigned vece,
                               TCGCond cond, bool min,
                               TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
@@ -3551,7 +3575,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
 {
     va_list va;
     TCGArg a2;
-    TCGv_vec v0, v1, v2;
+    TCGv_vec v0, v1, v2, v3, v4;
 
     va_start(va, a0);
     v0 = temp_tcgv_vec(arg_temp(a0));
@@ -3578,6 +3602,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
         expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
         break;
 
+    case INDEX_op_cmpsel_vec:
+        v2 = temp_tcgv_vec(arg_temp(a2));
+        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+        break;
+
     case INDEX_op_smin_vec:
         v2 = temp_tcgv_vec(arg_temp(a2));
         expand_vec_minmax(type, vece, TCG_COND_GT, true, v0, v1, v2);
author	Richard Henderson <richard.henderson@linaro.org>	2019-04-19 10:13:33 -1000
committer	Richard Henderson <richard.henderson@linaro.org>	2019-05-22 15:09:43 -0400
commit	904c5e19672778cc3349f4975437cfdf3371abb6 (patch)
tree	53f00e0e9c424a9aced2c136997809f595b5d17c
parent	25c012b4009256505be3430480954a0233de343e (diff)
download	qemu-904c5e19672778cc3349f4975437cfdf3371abb6.zip qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.gz qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.bz2