diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2019-04-19 10:13:33 -1000 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2019-05-22 15:09:43 -0400 |
commit | 904c5e19672778cc3349f4975437cfdf3371abb6 (patch) | |
tree | 53f00e0e9c424a9aced2c136997809f595b5d17c | |
parent | 25c012b4009256505be3430480954a0233de343e (diff) | |
download | qemu-904c5e19672778cc3349f4975437cfdf3371abb6.zip qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.gz qemu-904c5e19672778cc3349f4975437cfdf3371abb6.tar.bz2 |
tcg/i386: Support vector comparison select value
We already had backend support for this feature. Expand the new
cmpsel opcode using vpblendb. The combination allows us to avoid
an extra NOT for some comparison codes.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | tcg/i386/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/i386/tcg-target.inc.c | 39 |
2 files changed, 36 insertions, 5 deletions
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 16a83a7..928e8b8 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -191,7 +191,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 #define TCG_TARGET_HAS_bitsel_vec 0 -#define TCG_TARGET_HAS_cmpsel_vec 0 +#define TCG_TARGET_HAS_cmpsel_vec -1 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c index b360144..ffcafb1 100644 --- a/tcg/i386/tcg-target.inc.c +++ b/tcg/i386/tcg-target.inc.c @@ -3246,6 +3246,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_andc_vec: return 1; case INDEX_op_cmp_vec: + case INDEX_op_cmpsel_vec: return -1; case INDEX_op_shli_vec: @@ -3464,8 +3465,8 @@ static void expand_vec_mul(TCGType type, unsigned vece, } } -static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, - TCGv_vec v1, TCGv_vec v2, TCGCond cond) +static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec v2, TCGCond cond) { enum { NEED_SWAP = 1, @@ -3522,11 +3523,34 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, tcg_temp_free_vec(t2); } } - if (fixup & NEED_INV) { + return fixup & NEED_INV; +} + +static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec v1, TCGv_vec v2, TCGCond cond) +{ + if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) { tcg_gen_not_vec(vece, v0, v0); } } +static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0, + TCGv_vec c1, TCGv_vec c2, + TCGv_vec v3, TCGv_vec v4, TCGCond cond) +{ + TCGv_vec t = tcg_temp_new_vec(type); + + if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) { + /* Invert the sense of the compare by swapping arguments. */ + TCGv_vec x; + x = v3, v3 = v4, v4 = x; + } + vec_gen_4(INDEX_op_x86_vpblendvb_vec, type, vece, + tcgv_vec_arg(v0), tcgv_vec_arg(v4), + tcgv_vec_arg(v3), tcgv_vec_arg(t)); + tcg_temp_free_vec(t); +} + static void expand_vec_minmax(TCGType type, unsigned vece, TCGCond cond, bool min, TCGv_vec v0, TCGv_vec v1, TCGv_vec v2) @@ -3551,7 +3575,7 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, { va_list va; TCGArg a2; - TCGv_vec v0, v1, v2; + TCGv_vec v0, v1, v2, v3, v4; va_start(va, a0); v0 = temp_tcgv_vec(arg_temp(a0)); @@ -3578,6 +3602,13 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg)); break; + case INDEX_op_cmpsel_vec: + v2 = temp_tcgv_vec(arg_temp(a2)); + v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg)); + break; + case INDEX_op_smin_vec: v2 = temp_tcgv_vec(arg_temp(a2)); expand_vec_minmax(type, vece, TCG_COND_GT, true, v0, v1, v2); |