diff options
author | Haochen Jiang <haochen.jiang@intel.com> | 2021-12-02 15:30:17 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2021-12-08 14:12:07 +0800 |
commit | 691f05c2197a7b79cb2d7fdbabe3182e22da320a (patch) | |
tree | 49e20b2fd48793f10020881605236998c0cb2c6c /gcc | |
parent | 0485ce9128f272fe7ccb48672f69d7be6bb26a9c (diff) | |
download | gcc-691f05c2197a7b79cb2d7fdbabe3182e22da320a.zip gcc-691f05c2197a7b79cb2d7fdbabe3182e22da320a.tar.gz gcc-691f05c2197a7b79cb2d7fdbabe3182e22da320a.tar.bz2 |
Add combine splitter to transform vpcmpeqd/vpxor/vblendvps to vblendvps for ~op0
gcc/ChangeLog:
PR target/100738
* config/i386/sse.md
(*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint):
Add new define_insn_and_split.
gcc/testsuite/ChangeLog:
PR target/100738
* g++.target/i386/pr100738-1.C: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 27 | ||||
-rwxr-xr-x | gcc/testsuite/g++.target/i386/pr100738-1.C | 19 |
2 files changed, 46 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f8b34a1..5421fb5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20767,6 +20767,33 @@ (set_attr "btver2_decode" "vector,vector,vector") (set_attr "mode" "<ssefltvecmode>")]) +;; PR target/100738: Transform vpcmpeqd + vpxor + vblendvps to vblendvps for inverted mask; +(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_not_ltint" + [(set (match_operand:<ssebytemode> 0 "register_operand") + (unspec:<ssebytemode> + [(match_operand:<ssebytemode> 1 "register_operand") + (match_operand:<ssebytemode> 2 "vector_operand") + (subreg:<ssebytemode> + (lt:VI48_AVX + (subreg:VI48_AVX + (not:<ssebytemode> + (match_operand:<ssebytemode> 3 "register_operand")) 0) + (match_operand:VI48_AVX 4 "const0_operand")) 0)] + UNSPEC_BLENDV))] + "TARGET_SSE4_1 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:<ssefltvecmode> + [(match_dup 2) (match_dup 1) (match_dup 3)] UNSPEC_BLENDV))] +{ + operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]); + operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]); + operands[2] = force_reg (<ssefltvecmode>mode, + gen_lowpart (<ssefltvecmode>mode, operands[2])); + operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]); +}) + (define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 diff --git a/gcc/testsuite/g++.target/i386/pr100738-1.C b/gcc/testsuite/g++.target/i386/pr100738-1.C new file mode 100755 index 0000000..ace1866 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr100738-1.C @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx2" } */ +/* { dg-final {scan-assembler-times "vblendvps\[ \\t\]" 2 } } */ +/* { dg-final {scan-assembler-not "vpcmpeqd\[ \\t\]" } } */ +/* { dg-final {scan-assembler-not "vpxor\[ \\t\]" } } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef char v16qi __attribute__((vector_size(16))); +v4si +foo_1 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) < 0 ? c : d; +} + +v4si +foo_2 (v16qi a, v4si b, v4si c, v4si d) +{ + return ((v4si)~a) >= 0 ? c : d; +} |