diff options
author | Jan Beulich <jbeulich@suse.com> | 2023-07-05 09:41:09 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2023-07-05 09:41:09 +0200 |
commit | 2d11c99dfca3cc603dbbfafb3afc41689a68e40f (patch) | |
tree | 5fa200a9a6a7f3934d6451e7272ba0b03e79d999 /gcc | |
parent | 607613e516670dd817e7467e774ed2e3440bdb21 (diff) | |
download | gcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.zip gcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.tar.gz gcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.tar.bz2 |
x86: use VPTERNLOG also for certain andnot forms
When it's the memory operand which is to be inverted, using VPANDN*
requires a further load instruction. The same can be achieved by a
single VPTERNLOG*. Add two new alternatives (for plain memory and
embedded broadcast), adjusting the predicate for the first operand
accordingly.
Two pre-existing testcases actually end up being affected (improved) by
the change, which is reflected in updated expectations there.
gcc/
PR target/93768
* config/i386/sse.md (*andnot<mode>3): Add new alternatives
for memory form operand 1.
gcc/testsuite/
PR target/93768
* gcc.target/i386/avx512f-andn-di-zmm-2.c: New test.
* gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations
towards generated code.
* gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit
code.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr100711-3.c | 4 |
4 files changed, 47 insertions, 11 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 73a8738..2e6994e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17210,11 +17210,13 @@ "TARGET_AVX512F") (define_insn "*andnot<mode>3" - [(set (match_operand:VI 0 "register_operand" "=x,x,v") + [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v") (and:VI - (not:VI (match_operand:VI 1 "vector_operand" "0,x,v")) - (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))] - "TARGET_SSE" + (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br")) + (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))] + "TARGET_SSE + && (register_operand (operands[1], <MODE>mode) + || register_operand (operands[2], <MODE>mode))" { char buf[64]; const char *ops; @@ -17281,6 +17283,15 @@ case 2: ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; + case 3: + case 4: + tmp = "pternlog"; + ssesuffix = "<ternlogsuffix>"; + if (which_alternative != 4 || TARGET_AVX512VL) + ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}"; + else + ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}"; + break; default: gcc_unreachable (); } @@ -17289,7 +17300,7 @@ output_asm_insn (buf, operands); return ""; } - [(set_attr "isa" "noavx,avx,avx") + [(set_attr "isa" "noavx,avx,avx,*,*") (set_attr "type" "sselog") (set (attr "prefix_data16") (if_then_else @@ -17297,9 +17308,12 @@ (eq_attr "mode" "TI")) (const_string "1") (const_string "*"))) - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex,evex,evex,evex") (set (attr "mode") - (cond [(match_test "TARGET_AVX2") + (cond [(and (eq_attr "alternative" "3,4") + (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL")) + (const_string "XI") + (match_test "TARGET_AVX2") (const_string "<sseinsnmode>") (match_test "TARGET_AVX") (if_then_else @@ -17310,7 +17324,15 @@ (match_test "optimize_function_for_size_p (cfun)")) (const_string "V4SF") ] - (const_string "<sseinsnmode>")))]) + (const_string "<sseinsnmode>"))) + (set (attr "enabled") + (cond [(eq_attr "alternative" "3") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (eq_attr "alternative" "4") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)") + ] + (const_string "*")))]) ;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn (define_split diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c new file mode 100644 index 0000000..4ebb30f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ + +#define type __m512i +#define vec 512 +#define op andnot +#define suffix epi64 +#define SCALAR long long + +#include "avx512-binop-2.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c index a9608ca..86e7ebe 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ -/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vpbroadcast" } } */ #define type __m512i #define vec 512 diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c b/gcc/testsuite/gcc.target/i386/pr100711-3.c index e90f2a4..98cc1c3 100644 --- a/gcc/testsuite/gcc.target/i386/pr100711-3.c +++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c @@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b) return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b; } -/* { dg-final { scan-assembler-times "vpandn" 4 } } */ +/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */ |