aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2023-07-05 09:41:09 +0200
committerJan Beulich <jbeulich@suse.com>2023-07-05 09:41:09 +0200
commit2d11c99dfca3cc603dbbfafb3afc41689a68e40f (patch)
tree5fa200a9a6a7f3934d6451e7272ba0b03e79d999
parent607613e516670dd817e7467e774ed2e3440bdb21 (diff)
downloadgcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.zip
gcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.tar.gz
gcc-2d11c99dfca3cc603dbbfafb3afc41689a68e40f.tar.bz2
x86: use VPTERNLOG also for certain andnot forms
When it's the memory operand which is to be inverted, using VPANDN* requires a further load instruction. The same can be achieved by a single VPTERNLOG*. Add two new alternatives (for plain memory and embedded broadcast), adjusting the predicate for the first operand accordingly. Two pre-existing testcases actually end up being affected (improved) by the change, which is reflected in updated expectations there. gcc/ PR target/93768 * config/i386/sse.md (*andnot<mode>3): Add new alternatives for memory form operand 1. gcc/testsuite/ PR target/93768 * gcc.target/i386/avx512f-andn-di-zmm-2.c: New test. * gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations towards generated code. * gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit code.
-rw-r--r--gcc/config/i386/sse.md38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100711-3.c4
4 files changed, 47 insertions, 11 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 73a8738..2e6994e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17210,11 +17210,13 @@
"TARGET_AVX512F")
(define_insn "*andnot<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=x,x,v")
+ [(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
(and:VI
- (not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
- (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
- "TARGET_SSE"
+ (not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
+ (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
+ "TARGET_SSE
+ && (register_operand (operands[1], <MODE>mode)
+ || register_operand (operands[2], <MODE>mode))"
{
char buf[64];
const char *ops;
@@ -17281,6 +17283,15 @@
case 2:
ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
+ case 3:
+ case 4:
+ tmp = "pternlog";
+ ssesuffix = "<ternlogsuffix>";
+ if (which_alternative != 4 || TARGET_AVX512VL)
+ ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}";
+ else
+ ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}";
+ break;
default:
gcc_unreachable ();
}
@@ -17289,7 +17300,7 @@
output_asm_insn (buf, operands);
return "";
}
- [(set_attr "isa" "noavx,avx,avx")
+ [(set_attr "isa" "noavx,avx,avx,*,*")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@@ -17297,9 +17308,12 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex,evex")
+ (set_attr "prefix" "orig,vex,evex,evex,evex")
(set (attr "mode")
- (cond [(match_test "TARGET_AVX2")
+ (cond [(and (eq_attr "alternative" "3,4")
+ (match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL"))
+ (const_string "XI")
+ (match_test "TARGET_AVX2")
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(if_then_else
@@ -17310,7 +17324,15 @@
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
- (const_string "<sseinsnmode>")))])
+ (const_string "<sseinsnmode>")))
+ (set (attr "enabled")
+ (cond [(eq_attr "alternative" "3")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (eq_attr "alternative" "4")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
+ ]
+ (const_string "*")))])
;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
(define_split
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
new file mode 100644
index 0000000..4ebb30f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-di-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
+
+#define type __m512i
+#define vec 512
+#define op andnot
+#define suffix epi64
+#define SCALAR long long
+
+#include "avx512-binop-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
index a9608ca..86e7ebe 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-andn-si-zmm-2.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
-/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vpbroadcast" } } */
#define type __m512i
#define vec 512
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-3.c b/gcc/testsuite/gcc.target/i386/pr100711-3.c
index e90f2a4..98cc1c3 100644
--- a/gcc/testsuite/gcc.target/i386/pr100711-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr100711-3.c
@@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b)
return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b;
}
-/* { dg-final { scan-assembler-times "vpandn" 4 } } */
+/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */