aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2022-03-03 13:31:39 +0800
committerliuhongt <hongtao.liu@intel.com>2022-03-08 09:02:05 +0800
commitb1a741a03041782b34effcb820fbe19ba41a3b8f (patch)
tree6a29c55f51928c7394f18f1edb88a9c67c2e3b54
parente6533e2ebec964e77d3a2462abbabd214d677552 (diff)
downloadgcc-b1a741a03041782b34effcb820fbe19ba41a3b8f.zip
gcc-b1a741a03041782b34effcb820fbe19ba41a3b8f.tar.gz
gcc-b1a741a03041782b34effcb820fbe19ba41a3b8f.tar.bz2
Optimize v4si broadcast for noavx512vl.
This will enable below - vbroadcastss .LC1(%rip), %xmm0 + movl $-45, %edx + vmovd %edx, %xmm0 + vpshufd $0, %xmm0, %xmm0 According to microbenchmark, it's faster than broadcast from memory for TARGET_INTER_UNIT_MOVES_TO_VEC. gcc/ChangeLog: * config/i386/sse.md (*vec_dupv4si): Disable memory operand for !TARGET_INTER_UNIT_MOVES_TO_VEC when prefer_for_speed. gcc/testsuite/ChangeLog: * gcc.target/i386/pr100865-8a.c: Adjust testcase. * gcc.target/i386/pr100865-8c.c: Ditto. * gcc.target/i386/pr100865-9c.c: Ditto.
-rw-r--r--gcc/config/i386/sse.md7
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-8a.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-8c.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-9c.c2
4 files changed, 9 insertions, 4 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0076475..e9292e6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25153,7 +25153,12 @@
(set_attr "length_immediate" "1,0,1")
(set_attr "prefix_extra" "0,1,*")
(set_attr "prefix" "maybe_vex,maybe_evex,orig")
- (set_attr "mode" "TI,V4SF,V4SF")])
+ (set_attr "mode" "TI,V4SF,V4SF")
+ (set (attr "preferred_for_speed")
+ (cond [(eq_attr "alternative" "1")
+ (symbol_ref "!TARGET_INTER_UNIT_MOVES_TO_VEC")
+ ]
+ (symbol_ref "true")))])
(define_insn "*vec_dupv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x,v,v,x")
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8a.c b/gcc/testsuite/gcc.target/i386/pr100865-8a.c
index 911b14d..544a14d 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-8a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-8a.c
@@ -20,5 +20,5 @@ foo (void)
array[i] = MK_CONST128_BROADCAST_SIGNED (-45);
}
-/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "(?:vpbroadcastd|vpshufd)\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-8c.c b/gcc/testsuite/gcc.target/i386/pr100865-8c.c
index 00682ed..efee048 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-8c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-8c.c
@@ -3,5 +3,5 @@
#include "pr100865-8a.c"
-/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-9c.c b/gcc/testsuite/gcc.target/i386/pr100865-9c.c
index 8ffcdc1..e6f2590 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-9c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-9c.c
@@ -3,5 +3,5 @@
#include "pr100865-9a.c"
-/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vpshufd\[\\t \]+\[^\n\]*, %xmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa\[\\t \]%xmm\[0-9\]+, " 16 } } */