aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2021-08-23 14:47:03 -0700
committerH.J. Lu <hjl.tools@gmail.com>2021-08-24 05:46:17 -0700
commit6e5401e87d02919b0594e04f828892deef956407 (patch)
tree61bb08cb5cb7cf673b06d88197955b4899c6e608
parent9216ee6d1195d48388f825cf1b072e570129cbbe (diff)
downloadgcc-6e5401e87d02919b0594e04f828892deef956407.zip
gcc-6e5401e87d02919b0594e04f828892deef956407.tar.gz
gcc-6e5401e87d02919b0594e04f828892deef956407.tar.bz2
x86: Broadcast from integer to a pseudo vector register
Broadcast from integer to a pseudo vector register instead of a hard vector register to allow LRA to remove redundant move instruction after broadcast. gcc/ PR target/102021 * config/i386/i386-expand.c (ix86_expand_vector_move): Broadcast from integer to a pseudo vector register. gcc/testsuite/ PR target/102021 * gcc.target/i386/pr100865-10b.c: Expect vzeroupper. * gcc.target/i386/pr100865-4b.c: Likewise. * gcc.target/i386/pr100865-6b.c: Expect vmovdqu and vzeroupper. * gcc.target/i386/pr100865-7b.c: Likewise. * gcc.target/i386/pr102021.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.c13
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-10b.c1
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-4b.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-6b.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-7b.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr102021.c15
6 files changed, 22 insertions, 22 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 9bf13db..2500dbf 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -579,19 +579,10 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
{
/* Broadcast to XMM/YMM/ZMM register from an integer
constant or scalar mem. */
- /* Hard registers are used for 2 purposes:
- 1. Prevent stack realignment when the original code
- doesn't use vector registers, which is the same for
- memcpy and memset.
- 2. Prevent combine to convert constant broadcast to
- load from constant pool. */
- op1 = ix86_gen_scratch_sse_rtx (mode);
+ op1 = gen_reg_rtx (mode);
if (FLOAT_MODE_P (mode)
|| (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode))
- {
- first = force_const_mem (GET_MODE_INNER (mode), first);
- op1 = gen_reg_rtx (mode);
- }
+ first = force_const_mem (GET_MODE_INNER (mode), first);
bool ok = ix86_expand_vector_init_duplicate (false, mode,
op1, first);
gcc_assert (ok);
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
index 77ace86..e5616d8 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
@@ -5,4 +5,3 @@
/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
-/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
index 80e9fdb..6d9cb91 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
@@ -5,7 +5,6 @@
/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-6b.c b/gcc/testsuite/gcc.target/i386/pr100865-6b.c
index 35f2e96..9588249 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-6b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-6b.c
@@ -4,9 +4,7 @@
#include "pr100865-6a.c"
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu32\[\\t \]%ymm\[0-9\]+, " 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7b.c b/gcc/testsuite/gcc.target/i386/pr100865-7b.c
index ad267c4..3b20c68 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7b.c
@@ -5,8 +5,6 @@
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu64\[\\t \]%ymm\[0-9\]+, " 16 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr102021.c b/gcc/testsuite/gcc.target/i386/pr102021.c
new file mode 100644
index 0000000..6db3f57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102021.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake-avx512" } */
+
+#include<immintrin.h>
+
+__m256i
+foo ()
+{
+ return _mm256_set1_epi16 (12);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovdqa" } } */
+/* { dg-final { scan-assembler-not "vzeroupper" } } */