aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-12-13 11:20:46 +0800
committerliuhongt <hongtao.liu@intel.com>2023-12-14 16:40:06 +0800
commitbe0ff0866a6f072ccfbbb3a3c2079adf1db51aa1 (patch)
tree9eefb522c1a9e6f347c49fc03916a3d989717a1b
parente9f0af150358d93b4c4c5f65d662748ae87bd3d0 (diff)
downloadgcc-be0ff0866a6f072ccfbbb3a3c2079adf1db51aa1.zip
gcc-be0ff0866a6f072ccfbbb3a3c2079adf1db51aa1.tar.gz
gcc-be0ff0866a6f072ccfbbb3a3c2079adf1db51aa1.tar.bz2
Force broadcast constant to mem for vec_dup{v4di,v8si,v4df,v8df} when TARGET_AVX2 is not available.
vpbroadcastd/vpbroadcastq is avaiable under TARGET_AVX2, but vec_dup{v4di,v8si} pattern is avaiable under AVX with memory operand. And it will cause LRA/Reload to generate spill and reload if we put constant in register. gcc/ChangeLog: PR target/112992 * config/i386/i386-expand.cc (ix86_convert_const_wide_int_to_broadcast): Don't convert to broadcast for vec_dup{v4di,v8si} when TARGET_AVX2 is not available. (ix86_broadcast_from_constant): Allow broadcast for V4DI/V8SI when !TARGET_AVX2 since it will be forced to memory later. (ix86_expand_vector_move): Force constant to mem for vec_dup{vssi,v4di} when TARGET_AVX2 is not available. gcc/testsuite/ChangeLog: * gcc.target/i386/pr100865-7a.c: Adjust testcase. * gcc.target/i386/pr100865-7c.c: Ditto. * gcc.target/i386/pr112992.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.cc48
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-7a.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-7c.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr112992.c30
4 files changed, 62 insertions, 22 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a53d69d..fad4f34 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -297,6 +297,12 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
return nullptr;
+ unsigned int msize = GET_MODE_SIZE (mode);
+
+ /* Only optimized for vpbroadcast[bwsd]/vbroadcastss with xmm/ymm/zmm. */
+ if (msize != 16 && msize != 32 && msize != 64)
+ return nullptr;
+
/* Convert CONST_WIDE_INT to a non-standard SSE constant integer
broadcast only if vector broadcast is available. */
if (!TARGET_AVX
@@ -309,18 +315,23 @@ ix86_convert_const_wide_int_to_broadcast (machine_mode mode, rtx op)
HOST_WIDE_INT val = CONST_WIDE_INT_ELT (op, 0);
HOST_WIDE_INT val_broadcast;
scalar_int_mode broadcast_mode;
- if (TARGET_AVX2
+ /* vpbroadcastb zmm requires TARGET_AVX512BW. */
+ if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
&& ix86_broadcast (val, GET_MODE_BITSIZE (QImode),
val_broadcast))
broadcast_mode = QImode;
- else if (TARGET_AVX2
+ else if ((msize == 64 ? TARGET_AVX512BW : TARGET_AVX2)
&& ix86_broadcast (val, GET_MODE_BITSIZE (HImode),
val_broadcast))
broadcast_mode = HImode;
- else if (ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
+ /* vbroadcasts[sd] only support memory operand w/o AVX2.
+ When msize == 16, pshufs is used for vec_duplicate.
+ when msize == 64, vpbroadcastd is used, and TARGET_AVX512F must be existed. */
+ else if ((msize != 32 || TARGET_AVX2)
+ && ix86_broadcast (val, GET_MODE_BITSIZE (SImode),
val_broadcast))
broadcast_mode = SImode;
- else if (TARGET_64BIT
+ else if (TARGET_64BIT && (msize != 32 || TARGET_AVX2)
&& ix86_broadcast (val, GET_MODE_BITSIZE (DImode),
val_broadcast))
broadcast_mode = DImode;
@@ -596,23 +607,17 @@ ix86_broadcast_from_constant (machine_mode mode, rtx op)
&& INTEGRAL_MODE_P (mode))
return nullptr;
+ unsigned int msize = GET_MODE_SIZE (mode);
+ unsigned int inner_size = GET_MODE_SIZE (GET_MODE_INNER ((mode)));
+
/* Convert CONST_VECTOR to a non-standard SSE constant integer
broadcast only if vector broadcast is available. */
- if (!(TARGET_AVX2
- || (TARGET_AVX
- && (GET_MODE_INNER (mode) == SImode
- || GET_MODE_INNER (mode) == DImode))
- || FLOAT_MODE_P (mode))
- || standard_sse_constant_p (op, mode))
+ if (standard_sse_constant_p (op, mode))
return nullptr;
- /* Don't broadcast from a 64-bit integer constant in 32-bit mode.
- We can still put 64-bit integer constant in memory when
- avx512 embed broadcast is available. */
- if (GET_MODE_INNER (mode) == DImode && !TARGET_64BIT
- && (!TARGET_AVX512F
- || (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
- || (GET_MODE_SIZE (mode) < 64 && !TARGET_AVX512VL)))
+ /* vpbroadcast[b,w] is available under TARGET_AVX2.
+ or TARGET_AVX512BW for zmm. */
+ if (inner_size < 4 && !(msize == 64 ? TARGET_AVX512BW : TARGET_AVX2))
return nullptr;
if (GET_MODE_INNER (mode) == TImode)
@@ -710,7 +715,14 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
constant or scalar mem. */
op1 = gen_reg_rtx (mode);
if (FLOAT_MODE_P (mode)
- || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode))
+ || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)
+ /* vbroadcastss/vbroadcastsd only supports memory operand
+ w/o AVX2, force them into memory to avoid spill to
+ memory. */
+ || (GET_MODE_SIZE (mode) == 32
+ && (GET_MODE_INNER (mode) == DImode
+ || GET_MODE_INNER (mode) == SImode)
+ && !TARGET_AVX2))
first = force_const_mem (GET_MODE_INNER (mode), first);
bool ok = ix86_expand_vector_init_duplicate (false, mode,
op1, first);
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7a.c b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
index f6f2be9..7de7d4a 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7a.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7a.c
@@ -11,7 +11,6 @@ foo (void)
array[i] = -45;
}
-/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vpbroadcastq" { target ia32 } } } */
/* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7c.c b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
index 4d50bb7..edbfd5b 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7c.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7c.c
@@ -11,7 +11,6 @@ foo (void)
array[i] = -45;
}
-/* { dg-final { scan-assembler-times "vbroadcastsd" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vbroadcastsd" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
-/* { dg-final { scan-assembler-not "vbroadcastsd" { target ia32 } } } */
/* { dg-final { scan-assembler-not "vmovdqa" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr112992.c b/gcc/testsuite/gcc.target/i386/pr112992.c
new file mode 100644
index 0000000..743e64d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112992.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx -mno-avx2 -O2 " } */
+/* { dg-final { scan-assembler-not {(?n)(%rsp)} } } */
+
+typedef unsigned long long v4di __attribute((vector_size(32)));
+typedef unsigned int v8si __attribute((vector_size(32)));
+typedef unsigned short v16hi __attribute((vector_size(32)));
+typedef unsigned char v32qi __attribute((vector_size(32)));
+
+#define MASK 0x01010101
+#define MASKL 0x0101010101010101ULL
+#define MASKS 0x0101
+
+v4di fooq() {
+ return (v4di){MASKL,MASKL,MASKL,MASKL};
+}
+
+v8si food() {
+ return (v8si){MASK,MASK,MASK,MASK,MASK,MASK,MASK,MASK};
+}
+
+v16hi foow() {
+ return (v16hi){MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,
+ MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS,MASKS};
+}
+
+v32qi foob() {
+ return (v32qi){1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
+}