aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-08-29 11:39:20 +0800
committerliuhongt <hongtao.liu@intel.com>2024-08-30 13:51:54 +0800
commitab214ef734bfc3dcffcf79ff9e1dd651c2b40566 (patch)
tree2e9efcc00dc6024d94dff83349b0183830424874
parent350d627d5ee6abf23ede3f8a3b23348caab81fe3 (diff)
downloadgcc-ab214ef734bfc3dcffcf79ff9e1dd651c2b40566.zip
gcc-ab214ef734bfc3dcffcf79ff9e1dd651c2b40566.tar.gz
gcc-ab214ef734bfc3dcffcf79ff9e1dd651c2b40566.tar.bz2
Check avx upper register for parallel.
For function arguments/return, when it's BLK mode, it's put in a parallel with an expr_list, and the expr_list contains the real mode and registers. Current ix86_check_avx_upper_register only checked for SSE_REG_P, and failed to handle that. The patch extend the handle to each subrtx. gcc/ChangeLog: PR target/116512 * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate subrtx to scan for avx upper register. (ix86_check_avx_upper_stores): Inline old ix86_check_avx_upper_register. (ix86_avx_u128_mode_needed): Ditto, and replace FOR_EACH_SUBRTX with call to new ix86_check_avx_upper_register. gcc/testsuite/ChangeLog: * gcc.target/i386/pr116512.c: New test.
-rw-r--r--gcc/config/i386/i386.cc36
-rw-r--r--gcc/testsuite/gcc.target/i386/pr116512.c26
2 files changed, 49 insertions, 13 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index a1f65d4..546c964 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14882,9 +14882,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
static bool
ix86_check_avx_upper_register (const_rtx exp)
{
- return (SSE_REG_P (exp)
- && !EXT_REX_SSE_REG_P (exp)
- && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
+ /* construct_container may return a parallel with expr_list
+ which contains the real reg and mode */
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
+ {
+ const_rtx x = *iter;
+ if (SSE_REG_P (x)
+ && !EXT_REX_SSE_REG_P (x)
+ && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
+ return true;
+ }
+
+ return false;
}
/* Check if a 256bit or 512bit AVX register is referenced in stores. */
@@ -14892,7 +14902,9 @@ ix86_check_avx_upper_register (const_rtx exp)
static void
ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
{
- if (ix86_check_avx_upper_register (dest))
+ if (SSE_REG_P (dest)
+ && !EXT_REX_SSE_REG_P (dest)
+ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
{
bool *used = (bool *) data;
*used = true;
@@ -14951,14 +14963,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
return AVX_U128_CLEAN;
}
- subrtx_iterator::array_type array;
-
rtx set = single_set (insn);
if (set)
{
rtx dest = SET_DEST (set);
rtx src = SET_SRC (set);
- if (ix86_check_avx_upper_register (dest))
+ if (SSE_REG_P (dest)
+ && !EXT_REX_SSE_REG_P (dest)
+ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
{
/* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
source isn't zero. */
@@ -14969,9 +14981,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
}
else
{
- FOR_EACH_SUBRTX (iter, array, src, NONCONST)
- if (ix86_check_avx_upper_register (*iter))
- return AVX_U128_DIRTY;
+ if (ix86_check_avx_upper_register (src))
+ return AVX_U128_DIRTY;
}
/* This isn't YMM/ZMM load/store. */
@@ -14982,9 +14993,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
Hardware changes state only when a 256bit register is written to,
but we need to prevent the compiler from moving optimal insertion
point above eventual read from 256bit or 512 bit register. */
- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
- if (ix86_check_avx_upper_register (*iter))
- return AVX_U128_DIRTY;
+ if (ix86_check_avx_upper_register (PATTERN (insn)))
+ return AVX_U128_DIRTY;
return AVX_U128_ANY;
}
diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c
new file mode 100644
index 0000000..c2bc6c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116512.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v4 -O2" } */
+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+struct B {
+ union {
+ __m512 f;
+ __m512i s;
+ };
+};
+
+struct B foo(int n) {
+ struct B res;
+ res.s = _mm512_set1_epi32(n);
+
+ return res;
+}
+
+__m512i bar(int n) {
+ struct B res;
+ res.s = _mm512_set1_epi32(n);
+
+ return res.s;
+}