aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-12-07 09:17:27 +0800
committerliuhongt <hongtao.liu@intel.com>2023-12-12 13:24:25 +0800
commitfc62716fe8d1d60a9f1c6906e5a4845b3331b828 (patch)
tree40a63a5d8863efc5178deee73f5d17160b42f26b
parentd96533559e26dd0c86f0708fa46eef65c35f7b90 (diff)
downloadgcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.zip
gcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.tar.gz
gcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.tar.bz2
Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
If the function desn't clobber any sse registers or only clobber 128-bit part, then vzeroupper isn't issued before the function exit. the status not CLEAN but ANY after the function. Also for sibling_call, it's safe to issue an vzeroupper. Also there could be missing vzeroupper since there's no mode_exit for sibling_call_p. gcc/ChangeLog: PR target/112891 * config/i386/i386.cc (ix86_avx_u128_mode_after): Return AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to align with ix86_avx_u128_mode_needed. (ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for sibling_call. gcc/testsuite/ChangeLog: * gcc.target/i386/pr112891.c: New test. * gcc.target/i386/pr112891-2.c: New test.
-rw-r--r--gcc/config/i386/i386.cc22
-rw-r--r--gcc/testsuite/gcc.target/i386/pr112891-2.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/pr112891.c29
3 files changed, 78 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4b6bad3..4706f0d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
vzeroupper if all SSE registers are clobbered. */
const function_abi &abi = insn_callee_abi (insn);
if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
- || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
- abi.mode_clobbers (V4DImode)))
+ /* Should be safe to issue an vzeroupper before sibling_call_p.
+ Also there not mode_exit for sibling_call, so there could be
+ missing vzeroupper for that. */
+ || !(SIBLING_CALL_P (insn)
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+ abi.mode_clobbers (V4DImode))))
return AVX_U128_ANY;
return AVX_U128_CLEAN;
@@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
bool avx_upper_reg_found = false;
note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+ if (avx_upper_reg_found)
+ return AVX_U128_DIRTY;
+
+ /* If the function desn't clobber any sse registers or only clobber
+ 128-bit part, Then vzeroupper isn't issued before the function exit.
+ the status not CLEAN but ANY after the function. */
+ const function_abi &abi = insn_callee_abi (insn);
+ if (!(SIBLING_CALL_P (insn)
+ || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
+ abi.mode_clobbers (V4DImode))))
+ return AVX_U128_ANY;
+
+ return AVX_U128_CLEAN;
}
/* Otherwise, return current mode. Remember that if insn
diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c
new file mode 100644
index 0000000..164c398
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+ a[0] = 1.0;
+ a[1] = 2.0;
+}
+
+double
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+ a[0] += b[0];
+ a[1] += b[1];
+ a[2] += b[2];
+ a[3] += b[3];
+ bar (b);
+ return a[5] + b[5];
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+ double c = foo (a, b);
+ return __builtin_exp (c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c
new file mode 100644
index 0000000..dbf6c67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112891.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
+
+void
+__attribute__((noinline))
+bar (double* a)
+{
+ a[0] = 1.0;
+ a[1] = 2.0;
+}
+
+void
+__attribute__((noinline))
+foo (double* __restrict a, double* b)
+{
+ a[0] += b[0];
+ a[1] += b[1];
+ a[2] += b[2];
+ a[3] += b[3];
+ bar (b);
+}
+
+double
+foo1 (double* __restrict a, double* b)
+{
+ foo (a, b);
+ return __builtin_exp (b[1]);
+}