diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-12-07 09:17:27 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-12-12 13:24:25 +0800 |
commit | fc62716fe8d1d60a9f1c6906e5a4845b3331b828 (patch) | |
tree | 40a63a5d8863efc5178deee73f5d17160b42f26b | |
parent | d96533559e26dd0c86f0708fa46eef65c35f7b90 (diff) | |
download | gcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.zip gcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.tar.gz gcc-fc62716fe8d1d60a9f1c6906e5a4845b3331b828.tar.bz2 |
Don't assume it's AVX_U128_CLEAN after call_insn whose abi.mode_clobber(V4DImode) deosn't contains all SSE_REGS.
If the function desn't clobber any sse registers or only clobber
128-bit part, then vzeroupper isn't issued before the function exit.
the status not CLEAN but ANY after the function.
Also for sibling_call, it's safe to issue an vzeroupper. Also there
could be missing vzeroupper since there's no mode_exit for
sibling_call_p.
gcc/ChangeLog:
PR target/112891
* config/i386/i386.cc (ix86_avx_u128_mode_after): Return
AVX_U128_ANY if callee_abi doesn't clobber all_sse_regs to
align with ix86_avx_u128_mode_needed.
(ix86_avx_u128_mode_needed): Return AVX_U128_ClEAN for
sibling_call.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr112891.c: New test.
* gcc.target/i386/pr112891-2.c: New test.
-rw-r--r-- | gcc/config/i386/i386.cc | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr112891-2.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr112891.c | 29 |
3 files changed, 78 insertions, 3 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4b6bad3..4706f0d 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -15038,8 +15038,12 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) vzeroupper if all SSE registers are clobbered. */ const function_abi &abi = insn_callee_abi (insn); if (vzeroupper_pattern (PATTERN (insn), VOIDmode) - || !hard_reg_set_subset_p (reg_class_contents[SSE_REGS], - abi.mode_clobbers (V4DImode))) + /* Should be safe to issue an vzeroupper before sibling_call_p. + Also there not mode_exit for sibling_call, so there could be + missing vzeroupper for that. */ + || !(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) return AVX_U128_ANY; return AVX_U128_CLEAN; @@ -15177,7 +15181,19 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) bool avx_upper_reg_found = false; note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + if (avx_upper_reg_found) + return AVX_U128_DIRTY; + + /* If the function desn't clobber any sse registers or only clobber + 128-bit part, Then vzeroupper isn't issued before the function exit. + the status not CLEAN but ANY after the function. */ + const function_abi &abi = insn_callee_abi (insn); + if (!(SIBLING_CALL_P (insn) + || hard_reg_set_subset_p (reg_class_contents[SSE_REGS], + abi.mode_clobbers (V4DImode)))) + return AVX_U128_ANY; + + return AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn diff --git a/gcc/testsuite/gcc.target/i386/pr112891-2.c b/gcc/testsuite/gcc.target/i386/pr112891-2.c new file mode 100644 index 0000000..164c398 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +double +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); + return a[5] + b[5]; +} + +double +foo1 (double* __restrict a, double* b) +{ + double c = foo (a, b); + return __builtin_exp (c); +} diff --git a/gcc/testsuite/gcc.target/i386/pr112891.c b/gcc/testsuite/gcc.target/i386/pr112891.c new file mode 100644 index 0000000..dbf6c67 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr112891.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O3" } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ + +void +__attribute__((noinline)) +bar (double* a) +{ + a[0] = 1.0; + a[1] = 2.0; +} + +void +__attribute__((noinline)) +foo (double* __restrict a, double* b) +{ + a[0] += b[0]; + a[1] += b[1]; + a[2] += b[2]; + a[3] += b[3]; + bar (b); +} + +double +foo1 (double* __restrict a, double* b) +{ + foo (a, b); + return __builtin_exp (b[1]); +} |