diff options
author | liuhongt <hongtao.liu@intel.com> | 2024-05-31 14:38:07 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2024-06-03 15:18:10 +0800 |
commit | ac306de7d5100d3682eae2270995a9abbe19db38 (patch) | |
tree | 9d27ba0cc38a838211d05c0f6057c397cc6e8547 | |
parent | 1f2ca510065a2033bac408eb5a960ef0126f25cc (diff) | |
download | gcc-ac306de7d5100d3682eae2270995a9abbe19db38.zip gcc-ac306de7d5100d3682eae2270995a9abbe19db38.tar.gz gcc-ac306de7d5100d3682eae2270995a9abbe19db38.tar.bz2 |
Add some preference for floating point rtl ifcvt when sse4.1 is not available
W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) for
movdfcc/movsfcc, and could possibly fail cost comparison. Increase
branch cost could hurt performance for other modes, so specially add
some preference for floating point ifcvt.
gcc/ChangeLog:
PR target/115299
* config/i386/i386.cc (ix86_noce_conversion_profitable_p): Add
some preference for floating point ifcvt when SSE4.1 is not
available.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr115299.c: New test.
* gcc.target/i386/pr86722.c: Adjust testcase.
-rw-r--r-- | gcc/config/i386/i386.cc | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr115299.c | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr86722.c | 2 |
3 files changed, 28 insertions, 1 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 1a0206a..271da12 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24879,6 +24879,23 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) return false; } } + + /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) + for movdfcc/movsfcc, and could possibly fail cost comparison. + Increase branch cost will hurt performance for other modes, so + specially add some preference for floating point ifcvt. */ + if (!TARGET_SSE4_1 && if_info->x + && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT + && if_info->speed_p) + { + unsigned cost = seq_cost (seq, true); + + if (cost <= if_info->original_cost) + return true; + + return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2)); + } + return default_noce_conversion_profitable_p (seq, if_info); } diff --git a/gcc/testsuite/gcc.target/i386/pr115299.c b/gcc/testsuite/gcc.target/i386/pr115299.c new file mode 100644 index 0000000..53c5899 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115299.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-sse4.1 -msse2" } */ + +void f(double*d,double*e){ + for(;d<e;++d) + *d=(*d<.5)?.7:0; +} + +/* { dg-final { scan-assembler {(?n)(?:cmpnltsd|cmpltsd)} } } */ +/* { dg-final { scan-assembler {(?n)(?:andnpd|andpd)} } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr86722.c b/gcc/testsuite/gcc.target/i386/pr86722.c index 4de2ca1..e266a1e 100644 --- a/gcc/testsuite/gcc.target/i386/pr86722.c +++ b/gcc/testsuite/gcc.target/i386/pr86722.c @@ -6,5 +6,5 @@ void f(double*d,double*e){ *d=(*d<.5)?.7:0; } -/* { dg-final { scan-assembler-not "andnpd" } } */ +/* { dg-final { scan-assembler-times {(?n)(?:andnpd|andpd)} 1 } } */ /* { dg-final { scan-assembler-not "orpd" } } */ |