aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-05-31 14:38:07 +0800
committerliuhongt <hongtao.liu@intel.com>2024-06-03 15:18:10 +0800
commitac306de7d5100d3682eae2270995a9abbe19db38 (patch)
tree9d27ba0cc38a838211d05c0f6057c397cc6e8547
parent1f2ca510065a2033bac408eb5a960ef0126f25cc (diff)
downloadgcc-ac306de7d5100d3682eae2270995a9abbe19db38.zip
gcc-ac306de7d5100d3682eae2270995a9abbe19db38.tar.gz
gcc-ac306de7d5100d3682eae2270995a9abbe19db38.tar.bz2
Add some preference for floating point rtl ifcvt when sse4.1 is not available
W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por) for movdfcc/movsfcc, and could possibly fail cost comparison. Increase branch cost could hurt performance for other modes, so specially add some preference for floating point ifcvt. gcc/ChangeLog: PR target/115299 * config/i386/i386.cc (ix86_noce_conversion_profitable_p): Add some preference for floating point ifcvt when SSE4.1 is not available. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115299.c: New test. * gcc.target/i386/pr86722.c: Adjust testcase.
-rw-r--r--gcc/config/i386/i386.cc17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr115299.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/pr86722.c2
3 files changed, 28 insertions, 1 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 1a0206a..271da12 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24879,6 +24879,23 @@ ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
return false;
}
}
+
+ /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
+ for movdfcc/movsfcc, and could possibly fail cost comparison.
+ Increase branch cost will hurt performance for other modes, so
+ specially add some preference for floating point ifcvt. */
+ if (!TARGET_SSE4_1 && if_info->x
+ && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
+ && if_info->speed_p)
+ {
+ unsigned cost = seq_cost (seq, true);
+
+ if (cost <= if_info->original_cost)
+ return true;
+
+ return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
+ }
+
return default_noce_conversion_profitable_p (seq, if_info);
}
diff --git a/gcc/testsuite/gcc.target/i386/pr115299.c b/gcc/testsuite/gcc.target/i386/pr115299.c
new file mode 100644
index 0000000..53c5899
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115299.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-sse4.1 -msse2" } */
+
+void f(double*d,double*e){
+ for(;d<e;++d)
+ *d=(*d<.5)?.7:0;
+}
+
+/* { dg-final { scan-assembler {(?n)(?:cmpnltsd|cmpltsd)} } } */
+/* { dg-final { scan-assembler {(?n)(?:andnpd|andpd)} } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr86722.c b/gcc/testsuite/gcc.target/i386/pr86722.c
index 4de2ca1..e266a1e 100644
--- a/gcc/testsuite/gcc.target/i386/pr86722.c
+++ b/gcc/testsuite/gcc.target/i386/pr86722.c
@@ -6,5 +6,5 @@ void f(double*d,double*e){
*d=(*d<.5)?.7:0;
}
-/* { dg-final { scan-assembler-not "andnpd" } } */
+/* { dg-final { scan-assembler-times {(?n)(?:andnpd|andpd)} 1 } } */
/* { dg-final { scan-assembler-not "orpd" } } */