aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2024-05-08 11:08:42 +0800
committerHongyu Wang <hongyu.wang@intel.com>2024-06-06 15:29:48 +0800
commit0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0 (patch)
tree11a2b3a6d34b66882875db2b16e85f1f284d9293
parent23db87301b623ecf162c9df718ce82ed9aa354a8 (diff)
downloadgcc-0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0.zip
gcc-0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0.tar.gz
gcc-0b6cea8783b9e1b86c5c7c277c301cb5931bc5e0.tar.bz2
[APX CCMP] Support ccmp for float compare
The ccmp insn itself doesn't support fp compare, but x86 has fp comi insn that changes EFLAG which can be the scc input to ccmp. Allow scalar fp compare in ix86_gen_ccmp_first except ORDERED/UNORDERD compare which can not be identified in ccmp. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_gen_ccmp_first): Add fp compare and check the allowed fp compare type. (ix86_gen_ccmp_next): Adjust compare_code input to ccmp for fp compare. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-ccmp-1.c: Add test for fp compare. * gcc.target/i386/apx-ccmp-2.c: Likewise.
-rw-r--r--gcc/config/i386/i386-expand.cc53
-rw-r--r--gcc/testsuite/gcc.target/i386/apx-ccmp-1.c45
-rw-r--r--gcc/testsuite/gcc.target/i386/apx-ccmp-2.c47
3 files changed, 138 insertions, 7 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 5353d76..d1d396a 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -25369,18 +25369,58 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
if (op_mode == VOIDmode)
op_mode = GET_MODE (op1);
+ /* We only supports following scalar comparisons that use just 1
+ instruction: DI/SI/QI/HI/DF/SF/HF.
+ Unordered/Ordered compare cannot be corretly indentified by
+ ccmp so they are not supported. */
if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode
- || op_mode == QImode))
+ || op_mode == QImode || op_mode == DFmode || op_mode == SFmode
+ || op_mode == HFmode)
+ || code == ORDERED
+ || code == UNORDERED)
{
end_sequence ();
return NULL_RTX;
}
/* Canonicalize the operands according to mode. */
- if (!nonimmediate_operand (op0, op_mode))
- op0 = force_reg (op_mode, op0);
- if (!x86_64_general_operand (op1, op_mode))
- op1 = force_reg (op_mode, op1);
+ if (SCALAR_INT_MODE_P (op_mode))
+ {
+ if (!nonimmediate_operand (op0, op_mode))
+ op0 = force_reg (op_mode, op0);
+ if (!x86_64_general_operand (op1, op_mode))
+ op1 = force_reg (op_mode, op1);
+ }
+ else
+ {
+ /* op0/op1 can be canonicallized from expand_fp_compare, so
+ just adjust the code to make it generate supported fp
+ condition. */
+ if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
+ {
+ /* First try to split condition if we don't need to honor
+ NaNs, as the ORDERED/UNORDERED check always fall
+ through. */
+ if (!HONOR_NANS (op_mode))
+ {
+ rtx_code first_code;
+ split_comparison (code, op_mode, &first_code, &code);
+ }
+ /* Otherwise try to swap the operand order and check if
+ the comparison is supported. */
+ else
+ {
+ code = swap_condition (code);
+ std::swap (op0, op1);
+ }
+
+ if (ix86_fp_compare_code_to_integer (code) == UNKNOWN)
+ {
+ end_sequence ();
+ return NULL_RTX;
+ }
+ }
+ }
*prep_seq = get_insns ();
end_sequence ();
@@ -25445,6 +25485,9 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
prev_code = GET_CODE (prev);
+ /* Fixup FP compare code here. */
+ if (GET_MODE (XEXP (prev, 0)) == CCFPmode)
+ prev_code = ix86_fp_compare_code_to_integer (prev_code);
if (bit_code != AND)
prev_code = reverse_condition (prev_code);
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
index 5a2dad8..e4e112f 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mapx-features=ccmp" } */
+/* { dg-options "-O2 -ffast-math -mapx-features=ccmp" } */
int
f1 (int a)
@@ -56,8 +56,49 @@ f9 (int a, int b)
return a == 3 || a == 0;
}
+int
+f10 (float a, int b, float c)
+{
+ return a > c || b < 19;
+}
+
+int
+f11 (float a, int b)
+{
+ return a == 0.0 && b > 21;
+}
+
+int
+f12 (double a, int b)
+{
+ return a < 3.0 && b != 23;
+}
+
+int
+f13 (double a, double b, int c, int d)
+{
+ a += b;
+ c += d;
+ return a != b || c == d;
+}
+
+int
+f14 (double a, int b)
+{
+ return b != 0 && a < 1.5;
+}
+
+int
+f15 (double a, double b, int c, int d)
+{
+ return c != d || a <= b;
+}
+
/* { dg-final { scan-assembler-times "ccmpg" 2 } } */
/* { dg-final { scan-assembler-times "ccmple" 2 } } */
/* { dg-final { scan-assembler-times "ccmpne" 4 } } */
-/* { dg-final { scan-assembler-times "ccmpe" 1 } } */
+/* { dg-final { scan-assembler-times "ccmpe" 3 } } */
+/* { dg-final { scan-assembler-times "ccmpbe" 1 } } */
+/* { dg-final { scan-assembler-times "ccmpa" 1 } } */
+/* { dg-final { scan-assembler-times "ccmpbl" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
index 30a1c21..0123a68 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ccmp-2.c
@@ -42,6 +42,47 @@ int foo_noapx(int a, int b, int c, int d)
return sum;
}
+__attribute__((noinline, noclone,
+ optimize(("finite-math-only")), target("apxf")))
+double foo_fp_apx(int a, double b, int c, double d)
+{
+ int sum = a;
+ double sumd = b;
+
+ if (a != c)
+ {
+ sum += a;
+ if (a < c || sumd != d || sum > c)
+ {
+ c += a;
+ sum += a + c;
+ }
+ }
+
+ return sum + sumd;
+}
+
+__attribute__((noinline, noclone,
+ optimize(("finite-math-only")), target("no-apxf")))
+double foo_fp_noapx(int a, double b, int c, double d)
+{
+ int sum = a;
+ double sumd = b;
+
+ if (a != c)
+ {
+ sum += a;
+ if (a < c || sumd != d || sum > c)
+ {
+ c += a;
+ sum += a + c;
+ }
+ }
+
+ return sum + sumd;
+}
+
+
int main (void)
{
if (!__builtin_cpu_supports ("apxf"))
@@ -53,5 +94,11 @@ int main (void)
if (val1 != val2)
__builtin_abort ();
+ double val3 = foo_fp_noapx (24, 7.5, 32, 2.0);
+ double val4 = foo_fp_apx (24, 7.5, 32, 2.0);
+
+ if (val3 != val4)
+ __builtin_abort ();
+
return 0;
}