aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2024-11-12 13:04:46 +0800
committerHongyu Wang <hongyu.wang@intel.com>2024-11-14 11:30:42 +0800
commit19b24f4aae6c59d70cc882623b1e9b279b3872f3 (patch)
tree62d094b8151f7cbc0b21d0352a21d2562e60e1f1 /gcc
parent8564d0948c72df0a66d7eb47e15c6ab43e9b25ce (diff)
downloadgcc-19b24f4aae6c59d70cc882623b1e9b279b3872f3.zip
gcc-19b24f4aae6c59d70cc882623b1e9b279b3872f3.tar.gz
gcc-19b24f4aae6c59d70cc882623b1e9b279b3872f3.tar.bz2
i386: Fix cstorebf4 fp comparison operand [PR117495]
For cstorebf4 it uses comparison_operator for BFmode compare, which is incorrect when directly uses ix86_expand_setcc as it does not canonicalize the input comparison to correct the compare code by swapping operands. The original code without AVX10.2 calls emit_store_flag_force, who actually calls to emit_store_flags_1 and recurisive calls to this expander again with swapped operand and flag. Therefore, we can avoid do the redundant recurisive call by adjusting the comparison_operator to ix86_fp_comparison_operator, and calls ix86_expand_setcc directly. gcc/ChangeLog: PR target/117495 * config/i386/i386.md (cstorebf4): Use ix86_fp_comparison_operator and calls ix86_expand_setcc directly. gcc/testsuite/ChangeLog: PR target/117495 * gcc.target/i386/pr117495.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117495.c26
2 files changed, 33 insertions, 11 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f4aae80..03b0f54 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1853,23 +1853,19 @@
(compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
(match_operand:BF 3 "cmp_fp_expander_operand")))
(set (match_operand:QI 0 "register_operand")
- (match_operator 1 "comparison_operator"
+ (match_operator 1 "ix86_fp_comparison_operator"
[(reg:CC FLAGS_REG)
(const_int 0)]))]
"TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
{
- if (TARGET_AVX10_2_256 && !flag_trapping_math)
- ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3]);
- else
+ rtx op2 = operands[2], op3 = operands[3];
+ if (!TARGET_AVX10_2_256 || flag_trapping_math)
{
- rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
- rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
- rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
- op1, op2, SFmode, 0, 1);
- if (!rtx_equal_p (res, operands[0]))
- emit_move_insn (operands[0], res);
+ op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
+ op3 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
}
+ ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+ op2, op3);
DONE;
})
diff --git a/gcc/testsuite/gcc.target/i386/pr117495.c b/gcc/testsuite/gcc.target/i386/pr117495.c
new file mode 100644
index 0000000..274b6ce
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117495.c
@@ -0,0 +1,26 @@
+/* PR target/117495 */
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64-v3 -fno-trapping-math" } */
+/* { dg-final { scan-assembler-times "vcomsbf16" 2 } } */
+
+__attribute__((target("avx10.2")))
+int foo (int b, int x)
+{
+ return (__bf16) b < x;
+}
+
+int foo2 (int b, int x)
+{
+ return (__bf16) b < x;
+}
+
+__attribute__((target("avx10.2")))
+int foo3 (__bf16 b, __bf16 x)
+{
+ return (__bf16) b < x;
+}
+
+int foo4 (__bf16 b, __bf16 x)
+{
+ return (__bf16) b < x;
+}