aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang, Jun <jun.zhang@intel.com>2024-08-26 10:53:54 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-08-26 13:22:04 +0800
commit576bd309ded9dfe258023f26924c064a7bf12875 (patch)
tree650603cfdc7c4f3eb6b659b0b40d9c21a9cb2942
parentf6fe2962daf7b8d8532c768c3b9eab00f99cce5b (diff)
downloadgcc-576bd309ded9dfe258023f26924c064a7bf12875.zip
gcc-576bd309ded9dfe258023f26924c064a7bf12875.tar.gz
gcc-576bd309ded9dfe258023f26924c064a7bf12875.tar.bz2
AVX10.2: Support compare instructions
gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_ssecom_setcc): Mention behavior change on flags. (ix86_expand_sse_comi): Handle AVX10.2 behavior. (ix86_expand_sse_comi_round): Ditto. (ix86_expand_round_builtin): Ditto. (ix86_expand_builtin): Change function call. * config/i386/i386.md (UNSPEC_COMX): New unspec. * config/i386/sse.md (avx10_2_v<unord>comx<ssemodesuffix><round_saeonly_name>): New. (<sse>_<unord>comi<round_saeonly_name>): Add HFmode. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-compare-1.c: New test. Co-authored-by: Haochen Jiang <haochen.jiang@intel.com> Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
-rw-r--r--gcc/config/i386/i386-expand.cc170
-rw-r--r--gcc/config/i386/i386.md1
-rw-r--r--gcc/config/i386/sse.md18
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c21
4 files changed, 183 insertions, 27 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 56fc433..d692008 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -10601,7 +10601,9 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
rtx_code_label *label = NULL;
/* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
- with NAN operands. */
+ with NAN operands.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are generated instead of
+ COMI/UCOMI. VCOMX/VUCOMX will not set ZF for NAN operands. */
if (check_unordered)
{
gcc_assert (comparison == EQ || comparison == NE);
@@ -10640,7 +10642,7 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
static rtx
ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
- rtx target)
+ rtx target, bool comx_ok)
{
rtx pat, set_dst;
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -10673,11 +10675,13 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
case GE:
break;
case EQ:
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
break;
case NE:
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
const_val = const1_rtx;
break;
@@ -10696,6 +10700,28 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
+ if ((comparison == EQ || comparison == NE)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_sse_comi:
+ icode = CODE_FOR_avx10_2_comxsf;
+ break;
+ case CODE_FOR_sse_ucomi:
+ icode = CODE_FOR_avx10_2_ucomxsf;
+ break;
+ case CODE_FOR_sse2_comi:
+ icode = CODE_FOR_avx10_2_comxdf;
+ break;
+ case CODE_FOR_sse2_ucomi:
+ icode = CODE_FOR_avx10_2_ucomxdf;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
pat = GEN_FCN (icode) (op0, op1);
if (! pat)
return 0;
@@ -12190,7 +12216,7 @@ ix86_erase_embedded_rounding (rtx pat)
with rounding. */
static rtx
ix86_expand_sse_comi_round (const struct builtin_description *d,
- tree exp, rtx target)
+ tree exp, rtx target, bool comx_ok)
{
rtx pat, set_dst;
tree arg0 = CALL_EXPR_ARG (exp, 0);
@@ -12252,6 +12278,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
op1 = safe_vector_operand (op1, mode1);
enum rtx_code comparison = comparisons[INTVAL (op2)];
+ enum rtx_code orig_comp = comparison;
bool ordered = ordereds[INTVAL (op2)];
bool non_signaling = non_signalings[INTVAL (op2)];
rtx const_val = const0_rtx;
@@ -12263,10 +12290,21 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
case ORDERED:
if (!ordered)
{
- /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
- if (!non_signaling)
- ordered = true;
- mode = CCSmode;
+ if (TARGET_AVX10_2_256 && comx_ok)
+ {
+ /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+ differently. So directly return true here. */
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const1_rtx);
+ return target;
+ }
+ else
+ {
+ /* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
+ if (!non_signaling)
+ ordered = true;
+ mode = CCSmode;
+ }
}
else
{
@@ -12280,10 +12318,21 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
case UNORDERED:
if (ordered)
{
- /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
- if (non_signaling)
- ordered = false;
- mode = CCSmode;
+ if (TARGET_AVX10_2_256 && comx_ok)
+ {
+ /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
+ differently. So directly return false here. */
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ return target;
+ }
+ else
+ {
+ /* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
+ if (non_signaling)
+ ordered = false;
+ mode = CCSmode;
+ }
}
else
{
@@ -12314,17 +12363,23 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
if (ordered == non_signaling)
ordered = !ordered;
break;
- case EQ:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_EQ_OQ/_CMP_EQ_OS. */
- check_unordered = true;
+ _CMP_EQ_OQ/_CMP_EQ_OS.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+ of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN. */
+ case EQ:
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
break;
case NE:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
- _CMP_NEQ_UQ/_CMP_NEQ_US. */
+ _CMP_NEQ_UQ/_CMP_NEQ_US.
+ Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+ of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN. */
gcc_assert (!ordered);
- check_unordered = true;
+ if (!TARGET_AVX10_2_256 || !comx_ok)
+ check_unordered = true;
mode = CCZmode;
const_val = const1_rtx;
break;
@@ -12343,14 +12398,77 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
+ /* Generate comx instead of comi when EQ/NE to avoid NAN checks.
+ Use orig_comp to exclude ORDERED/UNORDERED cases. */
+ if ((orig_comp == EQ || orig_comp == NE)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx512fp16_comi_round:
+ icode = CODE_FOR_avx10_2_comxhf_round;
+ break;
+ case CODE_FOR_sse_comi_round:
+ icode = CODE_FOR_avx10_2_comxsf_round;
+ break;
+ case CODE_FOR_sse2_comi_round:
+ icode = CODE_FOR_avx10_2_comxdf_round;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ /* Generate comi instead of comx when UNEQ/LTGT to avoid NAN checks. */
+ if ((comparison == UNEQ || comparison == LTGT)
+ && TARGET_AVX10_2_256 && comx_ok)
+ {
+ switch (icode)
+ {
+ case CODE_FOR_avx10_2_comxhf_round:
+ icode = CODE_FOR_avx512fp16_comi_round;
+ break;
+ case CODE_FOR_avx10_2_comxsf_round:
+ icode = CODE_FOR_sse_comi_round;
+ break;
+ case CODE_FOR_avx10_2_comxdf_round:
+ icode = CODE_FOR_sse2_comi_round;
+ break;
+
+ default:
+ break;
+ }
+ }
+
/*
- 1. COMI: ordered and signaling.
- 2. UCOMI: unordered and non-signaling.
+ 1. COMI/VCOMX: ordered and signaling.
+ 2. UCOMI/VUCOMX: unordered and non-signaling.
*/
if (non_signaling)
- icode = (icode == CODE_FOR_sse_comi_round
- ? CODE_FOR_sse_ucomi_round
- : CODE_FOR_sse2_ucomi_round);
+ switch (icode)
+ {
+ case CODE_FOR_sse_comi_round:
+ icode = CODE_FOR_sse_ucomi_round;
+ break;
+ case CODE_FOR_sse2_comi_round:
+ icode = CODE_FOR_sse2_ucomi_round;
+ break;
+ case CODE_FOR_avx512fp16_comi_round:
+ icode = CODE_FOR_avx512fp16_ucomi_round;
+ break;
+ case CODE_FOR_avx10_2_comxsf_round:
+ icode = CODE_FOR_avx10_2_ucomxsf_round;
+ break;
+ case CODE_FOR_avx10_2_comxhf_round:
+ icode = CODE_FOR_avx10_2_ucomxhf_round;
+ break;
+ case CODE_FOR_avx10_2_comxdf_round:
+ icode = CODE_FOR_avx10_2_ucomxdf_round;
+ break;
+ default:
+ gcc_unreachable ();
+ }
pat = GEN_FCN (icode) (op0, op1, op3);
if (! pat)
@@ -12487,7 +12605,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
break;
case INT_FTYPE_V4SF_V4SF_INT_INT:
case INT_FTYPE_V2DF_V2DF_INT_INT:
- return ix86_expand_sse_comi_round (d, exp, target);
+ return ix86_expand_sse_comi_round (d, exp, target, true);
case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT:
case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
@@ -15628,7 +15746,7 @@ rdseed_step:
case IX86_BUILTIN_VCOMSBF16GE:
case IX86_BUILTIN_VCOMSBF16LT:
case IX86_BUILTIN_VCOMSBF16LE:
- return ix86_expand_sse_comi (bdesc_args + i, exp, target);
+ return ix86_expand_sse_comi (bdesc_args + i, exp, target, false);
case IX86_BUILTIN_FABSQ:
case IX86_BUILTIN_COPYSIGNQ:
if (!TARGET_SSE)
@@ -15644,7 +15762,7 @@ rdseed_step:
&& fcode <= IX86_BUILTIN__BDESC_COMI_LAST)
{
i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST;
- return ix86_expand_sse_comi (bdesc_comi + i, exp, target);
+ return ix86_expand_sse_comi (bdesc_comi + i, exp, target, true);
}
if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 34f9214..b56a51b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -139,6 +139,7 @@
UNSPEC_SCALEF
UNSPEC_PCMP
UNSPEC_CVTBFSF
+ UNSPEC_COMX
;; Generic math support
UNSPEC_IEEE_MIN ; not commutative
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a6d844d..da91d39 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4692,6 +4692,22 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
+(define_insn "avx10_2_<unord>comx<mode><round_saeonly_name>"
+ [(set (reg:CCFP FLAGS_REG)
+ (unspec:CCFP
+ [(vec_select:MODEFH
+ (match_operand:<ssevecmode> 0 "register_operand" "v")
+ (parallel [(const_int 0)]))
+ (vec_select:MODEFH
+ (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
+ (parallel [(const_int 0)]))]
+ UNSPEC_COMX))]
+ "TARGET_AVX10_2_256"
+ "v<unord>comx<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<sse>_<unord>comi<round_saeonly_name>"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
@@ -4701,7 +4717,7 @@
(vec_select:MODEFH
(match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
- "SSE_FLOAT_MODE_P (<MODE>mode)"
+ "SSE_FLOAT_MODE_P (<MODE>mode) || <MODE>mode == E_HFmode"
"%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c
new file mode 100644
index 0000000..99d3218
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-compare-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vcomxsd\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcomxss\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vucomxsd\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vucomxss\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128 x3;
+volatile __m128d x4;
+volatile int a;
+
+void extern
+avx10_2_test (void)
+{
+ a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OS, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_US, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_sd (x4, x4, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+ a = _mm_comi_round_ss (x3, x3, _CMP_NEQ_UQ, _MM_FROUND_NO_EXC);
+}