diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-07-03 18:19:19 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-07-10 09:06:24 +0800 |
commit | d41a57c46df6f8f7dae0c0a8b349e734806a837b (patch) | |
tree | ed9ee22f29d6bef05e99bc4e64a63a820132d982 /gcc | |
parent | 361a6fc4bc5d3073e8e19fba0af51380720e677a (diff) | |
download | gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.zip gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.gz gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.bz2 |
Add pre_reload splitter to detect fp min/max pattern.
We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.
This patch adds pre_reload splitter to detect the min/max pattern.
Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.
gcc/ChangeLog:
PR target/110170
* config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
splitter to detect fp max pattern.
(*ieee_min<mode>3_1): Ditto, but for fp min pattern.
gcc/testsuite/ChangeLog:
* g++.target/i386/pr110170.C: New test.
* gcc.target/i386/pr110170.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.md | 43 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/i386/pr110170.C | 90 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr110170.c | 21 |
3 files changed, 154 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e47ced1..621cdd9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23163,6 +23163,49 @@ (set_attr "type" "sseadd") (set_attr "mode" "<MODE>")]) +;; Operands order in min/max instruction matters for signed zero and NANs. +(define_insn_and_split "*ieee_max<mode>3_1" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "register_operand") + (lt:MODEF + (match_operand:MODEF 3 "register_operand") + (match_operand:MODEF 4 "register_operand"))] + UNSPEC_BLENDV))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && (rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:MODEF + [(match_dup 2) + (match_dup 1)] + UNSPEC_IEEE_MAX))]) + +(define_insn_and_split "*ieee_min<mode>3_1" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "register_operand") + (lt:MODEF + (match_operand:MODEF 3 "register_operand") + (match_operand:MODEF 4 "register_operand"))] + UNSPEC_BLENDV))] + "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH + && (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:MODEF + [(match_dup 2) + (match_dup 1)] + UNSPEC_IEEE_MIN))]) + ;; Make two stack loads independent: ;; fld aa fld aa ;; fld %st(0) -> fld bb diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C new file mode 100644 index 0000000..e638b12 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr110170.C @@ -0,0 +1,90 @@ +/* { dg-do run { target sse4 } } */ +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */ +#include <math.h> + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +void +__attribute__((noinline)) +__cond_swap(double* __x, double* __y) { + bool __r = (*__x < *__y); + auto __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +auto test1() { + double nan = -0.0; + double x = 0.0; + __cond_swap(&nan, &x); + return x == -0.0 && nan == 0.0; +} + +auto test1r() { + double nan = NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == 1.0; +} + +auto test2() { + double nan = NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test2r() { + double nan = NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test3() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test3r() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test4() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + +auto test4r() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + + +static void +TEST() +{ + if ( + !test1() || !test1r() + || !test2() || !test2r() + || !test3() || !test4r() + || !test4() || !test4r() + ) __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c new file mode 100644 index 0000000..c72f733 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110170.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */ +/* Ideally cond_swap_df is also optimized to minsd/maxsd. */ +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */ + +void __cond_swap_df(double* __x, double* __y) { + _Bool __r = (*__x < *__y); + double __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +void __cond_swap_sf(float* __x, float* __y) { + _Bool __r = (*__x < *__y); + float __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} |