diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2016-08-15 20:46:53 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2016-08-15 20:46:53 +0200 |
commit | 432baa029c933e11934dddfd7ec355dcbdb4ea67 (patch) | |
tree | 3d80b7ede24baff6d342431517b6fd266918e311 /gcc | |
parent | 87155d4f6c847cd072321424831fd0ffb405265d (diff) | |
download | gcc-432baa029c933e11934dddfd7ec355dcbdb4ea67.zip gcc-432baa029c933e11934dddfd7ec355dcbdb4ea67.tar.gz gcc-432baa029c933e11934dddfd7ec355dcbdb4ea67.tar.bz2 |
re PR target/72867 (SSE/AVX/AVX512: incorrect optimization of VMINPS/VMAXPS at compile time)
PR target/72867
* config/i386/sse.md (<code><mode>3<mask_name><round_saeonly_name>):
Emit ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>
for !flag_finite_math_only or flag_signed_zeros.
(*<code><mode>3<mask_name><round_saeonly_name>): Rename from
*<code><mode>3_finite<mask_name><round_saeonly_name>. Do not
depend on flag_finite_math_only.
(ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>):
New insn pattern.
(*<code><mode>3<mask_name><round_saeonly_name>): Remove.
(*ieee_smin<mode>3): Ditto.
(*ieee_smax<mode>3): Ditto.
* config/i386/mmx.md (mmx_<code>v2sf3): Emit
mmx_ieee_<ieee_maxmin>v2sf3 for !flag_finite_math_only or
flag_signed_zeros.
(*mmx_<code>v2sf3): Rename from *mmx_<code>v2sf3_finite. Do not
depend on flag_finite_math_only.
(mmx_ieee_<ieee_maxmin>v2sf3): New insn pattern.
(*mmx_<code>v2sf3): Remove.
* config/i386/subst.md (round_saeonly_mask_arg3): New subst attribute.
* config/i386/i386.c (ix86_expand_sse_fp_mimnax): Check
flag_signed_zeros instead of !flag_unsafe_math_optimizations.
testsuite/ChangeLog:
PR target/72867
* gcc.target/i386/pr72867.c: New test.
From-SVN: r239487
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 25 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 16 | ||||
-rw-r--r-- | gcc/config/i386/mmx.md | 43 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 85 | ||||
-rw-r--r-- | gcc/config/i386/subst.md | 1 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr72867.c | 23 |
8 files changed, 123 insertions, 77 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fa52700..c380045 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2016-08-15 Uros Bizjak <ubizjak@gmail.com> + + PR target/72867 + * config/i386/sse.md (<code><mode>3<mask_name><round_saeonly_name>): + Emit ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name> + for !flag_finite_math_only or flag_signed_zeros. + (*<code><mode>3<mask_name><round_saeonly_name>): Rename from + *<code><mode>3_finite<mask_name><round_saeonly_name>. Do not + depend on flag_finite_math_only. + (ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>): + New insn pattern. + (*<code><mode>3<mask_name><round_saeonly_name>): Remove. + (*ieee_smin<mode>3): Ditto. + (*ieee_smax<mode>3): Ditto. + * config/i386/mmx.md (mmx_<code>v2sf3): Emit + mmx_ieee_<ieee_maxmin>v2sf3 for !flag_finite_math_only or + flag_signed_zeros. + (*mmx_<code>v2sf3): Rename from *mmx_<code>v2sf3_finite. Do not + depend on flag_finite_math_only. + (mmx_ieee_<ieee_maxmin>v2sf3): New insn pattern. + (*mmx_<code>v2sf3): Remove. + * config/i386/subst.md (round_saeonly_mask_arg3): New subst attribute. + * config/i386/i386.c (ix86_expand_sse_fp_mimnax): Check + flag_signed_zeros instead of !flag_unsafe_math_optimizations. + 2016-08-15 Segher Boessenkool <segher@kernel.crashing.org> PR rtl-optimization/73650 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7170194..02bfcbf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -23404,7 +23404,7 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, but MODE may be a vector mode and thus not appropriate. */ - if (!flag_finite_math_only || !flag_unsafe_math_optimizations) + if (!flag_finite_math_only || flag_signed_zeros) { int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; rtvec v; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 25ba1eb..e5d2fd0 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -885,6 +885,14 @@ (umax "maxu") (umin "minu")]) (define_code_attr maxmin_float [(smax "max") (smin "min")]) +(define_int_iterator IEEE_MAXMIN + [UNSPEC_IEEE_MAX + UNSPEC_IEEE_MIN]) + +(define_int_attr ieee_maxmin + [(UNSPEC_IEEE_MAX "max") + (UNSPEC_IEEE_MIN "min")]) + ;; Mapping of logic operators (define_code_iterator any_logic [and ior xor]) (define_code_iterator any_or [ior xor]) @@ -17401,14 +17409,6 @@ ;; Their operands are not commutative, and thus they may be used in the ;; presence of -0.0 and NaN. -(define_int_iterator IEEE_MAXMIN - [UNSPEC_IEEE_MAX - UNSPEC_IEEE_MIN]) - -(define_int_attr ieee_maxmin - [(UNSPEC_IEEE_MAX "max") - (UNSPEC_IEEE_MIN "min")]) - (define_insn "*ieee_s<ieee_maxmin><mode>3" [(set (match_operand:MODEF 0 "register_operand" "=x,v") (unspec:MODEF diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 65e8b46..9992233 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -296,10 +296,6 @@ (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) -;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX -;; isn't really correct, as those rtl operators aren't defined when -;; applied to NaNs. Hopefully the optimizers won't get too smart on us. - (define_expand "mmx_<code>v2sf3" [(set (match_operand:V2SF 0 "register_operand") (smaxmin:V2SF @@ -307,30 +303,47 @@ (match_operand:V2SF 2 "nonimmediate_operand")))] "TARGET_3DNOW" { - if (!flag_finite_math_only) - operands[1] = force_reg (V2SFmode, operands[1]); - ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands); + if (!flag_finite_math_only || flag_signed_zeros) + { + operands[1] = force_reg (V2SFmode, operands[1]); + emit_insn (gen_mmx_ieee_<maxmin_float>v2sf3 + (operands[0], operands[1], operands[2])); + DONE; + } + else + ix86_fixup_binary_operands_no_copy (<CODE>, V2SFmode, operands); }) -(define_insn "*mmx_<code>v2sf3_finite" +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "*mmx_<code>v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") (smaxmin:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0") (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] - "TARGET_3DNOW && flag_finite_math_only - && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)" + "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)" "pf<maxmin_float>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) -(define_insn "*mmx_<code>v2sf3" +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "mmx_ieee_<ieee_maxmin>v2sf3" [(set (match_operand:V2SF 0 "register_operand" "=y") - (smaxmin:V2SF - (match_operand:V2SF 1 "register_operand" "0") - (match_operand:V2SF 2 "nonimmediate_operand" "ym")))] + (unspec:V2SF + [(match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2SF 2 "nonimmediate_operand" "ym")] + IEEE_MAXMIN))] "TARGET_3DNOW" - "pf<maxmin_float>\t{%2, %0|%0, %2}" + "pf<ieee_maxmin>\t{%2, %0|%0, %2}" [(set_attr "type" "mmxadd") (set_attr "prefix_extra" "1") (set_attr "mode" "V2SF")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 9f60ad0..9a39c74 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1633,10 +1633,6 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "SF")]) -;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX -;; isn't really correct, as those rtl operators aren't defined when -;; applied to NaNs. Hopefully the optimizers won't get too smart on us. - (define_expand "<code><mode>3<mask_name><round_saeonly_name>" [(set (match_operand:VF 0 "register_operand") (smaxmin:VF @@ -1644,18 +1640,30 @@ (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))] "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" { - if (!flag_finite_math_only) - operands[1] = force_reg (<MODE>mode, operands[1]); - ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); + if (!flag_finite_math_only || flag_signed_zeros) + { + operands[1] = force_reg (<MODE>mode, operands[1]); + emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name> + (operands[0], operands[1], operands[2] + <mask_operand_arg34> + <round_saeonly_mask_arg3>)); + DONE; + } + else + ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); }) -(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>" +;; These versions of the min/max patterns are intentionally ignorant of +;; their behavior wrt -0.0 and NaN (via the commutative operand mark). +;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator +;; are undefined in this condition, we're certain this is correct. + +(define_insn "*<code><mode>3<mask_name><round_saeonly_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") (smaxmin:VF (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v") (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))] - "TARGET_SSE && flag_finite_math_only - && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) + "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" "@ <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} @@ -1666,16 +1674,23 @@ (set_attr "prefix" "<mask_prefix3>") (set_attr "mode" "<MODE>")]) -(define_insn "*<code><mode>3<mask_name><round_saeonly_name>" +;; These versions of the min/max patterns implement exactly the operations +;; min = (op1 < op2 ? op1 : op2) +;; max = (!(op1 < op2) ? op1 : op2) +;; Their operands are not commutative, and thus they may be used in the +;; presence of -0.0 and NaN. + +(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>" [(set (match_operand:VF 0 "register_operand" "=x,v") - (smaxmin:VF - (match_operand:VF 1 "register_operand" "0,v") - (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))] - "TARGET_SSE && !flag_finite_math_only + (unspec:VF + [(match_operand:VF 1 "register_operand" "0,v") + (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")] + IEEE_MAXMIN))] + "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>" "@ - <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} - v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" + <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2} + v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "btver2_sse_attr" "maxmin") @@ -1700,42 +1715,6 @@ (set_attr "prefix" "<round_saeonly_prefix>") (set_attr "mode" "<ssescalarmode>")]) -;; These versions of the min/max patterns implement exactly the operations -;; min = (op1 < op2 ? op1 : op2) -;; max = (!(op1 < op2) ? op1 : op2) -;; Their operands are not commutative, and thus they may be used in the -;; presence of -0.0 and NaN. - -(define_insn "*ieee_smin<mode>3" - [(set (match_operand:VF 0 "register_operand" "=x,v") - (unspec:VF - [(match_operand:VF 1 "register_operand" "0,v") - (match_operand:VF 2 "vector_operand" "xBm,vm")] - UNSPEC_IEEE_MIN))] - "TARGET_SSE" - "@ - min<ssemodesuffix>\t{%2, %0|%0, %2} - vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "<MODE>")]) - -(define_insn "*ieee_smax<mode>3" - [(set (match_operand:VF 0 "register_operand" "=x,v") - (unspec:VF - [(match_operand:VF 1 "register_operand" "0,v") - (match_operand:VF 2 "vector_operand" "xBm,vm")] - UNSPEC_IEEE_MAX))] - "TARGET_SSE" - "@ - max<ssemodesuffix>\t{%2, %0|%0, %2} - vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseadd") - (set_attr "prefix" "orig,vex") - (set_attr "mode" "<MODE>")]) - (define_insn "avx_addsubv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_merge:V4DF diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index e2f67c4..f0b8cf4 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -161,6 +161,7 @@ (define_subst_attr "round_saeonly_mask_op4" "round_saeonly" "" "<round_saeonly_mask_operand4>") (define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>") (define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>") +(define_subst_attr "round_saeonly_mask_arg3" "round_saeonly" "" ", operands[<mask_expand_op3>]") (define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v") (define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v") (define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "vector_operand" "register_operand") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index eeaa5d5..4b4c9ca 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2016-08-15 Uros Bizjak <ubizjak@gmail.com> + + PR target/72867 + * gcc.target/i386/pr72867.c: New test. + 2016-08-16 Eric Botcazou <ebotcazou@adacore.com> * c-c++-common/dump-ada-spec-5.c: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr72867.c b/gcc/testsuite/gcc.target/i386/pr72867.c new file mode 100644 index 0000000..b8614be --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr72867.c @@ -0,0 +1,23 @@ +/* PR target/72867 */ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target sse } */ + +#include "sse-check.h" +#include <xmmintrin.h> + +static void +sse_test (void) +{ + float nan = __builtin_nanf (""); + + __m128 x = _mm_min_ps(_mm_set1_ps(nan), _mm_set1_ps(1.0f)); + + if (x[0] != 1.0f) + abort (); + + x = _mm_min_ps(_mm_set1_ps(1.f), _mm_set1_ps(nan)); + + if (!__builtin_isnan (x[0])) + abort (); +} |