diff options
author | Richard Biener <rguenther@suse.de> | 2018-01-26 10:30:36 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2018-01-26 10:30:36 +0000 |
commit | 5b55e6e333c97aaefc1db6c9d8411de578d05614 (patch) | |
tree | 21db3fe6e8247b8085351073439c65bef3bda73c | |
parent | af2e3244f56152d94a18c1339a18a4c0b0e33060 (diff) | |
download | gcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.zip gcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.tar.gz gcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.tar.bz2 |
re PR tree-optimization/81082 (Failure to vectorise after reassociating index computation)
2018-01-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/81082
* fold-const.c (fold_plusminus_mult_expr): Do not perform the
association if it requires casting to unsigned.
* match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived
from fold_plusminus_mult_expr to catch important cases late when
range info is available.
* gcc.dg/vect/pr81082.c: New testcase.
* gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n
simplification to n * n.
From-SVN: r257077
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/fold-const.c | 8 | ||||
-rw-r--r-- | gcc/match.pd | 31 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/loop-15.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr81082.c | 15 |
6 files changed, 67 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 66ae9d1..79538c4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2018-01-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/81082 + * fold-const.c (fold_plusminus_mult_expr): Do not perform the + association if it requires casting to unsigned. + * match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived + from fold_plusminus_mult_expr to catch important cases late when + range info is available. + 2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> * config/i386/sol2.h (USE_HIDDEN_LINKONCE): Remove. diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 744c355..c86c3f9 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -7097,7 +7097,7 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type, /* Same may be zero and thus the operation 'code' may overflow. Likewise same may be minus one and thus the multiplication may overflow. Perform - the operations in an unsigned type. */ + the sum operation in an unsigned type. */ tree utype = unsigned_type_for (type); tree tem = fold_build2_loc (loc, code, utype, fold_convert_loc (loc, utype, alt0), @@ -7110,9 +7110,9 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type, return fold_build2_loc (loc, MULT_EXPR, type, fold_convert (type, tem), same); - return fold_convert_loc (loc, type, - fold_build2_loc (loc, MULT_EXPR, utype, tem, - fold_convert_loc (loc, utype, same))); + /* Do not resort to unsigned multiplication because + we lose the no-overflow property of the expression. */ + return NULL_TREE; } /* Subroutine of native_encode_expr. Encode the INTEGER_CST diff --git a/gcc/match.pd b/gcc/match.pd index b288a36..8631153 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1939,6 +1939,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (minus (convert (view_convert:stype @1)) (convert (view_convert:stype @2))))))) +/* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1). + Modeled after fold_plusminus_mult_expr. */ +(if (!TYPE_SATURATING (type) + && (!FLOAT_TYPE_P (type) || flag_associative_math)) + (for plusminus (plus minus) + (simplify + (plusminus (mult:cs @0 @1) (mult:cs @0 @2)) + (if (!ANY_INTEGRAL_TYPE_P (type) + || TYPE_OVERFLOW_WRAPS (type) + || (INTEGRAL_TYPE_P (type) + && tree_expr_nonzero_p (@0) + && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type))))) + (mult (plusminus @1 @2) @0))) + /* We cannot generate constant 1 for fract. */ + (if (!ALL_FRACT_MODE_P (TYPE_MODE (type))) + (simplify + (plusminus @0 (mult:cs @0 @2)) + (if (!ANY_INTEGRAL_TYPE_P (type) + || TYPE_OVERFLOW_WRAPS (type) + || (INTEGRAL_TYPE_P (type) + && tree_expr_nonzero_p (@0) + && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type))))) + (mult (plusminus { build_one_cst (type); } @2) @0))) + (simplify + (plusminus (mult:cs @0 @2) @0) + (if (!ANY_INTEGRAL_TYPE_P (type) + || TYPE_OVERFLOW_WRAPS (type) + || (INTEGRAL_TYPE_P (type) + && tree_expr_nonzero_p (@0) + && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type))))) + (mult (plusminus @2 { build_one_cst (type); }) @0)))))) /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 01904c8..5df5351 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2018-01-26 Richard Biener <rguenther@suse.de> + + PR tree-optimization/81082 + * gcc.dg/vect/pr81082.c: New testcase. + * gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n + simplification to n * n. + 2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> * gcc.target/i386/mcount_pic.c: Only xfail get_pc_thunk scan on diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c index dce6ad5..b437518 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c @@ -19,7 +19,7 @@ int bla(void) } /* Since the loop is removed, there should be no addition. */ -/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ /* The if from the loop header copying remains in the code. */ diff --git a/gcc/testsuite/gcc.dg/vect/pr81082.c b/gcc/testsuite/gcc.dg/vect/pr81082.c new file mode 100644 index 0000000..1195fc7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr81082.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +int +f (int *x, int b1, int b2, int b3) +{ + int foo = 0; + for (int i1 = 0; i1 < b1; ++i1) + for (int i2 = 0; i2 < b2; ++i2) + for (int i3 = 0; i3 < b3; ++i3) + foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)]; + return foo; +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ |