aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2018-01-26 10:30:36 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2018-01-26 10:30:36 +0000
commit5b55e6e333c97aaefc1db6c9d8411de578d05614 (patch)
tree21db3fe6e8247b8085351073439c65bef3bda73c
parentaf2e3244f56152d94a18c1339a18a4c0b0e33060 (diff)
downloadgcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.zip
gcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.tar.gz
gcc-5b55e6e333c97aaefc1db6c9d8411de578d05614.tar.bz2
re PR tree-optimization/81082 (Failure to vectorise after reassociating index computation)
2018-01-26 Richard Biener <rguenther@suse.de> PR tree-optimization/81082 * fold-const.c (fold_plusminus_mult_expr): Do not perform the association if it requires casting to unsigned. * match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived from fold_plusminus_mult_expr to catch important cases late when range info is available. * gcc.dg/vect/pr81082.c: New testcase. * gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n simplification to n * n. From-SVN: r257077
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/fold-const.c8
-rw-r--r--gcc/match.pd31
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/loop-15.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr81082.c15
6 files changed, 67 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66ae9d1..79538c4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-01-26 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/81082
+ * fold-const.c (fold_plusminus_mult_expr): Do not perform the
+ association if it requires casting to unsigned.
+ * match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived
+ from fold_plusminus_mult_expr to catch important cases late when
+ range info is available.
+
2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config/i386/sol2.h (USE_HIDDEN_LINKONCE): Remove.
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 744c355..c86c3f9 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -7097,7 +7097,7 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type,
/* Same may be zero and thus the operation 'code' may overflow. Likewise
same may be minus one and thus the multiplication may overflow. Perform
- the operations in an unsigned type. */
+ the sum operation in an unsigned type. */
tree utype = unsigned_type_for (type);
tree tem = fold_build2_loc (loc, code, utype,
fold_convert_loc (loc, utype, alt0),
@@ -7110,9 +7110,9 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, MULT_EXPR, type,
fold_convert (type, tem), same);
- return fold_convert_loc (loc, type,
- fold_build2_loc (loc, MULT_EXPR, utype, tem,
- fold_convert_loc (loc, utype, same)));
+ /* Do not resort to unsigned multiplication because
+ we lose the no-overflow property of the expression. */
+ return NULL_TREE;
}
/* Subroutine of native_encode_expr. Encode the INTEGER_CST
diff --git a/gcc/match.pd b/gcc/match.pd
index b288a36..8631153 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1939,6 +1939,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(minus (convert (view_convert:stype @1))
(convert (view_convert:stype @2)))))))
+/* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
+ Modeled after fold_plusminus_mult_expr. */
+(if (!TYPE_SATURATING (type)
+ && (!FLOAT_TYPE_P (type) || flag_associative_math))
+ (for plusminus (plus minus)
+ (simplify
+ (plusminus (mult:cs @0 @1) (mult:cs @0 @2))
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type)
+ || (INTEGRAL_TYPE_P (type)
+ && tree_expr_nonzero_p (@0)
+ && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
+ (mult (plusminus @1 @2) @0)))
+ /* We cannot generate constant 1 for fract. */
+ (if (!ALL_FRACT_MODE_P (TYPE_MODE (type)))
+ (simplify
+ (plusminus @0 (mult:cs @0 @2))
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type)
+ || (INTEGRAL_TYPE_P (type)
+ && tree_expr_nonzero_p (@0)
+ && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
+ (mult (plusminus { build_one_cst (type); } @2) @0)))
+ (simplify
+ (plusminus (mult:cs @0 @2) @0)
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type)
+ || (INTEGRAL_TYPE_P (type)
+ && tree_expr_nonzero_p (@0)
+ && expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
+ (mult (plusminus @2 { build_one_cst (type); }) @0))))))
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 01904c8..5df5351 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-01-26 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/81082
+ * gcc.dg/vect/pr81082.c: New testcase.
+ * gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n
+ simplification to n * n.
+
2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.target/i386/mcount_pic.c: Only xfail get_pc_thunk scan on
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
index dce6ad5..b437518 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
@@ -19,7 +19,7 @@ int bla(void)
}
/* Since the loop is removed, there should be no addition. */
-/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
/* The if from the loop header copying remains in the code. */
diff --git a/gcc/testsuite/gcc.dg/vect/pr81082.c b/gcc/testsuite/gcc.dg/vect/pr81082.c
new file mode 100644
index 0000000..1195fc7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr81082.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+int
+f (int *x, int b1, int b2, int b3)
+{
+ int foo = 0;
+ for (int i1 = 0; i1 < b1; ++i1)
+ for (int i2 = 0; i2 < b2; ++i2)
+ for (int i3 = 0; i3 < b3; ++i3)
+ foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
+ return foo;
+}
+
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */