aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2017-12-01 13:33:40 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2017-12-01 13:33:40 +0000
commitfb607032b8009d141409b8dc3c5e4df42c93a231 (patch)
tree57a7118f898507f4361145e0aa9875bb0e7c7f81 /gcc
parentd5f9df6a08d8f41fb7f77d588a63d6fecf2429d7 (diff)
downloadgcc-fb607032b8009d141409b8dc3c5e4df42c93a231.zip
gcc-fb607032b8009d141409b8dc3c5e4df42c93a231.tar.gz
gcc-fb607032b8009d141409b8dc3c5e4df42c93a231.tar.bz2
re PR tree-optimization/83232 (fma3d spec2000 regression on zen with -Ofast (generic tuning) after r255268 by missed SLP oppurtunity)
2017-12-01 Richard Biener <rguenther@suse.de> PR tree-optimization/83232 * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix detection of same access. Instead of breaking the group here do not consider the duplicate. Add comment explaining real fix. * gfortran.dg/vect/pr83232.f90: New testcase. From-SVN: r255307
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gfortran.dg/vect/pr83232.f9033
-rw-r--r--gcc/tree-vect-data-refs.c29
4 files changed, 65 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d8b2aeb..0213e8b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2017-12-01 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83232
+ * tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
+ detection of same access. Instead of breaking the group here
+ do not consider the duplicate. Add comment explaining real fix.
+
2017-12-01 Jonathan Wakely <jwakely@redhat.com>
* doc/md.texi (Insn Splitting): Fix "central flowgraph" typo.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b51bd2a..245ab25 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-12-01 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/83232
+ * gfortran.dg/vect/pr83232.f90: New testcase.
+
2017-12-01 Sudakshina Das <sudi.das@arm.com>
* gcc.target/arm/armv8_2-fp16-move-2.c: New test.
diff --git a/gcc/testsuite/gfortran.dg/vect/pr83232.f90 b/gcc/testsuite/gfortran.dg/vect/pr83232.f90
new file mode 100644
index 0000000..8fd9f03
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr83232.f90
@@ -0,0 +1,33 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+! { dg-additional-options "-funroll-loops --param vect-max-peeling-for-alignment=0 -fdump-tree-slp-details" }
+
+ SUBROUTINE MATERIAL_41_INTEGRATION ( STRESS,YLDC,EFPS, &
+ & DTnext,Dxx,Dyy,Dzz,Dxy,Dxz,Dyz,MatID,P1,P3 )
+ REAL(KIND(0D0)), INTENT(INOUT) :: STRESS(6)
+ REAL(KIND(0D0)), INTENT(IN) :: DTnext
+ REAL(KIND(0D0)), INTENT(IN) :: Dxx,Dyy,Dzz,Dxy,Dxz,Dyz
+ REAL(KIND(0D0)) :: Einc(6)
+ REAL(KIND(0D0)) :: P1,P3
+
+ Einc(1) = DTnext * Dxx ! (1)
+ Einc(2) = DTnext * Dyy
+ Einc(3) = DTnext * Dzz
+ Einc(4) = DTnext * Dxy
+ Einc(5) = DTnext * Dxz
+ Einc(6) = DTnext * Dyz
+ DO i = 1,6
+ STRESS(i) = STRESS(i) + P3*Einc(i)
+ ENDDO
+ STRESS(1) = STRESS(1) + (DTnext * P1 * (Dxx+Dyy+Dzz)) ! (2)
+ STRESS(2) = STRESS(2) + (DTnext * P1 * (Dxx+Dyy+Dzz))
+ STRESS(3) = 0.0
+ Einc(5) = 0.0 ! (3)
+ Einc(6) = 0.0
+ call foo (Einc)
+ END SUBROUTINE
+
+! We should vectorize (1) and (2)
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
+! We fail to vectorize at (3), this can be fixed in the future
+! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } }
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index ca86498..996d156 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2841,10 +2841,6 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
break;
- /* Do not place the same access in the interleaving chain twice. */
- if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
- break;
-
/* Check the types are compatible.
??? We don't distinguish this during sorting. */
if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
@@ -2854,7 +2850,25 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
- gcc_assert (init_a <= init_b);
+ HOST_WIDE_INT init_prev
+ = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
+ gcc_assert (init_a <= init_b
+ && init_a <= init_prev
+ && init_prev <= init_b);
+
+ /* Do not place the same access in the interleaving chain twice. */
+ if (init_b == init_prev)
+ {
+ gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
+ < gimple_uid (DR_STMT (drb)));
+ /* ??? For now we simply "drop" the later reference which is
+ otherwise the same rather than finishing off this group.
+ In the end we'd want to re-process duplicates forming
+ multiple groups from the refs, likely by just collecting
+ all candidates (including duplicates and split points
+ below) in a vector and then process them together. */
+ continue;
+ }
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
@@ -2866,10 +2880,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* If we have a store, the accesses are adjacent. This splits
groups into chunks we support (we don't support vectorization
of stores with gaps). */
- if (!DR_IS_READ (dra)
- && (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
- (DR_INIT (datarefs_copy[i-1]))
- != type_size_a))
+ if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
/* If the step (if not zero or non-constant) is greater than the