aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2017-03-08 08:50:01 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2017-03-08 08:50:01 +0000
commit61fdfd8c51a99f07b59706037cb2946bc793480c (patch)
tree4a053adac88e0139104fc2de519527bc7be891bd
parent4b48e88382ab6c37aa981881adac0390a74eaeaa (diff)
downloadgcc-61fdfd8c51a99f07b59706037cb2946bc793480c.zip
gcc-61fdfd8c51a99f07b59706037cb2946bc793480c.tar.gz
gcc-61fdfd8c51a99f07b59706037cb2946bc793480c.tar.bz2
re PR tree-optimization/79920 (Incorrect floating point results when compiling with -O3)
2017-03-08 Richard Biener <rguenther@suse.de> PR tree-optimization/79920 * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline with ncopies == 1 to ... (vect_transform_slp_perm_load): ... here. Properly compute all element loads by iterating VF times over the group. Do not handle ncopies (computed in a broken way) in vect_create_mask_and_perm. * gcc.dg/vect/pr79920.c: New testcase. From-SVN: r245968
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr79920.c44
-rw-r--r--gcc/tree-vect-slp.c102
4 files changed, 91 insertions, 70 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fcca6aa..05f6017 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2017-03-08 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/79920
+ * tree-vect-slp.c (vect_create_mask_and_perm): Remove and inline
+ with ncopies == 1 to ...
+ (vect_transform_slp_perm_load): ... here. Properly compute
+ all element loads by iterating VF times over the group. Do
+ not handle ncopies (computed in a broken way) in
+ vect_create_mask_and_perm.
+
2017-03-08 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/79904
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f2fd40e87..eef8826 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-03-08 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/79920
+ * gcc.dg/vect/pr79920.c: New testcase.
+
2017-03-08 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/79904
diff --git a/gcc/testsuite/gcc.dg/vect/pr79920.c b/gcc/testsuite/gcc.dg/vect/pr79920.c
new file mode 100644
index 0000000..c066b91
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr79920.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3" } */
+
+#include "tree-vect.h"
+
+double __attribute__((noinline,noclone))
+compute_integral (double w_1[18])
+{
+ double A = 0;
+ double t33[2][6] = {{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+ {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}};
+ double t43[2] = {0.0, 0.0};
+ double t31[2][2] = {{1.0, 1.0}, {1.0, 1.0}};
+ double t32[2][3] = {{0.0, 0.0, 1.0}, {0.0, 0.0, 1.0}};
+
+ for (int ip_1 = 0; ip_1 < 2; ++ip_1)
+ {
+ for (int i_0 = 0; i_0 < 6; ++i_0)
+ t33[ip_1][i_0] = ((w_1[i_0*3] * t32[ip_1][0])
+ + (w_1[i_0*3+2] * t32[ip_1][2]));
+ t43[ip_1] = 2.0;
+ }
+ for (int i_0 = 0; i_0 < 6; ++i_0)
+ A += t43[1]*t33[1][i_0];
+ return A;
+}
+
+int main()
+{
+ check_vect ();
+
+ double w_1[18] = {0., 1.0, 1.0,
+ 0., 1.0, 1.0,
+ 0., 1.0, 1.0,
+ 0., 1.0, 1.0,
+ 0., 1.0, 1.0,
+ 0., 1.0, 1.0};
+ double A = compute_integral(w_1);
+ if (A != 12.0)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_double && { vect_perm && vect_hw_misalign } } } } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 46d1ad6..1300c6a 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3379,66 +3379,6 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
}
}
-
-/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
- building a vector of type MASK_TYPE from it) and two input vectors placed in
- DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
- shifting by STRIDE elements of DR_CHAIN for every copy.
- (STRIDE is the number of vectorized stmts for NODE divided by the number of
- copies).
- VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
- the created stmts must be inserted. */
-
-static inline void
-vect_create_mask_and_perm (gimple *stmt,
- tree mask, int first_vec_indx, int second_vec_indx,
- gimple_stmt_iterator *gsi, slp_tree node,
- tree vectype, vec<tree> dr_chain,
- int ncopies, int vect_stmts_counter)
-{
- tree perm_dest;
- gimple *perm_stmt = NULL;
- int i, stride_in, stride_out;
- tree first_vec, second_vec, data_ref;
-
- stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
- stride_in = dr_chain.length () / ncopies;
-
- /* Initialize the vect stmts of NODE to properly insert the generated
- stmts later. */
- for (i = SLP_TREE_VEC_STMTS (node).length ();
- i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
- SLP_TREE_VEC_STMTS (node).quick_push (NULL);
-
- perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
- for (i = 0; i < ncopies; i++)
- {
- first_vec = dr_chain[first_vec_indx];
- second_vec = dr_chain[second_vec_indx];
-
- /* Generate the permute statement if necessary. */
- if (mask)
- {
- perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
- first_vec, second_vec, mask);
- data_ref = make_ssa_name (perm_dest, perm_stmt);
- gimple_set_lhs (perm_stmt, data_ref);
- vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- }
- else
- /* If mask was NULL_TREE generate the requested identity transform. */
- perm_stmt = SSA_NAME_DEF_STMT (first_vec);
-
- /* Store the vector statement in NODE. */
- SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
- = perm_stmt;
-
- first_vec_indx += stride_in;
- second_vec_indx += stride_in;
- }
-}
-
-
/* Generate vector permute statements from a list of loads in DR_CHAIN.
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
permute statements for the SLP node NODE of the SLP instance
@@ -3456,7 +3396,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
int nunits, vec_index = 0;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
- int unroll_factor, mask_element, ncopies;
+ int mask_element;
unsigned char *mask;
machine_mode mode;
@@ -3474,11 +3414,13 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
mask = XALLOCAVEC (unsigned char, nunits);
- unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
- /* Number of copies is determined by the final vectorization factor
- relatively to SLP_NODE_INSTANCE unrolling factor. */
- ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
+ /* Initialize the vect stmts of NODE to properly insert the generated
+ stmts later. */
+ if (! analyze_only)
+ for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
+ i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
+ SLP_TREE_VEC_STMTS (node).quick_push (NULL);
/* Generate permutation masks for every NODE. Number of masks for each NODE
is equal to GROUP_SIZE.
@@ -3505,7 +3447,7 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
bool noop_p = true;
*n_perms = 0;
- for (int j = 0; j < unroll_factor; j++)
+ for (int j = 0; j < vf; j++)
{
for (int k = 0; k < group_size; k++)
{
@@ -3578,10 +3520,30 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
if (second_vec_index == -1)
second_vec_index = first_vec_index;
- vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
- second_vec_index,
- gsi, node, vectype, dr_chain,
- ncopies, vect_stmts_counter++);
+
+ /* Generate the permute statement if necessary. */
+ tree first_vec = dr_chain[first_vec_index];
+ tree second_vec = dr_chain[second_vec_index];
+ gimple *perm_stmt;
+ if (! noop_p)
+ {
+ tree perm_dest
+ = vect_create_destination_var (gimple_assign_lhs (stmt),
+ vectype);
+ perm_dest = make_ssa_name (perm_dest);
+ perm_stmt = gimple_build_assign (perm_dest,
+ VEC_PERM_EXPR,
+ first_vec, second_vec,
+ mask_vec);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ }
+ else
+ /* If mask was NULL_TREE generate the requested
+ identity transform. */
+ perm_stmt = SSA_NAME_DEF_STMT (first_vec);
+
+ /* Store the vector statement in NODE. */
+ SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
}
index = 0;