aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Matz <matz@suse.de>2012-04-17 13:54:26 +0000
committerMichael Matz <matz@gcc.gnu.org>2012-04-17 13:54:26 +0000
commit7d75abc8ea408019c1c004150bfbce42bfbffa71 (patch)
tree86bb9df647b69f4246a856e415af27fe41e37b32
parentefa26eaae377bf2a5ce9b02b84f3963a0166e280 (diff)
downloadgcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.zip
gcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.tar.gz
gcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.tar.bz2
re PR tree-optimization/18437 (vectorizer failed for matrix multiplication)
PR tree-optimization/18437 * tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member. (STMT_VINFO_STRIDE_LOAD_P): New accessor. (vect_check_strided_load): Declare. * tree-vect-data-refs.c (vect_check_strided_load): New function. (vect_analyze_data_refs): Use it to accept strided loads. * tree-vect-stmts.c (vectorizable_load): Ditto and handle them. testsuite/ * gfortran.dg/vect/rnflow-trs2a2.f90: New test. From-SVN: r186530
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f9033
-rw-r--r--gcc/tree-vect-data-refs.c65
-rw-r--r--gcc/tree-vect-stmts.c103
-rw-r--r--gcc/tree-vectorizer.h3
6 files changed, 214 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e09d672..d43e344 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2012-04-17 Michael Matz <matz@suse.de>
+
+ PR tree-optimization/18437
+ * tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member.
+ (STMT_VINFO_STRIDE_LOAD_P): New accessor.
+ (vect_check_strided_load): Declare.
+ * tree-vect-data-refs.c (vect_check_strided_load): New function.
+ (vect_analyze_data_refs): Use it to accept strided loads.
+ * tree-vect-stmts.c (vectorizable_load): Ditto and handle them.
+
2012-04-17 Richard Guenther <rguenther@suse.de>
PR middle-end/53011
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e1862c6..301f561 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2012-04-17 Michael Matz <matz@suse.de>
+
+ PR tree-optimization/18437
+ * gfortran.dg/vect/rnflow-trs2a2.f90: New test.
+
2012-04-17 Richard Guenther <rguenther@suse.de>
PR middle-end/53011
diff --git a/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90
new file mode 100644
index 0000000..1d13cea
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90
@@ -0,0 +1,33 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+
+ function trs2a2 (j, k, u, d, m)
+! matrice de transition intermediaire, partant de k sans descendre
+! sous j. R = IjU(I-Ik)DIj, avec Ii = deltajj, j >= i.
+! alternative: trs2a2 = 0
+! trs2a2 (j:k-1, j:k-1) = matmul (utrsft (j:k-1,j:k-1),
+! dtrsft (j:k-1,j:k-1))
+!
+ real, dimension (1:m,1:m) :: trs2a2 ! resultat
+ real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft
+ integer, intent (in) :: j, k, m ! niveaux vallee pic
+!
+!##### following line replaced by Prentice to make less system dependent
+! real (kind = kind (1.0d0)) :: dtmp
+ real (kind = selected_real_kind (10,50)) :: dtmp
+!
+ trs2a2 = 0.0
+ do iclw1 = j, k - 1
+ do iclw2 = j, k - 1
+ dtmp = 0.0d0
+ do iclww = j, k - 1
+ dtmp = dtmp + u (iclw1, iclww) * d (iclww, iclw2)
+ enddo
+ trs2a2 (iclw1, iclw2) = dtmp
+ enddo
+ enddo
+ return
+ end function trs2a2
+
+! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
+! { dg-final { cleanup-tree-dump "vect" } }
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 4d550a4..37df7ab 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -2690,6 +2690,53 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
return decl;
}
+/* Check wether a non-affine load in STMT (being in the loop referred to
+ in LOOP_VINFO) is suitable for handling as strided load. That is the case
+ if its address is a simple induction variable. If so return the base
+ of that induction variable in *BASEP and the (loop-invariant) step
+ in *STEPP, both only when that pointer is non-zero.
+
+ This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
+ base pointer) only. */
+
+bool
+vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
+ tree *stepp)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ tree base, off;
+ affine_iv iv;
+
+ base = DR_REF (dr);
+
+ if (TREE_CODE (base) == ARRAY_REF)
+ {
+ off = TREE_OPERAND (base, 1);
+ base = TREE_OPERAND (base, 0);
+ }
+ else if (TREE_CODE (base) == MEM_REF)
+ {
+ off = TREE_OPERAND (base, 0);
+ base = TREE_OPERAND (base, 1);
+ }
+ else
+ return false;
+
+ if (TREE_CODE (off) != SSA_NAME)
+ return false;
+
+ if (!expr_invariant_in_loop_p (loop, base)
+ || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
+ return false;
+
+ if (basep)
+ *basep = iv.base;
+ if (stepp)
+ *stepp = iv.step;
+ return true;
+}
/* Function vect_analyze_data_refs.
@@ -3090,16 +3137,21 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
struct data_dependence_relation *ddr, *newddr;
bool bad = false;
+ bool strided_load = false;
tree off;
VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
- if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL)
- || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
+ strided_load = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
+ gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
+ if (gather
+ && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
+ gather = false;
+ if (!gather && !strided_load)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
{
fprintf (vect_dump,
- "not vectorized: not suitable for gather ");
+ "not vectorized: not suitable for gather/strided load ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
return false;
@@ -3152,13 +3204,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
{
fprintf (vect_dump,
"not vectorized: data dependence conflict"
- " prevents gather");
+ " prevents gather/strided load");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
}
return false;
}
- STMT_VINFO_GATHER_P (stmt_info) = true;
+ if (gather)
+ STMT_VINFO_GATHER_P (stmt_info) = true;
+ else if (strided_load)
+ STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
}
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 968e4ed..5e6f71a 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4224,6 +4224,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree aggr_type;
tree gather_base = NULL_TREE, gather_off = NULL_TREE;
tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
+ tree stride_base, stride_step;
int gather_scale = 1;
enum vect_def_type gather_dt = vect_unknown_def_type;
@@ -4357,6 +4358,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return false;
}
}
+ else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ {
+ vect_check_strided_load (stmt, loop_vinfo, &stride_base, &stride_step);
+ }
if (!vec_stmt) /* transformation not required. */
{
@@ -4524,6 +4529,104 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
return true;
}
+ else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
+ {
+ gimple_stmt_iterator incr_gsi;
+ bool insert_after;
+ gimple incr;
+ tree offvar;
+ tree ref = DR_REF (dr);
+ tree ivstep;
+ tree running_off;
+ VEC(constructor_elt, gc) *v = NULL;
+ gimple_seq stmts = NULL;
+
+ gcc_assert (stride_base && stride_step);
+
+ /* For a load with loop-invariant (but other than power-of-2)
+ stride (i.e. not a grouped access) like so:
+
+ for (i = 0; i < n; i += stride)
+ ... = array[i];
+
+ we generate a new induction variable and new accesses to
+ form a new vector (or vectors, depending on ncopies):
+
+ for (j = 0; ; j += VF*stride)
+ tmp1 = array[j];
+ tmp2 = array[j + stride];
+ ...
+ vectemp = {tmp1, tmp2, ...}
+ */
+
+ ivstep = stride_step;
+ ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
+ build_int_cst (TREE_TYPE (ivstep), vf));
+
+ standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+
+ create_iv (stride_base, ivstep, NULL,
+ loop, &incr_gsi, insert_after,
+ &offvar, NULL);
+ incr = gsi_stmt (incr_gsi);
+ set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
+
+ stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
+ if (stmts)
+ gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+
+ prev_stmt_info = NULL;
+ running_off = offvar;
+ for (j = 0; j < ncopies; j++)
+ {
+ tree vec_inv;
+
+ v = VEC_alloc (constructor_elt, gc, nunits);
+ for (i = 0; i < nunits; i++)
+ {
+ tree newref, newoff;
+ gimple incr;
+ if (TREE_CODE (ref) == ARRAY_REF)
+ newref = build4 (ARRAY_REF, TREE_TYPE (ref),
+ unshare_expr (TREE_OPERAND (ref, 0)),
+ running_off,
+ NULL_TREE, NULL_TREE);
+ else
+ newref = build2 (MEM_REF, TREE_TYPE (ref),
+ running_off,
+ TREE_OPERAND (ref, 1));
+
+ newref = force_gimple_operand_gsi (gsi, newref, true,
+ NULL_TREE, true,
+ GSI_SAME_STMT);
+ CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
+ newoff = SSA_NAME_VAR (running_off);
+ if (POINTER_TYPE_P (TREE_TYPE (newoff)))
+ incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
+ running_off, stride_step);
+ else
+ incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
+ running_off, stride_step);
+ newoff = make_ssa_name (newoff, incr);
+ gimple_assign_set_lhs (incr, newoff);
+ vect_finish_stmt_generation (stmt, incr, gsi);
+
+ running_off = newoff;
+ }
+
+ vec_inv = build_constructor (vectype, v);
+ new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ mark_symbols_for_renaming (new_stmt);
+
+ if (j == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+ return true;
+ }
if (grouped_load)
{
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6804fdc..5d99609 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -545,6 +545,7 @@ typedef struct _stmt_vec_info {
/* For loads only, true if this is a gather load. */
bool gather_p;
+ bool stride_load_p;
} *stmt_vec_info;
/* Access Functions. */
@@ -559,6 +560,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
#define STMT_VINFO_GATHER_P(S) (S)->gather_p
+#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
#define STMT_VINFO_DR_INIT(S) (S)->dr_init
@@ -875,6 +877,7 @@ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
int *);
+extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
tree *, gimple_stmt_iterator *,