diff options
author | Michael Matz <matz@suse.de> | 2012-04-17 13:54:26 +0000 |
---|---|---|
committer | Michael Matz <matz@gcc.gnu.org> | 2012-04-17 13:54:26 +0000 |
commit | 7d75abc8ea408019c1c004150bfbce42bfbffa71 (patch) | |
tree | 86bb9df647b69f4246a856e415af27fe41e37b32 | |
parent | efa26eaae377bf2a5ce9b02b84f3963a0166e280 (diff) | |
download | gcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.zip gcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.tar.gz gcc-7d75abc8ea408019c1c004150bfbce42bfbffa71.tar.bz2 |
re PR tree-optimization/18437 (vectorizer failed for matrix multiplication)
PR tree-optimization/18437
* tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member.
(STMT_VINFO_STRIDE_LOAD_P): New accessor.
(vect_check_strided_load): Declare.
* tree-vect-data-refs.c (vect_check_strided_load): New function.
(vect_analyze_data_refs): Use it to accept strided loads.
* tree-vect-stmts.c (vectorizable_load): Ditto and handle them.
testsuite/
* gfortran.dg/vect/rnflow-trs2a2.f90: New test.
From-SVN: r186530
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 | 33 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.c | 65 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 103 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 3 |
6 files changed, 214 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e09d672..d43e344 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2012-04-17 Michael Matz <matz@suse.de> + + PR tree-optimization/18437 + * tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member. + (STMT_VINFO_STRIDE_LOAD_P): New accessor. + (vect_check_strided_load): Declare. + * tree-vect-data-refs.c (vect_check_strided_load): New function. + (vect_analyze_data_refs): Use it to accept strided loads. + * tree-vect-stmts.c (vectorizable_load): Ditto and handle them. + 2012-04-17 Richard Guenther <rguenther@suse.de> PR middle-end/53011 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e1862c6..301f561 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-04-17 Michael Matz <matz@suse.de> + + PR tree-optimization/18437 + * gfortran.dg/vect/rnflow-trs2a2.f90: New test. + 2012-04-17 Richard Guenther <rguenther@suse.de> PR middle-end/53011 diff --git a/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 b/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 new file mode 100644 index 0000000..1d13cea --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/rnflow-trs2a2.f90 @@ -0,0 +1,33 @@ +! { dg-do compile } +! { dg-require-effective-target vect_double } + + function trs2a2 (j, k, u, d, m) +! matrice de transition intermediaire, partant de k sans descendre +! sous j. R = IjU(I-Ik)DIj, avec Ii = deltajj, j >= i. +! alternative: trs2a2 = 0 +! trs2a2 (j:k-1, j:k-1) = matmul (utrsft (j:k-1,j:k-1), +! dtrsft (j:k-1,j:k-1)) +! + real, dimension (1:m,1:m) :: trs2a2 ! resultat + real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft + integer, intent (in) :: j, k, m ! niveaux vallee pic +! +!##### following line replaced by Prentice to make less system dependent +! real (kind = kind (1.0d0)) :: dtmp + real (kind = selected_real_kind (10,50)) :: dtmp +! + trs2a2 = 0.0 + do iclw1 = j, k - 1 + do iclw2 = j, k - 1 + dtmp = 0.0d0 + do iclww = j, k - 1 + dtmp = dtmp + u (iclw1, iclww) * d (iclww, iclw2) + enddo + trs2a2 (iclw1, iclw2) = dtmp + enddo + enddo + return + end function trs2a2 + +! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } +! { dg-final { cleanup-tree-dump "vect" } } diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 4d550a4..37df7ab 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2690,6 +2690,53 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep, return decl; } +/* Check wether a non-affine load in STMT (being in the loop referred to + in LOOP_VINFO) is suitable for handling as strided load. That is the case + if its address is a simple induction variable. If so return the base + of that induction variable in *BASEP and the (loop-invariant) step + in *STEPP, both only when that pointer is non-zero. + + This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant + base pointer) only. */ + +bool +vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep, + tree *stepp) +{ + struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); + tree base, off; + affine_iv iv; + + base = DR_REF (dr); + + if (TREE_CODE (base) == ARRAY_REF) + { + off = TREE_OPERAND (base, 1); + base = TREE_OPERAND (base, 0); + } + else if (TREE_CODE (base) == MEM_REF) + { + off = TREE_OPERAND (base, 0); + base = TREE_OPERAND (base, 1); + } + else + return false; + + if (TREE_CODE (off) != SSA_NAME) + return false; + + if (!expr_invariant_in_loop_p (loop, base) + || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true)) + return false; + + if (basep) + *basep = iv.base; + if (stepp) + *stepp = iv.step; + return true; +} /* Function vect_analyze_data_refs. @@ -3090,16 +3137,21 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); struct data_dependence_relation *ddr, *newddr; bool bad = false; + bool strided_load = false; tree off; VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo); - if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL) - || get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE) + strided_load = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL); + gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); + if (gather + && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE) + gather = false; + if (!gather && !strided_load) { if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) { fprintf (vect_dump, - "not vectorized: not suitable for gather "); + "not vectorized: not suitable for gather/strided load "); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } return false; @@ -3152,13 +3204,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, { fprintf (vect_dump, "not vectorized: data dependence conflict" - " prevents gather"); + " prevents gather/strided load"); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); } return false; } - STMT_VINFO_GATHER_P (stmt_info) = true; + if (gather) + STMT_VINFO_GATHER_P (stmt_info) = true; + else if (strided_load) + STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true; } } diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 968e4ed..5e6f71a 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4224,6 +4224,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, tree aggr_type; tree gather_base = NULL_TREE, gather_off = NULL_TREE; tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; + tree stride_base, stride_step; int gather_scale = 1; enum vect_def_type gather_dt = vect_unknown_def_type; @@ -4357,6 +4358,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return false; } } + else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + { + vect_check_strided_load (stmt, loop_vinfo, &stride_base, &stride_step); + } if (!vec_stmt) /* transformation not required. */ { @@ -4524,6 +4529,104 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } return true; } + else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info)) + { + gimple_stmt_iterator incr_gsi; + bool insert_after; + gimple incr; + tree offvar; + tree ref = DR_REF (dr); + tree ivstep; + tree running_off; + VEC(constructor_elt, gc) *v = NULL; + gimple_seq stmts = NULL; + + gcc_assert (stride_base && stride_step); + + /* For a load with loop-invariant (but other than power-of-2) + stride (i.e. not a grouped access) like so: + + for (i = 0; i < n; i += stride) + ... = array[i]; + + we generate a new induction variable and new accesses to + form a new vector (or vectors, depending on ncopies): + + for (j = 0; ; j += VF*stride) + tmp1 = array[j]; + tmp2 = array[j + stride]; + ... + vectemp = {tmp1, tmp2, ...} + */ + + ivstep = stride_step; + ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep, + build_int_cst (TREE_TYPE (ivstep), vf)); + + standard_iv_increment_position (loop, &incr_gsi, &insert_after); + + create_iv (stride_base, ivstep, NULL, + loop, &incr_gsi, insert_after, + &offvar, NULL); + incr = gsi_stmt (incr_gsi); + set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); + + stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE); + if (stmts) + gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); + + prev_stmt_info = NULL; + running_off = offvar; + for (j = 0; j < ncopies; j++) + { + tree vec_inv; + + v = VEC_alloc (constructor_elt, gc, nunits); + for (i = 0; i < nunits; i++) + { + tree newref, newoff; + gimple incr; + if (TREE_CODE (ref) == ARRAY_REF) + newref = build4 (ARRAY_REF, TREE_TYPE (ref), + unshare_expr (TREE_OPERAND (ref, 0)), + running_off, + NULL_TREE, NULL_TREE); + else + newref = build2 (MEM_REF, TREE_TYPE (ref), + running_off, + TREE_OPERAND (ref, 1)); + + newref = force_gimple_operand_gsi (gsi, newref, true, + NULL_TREE, true, + GSI_SAME_STMT); + CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref); + newoff = SSA_NAME_VAR (running_off); + if (POINTER_TYPE_P (TREE_TYPE (newoff))) + incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff, + running_off, stride_step); + else + incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff, + running_off, stride_step); + newoff = make_ssa_name (newoff, incr); + gimple_assign_set_lhs (incr, newoff); + vect_finish_stmt_generation (stmt, incr, gsi); + + running_off = newoff; + } + + vec_inv = build_constructor (vectype, v); + new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + mark_symbols_for_renaming (new_stmt); + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + return true; + } if (grouped_load) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 6804fdc..5d99609 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -545,6 +545,7 @@ typedef struct _stmt_vec_info { /* For loads only, true if this is a gather load. */ bool gather_p; + bool stride_load_p; } *stmt_vec_info; /* Access Functions. */ @@ -559,6 +560,7 @@ typedef struct _stmt_vec_info { #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info #define STMT_VINFO_GATHER_P(S) (S)->gather_p +#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address #define STMT_VINFO_DR_INIT(S) (S)->dr_init @@ -875,6 +877,7 @@ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); extern bool vect_prune_runtime_alias_test_list (loop_vec_info); extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *, int *); +extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *); extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, tree *, gimple_stmt_iterator *, |