aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog71
-rw-r--r--gcc/config/i386/i386.c5
-rw-r--r--gcc/config/rs6000/rs6000.c116
-rw-r--r--gcc/config/spu/spu.c6
-rw-r--r--gcc/doc/tm.texi4
-rw-r--r--gcc/doc/tm.texi.in2
-rw-r--r--gcc/target.def6
-rw-r--r--gcc/target.h1
-rw-r--r--gcc/targhooks.c5
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/testsuite/ChangeLog17
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-109.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-42.c5
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-56.c6
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-60.c6
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-93.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-96.c2
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c21
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c21
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-peel-1.c51
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-peel-2.c52
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-peel-3.c55
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-peel-4.c47
-rw-r--r--gcc/tree-vect-data-refs.c462
-rw-r--r--gcc/tree-vect-loop-manip.c19
-rw-r--r--gcc/tree-vect-loop.c171
-rw-r--r--gcc/tree-vect-slp.c4
-rw-r--r--gcc/tree-vect-stmts.c179
-rw-r--r--gcc/tree-vectorizer.h30
29 files changed, 1176 insertions, 196 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a5e5c16..c69bc6c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2010-07-04 Ira Rosen <irar@il.ibm.com>
+ Revital Eres <eres@il.ibm.com>
+
+ * doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST):
+ Document new arguments.
+ * doc/tm.texi: Regenerate.
+ * targhooks.c (default_builtin_vectorization_cost): Add new arguments.
+ Handle unaligned store.
+ * targhooks.h (default_builtin_vectorization_cost): Add new arguments.
+ * target.def (builtin_vectorization_cost): Add new arguments.
+ * target.h (enum vect_cost_for_stmt): Add unaligned_store.
+ * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Take number
+ of iterations of prolog loop directly from LOOP_PEELING_FOR_ALIGNMENT.
+ (vect_vfa_segment_size): Fix indentation.
+ * tree-vectorizer.h (struct _vect_peel_info): New.
+ (struct _vect_peel_extended_info): New.
+ (struct _loop_vec_info): Add new field for peeling hash table and a
+ macro for its access.
+ (VECT_MAX_COST): Define.
+ (vect_get_load_cost): Declare.
+ (vect_get_store_cost, vect_get_known_peeling_cost,
+ vect_get_single_scalar_iteraion_cost): Likewise.
+ (vect_supportable_dr_alignment): Add new argument.
+ * tree-vect-loop.c (new_loop_vec_info): Initialize peeling hash table
+ field.
+ (destroy_loop_vec_info): Free peeling hash table.
+ (vect_analyze_loop_form): Update call to builtin_vectorization_cost.
+ (vect_analyze_loop): Move vect_enhance_data_refs_alignment before
+ vect_analyze_slp. Fix indentation.
+ (vect_get_single_scalar_iteraion_cost): New function.
+ (vect_get_known_peeling_cost): Likewise.
+ (vect_estimate_min_profitable_iters): Rename byte_misalign to npeel.
+ Call vect_get_single_scalar_iteraion_cost instead of cost_for_stmt per
+ statement. Move outside cost calculation inside unknown peeling case.
+ Call vect_get_known_peeling_cost for known amount of peeling.
+ * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Add data
+ reference to the print message of forced alignment.
+ (vect_verify_datarefs_alignment): Update call to
+ vect_supportable_dr_alignment.
+ (vect_get_data_access_cost): New function.
+ (vect_peeling_hash, vect_peeling_hash_eq, vect_peeling_hash_insert,
+ vect_peeling_hash_get_most_frequent, vect_peeling_hash_get_lowest_cost,
+ vect_peeling_hash_choose_best_peeling): Likewise.
+ (vect_enhance_data_refs_alignment): Fix documentation. Use hash table
+ to store all the accesses in the loop and find best possible access to
+ align using peeling for known alignment case. For unknown alignment
+ check if stores are preferred or if peeling is worthy.
+ (vect_find_same_alignment_drs): Analyze pairs of loads too.
+ (vect_supportable_dr_alignment): Add new argument and check aligned
+ accesses according to it.
+ * tree-vect-stmts.c (vect_get_stmt_cost): New function.
+ (cost_for_stmt): Call vect_get_stmt_cost.
+ (vect_model_simple_cost): Likewise.
+ (vect_model_store_cost): Call vect_get_stmt_cost. Call
+ vect_get_store_cost to calculate the cost of the statement.
+ (vect_get_store_cost): New function.
+ (vect_model_load_cost): Call vect_get_stmt_cost. Call
+ vect_get_load_cost to calculate the cost of the statement.
+ (vect_get_load_cost): New function.
+ (vectorizable_store): Update call to vect_supportable_dr_alignment.
+ (vectorizable_load): Likewise.
+ * config/spu/spu.c (spu_builtin_vectorization_cost): Add new
+ arguments.
+ * config/i386/i386.c (ix86_builtin_vectorization_cost): Add new
+ arguments. Handle unaligned store.
+ * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): New.
+ (rs6000_builtin_support_vector_misalignment): Return true for word and
+ double word alignments for VSX.
+ * tree-vect-slp.c (vect_build_slp_tree): Update calls to
+ vect_supportable_dr_alignment and builtin_vectorization_cost.
+
2010-07-03 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
PR target/44597
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ec2cdd3..711fc16 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -29376,7 +29376,9 @@ static const struct attribute_spec ix86_attribute_table[] =
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
@@ -29405,6 +29407,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
return ix86_cost->scalar_to_vec_cost;
case unaligned_load:
+ case unaligned_store:
return ix86_cost->vec_unalign_load_cost;
case cond_branch_taken:
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index a98b4dd..45bc230 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1075,6 +1075,8 @@ static bool rs6000_builtin_support_vector_misalignment (enum
machine_mode,
const_tree,
int, bool);
+static int rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt,
+ tree, int);
static void def_builtin (int, const char *, tree, int);
static bool rs6000_vector_alignment_reachable (const_tree, bool);
@@ -1467,6 +1469,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
rs6000_builtin_support_vector_misalignment
#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+ rs6000_builtin_vectorization_cost
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
@@ -3333,12 +3338,19 @@ rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
if (misalignment == -1)
{
- /* misalignment factor is unknown at compile time but we know
+ /* Misalignment factor is unknown at compile time but we know
it's word aligned. */
if (rs6000_vector_alignment_reachable (type, is_packed))
- return true;
+ {
+ int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
+
+ if (element_size == 64 || element_size == 32)
+ return true;
+ }
+
return false;
}
+
/* VSX supports word-aligned vector. */
if (misalignment % 4 == 0)
return true;
@@ -3404,6 +3416,106 @@ rs6000_builtin_vec_perm (tree type, tree *mask_element_type)
return d;
}
+
+/* Implement targetm.vectorize.builtin_vectorization_cost. */
+static int
+rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype, int misalign)
+{
+ unsigned elements;
+
+ switch (type_of_cost)
+ {
+ case scalar_stmt:
+ case scalar_load:
+ case scalar_store:
+ case vector_stmt:
+ case vector_load:
+ case vector_store:
+ case vec_to_scalar:
+ case scalar_to_vec:
+ case cond_branch_not_taken:
+ case vec_perm:
+ return 1;
+
+ case cond_branch_taken:
+ return 3;
+
+ case unaligned_load:
+ if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+ {
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ if (elements == 2)
+ /* Double word aligned. */
+ return 2;
+
+ if (elements == 4)
+ {
+ switch (misalign)
+ {
+ case 8:
+ /* Double word aligned. */
+ return 2;
+
+ case -1:
+ /* Unknown misalignment. */
+ case 4:
+ case 12:
+ /* Word aligned. */
+ return 22;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+
+ if (TARGET_ALTIVEC)
+ /* Misaligned loads are not supported. */
+ gcc_unreachable ();
+
+ return 2;
+
+ case unaligned_store:
+ if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
+ {
+ elements = TYPE_VECTOR_SUBPARTS (vectype);
+ if (elements == 2)
+ /* Double word aligned. */
+ return 2;
+
+ if (elements == 4)
+ {
+ switch (misalign)
+ {
+ case 8:
+ /* Double word aligned. */
+ return 2;
+
+ case -1:
+ /* Unknown misalignment. */
+ case 4:
+ case 12:
+ /* Word aligned. */
+ return 23;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+
+ if (TARGET_ALTIVEC)
+ /* Misaligned stores are not supported. */
+ gcc_unreachable ();
+
+ return 2;
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
/* Handle generic options of the form -mfoo=yes/no.
NAME is the option name.
VALUE is the option value.
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 4b7f916..3d4f587 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -209,7 +209,7 @@ static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void);
-static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt);
+static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
static bool spu_vector_alignment_reachable (const_tree, bool);
static tree spu_builtin_vec_perm (tree, tree *);
static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
@@ -6694,7 +6694,9 @@ spu_builtin_mask_for_load (void)
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
-spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index d6bc604..17b582f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5706,8 +5706,10 @@ preserved (e.g.@: used only by a reduction computation). Otherwise, the
@code{widen_mult_hi/lo} idioms will be used.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost})
+@deftypefn {Target Hook} int TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST (enum vect_cost_for_stmt @var{type_of_cost}, tree @var{vectype}, int @var{misalign})
Returns cost of different scalar or vector statements for vectorization cost model.
+For vector memory operations the cost may depend on type (@var{vectype}) and
+misalignment value (@var{misalign}).
@end deftypefn
@deftypefn {Target Hook} bool TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE (const_tree @var{type}, bool @var{is_packed})
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 083d56f..e79341d 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -5708,6 +5708,8 @@ preserved (e.g.@: used only by a reduction computation). Otherwise, the
@hook TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
Returns cost of different scalar or vector statements for vectorization cost model.
+For vector memory operations the cost may depend on type (@var{vectype}) and
+misalignment value (@var{misalign}).
@end deftypefn
@hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
diff --git a/gcc/target.def b/gcc/target.def
index 8bcf877..6270925 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -789,11 +789,13 @@ DEFHOOK
"",
tree, (tree x), NULL)
-/* Cost of different vector/scalar statements in vectorization cost model. */
+/* Cost of different vector/scalar statements in vectorization cost
+ model. In case of misaligned vector loads and stores the cost depends
+ on the data type and misalignment value. */
DEFHOOK
(builtin_vectorization_cost,
"",
- int, (enum vect_cost_for_stmt type_of_cost),
+ int, (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign),
default_builtin_vectorization_cost)
/* Return true if vector alignment is reachable (by peeling N
diff --git a/gcc/target.h b/gcc/target.h
index 18d160d..99dd1ee 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -119,6 +119,7 @@ enum vect_cost_for_stmt
vector_stmt,
vector_load,
unaligned_load,
+ unaligned_store,
vector_store,
vec_to_scalar,
scalar_to_vec,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 9271db8..1a49f0c 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -479,7 +479,9 @@ default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED,
/* Default vectorizer cost model values. */
int
-default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
+default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+ tree vectype ATTRIBUTE_UNUSED,
+ int misalign ATTRIBUTE_UNUSED)
{
switch (type_of_cost)
{
@@ -496,6 +498,7 @@ default_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost)
return 1;
case unaligned_load:
+ case unaligned_store:
return 2;
case cond_branch_taken:
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index f491dbd..eb4b547 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -77,7 +77,7 @@ extern tree default_builtin_vectorized_function (tree, tree, tree);
extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree);
-extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt);
+extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
extern tree default_builtin_reciprocal (unsigned int, bool, bool);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a815dd1..5ae6c6e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,20 @@
+2010-07-04 Ira Rosen <irar@il.ibm.com>
+ Revital Eres <eres@il.ibm.com>
+
+ * gcc.dg/vect/vect-42.c: Don't expect peeling on targets that support
+ misaligned stores.
+ * gcc.dg/vect/vect-60.c, gcc.dg/vect/vect-56.c, gcc.dg/vect/vect-93.c,
+ gcc.dg/vect/vect-96.c: Likewise.
+ * gcc.dg/vect/vect-109.c: Expect vectorization only on targets that
+ that support misaligned stores. Change the number of expected
+ misaligned accesses.
+ * gcc.dg/vect/vect-peel-1.c: New test.
+ * gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/vect-peel-3.c,
+ gcc.dg/vect/vect-peel-4.c: Likewise.
+ * gcc.dg/vect/vect-multitypes-1.c: Change the test to make it
+ vectorizable on all targets that support realignment.
+ * gcc.dg/vect/vect-multitypes-4.c: Likewise.
+
2010-07-03 H.J. Lu <hongjiu.lu@intel.com>
PR c/44806
diff --git a/gcc/testsuite/gcc.dg/vect/vect-109.c b/gcc/testsuite/gcc.dg/vect/vect-109.c
index 3939093..ddba263 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-109.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-109.c
@@ -72,8 +72,8 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_hw_misalign } } } */
/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 2 "vect" { xfail vect_hw_misalign } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 10 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_hw_misalign } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c
index 3ba1c6f..fa83200 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-42.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-42.c
@@ -65,6 +65,7 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 3 "vect" { target vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { ! vector_alignment_reachable } && { ! vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { ! vector_alignment_reachable } } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_hw_misalign } } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || { { ! vector_alignment_reachable } || vect_hw_misalign } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c
index 7b7da12..1555d41 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-56.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-56.c
@@ -68,6 +68,8 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c
index cbdf63d..ba8ffe6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-60.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
@@ -69,6 +69,8 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { xfail { vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-93.c b/gcc/testsuite/gcc.dg/vect/vect-93.c
index 85666d9..dfb98cf 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-93.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-93.c
@@ -72,7 +72,7 @@ int main (void)
/* main && main1 together: */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 2 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { target { vect_no_align && {! vector_alignment_reachable} } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { { vect_no_align } || {! vector_alignment_reachable} } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 3 "vect" { xfail { { vect_no_align } || { { ! vector_alignment_reachable} || vect_hw_misalign } } } } } */
/* in main1: */
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target !powerpc*-*-* !i?86-*-* !x86_64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-96.c b/gcc/testsuite/gcc.dg/vect/vect-96.c
index f392169..c7dea61 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-96.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-96.c
@@ -44,6 +44,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { {! vect_no_align} && vector_alignment_reachable } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align } || {! vector_alignment_reachable} } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { { vect_no_align } || { { ! vector_alignment_reachable} || vect_hw_misalign } } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning." 1 "vect" { target { vect_no_align || { {! vector_alignment_reachable} && {! vect_hw_misalign} } } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
index e8fe027..7981c4a 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
@@ -27,13 +27,13 @@ __attribute__ ((noinline)) int main1 (int n)
for (i = 0; i < n; i++)
{
sa[i+7] = sb[i];
- ia[i+3] = ib[i];
+ ia[i+3] = ib[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+7] != sb[i] || ia[i+3] != ib[i])
+ if (sa[i+7] != sb[i] || ia[i+3] != ib[i+1])
abort ();
}
@@ -44,7 +44,9 @@ __attribute__ ((noinline)) int main1 (int n)
access for peeling, and therefore will examine the option of
using a peeling factor = (V-3)%V = 1 for V=2,4.
This will not align the access 'sa[i+3]' (for which we need to
- peel 5 iterations), so the loop can not be vectorized. */
+ peel 5 iterations). However, 'ia[i+3]' also gets aligned if we peel 5
+ iterations, so the loop is vectorizable on all targets that support
+ unaligned loads. */
__attribute__ ((noinline)) int main2 (int n)
{
@@ -55,13 +57,13 @@ __attribute__ ((noinline)) int main2 (int n)
for (i = 0; i < n; i++)
{
ia[i+3] = ib[i];
- sa[i+3] = sb[i];
+ sa[i+3] = sb[i+1];
}
/* check results: */
for (i = 0; i < n; i++)
{
- if (sa[i+3] != sb[i] || ia[i+3] != ib[i])
+ if (sa[i+3] != sb[i+1] || ia[i+3] != ib[i])
abort ();
}
@@ -78,11 +80,8 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
index 274fb02..3a83491 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
@@ -20,7 +20,9 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
which will also align the access to 'ia[i+3]', and the loop could be
- vectorized on all targets that support unaligned loads. */
+ vectorized on all targets that support unaligned loads.
+ Without cost model on targets that support misaligned stores, no peeling
+ will be applied since we want to keep the four loads aligned. */
__attribute__ ((noinline))
int main1 (int n)
@@ -50,7 +52,11 @@ int main1 (int n)
using a peeling factor = VF-3%VF. This will result in a peeling factor
1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
- be vectorized. */
+ be vectorized. However, 'ia[i+3]' also gets aligned if we peel 5
+ iterations, so the loop is vectorizable on all targets that support
+ unaligned loads.
+ Without cost model on targets that support misaligned stores, no peeling
+ will be applied since we want to keep the four loads aligned. */
__attribute__ ((noinline))
int main2 (int n)
@@ -85,11 +91,10 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail {! vect_hw_misalign} } } } */
-/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_align } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" { target { vect_hw_misalign} } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { target { vect_hw_misalign } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
new file mode 100644
index 0000000..ae77463
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* All the accesses are misaligned. With cost model disabled, we count the
+ the number of aligned accesses for each peeling option, and in this case
+ we align the two loads if possible (i.e., if misaligned stores are
+ supported). */
+ for (i = 1; i <= N; i++)
+ {
+ ia[i] = ib[i+2] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i+2] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
new file mode 100644
index 0000000..ee7b8db
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned store. */
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* The store is aligned and the loads are misaligned with the same
+ misalignment. Cost model is disabled. If misaligned stores are supported,
+ we peel according to the loads to align them. */
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = ib[i+2] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i+2] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail { vect_no_align || vect_hw_misalign } } } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target vect_hw_misalign } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
new file mode 100644
index 0000000..80f03c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
@@ -0,0 +1,55 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 128
+#define RES 21888
+
+/* unaligned store. */
+
+int ib[N+10];
+int ia[N+10];
+int ic[N+10];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i, suma = 0, sumb = 0, sumc = 0;
+
+ /* ib and ic have same misalignment, we peel to align them. */
+ for (i = 1; i <= N; i++)
+ {
+ suma += ia[i];
+ sumb += ib[i+6];
+ sumc += ic[i+2];
+ }
+
+ /* check results: */
+ if (suma + sumb + sumc != RES)
+ abort ();
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+10; i++)
+ {
+ ib[i] = i;
+ ic[i] = i+2;
+ ia[i] = i/2;
+ }
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
new file mode 100644
index 0000000..971d023
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+int ib[N+5];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* Don't peel keeping one load and the store aligned. */
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = ib[i] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i < N+5; i++)
+ ib[i] = i;
+
+ return main1 ();
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 0 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index cbefc1f..cf9fab2 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -810,7 +810,11 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
NOTE: This is the only change to the code we make during
the analysis phase, before deciding to vectorize the loop. */
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "force alignment");
+ {
+ fprintf (vect_dump, "force alignment of ");
+ print_generic_expr (vect_dump, ref, TDF_SLIM);
+ }
+
DECL_ALIGN (base) = TYPE_ALIGN (vectype);
DECL_USER_ALIGN (base) = 1;
}
@@ -967,7 +971,7 @@ vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|| !STMT_VINFO_VECTORIZABLE (stmt_info))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
@@ -1061,6 +1065,189 @@ vector_alignment_reachable_p (struct data_reference *dr)
return true;
}
+
+/* Calculate the cost of the memory access represented by DR. */
+
+static void
+vect_get_data_access_cost (struct data_reference *dr,
+ unsigned int *inside_cost,
+ unsigned int *outside_cost)
+{
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ int ncopies = vf / nunits;
+ bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+
+ if (!supportable_dr_alignment)
+ *inside_cost = VECT_MAX_COST;
+ else
+ {
+ if (DR_IS_READ (dr))
+ vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
+ else
+ vect_get_store_cost (dr, ncopies, inside_cost);
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
+ "outside_cost = %d.", *inside_cost, *outside_cost);
+}
+
+
+static hashval_t
+vect_peeling_hash (const void *elem)
+{
+ const struct _vect_peel_info *peel_info;
+
+ peel_info = (const struct _vect_peel_info *) elem;
+ return (hashval_t) peel_info->npeel;
+}
+
+
+static int
+vect_peeling_hash_eq (const void *elem1, const void *elem2)
+{
+ const struct _vect_peel_info *a, *b;
+
+ a = (const struct _vect_peel_info *) elem1;
+ b = (const struct _vect_peel_info *) elem2;
+ return (a->npeel == b->npeel);
+}
+
+
+/* Insert DR into peeling hash table with NPEEL as key. */
+
+static void
+vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
+ int npeel)
+{
+ struct _vect_peel_info elem, *slot;
+ void **new_slot;
+ bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+
+ elem.npeel = npeel;
+ slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ &elem);
+ if (slot)
+ slot->count++;
+ else
+ {
+ slot = XNEW (struct _vect_peel_info);
+ slot->npeel = npeel;
+ slot->dr = dr;
+ slot->count = 1;
+ new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
+ INSERT);
+ *new_slot = slot;
+ }
+
+ if (!supportable_dr_alignment && !flag_vect_cost_model)
+ slot->count += VECT_MAX_COST;
+}
+
+
+/* Traverse peeling hash table to find peeling option that aligns maximum
+ number of data accesses. */
+
+static int
+vect_peeling_hash_get_most_frequent (void **slot, void *data)
+{
+ vect_peel_info elem = (vect_peel_info) *slot;
+ vect_peel_extended_info max = (vect_peel_extended_info) data;
+
+ if (elem->count > max->peel_info.count)
+ {
+ max->peel_info.npeel = elem->npeel;
+ max->peel_info.count = elem->count;
+ max->peel_info.dr = elem->dr;
+ }
+
+ return 1;
+}
+
+
+/* Traverse peeling hash table and calculate cost for each peeling option. Find
+ one with the lowest cost. */
+
+static int
+vect_peeling_hash_get_lowest_cost (void **slot, void *data)
+{
+ vect_peel_info elem = (vect_peel_info) *slot;
+ vect_peel_extended_info min = (vect_peel_extended_info) data;
+ int save_misalignment, dummy;
+ unsigned int inside_cost = 0, outside_cost = 0, i;
+ gimple stmt = DR_STMT (elem->dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
+ struct data_reference *dr;
+
+ for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
+ {
+ stmt = DR_STMT (dr);
+ stmt_info = vinfo_for_stmt (stmt);
+ /* For interleaving, only the alignment of the first access
+ matters. */
+ if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
+ && DR_GROUP_FIRST_DR (stmt_info) != stmt)
+ continue;
+
+ save_misalignment = DR_MISALIGNMENT (dr);
+ vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
+ vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
+ SET_DR_MISALIGNMENT (dr, save_misalignment);
+ }
+
+ outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
+ vect_get_single_scalar_iteraion_cost (loop_vinfo));
+
+ if (inside_cost < min->inside_cost
+ || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
+ {
+ min->inside_cost = inside_cost;
+ min->outside_cost = outside_cost;
+ min->peel_info.dr = elem->dr;
+ min->peel_info.npeel = elem->npeel;
+ }
+
+ return 1;
+}
+
+
+/* Choose best peeling option by traversing peeling hash table and either
+ choosing an option with the lowest cost (if cost model is enabled) or the
+ option that aligns as many accesses as possible. */
+
+static struct data_reference *
+vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
+ unsigned int *npeel)
+{
+ struct _vect_peel_extended_info res;
+
+ res.peel_info.dr = NULL;
+
+ if (flag_vect_cost_model)
+ {
+ res.inside_cost = INT_MAX;
+ res.outside_cost = INT_MAX;
+ htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ vect_peeling_hash_get_lowest_cost, &res);
+ }
+ else
+ {
+ res.peel_info.count = 0;
+ htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
+ vect_peeling_hash_get_most_frequent, &res);
+ }
+
+ *npeel = res.peel_info.npeel;
+ return res.peel_info.dr;
+}
+
+
/* Function vect_enhance_data_refs_alignment
This pass will use loop versioning and loop peeling in order to enhance
@@ -1158,15 +1345,21 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
enum dr_alignment_support supportable_dr_alignment;
- struct data_reference *dr0 = NULL;
+ struct data_reference *dr0 = NULL, *first_store = NULL;
struct data_reference *dr;
- unsigned int i;
+ unsigned int i, j;
bool do_peeling = false;
bool do_versioning = false;
bool stat;
gimple stmt;
stmt_vec_info stmt_info;
int vect_versioning_for_alias_required;
+ unsigned int npeel = 0;
+ bool all_misalignments_unknown = true;
+ unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ unsigned possible_npeel_number = 1;
+ tree vectype;
+ unsigned int nelements, mis, same_align_drs_max = 0;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
@@ -1201,12 +1394,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
- How many accesses will become unaligned due to the peeling,
and the cost of misaligned accesses.
- The cost of peeling (the extra runtime checks, the increase
- in code size).
-
- The scheme we use FORNOW: peel to force the alignment of the first
- unsupported misaligned access in the loop.
-
- TODO: Use a cost model. */
+ in code size). */
for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
{
@@ -1219,15 +1407,108 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt)
continue;
- if (!DR_IS_READ (dr) && !aligned_access_p (dr))
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
+ do_peeling = vector_alignment_reachable_p (dr);
+ if (do_peeling)
{
- do_peeling = vector_alignment_reachable_p (dr);
- if (do_peeling)
- dr0 = dr;
- if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "vector alignment may not be reachable");
- break;
- }
+ if (known_alignment_for_access_p (dr))
+ {
+ unsigned int npeel_tmp;
+
+ /* Save info about DR in the hash table. */
+ if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
+ LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
+ htab_create (1, vect_peeling_hash,
+ vect_peeling_hash_eq, free);
+
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
+ TREE_TYPE (DR_REF (dr))));
+ npeel_tmp = (nelements - mis) % vf;
+
+ /* For multiple types, it is possible that the bigger type access
+ will have more than one peeling option. E.g., a loop with two
+ types: one of size (vector size / 4), and the other one of
+ size (vector size / 8). Vectorization factor will 8. If both
+ access are misaligned by 3, the first one needs one scalar
+ iteration to be aligned, and the second one needs 5. But the
+ the first one will be aligned also by peeling 5 scalar
+ iterations, and in that case both accesses will be aligned.
+ Hence, except for the immediate peeling amount, we also want
+ to try to add full vector size, while we don't exceed
+ vectorization factor.
+ We do this automtically for cost model, since we calculate cost
+ for every peeling option. */
+ if (!flag_vect_cost_model)
+ possible_npeel_number = vf /nelements;
+
+ /* Handle the aligned case. We may decide to align some other
+ access, making DR unaligned. */
+ if (DR_MISALIGNMENT (dr) == 0)
+ {
+ npeel_tmp = 0;
+ if (!flag_vect_cost_model)
+ possible_npeel_number++;
+ }
+
+ for (j = 0; j < possible_npeel_number; j++)
+ {
+ gcc_assert (npeel_tmp <= vf);
+ vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
+ npeel_tmp += nelements;
+ }
+
+ all_misalignments_unknown = false;
+ /* Data-ref that was chosen for the case that all the
+ misalignments are unknown is not relevant anymore, since we
+ have a data-ref with known alignment. */
+ dr0 = NULL;
+ }
+ else
+ {
+ /* If we don't know all the misalignment values, we prefer
+ peeling for data-ref that has maximum number of data-refs
+ with the same alignment, unless the target prefers to align
+ stores over load. */
+ if (all_misalignments_unknown)
+ {
+ if (same_align_drs_max < VEC_length (dr_p,
+ STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
+ || !dr0)
+ {
+ same_align_drs_max = VEC_length (dr_p,
+ STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
+ dr0 = dr;
+ }
+
+ if (!first_store && !DR_IS_READ (dr))
+ first_store = dr;
+ }
+
+ /* If there are both known and unknown misaligned accesses in the
+ loop, we choose peeling amount according to the known
+ accesses. */
+
+
+ if (!supportable_dr_alignment)
+ {
+ dr0 = dr;
+ if (!first_store && !DR_IS_READ (dr))
+ first_store = dr;
+ }
+ }
+ }
+ else
+ {
+ if (!aligned_access_p (dr))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vector alignment may not be reachable");
+
+ break;
+ }
+ }
}
vect_versioning_for_alias_required
@@ -1242,24 +1523,112 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
do_peeling = false;
+ if (do_peeling && all_misalignments_unknown
+ && vect_supportable_dr_alignment (dr0, false))
+ {
+
+ /* Check if the target requires to prefer stores over loads, i.e., if
+ misaligned stores are more expensive than misaligned loads (taking
+ drs with same alignment into account). */
+ if (first_store && DR_IS_READ (dr0))
+ {
+ unsigned int load_inside_cost = 0, load_outside_cost = 0;
+ unsigned int store_inside_cost = 0, store_outside_cost = 0;
+ unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
+ unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
+
+ vect_get_data_access_cost (dr0, &load_inside_cost,
+ &load_outside_cost);
+ vect_get_data_access_cost (first_store, &store_inside_cost,
+ &store_outside_cost);
+
+ /* Calculate the penalty for leaving FIRST_STORE unaligned (by
+ aligning the load DR0). */
+ load_inside_penalty = store_inside_cost;
+ load_outside_penalty = store_outside_cost;
+ for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (first_store))),
+ i, dr);
+ i++)
+ if (DR_IS_READ (dr))
+ {
+ load_inside_penalty += load_inside_cost;
+ load_outside_penalty += load_outside_cost;
+ }
+ else
+ {
+ load_inside_penalty += store_inside_cost;
+ load_outside_penalty += store_outside_cost;
+ }
+
+ /* Calculate the penalty for leaving DR0 unaligned (by
+ aligning the FIRST_STORE). */
+ store_inside_penalty = load_inside_cost;
+ store_outside_penalty = load_outside_cost;
+ for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (dr0))),
+ i, dr);
+ i++)
+ if (DR_IS_READ (dr))
+ {
+ store_inside_penalty += load_inside_cost;
+ store_outside_penalty += load_outside_cost;
+ }
+ else
+ {
+ store_inside_penalty += store_inside_cost;
+ store_outside_penalty += store_outside_cost;
+ }
+
+ if (load_inside_penalty > store_inside_penalty
+ || (load_inside_penalty == store_inside_penalty
+ && load_outside_penalty > store_outside_penalty))
+ dr0 = first_store;
+ }
+
+ /* In case there are only loads with different unknown misalignments, use
+ peeling only if it may help to align other accesses in the loop. */
+ if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
+ (vinfo_for_stmt (DR_STMT (dr0))))
+ && vect_supportable_dr_alignment (dr0, false)
+ != dr_unaligned_supported)
+ do_peeling = false;
+ }
+
+ if (do_peeling && !dr0)
+ {
+ /* Peeling is possible, but there is no data access that is not supported
+ unless aligned. So we try to choose the best possible peeling. */
+
+ /* We should get here only if there are drs with known misalignment. */
+ gcc_assert (!all_misalignments_unknown);
+
+ /* Choose the best peeling from the hash table. */
+ dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
+ if (!dr0 || !npeel)
+ do_peeling = false;
+ }
+
if (do_peeling)
{
- int mis;
- int npeel = 0;
- gimple stmt = DR_STMT (dr0);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
+ stmt = DR_STMT (dr0);
+ stmt_info = vinfo_for_stmt (stmt);
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (known_alignment_for_access_p (dr0))
{
- /* Since it's known at compile time, compute the number of iterations
- in the peeled loop (the peeling factor) for use in updating
- DR_MISALIGNMENT values. The peeling factor is the vectorization
- factor minus the misalignment as an element count. */
- mis = DR_MISALIGNMENT (dr0);
- mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
- npeel = nelements - mis;
+ if (!npeel)
+ {
+ /* Since it's known at compile time, compute the number of
+ iterations in the peeled loop (the peeling factor) for use in
+ updating DR_MISALIGNMENT values. The peeling factor is the
+ vectorization factor minus the misalignment as an element
+ count. */
+ mis = DR_MISALIGNMENT (dr0);
+ mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
+ npeel = nelements - mis;
+ }
/* For interleaved data access every iteration accesses all the
members of the group, therefore we divide the number of iterations
@@ -1290,7 +1659,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
save_misalignment = DR_MISALIGNMENT (dr);
vect_update_misalignment_for_peel (dr, dr0, npeel);
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
SET_DR_MISALIGNMENT (dr, save_misalignment);
if (!supportable_dr_alignment)
@@ -1300,6 +1669,15 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
}
}
+ if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
+ {
+ stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
+ if (!stat)
+ do_peeling = false;
+ else
+ return stat;
+ }
+
if (do_peeling)
{
/* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
@@ -1314,7 +1692,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
vect_update_misalignment_for_peel (dr, dr0, npeel);
LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
- LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
+ if (npeel)
+ LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
+ else
+ LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
SET_DR_MISALIGNMENT (dr0, 0);
if (vect_print_dump_info (REPORT_ALIGNMENT))
fprintf (vect_dump, "Alignment of access forced using peeling.");
@@ -1358,7 +1739,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
&& DR_GROUP_FIRST_DR (stmt_info) != stmt))
continue;
- supportable_dr_alignment = vect_supportable_dr_alignment (dr);
+ supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
if (!supportable_dr_alignment)
{
@@ -1467,7 +1848,7 @@ vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
return;
- if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
+ if (dra == drb)
return;
if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
@@ -3558,13 +3939,16 @@ vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
return (alignment <= MAX_STACK_ALIGNMENT);
}
-/* Function vect_supportable_dr_alignment
- Return whether the data reference DR is supported with respect to its
+/* Return whether the data reference DR is supported with respect to its
+ alignment.
+ If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
+ it is aligned, i.e., check if it is possible to vectorize it with different
alignment. */
enum dr_alignment_support
-vect_supportable_dr_alignment (struct data_reference *dr)
+vect_supportable_dr_alignment (struct data_reference *dr,
+ bool check_aligned_accesses)
{
gimple stmt = DR_STMT (dr);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -3574,7 +3958,7 @@ vect_supportable_dr_alignment (struct data_reference *dr)
struct loop *vect_loop = NULL;
bool nested_in_vect_loop = false;
- if (aligned_access_p (dr))
+ if (aligned_access_p (dr) && !check_aligned_accesses)
return dr_aligned;
if (!loop_vinfo)
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index f8922a2..38546cf 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1976,25 +1976,18 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
tree niters_type = TREE_TYPE (loop_niters);
- int step = 1;
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
- if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
- step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
-
pe = loop_preheader_edge (loop);
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
{
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
- int elem_misalign = byte_misalign / element_size;
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "known alignment = %d.", byte_misalign);
+ fprintf (vect_dump, "known peeling = %d.", npeel);
- iters = build_int_cst (niters_type,
- (((nelements - elem_misalign) & (nelements - 1)) / step));
+ iters = build_int_cst (niters_type, npeel);
}
else
{
@@ -2017,7 +2010,8 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
- fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
+ fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
+ vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =
@@ -2323,7 +2317,8 @@ vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
DR_STEP (dr), vect_factor);
- if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
+ if (vect_supportable_dr_alignment (dr, false)
+ == dr_explicit_realign_optimized)
{
tree vector_size = TYPE_SIZE_UNIT
(STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 3b38716..ef48173 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -755,6 +755,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
+ LOOP_VINFO_PEELING_HTAB (res) = NULL;
return res;
}
@@ -845,6 +846,9 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
+ if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
+ htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
+
free (loop_vinfo);
loop->aux = NULL;
}
@@ -1122,7 +1126,11 @@ vect_analyze_loop_form (struct loop *loop)
static inline
int vect_get_cost (enum vect_cost_for_stmt type_of_cost)
{
- return targetm.vectorize.builtin_vectorization_cost (type_of_cost);
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
+ dummy_type, dummy);
}
@@ -1498,17 +1506,6 @@ vect_analyze_loop (struct loop *loop)
return NULL;
}
- /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
- ok = vect_analyze_slp (loop_vinfo, NULL);
- if (ok)
- {
- /* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
-
- /* Find stmts that need to be both vectorized and SLPed. */
- vect_detect_hybrid_slp (loop_vinfo);
- }
-
/* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */
@@ -1516,11 +1513,22 @@ vect_analyze_loop (struct loop *loop)
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "bad data alignment.");
+ fprintf (vect_dump, "bad data alignment.");
destroy_loop_vec_info (loop_vinfo, true);
return NULL;
}
+ /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
+ ok = vect_analyze_slp (loop_vinfo, NULL);
+ if (ok)
+ {
+ /* Decide which possible SLP instances to SLP. */
+ vect_make_slp_decision (loop_vinfo);
+
+ /* Find stmts that need to be both vectorized and SLPed. */
+ vect_detect_hybrid_slp (loop_vinfo);
+ }
+
/* Scan all the operations in the loop and make sure they are
vectorizable. */
@@ -2004,6 +2012,94 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
double_reduc, true);
}
+/* Calculate the cost of one scalar iteration of the loop. */
+int
+vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
+ int innerloop_iters, i, stmt_cost;
+
+ /* Count statements in scalar loop. Using this as scalar cost for a single
+ iteration for now.
+
+ TODO: Add outer loop support.
+
+ TODO: Consider assigning different costs to different scalar
+ statements. */
+
+ /* FORNOW. */
+ if (loop->inner)
+ innerloop_iters = 50; /* FIXME */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ gimple_stmt_iterator si;
+ basic_block bb = bbs[i];
+
+ if (bb->loop_father == loop->inner)
+ factor = innerloop_iters;
+ else
+ factor = 1;
+
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+
+ if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
+ continue;
+
+ if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+ {
+ if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
+ stmt_cost = vect_get_cost (scalar_load);
+ else
+ stmt_cost = vect_get_cost (scalar_store);
+ }
+ else
+ stmt_cost = vect_get_cost (scalar_stmt);
+
+ scalar_single_iter_cost += stmt_cost * factor;
+ }
+ }
+ return scalar_single_iter_cost;
+}
+
+/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
+int
+vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
+ int *peel_iters_epilogue,
+ int scalar_single_iter_cost)
+{
+ int peel_guard_costs = 0;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ *peel_iters_epilogue = vf/2;
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set to vf/2 because "
+ "loop iterations are unknown .");
+
+ /* If peeled iterations are known but number of scalar loop
+ iterations are unknown, count a taken branch per peeled loop. */
+ peel_guard_costs = 2 * vect_get_cost (cond_branch_taken);
+ }
+ else
+ {
+ int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
+ peel_iters_prologue = niters < peel_iters_prologue ?
+ niters : peel_iters_prologue;
+ *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
+ }
+
+ return (peel_iters_prologue * scalar_single_iter_cost)
+ + (*peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
+}
+
/* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the
@@ -2028,7 +2124,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
int peel_guard_costs = 0;
int innerloop_iters = 0, factor;
VEC (slp_instance, heap) *slp_instances;
@@ -2099,7 +2195,6 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
&& (!STMT_VINFO_LIVE_P (stmt_info)
|| STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
continue;
- scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
/* FIXME: for stmts in the inner-loop in outer-loop vectorization,
some of the "outside" costs are generated inside the outer-loop. */
@@ -2107,6 +2202,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
}
}
+ scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
+
/* Add additional cost for the peeled instructions in prologue and epilogue
loop.
@@ -2116,7 +2213,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */
- if (byte_misalign < 0)
+ if (npeel < 0)
{
peel_iters_prologue = vf/2;
if (vect_print_dump_info (REPORT_COST))
@@ -2137,46 +2234,18 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
not known. Hence guards remain the same. */
peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
+ vect_get_cost (cond_branch_not_taken));
+ vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ + (peel_iters_epilogue * scalar_single_iter_cost)
+ + peel_guard_costs;
}
else
{
- if (byte_misalign)
- {
- struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
- int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
- tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
- int nelements = TYPE_VECTOR_SUBPARTS (vectype);
-
- peel_iters_prologue = nelements - (byte_misalign / element_size);
- }
- else
- peel_iters_prologue = 0;
-
- if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
- {
- peel_iters_epilogue = vf/2;
- if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "cost model: "
- "epilogue peel iters set to vf/2 because "
- "loop iterations are unknown .");
-
- /* If peeled iterations are known but number of scalar loop
- iterations are unknown, count a taken branch per peeled loop. */
- peel_guard_costs += 2 * vect_get_cost (cond_branch_taken);
- }
- else
- {
- int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
- peel_iters_prologue = niters < peel_iters_prologue ?
- niters : peel_iters_prologue;
- peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
- }
+ peel_iters_prologue = npeel;
+ vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
+ peel_iters_prologue, &peel_iters_epilogue,
+ scalar_single_iter_cost);
}
- vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
- + (peel_iters_epilogue * scalar_single_iter_cost)
- + peel_guard_costs;
-
/* FORNOW: The scalar outside cost is incremented in one of the
following ways:
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 5f753a2..1ae3a65 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -560,7 +560,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (first_load == stmt)
{
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
- if (vect_supportable_dr_alignment (first_dr)
+ if (vect_supportable_dr_alignment (first_dr, false)
== dr_unaligned_unsupported)
{
if (vect_print_dump_info (REPORT_SLP))
@@ -646,7 +646,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
{
VEC_safe_push (slp_tree, heap, *loads, *node);
*inside_cost
- += targetm.vectorize.builtin_vectorization_cost (vec_perm)
+ += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
* group_size;
}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index c95fe7d..89e7c4b 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -545,6 +545,18 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
}
+/* Get cost by calling cost target builtin. */
+
+static inline
+int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost)
+{
+ tree dummy_type = NULL;
+ int dummy = 0;
+
+ return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
+ dummy_type, dummy);
+}
+
int
cost_for_stmt (gimple stmt)
{
@@ -553,9 +565,9 @@ cost_for_stmt (gimple stmt)
switch (STMT_VINFO_TYPE (stmt_info))
{
case load_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_load);
+ return vect_get_stmt_cost (scalar_load);
case store_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_store);
+ return vect_get_stmt_cost (scalar_store);
case op_vec_info_type:
case condition_vec_info_type:
case assignment_vec_info_type:
@@ -565,7 +577,7 @@ cost_for_stmt (gimple stmt)
case type_demotion_vec_info_type:
case type_conversion_vec_info_type:
case call_vec_info_type:
- return targetm.vectorize.builtin_vectorization_cost (scalar_stmt);
+ return vect_get_stmt_cost (scalar_stmt);
case undef_vec_info_type:
default:
gcc_unreachable ();
@@ -589,15 +601,13 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
if (PURE_SLP_STMT (stmt_info))
return;
- inside_cost = ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ inside_cost = ncopies * vect_get_stmt_cost (vector_stmt);
/* FORNOW: Assuming maximum 2 args per stmts. */
for (i = 0; i < 2; i++)
{
if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
- outside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ outside_cost += vect_get_stmt_cost (vector_stmt);
}
if (vect_print_dump_info (REPORT_COST))
@@ -638,22 +648,39 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
enum vect_def_type dt, slp_tree slp_node)
{
int group_size;
- int inside_cost = 0, outside_cost = 0;
+ unsigned int inside_cost = 0, outside_cost = 0;
+ struct data_reference *first_dr;
+ gimple first_stmt;
/* The SLP costs were already calculated during SLP tree build. */
if (PURE_SLP_STMT (stmt_info))
return;
if (dt == vect_constant_def || dt == vect_external_def)
- outside_cost
- = targetm.vectorize.builtin_vectorization_cost (scalar_to_vec);
+ outside_cost = vect_get_stmt_cost (scalar_to_vec);
/* Strided access? */
- if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
- group_size = vect_cost_strided_group_size (stmt_info);
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ {
+ if (slp_node)
+ {
+ first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
+ group_size = 1;
+ }
+ else
+ {
+ first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+ group_size = vect_cost_strided_group_size (stmt_info);
+ }
+
+ first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ }
/* Not a strided access. */
else
- group_size = 1;
+ {
+ group_size = 1;
+ first_dr = STMT_VINFO_DATA_REF (stmt_info);
+ }
/* Is this an access in a group of stores, which provide strided access?
If so, add in the cost of the permutes. */
@@ -661,7 +688,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
{
/* Uses a high and low interleave operation for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ * vect_get_stmt_cost (vector_stmt);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
@@ -670,8 +697,7 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
}
/* Costs of the stores. */
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_store);
+ vect_get_store_cost (first_dr, ncopies, &inside_cost);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
@@ -683,6 +709,49 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
}
+/* Calculate cost of DR's memory access. */
+void
+vect_get_store_cost (struct data_reference *dr, int ncopies,
+ unsigned int *inside_cost)
+{
+ int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+
+ switch (alignment_support_scheme)
+ {
+ case dr_aligned:
+ {
+ *inside_cost += ncopies * vect_get_stmt_cost (vector_store);
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_store_cost: aligned.");
+
+ break;
+ }
+
+ case dr_unaligned_supported:
+ {
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+
+ /* Here, we assign an additional cost for the unaligned store. */
+ *inside_cost += ncopies
+ * targetm.vectorize.builtin_vectorization_cost (unaligned_store,
+ vectype, DR_MISALIGNMENT (dr));
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_store_cost: unaligned supported by "
+ "hardware.");
+
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+}
+
+
/* Function vect_model_load_cost
Models cost for loads. In the case of strided accesses, the last access
@@ -695,10 +764,9 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
{
int group_size;
- int alignment_support_cheme;
gimple first_stmt;
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
- int inside_cost = 0, outside_cost = 0;
+ unsigned int inside_cost = 0, outside_cost = 0;
/* The SLP costs were already calculated during SLP tree build. */
if (PURE_SLP_STMT (stmt_info))
@@ -718,29 +786,47 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
first_dr = dr;
}
- alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
-
/* Is this an access in a group of loads providing strided access?
If so, add in the cost of the permutes. */
if (group_size > 1)
{
/* Uses an even and odd extract operations for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ * vect_get_stmt_cost (vector_stmt);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
group_size);
-
}
/* The loads themselves. */
- switch (alignment_support_cheme)
+ vect_get_load_cost (first_dr, ncopies,
+ ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node),
+ &inside_cost, &outside_cost);
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
+ "outside_cost = %d .", inside_cost, outside_cost);
+
+ /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
+ stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
+ stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
+}
+
+
+/* Calculate cost of DR's memory access. */
+void
+vect_get_load_cost (struct data_reference *dr, int ncopies,
+ bool add_realign_cost, unsigned int *inside_cost,
+ unsigned int *outside_cost)
+{
+ int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+
+ switch (alignment_support_scheme)
{
case dr_aligned:
{
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (vector_load);
+ inside_cost += ncopies * vect_get_stmt_cost (vector_load);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: aligned.");
@@ -749,10 +835,14 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
}
case dr_unaligned_supported:
{
- /* Here, we assign an additional cost for the unaligned load. */
- inside_cost += ncopies
- * targetm.vectorize.builtin_vectorization_cost (unaligned_load);
+ gimple stmt = DR_STMT (dr);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ /* Here, we assign an additional cost for the unaligned load. */
+ *inside_cost += ncopies
+ * targetm.vectorize.builtin_vectorization_cost (unaligned_load,
+ vectype, DR_MISALIGNMENT (dr));
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
"hardware.");
@@ -761,16 +851,14 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
}
case dr_explicit_realign:
{
- inside_cost += ncopies * (2
- * targetm.vectorize.builtin_vectorization_cost (vector_load)
- + targetm.vectorize.builtin_vectorization_cost (vector_stmt));
+ *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
+ + vect_get_stmt_cost (vector_stmt));
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
outside costs. */
if (targetm.vectorize.builtin_mask_for_load)
- inside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *inside_cost += vect_get_stmt_cost (vector_stmt);
break;
}
@@ -787,32 +875,21 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
access in the group. Inside the loop, there is a load op
and a realignment op. */
- if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
+ if (add_realign_cost)
{
- outside_cost = 2
- * targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *outside_cost = 2 * vect_get_stmt_cost (vector_stmt);
if (targetm.vectorize.builtin_mask_for_load)
- outside_cost
- += targetm.vectorize.builtin_vectorization_cost (vector_stmt);
+ *outside_cost += vect_get_stmt_cost (vector_stmt);
}
- inside_cost += ncopies
- * (targetm.vectorize.builtin_vectorization_cost (vector_load)
- + targetm.vectorize.builtin_vectorization_cost (vector_stmt));
+ *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
+ + vect_get_stmt_cost (vector_stmt));
break;
}
default:
gcc_unreachable ();
}
-
- if (vect_print_dump_info (REPORT_COST))
- fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
- "outside_cost = %d .", inside_cost, outside_cost);
-
- /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
- stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
- stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
}
@@ -3142,7 +3219,7 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
dr_chain = VEC_alloc (tree, heap, group_size);
oprnds = VEC_alloc (tree, heap, group_size);
- alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
+ alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
gcc_assert (alignment_support_scheme);
/* In case the vectorization factor (VF) is bigger than the number
@@ -3507,7 +3584,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
group_size = vec_num = 1;
}
- alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
+ alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
gcc_assert (alignment_support_scheme);
/* In case the vectorization factor (VF) is bigger than the number
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index bf6769c..ed8ff58 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -170,6 +170,21 @@ DEF_VEC_ALLOC_P(slp_instance, heap);
#define SLP_TREE_OUTSIDE_OF_LOOP_COST(S) (S)->cost.outside_of_loop
#define SLP_TREE_INSIDE_OF_LOOP_COST(S) (S)->cost.inside_of_loop
+
+typedef struct _vect_peel_info
+{
+ int npeel;
+ struct data_reference *dr;
+ unsigned int count;
+} *vect_peel_info;
+
+typedef struct _vect_peel_extended_info
+{
+ struct _vect_peel_info peel_info;
+ unsigned int inside_cost;
+ unsigned int outside_cost;
+} *vect_peel_extended_info;
+
/*-----------------------------------------------------------------*/
/* Info on vectorized loops. */
/*-----------------------------------------------------------------*/
@@ -245,6 +260,10 @@ typedef struct _loop_vec_info {
/* Reduction cycles detected in the loop. Used in loop-aware SLP. */
VEC (gimple, heap) *reductions;
+
+ /* Hash table used to choose the best peeling option. */
+ htab_t peeling_htab;
+
} *loop_vec_info;
/* Access Functions. */
@@ -270,6 +289,7 @@ typedef struct _loop_vec_info {
#define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
#define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
#define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
+#define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
VEC_length (gimple, (L)->may_misalign_stmts) > 0
@@ -543,6 +563,8 @@ typedef struct _stmt_vec_info {
#define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
#define STMT_SLP_TYPE(S) (S)->slp_type
+#define VECT_MAX_COST 1000
+
/* The maximum number of intermediate steps required in multi-step type
conversion. */
#define MAX_INTERM_CVT_STEPS 3
@@ -743,11 +765,14 @@ extern void vect_remove_stores (gimple);
extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
tree, int);
+extern void vect_get_load_cost (struct data_reference *, int, bool,
+ unsigned int *, unsigned int *);
+extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
extern enum dr_alignment_support vect_supportable_dr_alignment
- (struct data_reference *);
+ (struct data_reference *, bool);
extern tree vect_get_smallest_scalar_type (gimple, HOST_WIDE_INT *,
HOST_WIDE_INT *);
extern bool vect_analyze_data_ref_dependences (loop_vec_info, bb_vec_info,
@@ -795,7 +820,8 @@ extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *);
extern int vect_estimate_min_profitable_iters (loop_vec_info);
extern tree get_initial_def_for_reduction (gimple, tree, tree *);
extern int vect_min_worthwhile_factor (enum tree_code);
-
+extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int);
+extern int vect_get_single_scalar_iteraion_cost (loop_vec_info);
/* In tree-vect-slp.c. */
extern void vect_free_slp_instance (slp_instance);