aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2016-07-06 08:16:53 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2016-07-06 08:16:53 +0000
commit62da9e14ce5b670bf40a91f1dc9473579a2d5549 (patch)
treef64f6affdc9e9aa406115967f1fe3db21b1b124c
parent2de001eed013bb666f832694bc75b8f055ffdc76 (diff)
downloadgcc-62da9e14ce5b670bf40a91f1dc9473579a2d5549.zip
gcc-62da9e14ce5b670bf40a91f1dc9473579a2d5549.tar.gz
gcc-62da9e14ce5b670bf40a91f1dc9473579a2d5549.tar.bz2
[7/7] Add negative and zero strides to vect_memory_access_type
This patch uses the vect_memory_access_type from patch 6 to represent the effect of a negative contiguous stride or a zero stride. The latter is valid only for loads. Tested on aarch64-linux-gnu and x86_64-linux-gnu. gcc/ * tree-vectorizer.h (vect_memory_access_type): Add VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED. * tree-vect-stmts.c (compare_step_with_zero): New function. (perm_mask_for_reverse): Move further up file. (get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the step is negative. (get_negative_load_store_type): New function. (get_load_store_type): Call it. Add an ncopies argument. (vectorizable_mask_load_store): Update call accordingly and remove tests for negative steps. (vectorizable_store, vectorizable_load): Likewise. Handle new memory_access_types. From-SVN: r238039
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/tree-vect-stmts.c234
-rw-r--r--gcc/tree-vectorizer.h12
3 files changed, 140 insertions, 121 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2e30934..c853885 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,20 @@
2016-07-06 Richard Sandiford <richard.sandiford@arm.com>
+ * tree-vectorizer.h (vect_memory_access_type): Add
+ VMAT_INVARIANT, VMAT_CONTIGUOUS_DOWN and VMAT_CONTIGUOUS_REVERSED.
+ * tree-vect-stmts.c (compare_step_with_zero): New function.
+ (perm_mask_for_reverse): Move further up file.
+ (get_group_load_store_type): Stick to VMAT_ELEMENTWISE if the
+ step is negative.
+ (get_negative_load_store_type): New function.
+ (get_load_store_type): Call it. Add an ncopies argument.
+ (vectorizable_mask_load_store): Update call accordingly and
+ remove tests for negative steps.
+ (vectorizable_store, vectorizable_load): Likewise. Handle new
+ memory_access_types.
+
+2016-07-06 Richard Sandiford <richard.sandiford@arm.com>
+
* tree-vectorizer.h (vect_memory_access_type): New enum.
(_stmt_vec_info): Add a memory_access_type field.
(STMT_VINFO_MEMORY_ACCESS_TYPE): New macro.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 16bec2b..ffa5e98 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1672,6 +1672,42 @@ vectorizable_internal_function (combined_fn cfn, tree fndecl,
static tree permute_vec_elements (tree, tree, tree, gimple *,
gimple_stmt_iterator *);
+/* STMT is a non-strided load or store, meaning that it accesses
+ elements with a known constant step. Return -1 if that step
+ is negative, 0 if it is zero, and 1 if it is greater than zero. */
+
+static int
+compare_step_with_zero (gimple *stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ tree step;
+ if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
+ step = STMT_VINFO_DR_STEP (stmt_info);
+ else
+ step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
+ return tree_int_cst_compare (step, size_zero_node);
+}
+
+/* If the target supports a permute mask that reverses the elements in
+ a vector of type VECTYPE, return that mask, otherwise return null. */
+
+static tree
+perm_mask_for_reverse (tree vectype)
+{
+ int i, nunits;
+ unsigned char *sel;
+
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ sel = XALLOCAVEC (unsigned char, nunits);
+
+ for (i = 0; i < nunits; ++i)
+ sel[i] = nunits - 1 - i;
+
+ if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
+ return NULL_TREE;
+ return vect_gen_perm_mask_checked (vectype, sel);
+}
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT is part of a grouped load
@@ -1755,7 +1791,8 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
would access excess elements in the last iteration. */
bool would_overrun_p = (gap != 0);
if (!STMT_VINFO_STRIDED_P (stmt_info)
- && (can_overrun_p || !would_overrun_p))
+ && (can_overrun_p || !would_overrun_p)
+ && compare_step_with_zero (stmt) > 0)
{
/* First try using LOAD/STORE_LANES. */
if (vls_type == VLS_LOAD
@@ -1814,17 +1851,69 @@ get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
return true;
}
+/* A subroutine of get_load_store_type, with a subset of the same
+ arguments. Handle the case where STMT is a load or store that
+ accesses consecutive elements with a negative step. */
+
+static vect_memory_access_type
+get_negative_load_store_type (gimple *stmt, tree vectype,
+ vec_load_store_type vls_type,
+ unsigned int ncopies)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+ dr_alignment_support alignment_support_scheme;
+
+ if (ncopies > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "multiple types with negative step.\n");
+ return VMAT_ELEMENTWISE;
+ }
+
+ alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+ if (alignment_support_scheme != dr_aligned
+ && alignment_support_scheme != dr_unaligned_supported)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "negative step but alignment required.\n");
+ return VMAT_ELEMENTWISE;
+ }
+
+ if (vls_type == VLS_STORE_INVARIANT)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "negative step with invariant source;"
+ " no permute needed.\n");
+ return VMAT_CONTIGUOUS_DOWN;
+ }
+
+ if (!perm_mask_for_reverse (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "negative step and reversing not supported.\n");
+ return VMAT_ELEMENTWISE;
+ }
+
+ return VMAT_CONTIGUOUS_REVERSE;
+}
+
/* Analyze load or store statement STMT of type VLS_TYPE. Return true
if there is a memory access type that the vectorized form can use,
storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
or scatters, fill in GS_INFO accordingly.
SLP says whether we're performing SLP rather than loop vectorization.
- VECTYPE is the vector type that the vectorized statements will use. */
+ VECTYPE is the vector type that the vectorized statements will use.
+ NCOPIES is the number of vector statements that will be needed. */
static bool
get_load_store_type (gimple *stmt, tree vectype, bool slp,
- vec_load_store_type vls_type,
+ vec_load_store_type vls_type, unsigned int ncopies,
vect_memory_access_type *memory_access_type,
gather_scatter_info *gs_info)
{
@@ -1860,7 +1949,19 @@ get_load_store_type (gimple *stmt, tree vectype, bool slp,
*memory_access_type = VMAT_ELEMENTWISE;
}
else
- *memory_access_type = VMAT_CONTIGUOUS;
+ {
+ int cmp = compare_step_with_zero (stmt);
+ if (cmp < 0)
+ *memory_access_type = get_negative_load_store_type
+ (stmt, vectype, vls_type, ncopies);
+ else if (cmp == 0)
+ {
+ gcc_assert (vls_type == VLS_LOAD);
+ *memory_access_type = VMAT_INVARIANT;
+ }
+ else
+ *memory_access_type = VMAT_CONTIGUOUS;
+ }
/* FIXME: At the moment the cost model seems to underestimate the
cost of using elementwise accesses. This check preserves the
@@ -1971,7 +2072,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
vls_type = VLS_LOAD;
vect_memory_access_type memory_access_type;
- if (!get_load_store_type (stmt, vectype, false, vls_type,
+ if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
&memory_access_type, &gs_info))
return false;
@@ -1996,10 +2097,6 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
vls_type == VLS_LOAD ? "load" : "store");
return false;
}
- else if (tree_int_cst_compare (nested_in_vect_loop
- ? STMT_VINFO_DR_STEP (stmt_info)
- : DR_STEP (dr), size_zero_node) <= 0)
- return false;
else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !can_vec_mask_load_store_p (TYPE_MODE (vectype),
TYPE_MODE (mask_vectype),
@@ -5340,27 +5437,6 @@ ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
}
-/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
- reversal of the vector elements. If that is impossible to do,
- returns NULL. */
-
-static tree
-perm_mask_for_reverse (tree vectype)
-{
- int i, nunits;
- unsigned char *sel;
-
- nunits = TYPE_VECTOR_SUBPARTS (vectype);
- sel = XALLOCAVEC (unsigned char, nunits);
-
- for (i = 0; i < nunits; ++i)
- sel[i] = nunits - 1 - i;
-
- if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
- return NULL_TREE;
- return vect_gen_perm_mask_checked (vectype, sel);
-}
-
/* Function vectorizable_store.
Check if STMT defines a non scalar data-ref (array/pointer/structure) that
@@ -5400,7 +5476,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vec<tree> oprnds = vNULL;
vec<tree> result_chain = vNULL;
bool inv_p;
- bool negative = false;
tree offset = NULL_TREE;
vec<tree> vec_oprnds = vNULL;
bool slp = (slp_node != NULL);
@@ -5504,44 +5579,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- if (!STMT_VINFO_STRIDED_P (stmt_info))
- {
- negative =
- tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
- ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
- size_zero_node) < 0;
- if (negative && ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types with negative step.\n");
- return false;
- }
- if (negative)
- {
- alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
- if (alignment_support_scheme != dr_aligned
- && alignment_support_scheme != dr_unaligned_supported)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step but alignment required.\n");
- return false;
- }
- if (dt != vect_constant_def
- && dt != vect_external_def
- && !perm_mask_for_reverse (vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step and reversing not supported.\n");
- return false;
- }
- }
- }
-
vect_memory_access_type memory_access_type;
- if (!get_load_store_type (stmt, vectype, slp, vls_type,
+ if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
&memory_access_type, &gs_info))
return false;
@@ -5947,7 +5986,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
|| alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported);
- if (negative)
+ if (memory_access_type == VMAT_CONTIGUOUS_DOWN
+ || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
if (memory_access_type == VMAT_LOAD_STORE_LANES)
@@ -6169,9 +6209,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
misalign);
- if (negative
- && dt != vect_constant_def
- && dt != vect_external_def)
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
tree perm_mask = perm_mask_for_reverse (vectype);
tree perm_dest
@@ -6375,7 +6413,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gimple *first_stmt;
gimple *first_stmt_for_drptr = NULL;
bool inv_p;
- bool negative = false;
bool compute_in_loop = false;
struct loop *at_loop;
int vec_num;
@@ -6531,55 +6568,10 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
vect_memory_access_type memory_access_type;
- if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD,
+ if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
&memory_access_type, &gs_info))
return false;
- if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)
- && !STMT_VINFO_STRIDED_P (stmt_info))
- {
- negative = tree_int_cst_compare (nested_in_vect_loop
- ? STMT_VINFO_DR_STEP (stmt_info)
- : DR_STEP (dr),
- size_zero_node) < 0;
- if (negative && ncopies > 1)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "multiple types with negative step.\n");
- return false;
- }
-
- if (negative)
- {
- if (grouped_load)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step for group load not supported"
- "\n");
- return false;
- }
- alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
- if (alignment_support_scheme != dr_aligned
- && alignment_support_scheme != dr_unaligned_supported)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step but alignment required.\n");
- return false;
- }
- if (!perm_mask_for_reverse (vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "negative step and reversing not supported."
- "\n");
- return false;
- }
- }
- }
-
if (!vec_stmt) /* transformation not required. */
{
if (!slp)
@@ -7120,7 +7112,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else
at_loop = loop;
- if (negative)
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
if (memory_access_type == VMAT_LOAD_STORE_LANES)
@@ -7409,7 +7401,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
}
- if (negative)
+ if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
tree perm_mask = perm_mask_for_reverse (vectype);
new_temp = permute_vec_elements (new_temp, new_temp,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index ef69b7e..2cfb72a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -484,14 +484,26 @@ enum slp_vect_type {
/* Describes how we're going to vectorize an individual load or store,
or a group of loads or stores. */
enum vect_memory_access_type {
+ /* An access to an invariant address. This is used only for loads. */
+ VMAT_INVARIANT,
+
/* A simple contiguous access. */
VMAT_CONTIGUOUS,
+ /* A contiguous access that goes down in memory rather than up,
+ with no additional permutation. This is used only for stores
+ of invariants. */
+ VMAT_CONTIGUOUS_DOWN,
+
/* A simple contiguous access in which the elements need to be permuted
after loading or before storing. Only used for loop vectorization;
SLP uses separate permutes. */
VMAT_CONTIGUOUS_PERMUTE,
+ /* A simple contiguous access in which the elements need to be reversed
+ after loading or before storing. */
+ VMAT_CONTIGUOUS_REVERSE,
+
/* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */
VMAT_LOAD_STORE_LANES,