aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2015-11-17 18:55:55 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2015-11-17 18:55:55 +0000
commit70439f0d61b811fa5b9a77fcdf40c6353daa8f75 (patch)
tree9b4ddd183b2f4dd515f06826aaa396589cf86c98 /gcc
parent10766209ec09ef42deb8cb877f1893a8a03f2a97 (diff)
downloadgcc-70439f0d61b811fa5b9a77fcdf40c6353daa8f75.zip
gcc-70439f0d61b811fa5b9a77fcdf40c6353daa8f75.tar.gz
gcc-70439f0d61b811fa5b9a77fcdf40c6353daa8f75.tar.bz2
Vectorize internal functions
This patch tries to vectorize built-in and internal functions as internal functions first, falling back on the current built-in target hooks otherwise. This means that we'll automatically pick up vector versions of optabs without the target having to implement any special hooks. E.g. we'll use V4SF sqrt if the target defines a "sqrtv4sf2" optab. As well as being simpler, it means that the target-independent code has more idea what the vectorized function does. Tested on x86_64-linux-gnu, aarch64-linux-gnu, arm-linux-gnu and powerpc64-linux-gnu. gcc/ * internal-fn.h (direct_internal_fn_info): Add vectorizable flag. * internal-fn.c (direct_internal_fn_array): Update accordingly. * tree-vectorizer.h (vectorizable_function): Delete. * tree-vect-stmts.c: Include internal-fn.h. (vectorizable_internal_function): New function. (vectorizable_function): Inline into... (vectorizable_call): ...here. Explicitly reject calls that read from or write to memory. Try using an internal function before falling back on the old vectorizable_function behavior. From-SVN: r230492
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog12
-rw-r--r--gcc/internal-fn.c14
-rw-r--r--gcc/internal-fn.h8
-rw-r--r--gcc/tree-vect-stmts.c102
-rw-r--r--gcc/tree-vectorizer.h1
5 files changed, 100 insertions, 37 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5c6a83d..cdf4e6e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,17 @@
2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
+ * internal-fn.h (direct_internal_fn_info): Add vectorizable flag.
+ * internal-fn.c (direct_internal_fn_array): Update accordingly.
+ * tree-vectorizer.h (vectorizable_function): Delete.
+ * tree-vect-stmts.c: Include internal-fn.h.
+ (vectorizable_internal_function): New function.
+ (vectorizable_function): Inline into...
+ (vectorizable_call): ...here. Explicitly reject calls that read
+ from or write to memory. Try using an internal function before
+ falling back on the old vectorizable_function behavior.
+
+2015-11-17 Richard Sandiford <richard.sandiford@arm.com>
+
* target.def (builtin_vectorized_function): Take a combined_fn (in
the form of an unsigned int) rather than a function decl.
(builtin_md_vectorized_function): New.
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 06c5d9e..df3b7dc 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -69,13 +69,13 @@ init_internal_fns ()
/* Create static initializers for the information returned by
direct_internal_fn. */
-#define not_direct { -2, -2 }
-#define mask_load_direct { -1, 2 }
-#define load_lanes_direct { -1, -1 }
-#define mask_store_direct { 3, 2 }
-#define store_lanes_direct { 0, 0 }
-#define unary_direct { 0, 0 }
-#define binary_direct { 0, 0 }
+#define not_direct { -2, -2, false }
+#define mask_load_direct { -1, 2, false }
+#define load_lanes_direct { -1, -1, false }
+#define mask_store_direct { 3, 2, false }
+#define store_lanes_direct { 0, 0, false }
+#define unary_direct { 0, 0, true }
+#define binary_direct { 0, 0, true }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 6cb123f..aea6abd 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -134,6 +134,14 @@ struct direct_internal_fn_info
function isn't directly mapped to an optab. */
signed int type0 : 8;
signed int type1 : 8;
+ /* True if the function is pointwise, so that it can be vectorized by
+ converting the return type and all argument types to vectors of the
+ same number of elements. E.g. we can vectorize an IFN_SQRT on
+ floats as an IFN_SQRT on vectors of N floats.
+
+ This only needs 1 bit, but occupies the full 16 to ensure a nice
+ layout. */
+ unsigned int vectorizable : 16;
};
extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1];
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 0a3cac5..4bb58b9 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h"
#include "tree-vectorizer.h"
#include "builtins.h"
+#include "internal-fn.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@@ -1641,27 +1642,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
add_stmt_to_eh_lp (vec_stmt, lp_nr);
}
-/* Checks if CALL can be vectorized in type VECTYPE. Returns
- a function declaration if the target has a vectorized version
- of the function, or NULL_TREE if the function cannot be vectorized. */
+/* We want to vectorize a call to combined function CFN with function
+ decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
+ as the types of all inputs. Check whether this is possible using
+ an internal function, returning its code if so or IFN_LAST if not. */
-tree
-vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
+static internal_fn
+vectorizable_internal_function (combined_fn cfn, tree fndecl,
+ tree vectype_out, tree vectype_in)
{
- /* We only handle functions that do not read or clobber memory. */
- if (gimple_vuse (call))
- return NULL_TREE;
-
- combined_fn fn = gimple_call_combined_fn (call);
- if (fn != CFN_LAST)
- return targetm.vectorize.builtin_vectorized_function
- (fn, vectype_out, vectype_in);
-
- if (gimple_call_builtin_p (call, BUILT_IN_MD))
- return targetm.vectorize.builtin_md_vectorized_function
- (gimple_call_fndecl (call), vectype_out, vectype_in);
-
- return NULL_TREE;
+ internal_fn ifn;
+ if (internal_fn_p (cfn))
+ ifn = as_internal_fn (cfn);
+ else
+ ifn = associated_internal_fn (fndecl);
+ if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
+ {
+ const direct_internal_fn_info &info = direct_internal_fn (ifn);
+ if (info.vectorizable)
+ {
+ tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
+ tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+ if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1)))
+ return ifn;
+ }
+ }
+ return IFN_LAST;
}
@@ -2263,15 +2269,43 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else
return false;
+ /* We only handle functions that do not read or clobber memory. */
+ if (gimple_vuse (stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "function reads from or writes to memory.\n");
+ return false;
+ }
+
/* For now, we only vectorize functions if a target specific builtin
is available. TODO -- in some cases, it might be profitable to
insert the calls for pieces of the vector, in order to be able
to vectorize other operations in the loop. */
- fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
- if (fndecl == NULL_TREE)
+ fndecl = NULL_TREE;
+ internal_fn ifn = IFN_LAST;
+ combined_fn cfn = gimple_call_combined_fn (stmt);
+ tree callee = gimple_call_fndecl (stmt);
+
+ /* First try using an internal function. */
+ if (cfn != CFN_LAST)
+ ifn = vectorizable_internal_function (cfn, callee, vectype_out,
+ vectype_in);
+
+ /* If that fails, try asking for a target-specific built-in function. */
+ if (ifn == IFN_LAST)
+ {
+ if (cfn != CFN_LAST)
+ fndecl = targetm.vectorize.builtin_vectorized_function
+ (cfn, vectype_out, vectype_in);
+ else
+ fndecl = targetm.vectorize.builtin_md_vectorized_function
+ (callee, vectype_out, vectype_in);
+ }
+
+ if (ifn == IFN_LAST && !fndecl)
{
- if (gimple_call_internal_p (stmt)
- && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
+ if (cfn == CFN_GOMP_SIMD_LANE
&& !slp_node
&& loop_vinfo
&& LOOP_VINFO_LOOP (loop_vinfo)->simduid
@@ -2292,8 +2326,6 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
}
- gcc_assert (!gimple_vuse (stmt));
-
if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else if (modifier == NARROW)
@@ -2355,7 +2387,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vec<tree> vec_oprndsk = vec_defs[k];
vargs[k] = vec_oprndsk[i];
}
- new_stmt = gimple_build_call_vec (fndecl, vargs);
+ if (ifn != IFN_LAST)
+ new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+ else
+ new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2403,7 +2438,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
else
{
- new_stmt = gimple_build_call_vec (fndecl, vargs);
+ if (ifn != IFN_LAST)
+ new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+ else
+ new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
}
@@ -2449,7 +2487,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vargs.quick_push (vec_oprndsk[i]);
vargs.quick_push (vec_oprndsk[i + 1]);
}
- new_stmt = gimple_build_call_vec (fndecl, vargs);
+ if (ifn != IFN_LAST)
+ new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+ else
+ new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -2487,7 +2528,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vargs.quick_push (vec_oprnd1);
}
- new_stmt = gimple_build_call_vec (fndecl, vargs);
+ if (ifn != IFN_LAST)
+ new_stmt = gimple_build_call_internal_vec (ifn, vargs);
+ else
+ new_stmt = gimple_build_call_vec (fndecl, vargs);
new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 1b5c95c..7867c26 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -962,7 +962,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
int *, vec<tree> *);
extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
extern void free_stmt_vec_info (gimple *stmt);
-extern tree vectorizable_function (gcall *, tree, tree);
extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
stmt_vector_for_cost *,
stmt_vector_for_cost *);