diff options
| -rw-r--r-- | gcc/ChangeLog | 12 | ||||
| -rw-r--r-- | gcc/internal-fn.c | 14 | ||||
| -rw-r--r-- | gcc/internal-fn.h | 8 | ||||
| -rw-r--r-- | gcc/tree-vect-stmts.c | 102 | ||||
| -rw-r--r-- | gcc/tree-vectorizer.h | 1 |
5 files changed, 100 insertions, 37 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c6a83d..cdf4e6e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,17 @@ 2015-11-17 Richard Sandiford <richard.sandiford@arm.com> + * internal-fn.h (direct_internal_fn_info): Add vectorizable flag. + * internal-fn.c (direct_internal_fn_array): Update accordingly. + * tree-vectorizer.h (vectorizable_function): Delete. + * tree-vect-stmts.c: Include internal-fn.h. + (vectorizable_internal_function): New function. + (vectorizable_function): Inline into... + (vectorizable_call): ...here. Explicitly reject calls that read + from or write to memory. Try using an internal function before + falling back on the old vectorizable_function behavior. + +2015-11-17 Richard Sandiford <richard.sandiford@arm.com> + * target.def (builtin_vectorized_function): Take a combined_fn (in the form of an unsigned int) rather than a function decl. (builtin_md_vectorized_function): New. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 06c5d9e..df3b7dc 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -69,13 +69,13 @@ init_internal_fns () /* Create static initializers for the information returned by direct_internal_fn. */ -#define not_direct { -2, -2 } -#define mask_load_direct { -1, 2 } -#define load_lanes_direct { -1, -1 } -#define mask_store_direct { 3, 2 } -#define store_lanes_direct { 0, 0 } -#define unary_direct { 0, 0 } -#define binary_direct { 0, 0 } +#define not_direct { -2, -2, false } +#define mask_load_direct { -1, 2, false } +#define load_lanes_direct { -1, -1, false } +#define mask_store_direct { 3, 2, false } +#define store_lanes_direct { 0, 0, false } +#define unary_direct { 0, 0, true } +#define binary_direct { 0, 0, true } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 6cb123f..aea6abd 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -134,6 +134,14 @@ struct direct_internal_fn_info function isn't directly mapped to an optab. */ signed int type0 : 8; signed int type1 : 8; + /* True if the function is pointwise, so that it can be vectorized by + converting the return type and all argument types to vectors of the + same number of elements. E.g. we can vectorize an IFN_SQRT on + floats as an IFN_SQRT on vectors of N floats. + + This only needs 1 bit, but occupies the full 16 to ensure a nice + layout. */ + unsigned int vectorizable : 16; }; extern const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1]; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 0a3cac5..4bb58b9 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "tree-vectorizer.h" #include "builtins.h" +#include "internal-fn.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -1641,27 +1642,32 @@ vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt, add_stmt_to_eh_lp (vec_stmt, lp_nr); } -/* Checks if CALL can be vectorized in type VECTYPE. Returns - a function declaration if the target has a vectorized version - of the function, or NULL_TREE if the function cannot be vectorized. */ +/* We want to vectorize a call to combined function CFN with function + decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN + as the types of all inputs. Check whether this is possible using + an internal function, returning its code if so or IFN_LAST if not. */ -tree -vectorizable_function (gcall *call, tree vectype_out, tree vectype_in) +static internal_fn +vectorizable_internal_function (combined_fn cfn, tree fndecl, + tree vectype_out, tree vectype_in) { - /* We only handle functions that do not read or clobber memory. */ - if (gimple_vuse (call)) - return NULL_TREE; - - combined_fn fn = gimple_call_combined_fn (call); - if (fn != CFN_LAST) - return targetm.vectorize.builtin_vectorized_function - (fn, vectype_out, vectype_in); - - if (gimple_call_builtin_p (call, BUILT_IN_MD)) - return targetm.vectorize.builtin_md_vectorized_function - (gimple_call_fndecl (call), vectype_out, vectype_in); - - return NULL_TREE; + internal_fn ifn; + if (internal_fn_p (cfn)) + ifn = as_internal_fn (cfn); + else + ifn = associated_internal_fn (fndecl); + if (ifn != IFN_LAST && direct_internal_fn_p (ifn)) + { + const direct_internal_fn_info &info = direct_internal_fn (ifn); + if (info.vectorizable) + { + tree type0 = (info.type0 < 0 ? vectype_out : vectype_in); + tree type1 = (info.type1 < 0 ? vectype_out : vectype_in); + if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1))) + return ifn; + } + } + return IFN_LAST; } @@ -2263,15 +2269,43 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, else return false; + /* We only handle functions that do not read or clobber memory. */ + if (gimple_vuse (stmt)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "function reads from or writes to memory.\n"); + return false; + } + /* For now, we only vectorize functions if a target specific builtin is available. TODO -- in some cases, it might be profitable to insert the calls for pieces of the vector, in order to be able to vectorize other operations in the loop. */ - fndecl = vectorizable_function (stmt, vectype_out, vectype_in); - if (fndecl == NULL_TREE) + fndecl = NULL_TREE; + internal_fn ifn = IFN_LAST; + combined_fn cfn = gimple_call_combined_fn (stmt); + tree callee = gimple_call_fndecl (stmt); + + /* First try using an internal function. */ + if (cfn != CFN_LAST) + ifn = vectorizable_internal_function (cfn, callee, vectype_out, + vectype_in); + + /* If that fails, try asking for a target-specific built-in function. */ + if (ifn == IFN_LAST) + { + if (cfn != CFN_LAST) + fndecl = targetm.vectorize.builtin_vectorized_function + (cfn, vectype_out, vectype_in); + else + fndecl = targetm.vectorize.builtin_md_vectorized_function + (callee, vectype_out, vectype_in); + } + + if (ifn == IFN_LAST && !fndecl) { - if (gimple_call_internal_p (stmt) - && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE + if (cfn == CFN_GOMP_SIMD_LANE && !slp_node && loop_vinfo && LOOP_VINFO_LOOP (loop_vinfo)->simduid @@ -2292,8 +2326,6 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, } } - gcc_assert (!gimple_vuse (stmt)); - if (slp_node || PURE_SLP_STMT (stmt_info)) ncopies = 1; else if (modifier == NARROW) @@ -2355,7 +2387,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, vec<tree> vec_oprndsk = vec_defs[k]; vargs[k] = vec_oprndsk[i]; } - new_stmt = gimple_build_call_vec (fndecl, vargs); + if (ifn != IFN_LAST) + new_stmt = gimple_build_call_internal_vec (ifn, vargs); + else + new_stmt = gimple_build_call_vec (fndecl, vargs); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_call_set_lhs (new_stmt, new_temp); vect_finish_stmt_generation (stmt, new_stmt, gsi); @@ -2403,7 +2438,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, } else { - new_stmt = gimple_build_call_vec (fndecl, vargs); + if (ifn != IFN_LAST) + new_stmt = gimple_build_call_internal_vec (ifn, vargs); + else + new_stmt = gimple_build_call_vec (fndecl, vargs); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_call_set_lhs (new_stmt, new_temp); } @@ -2449,7 +2487,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, vargs.quick_push (vec_oprndsk[i]); vargs.quick_push (vec_oprndsk[i + 1]); } - new_stmt = gimple_build_call_vec (fndecl, vargs); + if (ifn != IFN_LAST) + new_stmt = gimple_build_call_internal_vec (ifn, vargs); + else + new_stmt = gimple_build_call_vec (fndecl, vargs); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_call_set_lhs (new_stmt, new_temp); vect_finish_stmt_generation (stmt, new_stmt, gsi); @@ -2487,7 +2528,10 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, vargs.quick_push (vec_oprnd1); } - new_stmt = gimple_build_call_vec (fndecl, vargs); + if (ifn != IFN_LAST) + new_stmt = gimple_build_call_internal_vec (ifn, vargs); + else + new_stmt = gimple_build_call_vec (fndecl, vargs); new_temp = make_ssa_name (vec_dest, new_stmt); gimple_call_set_lhs (new_stmt, new_temp); vect_finish_stmt_generation (stmt, new_stmt, gsi); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 1b5c95c..7867c26 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -962,7 +962,6 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree, int *, vec<tree> *); extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *); extern void free_stmt_vec_info (gimple *stmt); -extern tree vectorizable_function (gcall *, tree, tree); extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, stmt_vector_for_cost *, stmt_vector_for_cost *); |
