aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog20
-rw-r--r--gcc/config/i386/i386.c30
-rw-r--r--gcc/doc/tm.texi5
-rw-r--r--gcc/target.h2
-rw-r--r--gcc/targhooks.c3
-rw-r--r--gcc/targhooks.h2
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/i386/vectorize3.c30
-rw-r--r--gcc/tree-vect-patterns.c2
-rw-r--r--gcc/tree-vect-transform.c171
-rw-r--r--gcc/tree-vectorizer.h2
11 files changed, 189 insertions, 82 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e2b0f58..00483ff 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2007-02-05 Richard Guenther <rguenther@suse.de>
+
+ * tree-vectorizer.h (vectorizable_function): Add argument type
+ argument, change return type.
+ * tree-vect-patterns.c (vect_recog_pow_pattern): Adjust caller.
+ * tree-vect-transform.c (vectorizable_function): Handle extra
+ argument, return vectorized function decl.
+ (build_vectorized_function_call): Remove.
+ (vectorizable_call): Handle calls with result and argument types
+ differing. Handle loop vectorization factor correctly.
+ * targhooks.c (default_builtin_vectorized_function): Adjust for
+ extra argument.
+ * targhooks.h (default_builtin_vectorized_function): Likewise.
+ * target.h (builtin_vectorized_function): Add argument type
+ argument.
+ * config/i386/i386.c (ix86_builtin_vectorized_function): Handle
+ extra argument, allow vectorizing of lrintf.
+ * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Adjust
+ documentation of target hook.
+
2007-02-05 Hans-Peter Nilsson <hp@axis.com>
PR target/30665
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e652d7e..ea12849 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1355,7 +1355,7 @@ static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
tree, bool);
static void ix86_init_builtins (void);
static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
-static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
+static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
static const char *ix86_mangle_fundamental_type (tree);
static tree ix86_stack_protect_fail (void);
static rtx ix86_internal_arg_pointer (void);
@@ -17661,29 +17661,41 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if it is not available. */
static tree
-ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
+ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
+ tree type_in)
{
- enum machine_mode el_mode;
- int n;
+ enum machine_mode in_mode, out_mode;
+ int in_n, out_n;
- if (TREE_CODE (type) != VECTOR_TYPE)
+ if (TREE_CODE (type_out) != VECTOR_TYPE
+ || TREE_CODE (type_in) != VECTOR_TYPE)
return NULL_TREE;
- el_mode = TYPE_MODE (TREE_TYPE (type));
- n = TYPE_VECTOR_SUBPARTS (type);
+ out_mode = TYPE_MODE (TREE_TYPE (type_out));
+ out_n = TYPE_VECTOR_SUBPARTS (type_out);
+ in_mode = TYPE_MODE (TREE_TYPE (type_in));
+ in_n = TYPE_VECTOR_SUBPARTS (type_in);
switch (fn)
{
case BUILT_IN_SQRT:
- if (el_mode == DFmode && n == 2)
+ if (out_mode == DFmode && out_n == 2
+ && in_mode == DFmode && in_n == 2)
return ix86_builtins[IX86_BUILTIN_SQRTPD];
return NULL_TREE;
case BUILT_IN_SQRTF:
- if (el_mode == SFmode && n == 4)
+ if (out_mode == SFmode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
return ix86_builtins[IX86_BUILTIN_SQRTPS];
return NULL_TREE;
+ case BUILT_IN_LRINTF:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == SFmode && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+ return NULL_TREE;
+
default:
;
}
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c9f6cb8..ebb3f2c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5384,11 +5384,12 @@ preserved (e.g. used only by a reduction computation). Otherwise, the
@code{widen_mult_hi/lo} idioms will be used.
@end deftypefn
-@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type})
+@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (enum built_in_function @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in})
This hook should return the decl of a function that implements the vectorized
variant of the builtin function with builtin function code @var{code} or
@code{NULL_TREE} if such a function is not available. The return type of
-the vectorized function shall be of vector type @var{vec_type}.
+the vectorized function shall be of vector type @var{vec_type_out} and the
+argument types should be @var{vec_type_in}.
@end deftypefn
@node Anchored Addresses
diff --git a/gcc/target.h b/gcc/target.h
index f4678e4..2d8d69b 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -395,7 +395,7 @@ struct gcc_target
/* Returns a code for builtin that realizes vectorized version of
function, or NULL_TREE if not available. */
- tree (* builtin_vectorized_function) (unsigned, tree);
+ tree (* builtin_vectorized_function) (unsigned, tree, tree);
/* Target builtin that implements vector widening multiplication.
builtin_mul_widen_eve computes the element-by-element products
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index e7bdf0b..ed4d890 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -323,7 +323,8 @@ default_invalid_within_doloop (rtx insn)
tree
default_builtin_vectorized_function (enum built_in_function fn ATTRIBUTE_UNUSED,
- tree type ATTRIBUTE_UNUSED)
+ tree type_out ATTRIBUTE_UNUSED,
+ tree type_in ATTRIBUTE_UNUSED)
{
return NULL_TREE;
}
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 5f63dd7..0b4ded9 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -57,7 +57,7 @@ extern const char * default_invalid_within_doloop (rtx);
extern bool default_narrow_bitfield (void);
-extern tree default_builtin_vectorized_function (enum built_in_function, tree);
+extern tree default_builtin_vectorized_function (enum built_in_function, tree, tree);
/* These are here, and not in hooks.[ch], because not all users of
hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d6f14ca..2b88029 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2007-02-05 Richard Guenther <rguenther@suse.de>
+
+ * gcc.target/i386/vectorize3.c: New testcase.
+
2007-02-05 Hans-Peter Nilsson <hp@axis.com>
PR target/30665
diff --git a/gcc/testsuite/gcc.target/i386/vectorize3.c b/gcc/testsuite/gcc.target/i386/vectorize3.c
new file mode 100644
index 0000000..2947acb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vectorize3.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */
+
+float a[256];
+int b[256];
+unsigned short c[256];
+
+extern long lrintf (float);
+
+void foo(void)
+{
+ int i;
+
+ for (i=0; i<256; ++i)
+ b[i] = lrintf (a[i]);
+}
+
+void bar(void)
+{
+ int i;
+
+ for (i=0; i<256; ++i)
+ {
+ b[i] = lrintf (a[i]);
+ c[i] += c[i];
+ }
+}
+
+/* { dg-final { scan-assembler "cvtps2dq" } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f9d3de6..c9e34e3 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -488,7 +488,7 @@ vect_recog_pow_pattern (tree last_stmt, tree *type_in, tree *type_out)
if (*type_in)
{
newfn = build_function_call_expr (newfn, newarglist);
- if (vectorizable_function (newfn, *type_in))
+ if (vectorizable_function (newfn, *type_in, *type_in) != NULL_TREE)
return newfn;
}
}
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 846d52b..fc95e60 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -1579,47 +1579,28 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
/* Checks if CALL can be vectorized in type VECTYPE. Returns
- true if the target has a vectorized version of the function,
- or false if the function cannot be vectorized. */
+ a function declaration if the target has a vectorized version
+ of the function, or NULL_TREE if the function cannot be vectorized. */
-bool
-vectorizable_function (tree call, tree vectype)
+tree
+vectorizable_function (tree call, tree vectype_out, tree vectype_in)
{
tree fndecl = get_callee_fndecl (call);
+ enum built_in_function code;
/* We only handle functions that do not read or clobber memory -- i.e.
const or novops ones. */
if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
- return false;
+ return NULL_TREE;
if (!fndecl
|| TREE_CODE (fndecl) != FUNCTION_DECL
|| !DECL_BUILT_IN (fndecl))
- return false;
+ return NULL_TREE;
- if (targetm.vectorize.builtin_vectorized_function (DECL_FUNCTION_CODE (fndecl), vectype))
- return true;
-
- return false;
-}
-
-/* Returns an expression that performs a call to vectorized version
- of FNDECL in type VECTYPE, with the arguments given by ARGS.
- If extra statements need to be generated, they are inserted
- before BSI. */
-
-static tree
-build_vectorized_function_call (tree fndecl,
- tree vectype, tree args)
-{
- tree vfndecl;
- enum built_in_function code = DECL_FUNCTION_CODE (fndecl);
-
- /* The target specific builtin should be available. */
- vfndecl = targetm.vectorize.builtin_vectorized_function (code, vectype);
- gcc_assert (vfndecl != NULL_TREE);
-
- return build_function_call_expr (vfndecl, args);
+ code = DECL_FUNCTION_CODE (fndecl);
+ return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
+ vectype_in);
}
/* Function vectorizable_call.
@@ -1635,13 +1616,13 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree vec_dest;
tree scalar_dest;
tree operation;
- tree op, args, type;
- tree vec_oprnd, vargs, *pvargs_end;
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ tree args, type;
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
+ tree vectype_out, vectype_in;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- tree fndecl, rhs, new_temp, def, def_stmt;
- enum vect_def_type dt;
+ tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
+ enum vect_def_type dt[2];
+ int ncopies, j, nargs;
/* Is STMT a vectorizable call? */
if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
@@ -1653,31 +1634,68 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
operation = GIMPLE_STMT_OPERAND (stmt, 1);
if (TREE_CODE (operation) != CALL_EXPR)
return false;
-
+
+ /* Process function arguments. */
+ rhs_type = NULL_TREE;
+ for (args = TREE_OPERAND (operation, 1), nargs = 0;
+ args; args = TREE_CHAIN (args), ++nargs)
+ {
+ tree op = TREE_VALUE (args);
+
+ /* Bail out if the function has more than two arguments, we
+ do not have interesting builtin functions to vectorize with
+ more than two arguments. */
+ if (nargs >= 2)
+ return false;
+
+ /* We can only handle calls with arguments of the same type. */
+ if (rhs_type
+ && rhs_type != TREE_TYPE (op))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "argument types differ.");
+ return false;
+ }
+ rhs_type = TREE_TYPE (op);
+
+ if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "use not simple.");
+ return false;
+ }
+ }
+
+ /* No arguments is also not good. */
+ if (nargs == 0)
+ return false;
+
+ vectype_in = get_vectype_for_scalar_type (rhs_type);
+
+ lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
+ vectype_out = get_vectype_for_scalar_type (lhs_type);
+
+ /* Only handle the case of vectors with the same number of elements.
+ FIXME: We need a way to handle for example the SSE2 cvtpd2dq
+ instruction which converts V2DFmode to V4SImode but only
+ using the lower half of the V4SImode result. */
+ if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
+ return false;
+
/* For now, we only vectorize functions if a target specific builtin
is available. TODO -- in some cases, it might be profitable to
insert the calls for pieces of the vector, in order to be able
to vectorize other operations in the loop. */
- if (!vectorizable_function (operation, vectype))
+ fndecl = vectorizable_function (operation, vectype_out, vectype_in);
+ if (fndecl == NULL_TREE)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "function is not vectorizable.");
return false;
}
- gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
- for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
- {
- op = TREE_VALUE (args);
-
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "use not simple.");
- return false;
- }
- }
+ gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
if (!vec_stmt) /* transformation not required. */
{
@@ -1690,29 +1708,50 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
+ ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ / TYPE_VECTOR_SUBPARTS (vectype_out));
+ gcc_assert (ncopies >= 1);
+
/* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
- vec_dest = vect_create_destination_var (scalar_dest, vectype);
+ vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
- /* Handle uses. */
- vargs = NULL_TREE;
- pvargs_end = &vargs;
- for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
+ prev_stmt_info = NULL;
+ for (j = 0; j < ncopies; ++j)
{
- op = TREE_VALUE (args);
- vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
-
- *pvargs_end = tree_cons (NULL_TREE, vec_oprnd, NULL_TREE);
- pvargs_end = &TREE_CHAIN (*pvargs_end);
- }
+ tree new_stmt, vargs;
+ tree vec_oprnd[2];
+ int n;
+
+ /* Build argument list for the vectorized call. */
+ vargs = NULL_TREE;
+ for (args = TREE_OPERAND (operation, 1), n = 0;
+ args; args = TREE_CHAIN (args), ++n)
+ {
+ tree op = TREE_VALUE (args);
- fndecl = get_callee_fndecl (operation);
- rhs = build_vectorized_function_call (fndecl, vectype, vargs);
- *vec_stmt = build2 (GIMPLE_MODIFY_STMT, vectype, vec_dest, rhs);
- new_temp = make_ssa_name (vec_dest, *vec_stmt);
- GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
+ if (j == 0)
+ vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
- vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
+ vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
+ }
+ vargs = nreverse (vargs);
+
+ rhs = build_function_call_expr (fndecl, vargs);
+ new_stmt = build2 (GIMPLE_MODIFY_STMT, NULL_TREE, vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+
+ if (j == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
/* The call in STMT might prevent it from being removed in dce. We however
cannot remove it here, due to the way the ssa name it defines is mapped
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index a13ee1e..b523d88 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -412,7 +412,7 @@ extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
-extern bool vectorizable_function (tree, tree);
+extern tree vectorizable_function (tree, tree, tree);
extern bool vectorizable_call (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);