aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/i386/i386.c23
-rw-r--r--gcc/config/i386/sse.md20
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-lrint-vec.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/sse2-lrintf-vec.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/vectorize2.c30
-rw-r--r--gcc/tree-vect-transform.c163
8 files changed, 307 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2629c90..082dfa8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2007-06-29 Uros Bizjak <ubizjak@gmail.com>
+
+ PR tree-optimization/24659
+ * tree-vect-transform.c (vectorizable_call): Handle
+ (nunits_in == nunits_out / 2) and (nunits_out == nunits_in / 2) cases.
+
+ * config/i386/sse.md (vec_pack_sfix_v2df): New expander.
+ * config/i386/i386.c (enum ix86_builtins)
+ [IX86_BUILTIN_VEC_PACK_SFIX]: New constant.
+ (struct bdesc_2arg) [__builtin_ia32_vec_pack_sfix]: New builtin
+ description.
+ (ix86_init_mmx_sse_builtins): Define all builtins with 2 arguments as
+ const using def_builtin_const.
+ (ix86_expand_binop_builtin): Remove bogus assert() that insn wants
+ input operands in the same modes as the result.
+ (ix86_builtin_vectorized_function): Handle BUILT_IN_LRINT.
+
2007-06-29 Richard Sandiford <rsandifo@nildram.co.uk>
* df-problems.c (df_set_unused_notes_for_mw): Fix formatting.
@@ -264,7 +281,7 @@
* gimplify.c (mark_addressable): New function.
(gimplify_modify_expr_rhs, gimplify_addr_expr, gimplify_expr): Use it.
-2007-06-19 Uros Bizjak <ubizjak@gmail.com>
+2007-06-22 Uros Bizjak <ubizjak@gmail.com>
PR middle-end/32374
* expr.c (store_constructor): Do not clobber non-zeroed memory.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5c5cb52..96c948f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16820,6 +16820,8 @@ enum ix86_builtins
IX86_BUILTIN_VEC_SET_V4HI,
IX86_BUILTIN_VEC_SET_V16QI,
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
IX86_BUILTIN_CRC32HI,
@@ -17167,6 +17169,8 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
@@ -17563,6 +17567,9 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
tree v4si_ftype_v2df
= build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4si_ftype_v2df_v2df
+ = build_function_type_list (V4SI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
tree v2si_ftype_v2df
= build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
tree v4sf_ftype_v2df
@@ -17906,7 +17913,10 @@ ix86_init_mmx_sse_builtins (void)
|| d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
type = v2di_ftype_v2df_v2df;
- def_builtin (d->mask, d->name, type, d->code);
+ if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
+ type = v4si_ftype_v2df_v2df;
+
+ def_builtin_const (d->mask, d->name, type, d->code);
}
/* Add all builtins that are more or less simple operations on 1 operand. */
@@ -18457,11 +18467,6 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
op1 = gen_lowpart (TImode, x);
}
- /* The insn must want input operands in the same modes as the
- result. */
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-
if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
@@ -19863,6 +19868,12 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPS];
return NULL_TREE;
+ case BUILT_IN_LRINT:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ return NULL_TREE;
+
case BUILT_IN_LRINTF:
if (out_mode == SImode && out_n == 4
&& in_mode == SFmode && in_n == 4)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 65abbcf..12b8cc8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2421,6 +2421,26 @@
DONE;
})
+(define_expand "vec_pack_sfix_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point element swizzling
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d023d47..2ac00bf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2007-06-29 Uros Bizjak <ubizjak@gmail.com>
+
+ PR tree-optimization/24659
+ * gcc.target/i386/vectorize2.c: New test.
+ * gcc.target/i386/sse2-lrint-vec.c: New runtime test.
+ * gcc.target/i386/sse2-lrintf-vec.c: Ditto.
+
2007-06-29 Eric Botcazou <ebotcazou@adacore.com>
* gcc.dg/pointer-arith-9.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/sse2-lrint-vec.c b/gcc/testsuite/gcc.target/i386/sse2-lrint-vec.c
new file mode 100644
index 0000000..35a7ac8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-lrint-vec.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2" } */
+
+#include "../../gcc.dg/i386-cpuid.h"
+
+extern long lrint (double);
+extern void abort (void);
+
+#define N 32
+
+int __attribute__((noinline))
+main1 ()
+{
+ double a[N] = {0.4,3.5,6.6,9.4,12.5,15.6,18.4,21.5,24.6,27.4,30.5,33.6,36.4,39.5,42.6,45.4,0.5,3.6,6.4,9.5,12.6,15.4,18.5,21.6,24.4,27.5,30.6,33.4,36.5,39.6,42.4,45.5};
+ long r[N];
+
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ r[i] = lrint (a[i]);
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (r[i] != lrint (a[i]))
+ abort();
+ }
+
+ return 0;
+}
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ main1 ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse2-lrintf-vec.c b/gcc/testsuite/gcc.target/i386/sse2-lrintf-vec.c
new file mode 100644
index 0000000..dc4ae93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-lrintf-vec.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2" } */
+
+#include "../../gcc.dg/i386-cpuid.h"
+
+extern long lrintf (float);
+extern void abort (void);
+
+#define N 32
+
+int __attribute__((noinline))
+main1 ()
+{
+ float a[N] = {0.4,3.5,6.6,9.4,12.5,15.6,18.4,21.5,24.6,27.4,30.5,33.6,36.4,39.5,42.6,45.4,0.5,3.6,6.4,9.5,12.6,15.4,18.5,21.6,24.4,27.5,30.6,33.4,36.5,39.6,42.4,45.5};
+ long r[N];
+
+ int i;
+
+ for (i = 0; i < N; i++)
+ {
+ r[i] = lrintf (a[i]);
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (r[i] != lrintf (a[i]))
+ abort();
+ }
+
+ return 0;
+}
+
+int
+main ()
+{
+ unsigned long cpu_facilities;
+
+ cpu_facilities = i386_cpuid ();
+
+ if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
+ /* If host has no vector support, pass. */
+ return 0;
+
+ main1 ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vectorize2.c b/gcc/testsuite/gcc.target/i386/vectorize2.c
new file mode 100644
index 0000000..4196487
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vectorize2.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse2 -mfpmath=sse" } */
+
+double a[256];
+int b[256];
+unsigned short c[256];
+
+extern long lrint (double);
+
+void foo(void)
+{
+ int i;
+
+ for (i=0; i<256; ++i)
+ b[i] = lrint (a[i]);
+}
+
+void bar(void)
+{
+ int i;
+
+ for (i=0; i<256; ++i)
+ {
+ b[i] = lrint (a[i]);
+ c[i] += c[i];
+ }
+}
+
+/* { dg-final { scan-assembler "cvtpd2dq" } } */
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 66228f0..5fdbbe1 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -2253,13 +2253,19 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
tree scalar_dest;
tree operation;
tree op, type;
+ tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
tree vectype_out, vectype_in;
+ int nunits_in;
+ int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
enum vect_def_type dt[2];
+ tree new_stmt;
int ncopies, j, nargs;
call_expr_arg_iterator iter;
+ tree vargs;
+ enum { NARROW, NONE, WIDEN } modifier;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
@@ -2291,12 +2297,10 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
nargs = 0;
FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
{
- ++nargs;
-
/* Bail out if the function has more than two arguments, we
do not have interesting builtin functions to vectorize with
more than two arguments. */
- if (nargs > 2)
+ if (nargs >= 2)
return false;
/* We can only handle calls with arguments of the same type. */
@@ -2309,12 +2313,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
}
rhs_type = TREE_TYPE (op);
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs-1]))
+ if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
return false;
}
+
+ ++nargs;
}
/* No arguments is also not good. */
@@ -2322,15 +2328,20 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
return false;
vectype_in = get_vectype_for_scalar_type (rhs_type);
+ nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
vectype_out = get_vectype_for_scalar_type (lhs_type);
+ nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- /* Only handle the case of vectors with the same number of elements.
- FIXME: We need a way to handle for example the SSE2 cvtpd2dq
- instruction which converts V2DFmode to V4SImode but only
- using the lower half of the V4SImode result. */
- if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
+ /* FORNOW */
+ if (nunits_in == nunits_out / 2)
+ modifier = NARROW;
+ else if (nunits_out == nunits_in)
+ modifier = NONE;
+ else if (nunits_out == nunits_in / 2)
+ modifier = WIDEN;
+ else
return false;
/* For now, we only vectorize functions if a target specific builtin
@@ -2348,8 +2359,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
- ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- / TYPE_VECTOR_SUBPARTS (vectype_out));
+ if (modifier == NARROW)
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
+ /* Sanity check: make sure that at least one copy of the vectorized stmt
+ needs to be generated. */
+ gcc_assert (ncopies >= 1);
if (!vec_stmt) /* transformation not required. */
{
@@ -2365,55 +2382,113 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
- gcc_assert (ncopies >= 1);
-
/* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
prev_stmt_info = NULL;
- for (j = 0; j < ncopies; ++j)
- {
- tree new_stmt, vargs;
- tree vec_oprnd[2];
- int n;
-
- /* Build argument list for the vectorized call. */
- /* FIXME: Rewrite this so that it doesn't construct a temporary
- list. */
- vargs = NULL_TREE;
- n = -1;
- FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ switch (modifier)
+ {
+ case NONE:
+ for (j = 0; j < ncopies; ++j)
+ {
+ /* Build argument list for the vectorized call. */
+ /* FIXME: Rewrite this so that it doesn't
+ construct a temporary list. */
+ vargs = NULL_TREE;
+ nargs = 0;
+ FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ {
+ if (j == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+
+ vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
+
+ ++nargs;
+ }
+ vargs = nreverse (vargs);
+
+ rhs = build_function_call_expr (fndecl, vargs);
+ new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+
+ if (j == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+
+ break;
+
+ case NARROW:
+ for (j = 0; j < ncopies; ++j)
{
- ++n;
+ /* Build argument list for the vectorized call. */
+ /* FIXME: Rewrite this so that it doesn't
+ construct a temporary list. */
+ vargs = NULL_TREE;
+ nargs = 0;
+ FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ {
+ if (j == 0)
+ {
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+ }
+ else
+ {
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
+ vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+ }
+
+ vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
+ vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
+
+ ++nargs;
+ }
+ vargs = nreverse (vargs);
+
+ rhs = build_function_call_expr (fndecl, vargs);
+ new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
if (j == 0)
- vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
- vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
}
- vargs = nreverse (vargs);
- rhs = build_function_call_expr (fndecl, vargs);
- new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+ *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- vect_finish_stmt_generation (stmt, new_stmt, bsi);
+ break;
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ case WIDEN:
+ /* No current target implements this case. */
+ return false;
}
- /* The call in STMT might prevent it from being removed in dce. We however
- cannot remove it here, due to the way the ssa name it defines is mapped
- to the new definition. So just replace rhs of the statement with something
- harmless. */
+ /* The call in STMT might prevent it from being removed in dce.
+ We however cannot remove it here, due to the way the ssa name
+ it defines is mapped to the new definition. So just replace
+ rhs of the statement with something harmless. */
type = TREE_TYPE (scalar_dest);
GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
update_stmt (stmt);