aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2007-06-29 12:30:06 +0200
committerUros Bizjak <uros@gcc.gnu.org>2007-06-29 12:30:06 +0200
commitb40c4f68160a4b556135185e8b5e013ab77ba2ec (patch)
tree332c2a8a980e60c5f65e31276d3aa5472059f7e9 /gcc/config
parent690f48b1b48b5626cb9b78944f1da201d11e00ec (diff)
downloadgcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.zip
gcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.tar.gz
gcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.tar.bz2
re PR tree-optimization/24659 (Conversions are not vectorized)
PR tree-optimization/24659 * tree-vect-transform.c (vectorizable_call): Handle (nunits_in == nunits_out / 2) and (nunits_out == nunits_in / 2) cases. * config/i386/sse.md (vec_pack_sfix_v2df): New expander. * config/i386/i386.c (enum ix86_builtins) [IX86_BUILTIN_VEC_PACK_SFIX]: New constant. (struct bdesc_2arg) [__builtin_ia32_vec_pack_sfix]: New builtin description. (ix86_init_mmx_sse_builtins): Define all builtins with 2 arguments as const using def_builtin_const. (ix86_expand_binop_builtin): Remove bogus assert() that insn wants input operands in the same modes as the result. (ix86_builtin_vectorized_function): Handle BUILT_IN_LRINT. testsuite/ChangeLog: PR tree-optimization/24659 * gcc.target/i386/vectorize2.c: New test. * gcc.target/i386/sse2-lrint-vec.c: New runtime test. * gcc.target/i386/sse2-lrintf-vec.c: Ditto. From-SVN: r126111
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386.c23
-rw-r--r--gcc/config/i386/sse.md20
2 files changed, 37 insertions, 6 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5c5cb52..96c948f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16820,6 +16820,8 @@ enum ix86_builtins
IX86_BUILTIN_VEC_SET_V4HI,
IX86_BUILTIN_VEC_SET_V16QI,
+ IX86_BUILTIN_VEC_PACK_SFIX,
+
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
IX86_BUILTIN_CRC32HI,
@@ -17167,6 +17169,8 @@ static const struct builtin_description bdesc_2arg[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
+
/* SSE2 MMX */
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
@@ -17563,6 +17567,9 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
tree v4si_ftype_v2df
= build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
+ tree v4si_ftype_v2df_v2df
+ = build_function_type_list (V4SI_type_node,
+ V2DF_type_node, V2DF_type_node, NULL_TREE);
tree v2si_ftype_v2df
= build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
tree v4sf_ftype_v2df
@@ -17906,7 +17913,10 @@ ix86_init_mmx_sse_builtins (void)
|| d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
type = v2di_ftype_v2df_v2df;
- def_builtin (d->mask, d->name, type, d->code);
+ if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
+ type = v4si_ftype_v2df_v2df;
+
+ def_builtin_const (d->mask, d->name, type, d->code);
}
/* Add all builtins that are more or less simple operations on 1 operand. */
@@ -18457,11 +18467,6 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
op1 = gen_lowpart (TImode, x);
}
- /* The insn must want input operands in the same modes as the
- result. */
- gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
- && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
-
if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
@@ -19863,6 +19868,12 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
return ix86_builtins[IX86_BUILTIN_SQRTPS];
return NULL_TREE;
+ case BUILT_IN_LRINT:
+ if (out_mode == SImode && out_n == 4
+ && in_mode == DFmode && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ return NULL_TREE;
+
case BUILT_IN_LRINTF:
if (out_mode == SImode && out_n == 4
&& in_mode == SFmode && in_n == 4)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 65abbcf..12b8cc8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2421,6 +2421,26 @@
DONE;
})
+(define_expand "vec_pack_sfix_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
+
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point element swizzling