diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2007-06-29 12:30:06 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2007-06-29 12:30:06 +0200 |
commit | b40c4f68160a4b556135185e8b5e013ab77ba2ec (patch) | |
tree | 332c2a8a980e60c5f65e31276d3aa5472059f7e9 /gcc/config | |
parent | 690f48b1b48b5626cb9b78944f1da201d11e00ec (diff) | |
download | gcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.zip gcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.tar.gz gcc-b40c4f68160a4b556135185e8b5e013ab77ba2ec.tar.bz2 |
re PR tree-optimization/24659 (Conversions are not vectorized)
PR tree-optimization/24659
* tree-vect-transform.c (vectorizable_call): Handle
(nunits_in == nunits_out / 2) and (nunits_out == nunits_in / 2) cases.
* config/i386/sse.md (vec_pack_sfix_v2df): New expander.
* config/i386/i386.c (enum ix86_builtins)
[IX86_BUILTIN_VEC_PACK_SFIX]: New constant.
(struct bdesc_2arg) [__builtin_ia32_vec_pack_sfix]: New builtin
description.
(ix86_init_mmx_sse_builtins): Define all builtins with 2 arguments as
const using def_builtin_const.
(ix86_expand_binop_builtin): Remove bogus assert() that insn wants
input operands in the same modes as the result.
(ix86_builtin_vectorized_function): Handle BUILT_IN_LRINT.
testsuite/ChangeLog:
PR tree-optimization/24659
* gcc.target/i386/vectorize2.c: New test.
* gcc.target/i386/sse2-lrint-vec.c: New runtime test.
* gcc.target/i386/sse2-lrintf-vec.c: Ditto.
From-SVN: r126111
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386.c | 23 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 20 |
2 files changed, 37 insertions, 6 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5c5cb52..96c948f 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -16820,6 +16820,8 @@ enum ix86_builtins IX86_BUILTIN_VEC_SET_V4HI, IX86_BUILTIN_VEC_SET_V16QI, + IX86_BUILTIN_VEC_PACK_SFIX, + /* SSE4.2. */ IX86_BUILTIN_CRC32QI, IX86_BUILTIN_CRC32HI, @@ -17167,6 +17169,8 @@ static const struct builtin_description bdesc_2arg[] = { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 }, + /* SSE2 MMX */ { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 }, @@ -17563,6 +17567,9 @@ ix86_init_mmx_sse_builtins (void) = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); tree v4si_ftype_v2df = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); + tree v4si_ftype_v2df_v2df + = build_function_type_list (V4SI_type_node, + V2DF_type_node, V2DF_type_node, NULL_TREE); tree v2si_ftype_v2df = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); tree v4sf_ftype_v2df @@ -17906,7 +17913,10 @@ ix86_init_mmx_sse_builtins (void) || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3) type = v2di_ftype_v2df_v2df; - def_builtin (d->mask, d->name, type, d->code); + if (d->icode == CODE_FOR_vec_pack_sfix_v2df) + type = v4si_ftype_v2df_v2df; + + def_builtin_const (d->mask, d->name, type, d->code); } /* Add all builtins that are more or less simple operations on 1 operand. */ @@ -18457,11 +18467,6 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) op1 = gen_lowpart (TImode, x); } - /* The insn must want input operands in the same modes as the - result. */ - gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) - && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); - if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) @@ -19863,6 +19868,12 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return ix86_builtins[IX86_BUILTIN_SQRTPS]; return NULL_TREE; + case BUILT_IN_LRINT: + if (out_mode == SImode && out_n == 4 + && in_mode == DFmode && in_n == 2) + return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; + return NULL_TREE; + case BUILT_IN_LRINTF: if (out_mode == SImode && out_n == 4 && in_mode == SFmode && in_n == 4) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 65abbcf..12b8cc8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2421,6 +2421,26 @@ DONE; }) +(define_expand "vec_pack_sfix_v2df" + [(match_operand:V4SI 0 "register_operand" "") + (match_operand:V2DF 1 "nonimmediate_operand" "") + (match_operand:V2DF 2 "nonimmediate_operand" "")] + "TARGET_SSE2" +{ + rtx r1, r2; + + r1 = gen_reg_rtx (V4SImode); + r2 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); + emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); + emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, r1), + gen_lowpart (V2DImode, r2))); + DONE; +}) + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel double-precision floating point element swizzling |