diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2007-05-17 08:31:05 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2007-05-17 08:31:05 +0200 |
commit | d9987fb40780ff8900d0dca8b40ebd501c963bd5 (patch) | |
tree | 89eb8371ed2645bc5faf2ffeb7e859c669317654 /gcc/tree-vect-transform.c | |
parent | f59d2a7c86c5d8834ca0558682cac83e31035e68 (diff) | |
download | gcc-d9987fb40780ff8900d0dca8b40ebd501c963bd5.zip gcc-d9987fb40780ff8900d0dca8b40ebd501c963bd5.tar.gz gcc-d9987fb40780ff8900d0dca8b40ebd501c963bd5.tar.bz2 |
re PR tree-optimization/24659 (Conversions are not vectorized)
PR tree-optimization/24659
* optabs.h (enum optab_index): Add OTI_vec_unpacks_float_hi,
OTI_vec_unpacks_float_lo, OTI_vec_unpacku_float_hi,
OTI_vec_unpacku_float_lo, OTI_vec_pack_sfix_trunc and
OTI_vec_pack_ufix_trunc.
(vec_unpacks_float_hi_optab): Define new macro.
(vec_unpacks_float_lo_optab): Ditto.
(vec_unpacku_float_hi_optab): Ditto.
(vec_unpacku_float_lo_optab): Ditto.
(vec_pack_sfix_trunc_optab): Ditto.
(vec_pack_ufix_trunc_optab): Ditto.
* genopinit.c (optabs): Implement vec_unpack[s|u]_[hi|lo]_optab
and vec_pack_[s|u]fix_trunc_optab using
vec_unpack[s|u]_[hi\lo]_* and vec_pack_[u|s]fix_trunc_* patterns
* tree-vectorizer.c (supportable_widening_operation): Handle
FLOAT_EXPR and CONVERT_EXPR. Update comment.
(supportable_narrowing_operation): New function.
* tree-vectorizer.h (supportable_narrowing_operation): Prototype.
* tree-vect-transform.c (vectorizable_conversion): Handle
(nunits_in == nunits_out / 2) and (nunits_out == nunits_in / 2) cases.
(vect_gen_widened_results_half): Move before vectorizable_conversion.
(vectorizable_type_demotion): Call supportable_narrowing_operation()
to check for target support.
* optabs.c (optab_for_tree_code) Return vec_unpack[s|u]_float_hi_optab
for VEC_UNPACK_FLOAT_HI_EXPR, vec_unpack[s|u]_float_lo_optab
for VEC_UNPACK_FLOAT_LO_EXPR and vec_pack_[u|s]fix_trunc_optab
for VEC_PACK_FIX_TRUNC_EXPR.
(expand_binop): Special case mode of the result for
vec_pack_[u|s]fix_trunc_optab.
(init_optabs): Initialize vec_unpack[s|u]_[hi|lo]_optab and
vec_pack_[u|s]fix_trunc_optab.
* tree.def (VEC_UNPACK_FLOAT_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR,
VEC_PACK_FIX_TRUNC_EXPR): New tree codes.
* tree-pretty-print.c (dump_generic_node): Handle
VEC_UNPACK_FLOAT_HI_EXPR, VEC_UNPACK_FLOAT_LO_EXPR and
VEC_PACK_FIX_TRUNC_EXPR.
(op_prio): Ditto.
* expr.c (expand_expr_real_1): Ditto.
* tree-inline.c (estimate_num_insns_1): Ditto.
* tree-vect-generic.c (expand_vector_operations_1): Ditto.
* config/i386/sse.md (vec_unpacks_float_hi_v8hi): New expander.
(vec_unpacks_float_lo_v8hi): Ditto.
(vec_unpacku_float_hi_v8hi): Ditto.
(vec_unpacku_float_lo_v8hi): Ditto.
(vec_unpacks_float_hi_v4si): Ditto.
(vec_unpacks_float_lo_v4si): Ditto.
(vec_pack_sfix_trunc_v2df): Ditto.
* doc/c-tree.texi (Expression trees) [VEC_UNPACK_FLOAT_HI_EXPR]:
Document.
[VEC_UNPACK_FLOAT_LO_EXPR]: Ditto.
[VEC_PACK_FIX_TRUNC_EXPR]: Ditto.
* doc/md.texi (Standard Names) [vec_pack_sfix_trunc]: Document.
[vec_pack_ufix_trunc]: Ditto.
[vec_unpacks_float_hi]: Ditto.
[vec_unpacks_float_lo]: Ditto.
[vec_unpacku_float_hi]: Ditto.
[vec_unpacku_float_lo]: Ditto.
testsuite/ChangeLog:
PR tree-optimization/24659
* gcc.dg/vect/vect-floatint-conversion-2.c: New test.
* gcc.dg/vect/vect-intfloat-conversion-1.c: Require vect_float,
not vect_int target.
* gcc.dg/vect/vect-intfloat-conversion-2.c: Require vect_float,
not vect_int target. Loop is vectorized for vect_intfloat_cvt
targets.
* gcc.dg/vect/vect-intfloat-conversion-3.c: New test.
* gcc.dg/vect/vect-intfloat-conversion-4a.c: New test.
* gcc.dg/vect/vect-intfloat-conversion-4b.c: New test.
From-SVN: r124784
Diffstat (limited to 'gcc/tree-vect-transform.c')
-rw-r--r-- | gcc/tree-vect-transform.c | 313 |
1 files changed, 204 insertions, 109 deletions
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c index 875731d..30e7211 100644 --- a/gcc/tree-vect-transform.c +++ b/gcc/tree-vect-transform.c @@ -210,7 +210,7 @@ vect_create_addr_base_for_vector_ref (tree stmt, accessed in the loop by STMT, along with the def-use update chain to appropriately advance the pointer through the loop iterations. Also set aliasing information for the pointer. This vector pointer is used by the - callers to this function to create a memory reference expression for vector + callers to this function to create a memory reference expression for vector load/store access. Input: @@ -1931,6 +1931,64 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) } +/* Function vect_gen_widened_results_half + + Create a vector stmt whose code, type, number of arguments, and result + variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are + VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. + In the case that CODE is a CALL_EXPR, this means that a call to DECL + needs to be created (DECL is a function-decl of a target-builtin). + STMT is the original scalar stmt that we are vectorizing. */ + +static tree +vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl, + tree vec_oprnd0, tree vec_oprnd1, int op_type, + tree vec_dest, block_stmt_iterator *bsi, + tree stmt) +{ + tree expr; + tree new_stmt; + tree new_temp; + tree sym; + ssa_op_iter iter; + + /* Generate half of the widened result: */ + if (code == CALL_EXPR) + { + /* Target specific support */ + if (op_type == binary_op) + expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1); + else + expr = build_call_expr (decl, 1, vec_oprnd0); + } + else + { + /* Generic support */ + gcc_assert (op_type == TREE_CODE_LENGTH (code)); + if (op_type == binary_op) + expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1); + else + expr = build1 (code, vectype, vec_oprnd0); + } + new_stmt = build_gimple_modify_stmt (vec_dest, expr); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + + if (code == CALL_EXPR) + { + FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS) + { + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + mark_sym_for_renaming (sym); + } + } + + return new_stmt; +} + + /* Function vectorizable_conversion. Check if STMT performs a conversion operation, that can be vectorized. @@ -1946,21 +2004,24 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, tree scalar_dest; tree operation; tree op0; - tree vec_oprnd0 = NULL_TREE; + tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - enum tree_code code; + enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing; + tree decl1 = NULL_TREE, decl2 = NULL_TREE; tree new_temp; tree def, def_stmt; enum vect_def_type dt0; tree new_stmt; + stmt_vec_info prev_stmt_info; int nunits_in; int nunits_out; - int ncopies, j; tree vectype_out, vectype_in; + int ncopies, j; + tree expr; tree rhs_type, lhs_type; tree builtin_decl; - stmt_vec_info prev_stmt_info; + enum { NARROW, NONE, WIDEN } modifier; /* Is STMT a vectorizable conversion? */ @@ -1998,23 +2059,36 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0); lhs_type = TREE_TYPE (scalar_dest); vectype_out = get_vectype_for_scalar_type (lhs_type); - gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out); nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); - /* FORNOW: need to extend to support short<->float conversions as well. */ - if (nunits_out != nunits_in) + /* FORNOW */ + if (nunits_in == nunits_out / 2) + modifier = NARROW; + else if (nunits_out == nunits_in) + modifier = NONE; + else if (nunits_out == nunits_in / 2) + modifier = WIDEN; + else return false; + if (modifier == NONE) + gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out); + /* Bail out if the types are both integral or non-integral */ if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type)) || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type))) return false; + if (modifier == NARROW) + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out; + else + ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; + /* Sanity check: make sure that at least one copy of the vectorized stmt needs to be generated. */ - ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in; gcc_assert (ncopies >= 1); + /* Check the operands of the operation. */ if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0)) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -2023,21 +2097,31 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, } /* Supportable by target? */ - if (!targetm.vectorize.builtin_conversion (code, vectype_in)) + if ((modifier == NONE + && !targetm.vectorize.builtin_conversion (code, vectype_in)) + || (modifier == WIDEN + && !supportable_widening_operation (code, stmt, vectype_in, + &decl1, &decl2, + &code1, &code2)) + || (modifier == NARROW + && !supportable_narrowing_operation (code, stmt, vectype_in, + &code1))) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "op not supported by target."); return false; } + if (modifier != NONE) + STMT_VINFO_VECTYPE (stmt_info) = vectype_in; + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; return true; } - /** Transform. **/ - + /** Transform. **/ if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "transform conversion."); @@ -2045,37 +2129,113 @@ vectorizable_conversion (tree stmt, block_stmt_iterator * bsi, vec_dest = vect_create_destination_var (scalar_dest, vectype_out); prev_stmt_info = NULL; - for (j = 0; j < ncopies; j++) + switch (modifier) { - tree sym; - ssa_op_iter iter; + case NONE: + for (j = 0; j < ncopies; j++) + { + tree sym; + ssa_op_iter iter; - if (j == 0) - vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); - else - vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + if (j == 0) + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + else + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); - builtin_decl = - targetm.vectorize.builtin_conversion (code, vectype_in); - new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0); + builtin_decl = + targetm.vectorize.builtin_conversion (code, vectype_in); + new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0); - /* Arguments are ready. create the new vector stmt. */ - new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt); - new_temp = make_ssa_name (vec_dest, new_stmt); - GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, new_stmt, bsi); - FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS) - { - if (TREE_CODE (sym) == SSA_NAME) - sym = SSA_NAME_VAR (sym); - mark_sym_for_renaming (sym); - } + /* Arguments are ready. create the new vector stmt. */ + new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS) + { + if (TREE_CODE (sym) == SSA_NAME) + sym = SSA_NAME_VAR (sym); + mark_sym_for_renaming (sym); + } - if (j == 0) - STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; - else - STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; - prev_stmt_info = vinfo_for_stmt (new_stmt); + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + break; + + case WIDEN: + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to + generate more than one vector stmt - i.e - we need to "unroll" + the vector stmt by a factor VF/nunits. */ + for (j = 0; j < ncopies; j++) + { + if (j == 0) + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + else + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + + STMT_VINFO_VECTYPE (stmt_info) = vectype_in; + + /* Generate first half of the widened result: */ + new_stmt + = vect_gen_widened_results_half (code1, vectype_out, decl1, + vec_oprnd0, vec_oprnd1, + unary_op, vec_dest, bsi, stmt); + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + + /* Generate second half of the widened result: */ + new_stmt + = vect_gen_widened_results_half (code2, vectype_out, decl2, + vec_oprnd0, vec_oprnd1, + unary_op, vec_dest, bsi, stmt); + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + break; + + case NARROW: + /* In case the vectorization factor (VF) is bigger than the number + of elements that we can fit in a vectype (nunits), we have to + generate more than one vector stmt - i.e - we need to "unroll" + the vector stmt by a factor VF/nunits. */ + for (j = 0; j < ncopies; j++) + { + /* Handle uses. */ + if (j == 0) + { + vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + } + else + { + vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1); + vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0); + } + + /* Arguments are ready. Create the new vector stmt. */ + expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1); + new_stmt = build_gimple_modify_stmt (vec_dest, expr); + new_temp = make_ssa_name (vec_dest, new_stmt); + GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; + vect_finish_stmt_generation (stmt, new_stmt, bsi); + + if (j == 0) + STMT_VINFO_VEC_STMT (stmt_info) = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + + *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); } return true; } @@ -2525,7 +2685,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt) bool vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, - tree *vec_stmt) + tree *vec_stmt) { tree vec_dest; tree scalar_dest; @@ -2534,7 +2694,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, tree vec_oprnd0=NULL, vec_oprnd1=NULL; stmt_vec_info stmt_info = vinfo_for_stmt (stmt); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - enum tree_code code; + enum tree_code code, code1 = CODE_FOR_nothing; tree new_temp; tree def, def_stmt; enum vect_def_type dt0; @@ -2548,8 +2708,6 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, tree expr; tree vectype_in; tree scalar_type; - optab optab; - enum machine_mode vec_mode; if (!STMT_VINFO_RELEVANT_P (stmt_info)) return false; @@ -2607,13 +2765,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, } /* Supportable by target? */ - code = VEC_PACK_TRUNC_EXPR; - optab = optab_for_tree_code (code, vectype_in); - if (!optab) - return false; - - vec_mode = TYPE_MODE (vectype_in); - if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing) + if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1)) return false; STMT_VINFO_VECTYPE (stmt_info) = vectype_in; @@ -2652,7 +2804,7 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, } /* Arguments are ready. Create the new vector stmt. */ - expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1); + expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1); new_stmt = build_gimple_modify_stmt (vec_dest, expr); new_temp = make_ssa_name (vec_dest, new_stmt); GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; @@ -2671,64 +2823,6 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi, } -/* Function vect_gen_widened_results_half - - Create a vector stmt whose code, type, number of arguments, and result - variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are - VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI. - In the case that CODE is a CALL_EXPR, this means that a call to DECL - needs to be created (DECL is a function-decl of a target-builtin). - STMT is the original scalar stmt that we are vectorizing. */ - -static tree -vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl, - tree vec_oprnd0, tree vec_oprnd1, int op_type, - tree vec_dest, block_stmt_iterator *bsi, - tree stmt) -{ - tree expr; - tree new_stmt; - tree new_temp; - tree sym; - ssa_op_iter iter; - - /* Generate half of the widened result: */ - if (code == CALL_EXPR) - { - /* Target specific support */ - if (op_type == binary_op) - expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1); - else - expr = build_call_expr (decl, 1, vec_oprnd0); - } - else - { - /* Generic support */ - gcc_assert (op_type == TREE_CODE_LENGTH (code)); - if (op_type == binary_op) - expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1); - else - expr = build1 (code, vectype, vec_oprnd0); - } - new_stmt = build_gimple_modify_stmt (vec_dest, expr); - new_temp = make_ssa_name (vec_dest, new_stmt); - GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp; - vect_finish_stmt_generation (stmt, new_stmt, bsi); - - if (code == CALL_EXPR) - { - FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS) - { - if (TREE_CODE (sym) == SSA_NAME) - sym = SSA_NAME_VAR (sym); - mark_sym_for_renaming (sym); - } - } - - return new_stmt; -} - - /* Function vectorizable_type_promotion Check if STMT performs a binary or unary operation that involves @@ -2785,7 +2879,8 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi, operation = GIMPLE_STMT_OPERAND (stmt, 1); code = TREE_CODE (operation); - if (code != NOP_EXPR && code != WIDEN_MULT_EXPR) + if (code != NOP_EXPR && code != CONVERT_EXPR + && code != WIDEN_MULT_EXPR) return false; op0 = TREE_OPERAND (operation, 0); |