diff options
author | Jakub Jelinek <jakub@redhat.com> | 2011-11-07 16:59:07 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2011-11-07 16:59:07 +0100 |
commit | aec7ae7deaef9d52541da07c387066ad6ceb3d87 (patch) | |
tree | f56ca4ced37752007fdc3ce936daa06795357ad2 /gcc/tree-vect-stmts.c | |
parent | 571b34b25eec4909165272e2d5a7084866ddaf96 (diff) | |
download | gcc-aec7ae7deaef9d52541da07c387066ad6ceb3d87.zip gcc-aec7ae7deaef9d52541da07c387066ad6ceb3d87.tar.gz gcc-aec7ae7deaef9d52541da07c387066ad6ceb3d87.tar.bz2 |
re PR tree-optimization/50789 (Gather vectorization)
PR tree-optimization/50789
* tree-vect-stmts.c (process_use): Add force argument, avoid
exist_non_indexing_operands_for_use_p check if true.
(vect_mark_stmts_to_be_vectorized): Adjust callers. Handle
STMT_VINFO_GATHER_P.
(gen_perm_mask): New function.
(perm_mask_for_reverse): Use it.
(reverse_vec_element): Rename to...
(permute_vec_elements): ... this. Add Y and MASK_VEC arguments,
generalize for any permutations.
(vectorizable_load): Adjust caller. Handle STMT_VINFO_GATHER_P.
* target.def (TARGET_VECTORIZE_BUILTIN_GATHER): New hook.
* doc/tm.texi.in (TARGET_VECTORIZE_BUILTIN_GATHER): Document it.
* doc/tm.texi: Regenerate.
* tree-data-ref.c (initialize_data_dependence_relation,
compute_self_dependence): No longer static.
* tree-data-ref.h (initialize_data_dependence_relation,
compute_self_dependence): New prototypes.
* tree-vect-data-refs.c (vect_check_gather): New function.
(vect_analyze_data_refs): Detect possible gather load data
refs.
* tree-vectorizer.h (struct _stmt_vec_info): Add gather_p field.
(STMT_VINFO_GATHER_P): Define.
(vect_check_gather): New prototype.
* config/i386/i386-builtin-types.def: Add types for alternate
gather builtins.
* config/i386/sse.md (AVXMODE48P_DI): Remove.
(VEC_GATHER_MODE): Rename mode_attr to...
(VEC_GATHER_IDXSI): ... this.
(VEC_GATHER_IDXDI, VEC_GATHER_SRCDI): New mode_attrs.
(avx2_gathersi<mode>, *avx2_gathersi<mode>): Use <VEC_GATHER_IDXSI>
instead of <VEC_GATHER_MODE>.
(avx2_gatherdi<mode>): Use <VEC_GATHER_IDXDI> instead of
<<AVXMODE48P_DI> and <VEC_GATHER_SRCDI> instead of VEC_GATHER_MODE
on src and mask operands.
(*avx2_gatherdi<mode>): Likewise. Use VEC_GATHER_MODE iterator
instead of AVXMODE48P_DI.
(avx2_gatherdi<mode>256, *avx2_gatherdi<mode>256): Removed.
* config/i386/i386.c (enum ix86_builtins): Add
IX86_BUILTIN_GATHERALTSIV4DF, IX86_BUILTIN_GATHERALTDIV8SF,
IX86_BUILTIN_GATHERALTSIV4DI and IX86_BUILTIN_GATHERALTDIV8SI.
(ix86_init_mmx_sse_builtins): Create those builtins.
(ix86_expand_builtin): Handle those builtins and adjust expansions
of other gather builtins.
(ix86_vectorize_builtin_gather): New function.
(TARGET_VECTORIZE_BUILTIN_GATHER): Define.
* gcc.target/i386/avx2-gather-1.c: New test.
* gcc.target/i386/avx2-gather-2.c: New test.
* gcc.target/i386/avx2-gather-3.c: New test.
* gcc.target/i386/avx2-gather-4.c: New test.
From-SVN: r181089
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r-- | gcc/tree-vect-stmts.c | 275 |
1 files changed, 241 insertions, 34 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index dd65636..8b9a2cf 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -332,6 +332,8 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt) - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt that defined USE. This is done by calling mark_relevant and passing it the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant). + - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't + be performed. Outputs: Generally, LIVE_P and RELEVANT are used to define the liveness and @@ -351,7 +353,8 @@ exist_non_indexing_operands_for_use_p (tree use, gimple stmt) static bool process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, - enum vect_relevant relevant, VEC(gimple,heap) **worklist) + enum vect_relevant relevant, VEC(gimple,heap) **worklist, + bool force) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); @@ -363,7 +366,7 @@ process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p, /* case 1: we are only interested in uses that need to be vectorized. Uses that are used for address computation are not considered relevant. */ - if (!exist_non_indexing_operands_for_use_p (use, stmt)) + if (!force && !exist_non_indexing_operands_for_use_p (use, stmt)) return true; if (!vect_is_simple_use (use, loop_vinfo, NULL, &def_stmt, &def, &dt)) @@ -646,7 +649,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) break; } - if (is_pattern_stmt_p (vinfo_for_stmt (stmt))) + if (is_pattern_stmt_p (stmt_vinfo)) { /* Pattern statements are not inserted into the code, so FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we @@ -660,9 +663,9 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op)) { if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo, - live_p, relevant, &worklist) + live_p, relevant, &worklist, false) || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo, - live_p, relevant, &worklist)) + live_p, relevant, &worklist, false)) { VEC_free (gimple, heap, worklist); return false; @@ -673,7 +676,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) { op = gimple_op (stmt, i); if (!process_use (stmt, op, loop_vinfo, live_p, relevant, - &worklist)) + &worklist, false)) { VEC_free (gimple, heap, worklist); return false; @@ -686,7 +689,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) { tree arg = gimple_call_arg (stmt, i); if (!process_use (stmt, arg, loop_vinfo, live_p, relevant, - &worklist)) + &worklist, false)) { VEC_free (gimple, heap, worklist); return false; @@ -699,12 +702,25 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) { tree op = USE_FROM_PTR (use_p); if (!process_use (stmt, op, loop_vinfo, live_p, relevant, - &worklist)) + &worklist, false)) { VEC_free (gimple, heap, worklist); return false; } } + + if (STMT_VINFO_GATHER_P (stmt_vinfo)) + { + tree off; + tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL); + gcc_assert (decl); + if (!process_use (stmt, off, loop_vinfo, live_p, relevant, + &worklist, true)) + { + VEC_free (gimple, heap, worklist); + return false; + } + } } /* while worklist */ VEC_free (gimple, heap, worklist); @@ -3914,23 +3930,17 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, return true; } -/* Given a vector type VECTYPE returns a builtin DECL to be used - for vector permutation and returns the mask that implements - reversal of the vector elements. If that is impossible to do, - returns NULL. */ +/* Given a vector type VECTYPE and permutation SEL returns + the VECTOR_CST mask that implements the permutation of the + vector elements. If that is impossible to do, returns NULL. */ static tree -perm_mask_for_reverse (tree vectype) +gen_perm_mask (tree vectype, unsigned char *sel) { tree mask_elt_type, mask_type, mask_vec; int i, nunits; - unsigned char *sel; nunits = TYPE_VECTOR_SUBPARTS (vectype); - sel = XALLOCAVEC (unsigned char, nunits); - - for (i = 0; i < nunits; ++i) - sel[i] = nunits - 1 - i; if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel)) return NULL; @@ -3941,33 +3951,52 @@ perm_mask_for_reverse (tree vectype) mask_type = get_vectype_for_scalar_type (mask_elt_type); mask_vec = NULL; - for (i = 0; i < nunits; i++) - mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, i), mask_vec); + for (i = nunits - 1; i >= 0; i--) + mask_vec = tree_cons (NULL, build_int_cst (mask_elt_type, sel[i]), + mask_vec); mask_vec = build_vector (mask_type, mask_vec); return mask_vec; } -/* Given a vector variable X, that was generated for the scalar LHS of - STMT, generate instructions to reverse the vector elements of X, - insert them a *GSI and return the permuted vector variable. */ +/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements + reversal of the vector elements. If that is impossible to do, + returns NULL. */ static tree -reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +perm_mask_for_reverse (tree vectype) +{ + int i, nunits; + unsigned char *sel; + + nunits = TYPE_VECTOR_SUBPARTS (vectype); + sel = XALLOCAVEC (unsigned char, nunits); + + for (i = 0; i < nunits; ++i) + sel[i] = nunits - 1 - i; + + return gen_perm_mask (vectype, sel); +} + +/* Given a vector variable X and Y, that was generated for the scalar + STMT, generate instructions to permute the vector elements of X and Y + using permutation mask MASK_VEC, insert them at *GSI and return the + permuted vector variable. */ + +static tree +permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt, + gimple_stmt_iterator *gsi) { tree vectype = TREE_TYPE (x); - tree mask_vec, perm_dest, data_ref; + tree perm_dest, data_ref; gimple perm_stmt; - mask_vec = perm_mask_for_reverse (vectype); - perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + data_ref = make_ssa_name (perm_dest, NULL); /* Generate the permute statement. */ - perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest, - x, x, mask_vec); - data_ref = make_ssa_name (perm_dest, perm_stmt); - gimple_set_lhs (perm_stmt, data_ref); + perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, data_ref, + x, y, mask_vec); vect_finish_stmt_generation (stmt, perm_stmt, gsi); return data_ref; @@ -4026,6 +4055,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); int vf; tree aggr_type; + tree gather_base = NULL_TREE, gather_off = NULL_TREE; + tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; + int gather_scale = 1; + enum vect_def_type gather_dt = vect_unknown_def_type; if (loop_vinfo) { @@ -4106,7 +4139,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, { strided_load = true; /* FORNOW */ - gcc_assert (! nested_in_vect_loop); + gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info)); first_stmt = GROUP_FIRST_ELEMENT (stmt_info); if (!slp && !PURE_SLP_STMT (stmt_info)) @@ -4121,7 +4154,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (negative) { - gcc_assert (!strided_load); + gcc_assert (!strided_load && !STMT_VINFO_GATHER_P (stmt_info)); alignment_support_scheme = vect_supportable_dr_alignment (dr, false); if (alignment_support_scheme != dr_aligned && alignment_support_scheme != dr_unaligned_supported) @@ -4138,6 +4171,23 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, } } + if (STMT_VINFO_GATHER_P (stmt_info)) + { + gimple def_stmt; + tree def; + gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base, + &gather_off, &gather_scale); + gcc_assert (gather_decl); + if (!vect_is_simple_use_1 (gather_off, loop_vinfo, bb_vinfo, + &def_stmt, &def, &gather_dt, + &gather_off_vectype)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "gather index use not simple."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -4150,6 +4200,161 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, /** Transform. **/ + if (STMT_VINFO_GATHER_P (stmt_info)) + { + tree vec_oprnd0 = NULL_TREE, op; + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl)); + tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; + tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE; + edge pe = loop_preheader_edge (loop); + gimple_seq seq; + basic_block new_bb; + enum { NARROW, NONE, WIDEN } modifier; + int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype); + + if (nunits == gather_off_nunits) + modifier = NONE; + else if (nunits == gather_off_nunits / 2) + { + unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits); + modifier = WIDEN; + + for (i = 0; i < gather_off_nunits; ++i) + sel[i] = i | nunits; + + perm_mask = gen_perm_mask (gather_off_vectype, sel); + gcc_assert (perm_mask != NULL_TREE); + } + else if (nunits == gather_off_nunits * 2) + { + unsigned char *sel = XALLOCAVEC (unsigned char, nunits); + modifier = NARROW; + + for (i = 0; i < nunits; ++i) + sel[i] = i < gather_off_nunits + ? i : i + nunits - gather_off_nunits; + + perm_mask = gen_perm_mask (vectype, sel); + gcc_assert (perm_mask != NULL_TREE); + ncopies *= 2; + } + else + gcc_unreachable (); + + rettype = TREE_TYPE (TREE_TYPE (gather_decl)); + srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); + scaletype = TREE_VALUE (arglist); + gcc_checking_assert (types_compatible_p (srctype, rettype) + && types_compatible_p (srctype, masktype)); + + vec_dest = vect_create_destination_var (scalar_dest, vectype); + + ptr = fold_convert (ptrtype, gather_base); + if (!is_gimple_min_invariant (ptr)) + { + ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); + gcc_assert (!new_bb); + } + + /* Currently we support only unconditional gather loads, + so mask should be all ones. */ + if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE) + mask = build_int_cst (TREE_TYPE (masktype), -1); + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype))) + { + REAL_VALUE_TYPE r; + long tmp[6]; + for (j = 0; j < 6; ++j) + tmp[j] = -1; + real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype))); + mask = build_real (TREE_TYPE (masktype), r); + } + else + gcc_unreachable (); + mask = build_vector_from_val (masktype, mask); + mask = vect_init_vector (stmt, mask, masktype, NULL); + + scale = build_int_cst (scaletype, gather_scale); + + prev_stmt_info = NULL; + for (j = 0; j < ncopies; ++j) + { + if (modifier == WIDEN && (j & 1)) + op = permute_vec_elements (vec_oprnd0, vec_oprnd0, + perm_mask, stmt, gsi); + else if (j == 0) + op = vec_oprnd0 + = vect_get_vec_def_for_operand (gather_off, stmt, NULL); + else + op = vec_oprnd0 + = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0); + + if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) + { + gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)) + == TYPE_VECTOR_SUBPARTS (idxtype)); + var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL); + add_referenced_var (var); + var = make_ssa_name (var, NULL); + op = build1 (VIEW_CONVERT_EXPR, idxtype, op); + new_stmt + = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, + op, NULL_TREE); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + op = var; + } + + new_stmt + = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale); + + if (!useless_type_conversion_p (vectype, rettype)) + { + gcc_assert (TYPE_VECTOR_SUBPARTS (vectype) + == TYPE_VECTOR_SUBPARTS (rettype)); + var = vect_get_new_vect_var (rettype, vect_simple_var, NULL); + add_referenced_var (var); + op = make_ssa_name (var, new_stmt); + gimple_call_set_lhs (new_stmt, op); + vect_finish_stmt_generation (stmt, new_stmt, gsi); + var = make_ssa_name (vec_dest, NULL); + op = build1 (VIEW_CONVERT_EXPR, vectype, op); + new_stmt + = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op, + NULL_TREE); + } + else + { + var = make_ssa_name (vec_dest, new_stmt); + gimple_call_set_lhs (new_stmt, var); + } + + vect_finish_stmt_generation (stmt, new_stmt, gsi); + + if (modifier == NARROW) + { + if ((j & 1) == 0) + { + prev_res = var; + continue; + } + var = permute_vec_elements (prev_res, var, + perm_mask, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (var); + } + + if (prev_stmt_info == NULL) + STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; + else + STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; + prev_stmt_info = vinfo_for_stmt (new_stmt); + } + return true; + } + if (strided_load) { first_stmt = GROUP_FIRST_ELEMENT (stmt_info); @@ -4541,7 +4746,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (negative) { - new_temp = reverse_vec_elements (new_temp, stmt, gsi); + tree perm_mask = perm_mask_for_reverse (vectype); + new_temp = permute_vec_elements (new_temp, new_temp, + perm_mask, stmt, gsi); new_stmt = SSA_NAME_DEF_STMT (new_temp); } |