diff options
Diffstat (limited to 'gcc/tree-vect-stmts.cc')
-rw-r--r-- | gcc/tree-vect-stmts.cc | 603 |
1 files changed, 330 insertions, 273 deletions
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index dbeb8bd..15e0d06 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -417,7 +417,9 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, /* Check if it's a not live PHI and multiple exits. In this case there will be a usage later on after peeling which is needed for the - alternate exit. */ + alternate exit. + ??? Unless the PHI was marked live because of early + break, which also needs the latch def live and vectorized. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) && is_a <gphi *> (stmt) && gimple_bb (stmt) == LOOP_VINFO_LOOP (loop_vinfo)->header @@ -655,14 +657,15 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo, } /* We are also not interested in uses on loop PHI backedges that are inductions. Otherwise we'll needlessly vectorize the IV increment - and cause hybrid SLP for SLP inductions. Unless the PHI is live - of course. */ + and cause hybrid SLP for SLP inductions. */ else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def - && ! STMT_VINFO_LIVE_P (stmt_vinfo) && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, loop_latch_edge (bb->loop_father)) - == use)) + == use) + && (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + || (gimple_bb (stmt_vinfo->stmt) + != LOOP_VINFO_LOOP (loop_vinfo)->header))) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -670,7 +673,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo, return opt_result::success (); } - vect_mark_relevant (worklist, dstmt_vinfo, relevant, false); return opt_result::success (); } @@ -722,16 +724,28 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) phi_info->stmt); if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p)) - vect_mark_relevant (&worklist, phi_info, relevant, live_p); + { + if (STMT_VINFO_DEF_TYPE (phi_info) == vect_unknown_def_type) + return opt_result::failure_at + (*si, "not vectorized: unhandled relevant PHI: %G", *si); + vect_mark_relevant (&worklist, phi_info, relevant, live_p); + } } - for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si)) { - if (is_gimple_debug (gsi_stmt (si))) + gimple *stmt = gsi_stmt (si); + if (is_gimple_debug (stmt)) continue; - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); + stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, - "init: stmt relevant? %G", stmt_info->stmt); + "init: stmt relevant? %G", stmt); + + if (gimple_get_lhs (stmt) == NULL_TREE + && !is_a <gcond *> (stmt) + && !is_a <gcall *> (stmt)) + return opt_result::failure_at + (stmt, "not vectorized: irregular stmt: %G", stmt); if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p)) vect_mark_relevant (&worklist, stmt_info, relevant, live_p); @@ -929,8 +943,7 @@ vect_model_simple_cost (vec_info *, int n, slp_tree node, is true the stmt is doing widening arithmetic. */ static void -vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, - enum vect_def_type *dt, +vect_model_promotion_demotion_cost (slp_tree slp_node, unsigned int ncopies, int pwr, stmt_vector_for_cost *cost_vec, bool widen_arith) @@ -943,16 +956,10 @@ vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, inside_cost += record_stmt_cost (cost_vec, ncopies, widen_arith ? vector_stmt : vec_promote_demote, - stmt_info, 0, vect_body); + slp_node, 0, vect_body); ncopies *= 2; } - /* FORNOW: Assuming maximum 2 args per stmts. */ - for (i = 0; i < 2; i++) - if (dt[i] == vect_constant_def || dt[i] == vect_external_def) - prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, - stmt_info, 0, vect_prologue); - if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_promotion_demotion_cost: inside_cost = %d, " @@ -1423,12 +1430,12 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, slp_tree slp_node, vec_load_store_type vls_type, int group_size, - vect_memory_access_type - memory_access_type, - const gather_scatter_info *gs_info, + vect_load_store_data *ls, slp_tree mask_node, vec<int> *elsvals = nullptr) { + vect_memory_access_type memory_access_type = ls->memory_access_type; + /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) return; @@ -1479,7 +1486,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, return; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { internal_fn ifn = (is_load ? IFN_MASK_GATHER_LOAD @@ -1487,18 +1494,22 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, internal_fn len_ifn = (is_load ? IFN_MASK_LEN_GATHER_LOAD : IFN_MASK_LEN_SCATTER_STORE); + stmt_vec_info repr = SLP_TREE_REPRESENTATIVE (slp_node); + tree off_vectype = (STMT_VINFO_GATHER_SCATTER_P (repr) + ? SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]) + : ls->strided_offset_vectype); + tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr)); + int scale = SLP_TREE_GS_SCALE (slp_node); if (internal_gather_scatter_fn_supported_p (len_ifn, vectype, - gs_info->memory_type, - gs_info->offset_vectype, - gs_info->scale, + memory_type, + off_vectype, scale, elsvals)) vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1); else if (internal_gather_scatter_fn_supported_p (ifn, vectype, - gs_info->memory_type, - gs_info->offset_vectype, - gs_info->scale, + memory_type, + off_vectype, scale, elsvals) - || gs_info->decl != NULL_TREE) + || memory_access_type == VMAT_GATHER_SCATTER_LEGACY) vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask); else @@ -1955,14 +1966,15 @@ static bool get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, slp_tree slp_node, bool masked_p, vec_load_store_type vls_type, - vect_memory_access_type *memory_access_type, - poly_int64 *poffset, - dr_alignment_support *alignment_support_scheme, - int *misalignment, - gather_scatter_info *gs_info, - internal_fn *lanes_ifn, - vec<int> *elsvals = nullptr) + vect_load_store_data *ls) { + vect_memory_access_type *memory_access_type = &ls->memory_access_type; + poly_int64 *poffset = &ls->poffset; + dr_alignment_support *alignment_support_scheme + = &ls->alignment_support_scheme; + int *misalignment = &ls->misalignment; + internal_fn *lanes_ifn = &ls->lanes_ifn; + vec<int> *elsvals = &ls->elsvals; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -2018,38 +2030,32 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type = VMAT_STRIDED_SLP; else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - *memory_access_type = VMAT_GATHER_SCATTER; slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); - memset (gs_info, 0, sizeof (gather_scatter_info)); - gs_info->offset_vectype = offset_vectype; - gs_info->scale = SLP_TREE_GS_SCALE (slp_node); - gs_info->base = SLP_TREE_GS_BASE (slp_node); - gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); - gs_info->decl = NULL_TREE; - gs_info->ifn = IFN_LAST; + int scale = SLP_TREE_GS_SCALE (slp_node); + tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); tree tem; if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype, - gs_info->memory_type, - offset_vectype, gs_info->scale, - &gs_info->ifn, &tem, + memory_type, + offset_vectype, scale, + &ls->gs.ifn, &tem, elsvals)) - /* GATHER_SCATTER_IFN_P. */; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather - && (gs_info->decl + && (ls->gs.decl = targetm.vectorize.builtin_gather (vectype, TREE_TYPE (offset_vectype), - gs_info->scale))) + scale))) : (targetm.vectorize.builtin_scatter - && (gs_info->decl + && (ls->gs.decl = targetm.vectorize.builtin_scatter (vectype, TREE_TYPE (offset_vectype), - gs_info->scale)))) - /* GATHER_SCATTER_LEGACY_P. */; + scale)))) + *memory_access_type = VMAT_GATHER_SCATTER_LEGACY; else { /* GATHER_SCATTER_EMULATED_P. */ @@ -2065,6 +2071,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, "gather.\n"); return false; } + *memory_access_type = VMAT_GATHER_SCATTER_EMULATED; } } else @@ -2312,19 +2319,27 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) && SLP_TREE_LANES (slp_node) == 1 - && loop_vinfo - && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals, - group_size, single_element_p)) - *memory_access_type = VMAT_GATHER_SCATTER; + && loop_vinfo) + { + gather_scatter_info gs_info; + if (vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, + masked_p, &gs_info, elsvals, + group_size, single_element_p)) + { + SLP_TREE_GS_SCALE (slp_node) = gs_info.scale; + SLP_TREE_GS_BASE (slp_node) = error_mark_node; + ls->gs.ifn = gs_info.ifn; + ls->strided_offset_vectype = gs_info.offset_vectype; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; + } + } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) *poffset = neg_ldst_offset; if (*memory_access_type == VMAT_ELEMENTWISE - || (*memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_LEGACY_P (*gs_info)) + || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY || *memory_access_type == VMAT_STRIDED_SLP || *memory_access_type == VMAT_INVARIANT) { @@ -2333,7 +2348,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, } else { - if (*memory_access_type == VMAT_GATHER_SCATTER + if (mat_gather_scatter_p (*memory_access_type) && !first_dr_info) *misalignment = DR_MISALIGNMENT_UNKNOWN; else @@ -2341,7 +2356,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *alignment_support_scheme = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, *misalignment, - *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr); + mat_gather_scatter_p (*memory_access_type)); } if (overrun_p) @@ -2375,7 +2390,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if (loop_vinfo && dr_safe_speculative_read_required (stmt_info) && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && (*memory_access_type == VMAT_GATHER_SCATTER + && (mat_gather_scatter_p (*memory_access_type) || *memory_access_type == VMAT_STRIDED_SLP)) { if (dump_enabled_p ()) @@ -2395,7 +2410,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, vector iteration or force masking. */ if (dr_safe_speculative_read_required (stmt_info) && (*alignment_support_scheme == dr_aligned - && *memory_access_type != VMAT_GATHER_SCATTER)) + && !mat_gather_scatter_p (*memory_access_type))) { /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); @@ -2711,13 +2726,12 @@ vect_get_mask_load_else (int elsval, tree type) static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, - tree vectype, - gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, + slp_tree slp_node, tree vectype, + gimple_stmt_iterator *gsi, tree decl, tree ptr, tree offset, tree mask) { - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); + tree rettype = TREE_TYPE (TREE_TYPE (decl)); tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); /* ptrtype */ arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); @@ -2783,8 +2797,8 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype); } - tree scale = build_int_cst (scaletype, gs_info->scale); - gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); + gimple *new_stmt = gimple_build_call (decl, 5, src_op, ptr, op, mask_op, scale); if (!useless_type_conversion_p (vectype, rettype)) @@ -2810,12 +2824,13 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, + slp_tree slp_node, gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, + tree decl, tree ptr, tree offset, tree oprnd, tree mask) { - tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); - tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); + tree rettype = TREE_TYPE (TREE_TYPE (decl)); + tree arglist = TYPE_ARG_TYPES (TREE_TYPE (decl)); /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist); tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); @@ -2879,9 +2894,9 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, op = var; } - tree scale = build_int_cst (scaletype, gs_info->scale); + tree scale = build_int_cst (scaletype, SLP_TREE_GS_SCALE (slp_node)); gcall *new_stmt - = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale); + = gimple_build_call (decl, 5, ptr, mask_arg, op, src, scale); return new_stmt; } @@ -2893,11 +2908,11 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, static void vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, - const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; - *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE); + *dataref_ptr = force_gimple_operand (SLP_TREE_GS_BASE (slp_node), + &stmts, true, NULL_TREE); if (stmts != NULL) { basic_block new_bb; @@ -2918,10 +2933,10 @@ vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, I * DR_STEP / SCALE. */ static void -vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, +vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node, + tree vectype, tree offset_vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -2962,15 +2977,15 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, /* The offset given in GS_INFO can have pointer type, so use the element type of the vector instead. */ - tree offset_type = TREE_TYPE (gs_info->offset_vectype); + tree offset_type = TREE_TYPE (offset_vectype); /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */ tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)), - ssize_int (gs_info->scale)); + ssize_int (SLP_TREE_GS_SCALE (node))); step = fold_convert (offset_type, step); /* Create {0, X, X*2, X*3, ...}. */ - tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype, + tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype, build_zero_cst (offset_type), step); *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset); } @@ -2993,7 +3008,7 @@ vect_get_loop_variant_data_ptr_increment ( tree step = vect_dr_behavior (vinfo, dr_info)->step; /* gather/scatter never reach here. */ - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); + gcc_assert (!mat_gather_scatter_p (memory_access_type)); /* When we support SELECT_VL pattern, we dynamic adjust the memory address by .SELECT_VL result. @@ -3103,10 +3118,10 @@ vectorizable_bswap (vec_info *vinfo, SLP_TREE_TYPE (slp_node) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_bswap"); record_stmt_cost (cost_vec, - 1, vector_stmt, stmt_info, 0, vect_prologue); + 1, vector_stmt, slp_node, 0, vect_prologue); record_stmt_cost (cost_vec, SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vec_perm, stmt_info, 0, vect_body); + vec_perm, slp_node, 0, vect_body); return true; } @@ -3417,7 +3432,7 @@ vectorizable_call (vec_info *vinfo, } } - int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + int reduc_idx = SLP_TREE_REDUC_IDX (slp_node); internal_fn cond_fn = get_conditional_internal_fn (ifn); internal_fn cond_len_fn = get_len_internal_fn (ifn); int len_opno = internal_fn_len_index (cond_len_fn); @@ -5386,7 +5401,7 @@ vectorizable_conversion (vec_info *vinfo, SLP_TREE_TYPE (slp_node) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, + vect_model_promotion_demotion_cost (slp_node, nvectors, multi_step_cvt, cost_vec, widen_arith); } @@ -5398,7 +5413,7 @@ vectorizable_conversion (vec_info *vinfo, so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */ unsigned int nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt; - vect_model_promotion_demotion_cost (stmt_info, dt, nvectors, + vect_model_promotion_demotion_cost (slp_node, nvectors, multi_step_cvt, cost_vec, widen_arith); } @@ -6437,7 +6452,7 @@ vectorizable_operation (vec_info *vinfo, using_emulated_vectors_p = true; } - int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info); + int reduc_idx = SLP_TREE_REDUC_IDX (slp_node); vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL); vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL); internal_fn cond_fn = get_conditional_internal_fn (code); @@ -6549,6 +6564,20 @@ vectorizable_operation (vec_info *vinfo, vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out); } + /* For reduction operations with undefined overflow behavior make sure to + pun them to unsigned since we change the order of evaluation. + ??? Avoid for in-order reductions? */ + else if (arith_code_with_undefined_signed_overflow (orig_code) + && ANY_INTEGRAL_TYPE_P (vectype) + && TYPE_OVERFLOW_UNDEFINED (vectype) + && SLP_TREE_REDUC_IDX (slp_node) != -1) + { + gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR + || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR); + vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out); + vectype = unsigned_type_for (vectype); + vec_dest = vect_create_destination_var (scalar_dest, vectype); + } /* Handle def. */ else vec_dest = vect_create_destination_var (scalar_dest, vectype_out); @@ -6562,6 +6591,46 @@ vectorizable_operation (vec_info *vinfo, vop1 = ((op_type == binary_op || op_type == ternary_op) ? vec_oprnds1[i] : NULL_TREE); vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE); + + if (vec_cvt_dest + && !useless_type_conversion_p (vectype, TREE_TYPE (vop0))) + { + new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop0); + new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR, + new_temp); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vop0 = new_temp; + } + if (vop1 + && vec_cvt_dest + && !useless_type_conversion_p (vectype, TREE_TYPE (vop1))) + { + new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop1); + new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR, + new_temp); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vop1 = new_temp; + } + if (vop2 + && vec_cvt_dest + && !useless_type_conversion_p (vectype, TREE_TYPE (vop2))) + { + new_temp = build1 (VIEW_CONVERT_EXPR, vectype, vop2); + new_stmt = gimple_build_assign (vec_dest, VIEW_CONVERT_EXPR, + new_temp); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vop2 = new_temp; + } + if (using_emulated_vectors_p) { /* Lower the operation. This follows vector lowering. */ @@ -7725,7 +7794,6 @@ vectorizable_store (vec_info *vinfo, unsigned int vec_num; bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); tree aggr_type; - gather_scatter_info gs_info; poly_uint64 vf; vec_load_store_type vls_type; tree ref_type; @@ -7777,7 +7845,7 @@ vectorizable_store (vec_info *vinfo, return false; } - tree vectype = SLP_TREE_VECTYPE (stmt_info), rhs_vectype = NULL_TREE; + tree vectype = SLP_TREE_VECTYPE (slp_node), rhs_vectype = NULL_TREE; poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); if (loop_vinfo) @@ -7810,16 +7878,19 @@ vectorizable_store (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - vls_type, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + vls_type, &_ls_data)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; if (slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) @@ -7840,8 +7911,8 @@ vectorizable_store (vec_info *vinfo, return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && (memory_access_type != VMAT_GATHER_SCATTER - || (GATHER_SCATTER_LEGACY_P (gs_info) + && (!mat_gather_scatter_p (memory_access_type) + || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) { if (dump_enabled_p ()) @@ -7849,8 +7920,7 @@ vectorizable_store (vec_info *vinfo, "unsupported access type for masked store.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7868,7 +7938,7 @@ vectorizable_store (vec_info *vinfo, dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) - && memory_access_type != VMAT_GATHER_SCATTER); + && !mat_gather_scatter_p (memory_access_type)); if (grouped_store) { first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); @@ -7892,13 +7962,10 @@ vectorizable_store (vec_info *vinfo, bool costing_p = cost_vec; if (costing_p) /* transformation not required. */ { - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - vls_type, group_size, - memory_access_type, &gs_info, + vls_type, group_size, &ls, mask_node); if (!vect_maybe_update_slp_op_vectype (op_node, vectype) @@ -7921,8 +7988,8 @@ vectorizable_store (vec_info *vinfo, "Vectorizing an unaligned access.\n"); SLP_TREE_TYPE (slp_node) = store_vec_info_type; + slp_node->data = new vect_load_store_data (std::move (ls)); } - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); /* Transform. */ @@ -7937,7 +8004,7 @@ vectorizable_store (vec_info *vinfo, unsigned int inside_cost = 0, prologue_cost = 0; if (vls_type == VLS_STORE_INVARIANT) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, 0, vect_prologue); + slp_node, 0, vect_prologue); vect_get_store_cost (vinfo, stmt_info, slp_node, 1, alignment_support_scheme, misalignment, &inside_cost, cost_vec); @@ -8017,6 +8084,14 @@ vectorizable_store (vec_info *vinfo, ... */ + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; + unsigned nstores = const_nunits; unsigned lnel = 1; tree ltype = elem_type; @@ -8282,12 +8357,13 @@ vectorizable_store (vec_info *vinfo, aggr_type = NULL_TREE; bump = NULL_TREE; } - else if (memory_access_type == VMAT_GATHER_SCATTER) + else if (mat_gather_scatter_p (memory_access_type)) { aggr_type = elem_type; if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, + ls.strided_offset_vectype, + loop_vinfo, gsi, &bump, &vec_offset, loop_lens); } else @@ -8318,6 +8394,8 @@ vectorizable_store (vec_info *vinfo, if (memory_access_type == VMAT_LOAD_STORE_LANES) { + const internal_fn lanes_ifn = ls.lanes_ifn; + if (costing_p) /* Update all incoming store operand nodes, the general handling above only handles the mask and the first store operand node. */ @@ -8470,7 +8548,7 @@ vectorizable_store (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_store); auto_vec<tree> vec_offsets; @@ -8494,7 +8572,7 @@ vectorizable_store (vec_info *vinfo, vect_get_slp_defs (mask_node, &vec_masks); if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, + vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr, &vec_offsets); else dataref_ptr @@ -8538,7 +8616,7 @@ vectorizable_store (vec_info *vinfo, unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -8552,9 +8630,9 @@ vectorizable_store (vec_info *vinfo, if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) vec_offset = vec_offsets[j]; - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); - if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE) + if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE) { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, @@ -8608,7 +8686,7 @@ vectorizable_store (vec_info *vinfo, vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for scatter is legacy, x86 only. */ gcc_assert (nunits.is_constant () @@ -8624,13 +8702,14 @@ vectorizable_store (vec_info *vinfo, continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); poly_uint64 offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); + = TYPE_VECTOR_SUBPARTS (offset_vectype); if (known_eq (nunits, offset_nunits)) { new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, - dataref_ptr, vec_offsets[j], + (vinfo, stmt_info, slp_node, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[j], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -8641,7 +8720,7 @@ vectorizable_store (vec_info *vinfo, lanes but the builtins will store full vectype data from the lower lanes. */ new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, dataref_ptr, vec_offsets[2 * j], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, @@ -8667,14 +8746,14 @@ vectorizable_store (vec_info *vinfo, VEC_UNPACK_HI_EXPR, final_mask); final_mask = make_ssa_name - (truth_type_for (gs_info.offset_vectype)); + (truth_type_for (offset_vectype)); gimple_set_lhs (new_stmt, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, + (vinfo, stmt_info, slp_node, gsi, ls.gs.decl, dataref_ptr, vec_offsets[2 * j + 1], vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, @@ -8707,8 +8786,8 @@ vectorizable_store (vec_info *vinfo, } new_stmt = vect_build_one_scatter_store_call - (vinfo, stmt_info, gsi, &gs_info, - dataref_ptr, vec_offset, + (vinfo, stmt_info, slp_node, gsi, + ls.gs.decl, dataref_ptr, vec_offset, vec_oprnd, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -8739,9 +8818,10 @@ vectorizable_store (vec_info *vinfo, continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); unsigned HOST_WIDE_INT const_nunits = nunits.to_constant (); unsigned HOST_WIDE_INT const_offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant (); + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); vec<constructor_elt, va_gc> *ctor_elts; vec_alloc (ctor_elts, const_nunits); gimple_seq stmts = NULL; @@ -8756,7 +8836,7 @@ vectorizable_store (vec_info *vinfo, unsigned elt_offset = (j % factor) * const_nunits; tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); tree ltype = build_aligned_type (TREE_TYPE (vectype), align); for (unsigned k = 0; k < const_nunits; ++k) { @@ -9235,7 +9315,6 @@ vectorizable_load (vec_info *vinfo, bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); poly_uint64 vf; tree aggr_type; - gather_scatter_info gs_info; tree ref_type; enum vect_def_type mask_dt = vect_unknown_def_type; enum vect_def_type els_dt = vect_unknown_def_type; @@ -9369,20 +9448,23 @@ vectorizable_load (vec_info *vinfo, else group_size = 1; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - auto_vec<int> elsvals; - int maskload_elsval = 0; - bool need_zeroing = false; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - VLS_LOAD, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn, &elsvals)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + VLS_LOAD, &ls)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; + const vec<int> &elsvals = ls.elsvals; + int maskload_elsval = 0; + bool need_zeroing = false; /* We might need to explicitly zero inactive elements if there are padding bits in the type that might leak otherwise. @@ -9395,7 +9477,7 @@ vectorizable_load (vec_info *vinfo, get_load_store_type. */ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE - || memory_access_type == VMAT_GATHER_SCATTER) + || mat_gather_scatter_p (memory_access_type)) && SLP_TREE_LANES (slp_node) == 1)) { slp_perm = true; @@ -9453,19 +9535,18 @@ vectorizable_load (vec_info *vinfo, if (!VECTOR_MODE_P (vec_mode) || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype), - true, NULL, &elsvals)) + true, NULL, &ls.elsvals)) return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && memory_access_type != VMAT_GATHER_SCATTER) + && !mat_gather_scatter_p (memory_access_type)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "unsupported access type for masked load.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9496,18 +9577,15 @@ vectorizable_load (vec_info *vinfo, return false; } - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - VLS_LOAD, group_size, - memory_access_type, &gs_info, - mask_node, &elsvals); + VLS_LOAD, group_size, &ls, + mask_node, &ls.elsvals); if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_GATHER_SCATTER + && !mat_gather_scatter_p (memory_access_type) && memory_access_type != VMAT_STRIDED_SLP && memory_access_type != VMAT_INVARIANT && alignment_support_scheme != dr_aligned) @@ -9518,16 +9596,7 @@ vectorizable_load (vec_info *vinfo, vinfo->any_known_not_updated_vssa = true; SLP_TREE_TYPE (slp_node) = load_vec_info_type; - } - else - { - /* Here just get the else values. */ - if (loop_vinfo - && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - VLS_LOAD, group_size, - memory_access_type, &gs_info, - mask_node, &elsvals); + slp_node->data = new vect_load_store_data (std::move (ls)); } /* If the type needs padding we must zero inactive elements. @@ -9550,8 +9619,6 @@ vectorizable_load (vec_info *vinfo, if (elsvals.length ()) maskload_elsval = *elsvals.begin (); - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); - if (dump_enabled_p () && !costing_p) dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n"); @@ -9720,6 +9787,13 @@ vectorizable_load (vec_info *vinfo, tree ltype = TREE_TYPE (vectype); tree lvectype = vectype; auto_vec<tree> dr_chain; + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; if (memory_access_type == VMAT_STRIDED_SLP) { HOST_WIDE_INT n = gcd (group_size, const_nunits); @@ -9943,7 +10017,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) grouped_load = false; if (grouped_load @@ -10039,7 +10113,7 @@ vectorizable_load (vec_info *vinfo, gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask_node && !loop_masks) - || memory_access_type == VMAT_GATHER_SCATTER + || mat_gather_scatter_p (memory_access_type) || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -10167,6 +10241,8 @@ vectorizable_load (vec_info *vinfo, tree vec_els = NULL_TREE; if (memory_access_type == VMAT_LOAD_STORE_LANES) { + const internal_fn lanes_ifn = ls.lanes_ifn; + gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -10332,7 +10408,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_load && !slp_perm); @@ -10342,7 +10418,7 @@ vectorizable_load (vec_info *vinfo, aggr_type = NULL_TREE; bump = NULL_TREE; if (!costing_p) - vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, + vect_get_gather_scatter_ops (loop, slp_node, &dataref_ptr, &vec_offsets); } else @@ -10350,8 +10426,9 @@ vectorizable_load (vec_info *vinfo, aggr_type = elem_type; if (!costing_p) { - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, + vect_get_strided_load_store_ops (stmt_info, slp_node, vectype, + ls.strided_offset_vectype, + loop_vinfo, gsi, &bump, &vec_offset, loop_lens); dataref_ptr = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, @@ -10387,7 +10464,7 @@ vectorizable_load (vec_info *vinfo, /* 2. Create the vector-load in the loop. */ unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -10400,9 +10477,9 @@ vectorizable_load (vec_info *vinfo, if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) vec_offset = vec_offsets[i]; tree zero = build_zero_cst (vectype); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); - if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD) + if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD) { if (loop_lens) final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens, @@ -10462,7 +10539,7 @@ vectorizable_load (vec_info *vinfo, new_stmt = call; data_ref = NULL_TREE; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for gather is legacy, x86 only. */ gcc_assert (!final_len && nunits.is_constant ()); @@ -10474,13 +10551,14 @@ vectorizable_load (vec_info *vinfo, slp_node, 0, vect_body); continue; } - poly_uint64 offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); + tree offset_vectype = TREE_TYPE (vec_offsets[0]); + poly_uint64 offset_nunits = TYPE_VECTOR_SUBPARTS (offset_vectype); if (known_eq (nunits, offset_nunits)) { new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, vec_offsets[i], final_mask); + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[i], + final_mask); data_ref = NULL_TREE; } else if (known_eq (nunits, offset_nunits * 2)) @@ -10489,8 +10567,9 @@ vectorizable_load (vec_info *vinfo, lanes but the builtins will produce full vectype data with just the lower lanes filled. */ new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, vec_offsets[2 * i], final_mask); + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[2 * i], + final_mask); tree low = make_ssa_name (vectype); gimple_set_lhs (new_stmt, low); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); @@ -10521,15 +10600,15 @@ vectorizable_load (vec_info *vinfo, VEC_UNPACK_HI_EXPR, final_mask); final_mask = make_ssa_name - (truth_type_for (gs_info.offset_vectype)); + (truth_type_for (offset_vectype)); gimple_set_lhs (new_stmt, final_mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, - dataref_ptr, + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offsets[2 * i + 1], final_mask); tree high = make_ssa_name (vectype); gimple_set_lhs (new_stmt, high); @@ -10572,7 +10651,8 @@ vectorizable_load (vec_info *vinfo, new_stmt, gsi); } new_stmt = vect_build_one_gather_load_call - (vinfo, stmt_info, vectype, gsi, &gs_info, + (vinfo, stmt_info, slp_node, vectype, gsi, + ls.gs.decl, dataref_ptr, vec_offset, final_mask); data_ref = NULL_TREE; } @@ -10601,8 +10681,9 @@ vectorizable_load (vec_info *vinfo, slp_node, 0, vect_body); continue; } + tree offset_vectype = TREE_TYPE (vec_offsets[0]); unsigned HOST_WIDE_INT const_offset_nunits - = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype) .to_constant (); + = TYPE_VECTOR_SUBPARTS (offset_vectype).to_constant (); vec<constructor_elt, va_gc> *ctor_elts; vec_alloc (ctor_elts, const_nunits); gimple_seq stmts = NULL; @@ -10613,7 +10694,7 @@ vectorizable_load (vec_info *vinfo, vec_offset = vec_offsets[i / factor]; unsigned elt_offset = (i % factor) * const_nunits; tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset)); - tree scale = size_int (gs_info.scale); + tree scale = size_int (SLP_TREE_GS_SCALE (slp_node)); tree ltype = build_aligned_type (TREE_TYPE (vectype), align); for (unsigned k = 0; k < const_nunits; ++k) { @@ -11479,20 +11560,24 @@ vectorizable_condition (vec_info *vinfo, if (code != COND_EXPR) return false; - stmt_vec_info reduc_info = NULL; - int reduc_index = -1; + int reduc_index = SLP_TREE_REDUC_IDX (slp_node); vect_reduction_type reduction_type = TREE_CODE_REDUCTION; - bool for_reduction - = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL; + bool nested_cycle_p = false; + bool for_reduction = vect_is_reduction (stmt_info); if (for_reduction) { if (SLP_TREE_LANES (slp_node) > 1) return false; - reduc_info = info_for_reduction (vinfo, stmt_info); - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); - reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); - gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION - || reduc_index != -1); + /* ??? With a reduction path we do not get at the reduction info from + every stmt, use the conservative default setting then. */ + if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))) + { + vect_reduc_info reduc_info + = info_for_reduction (loop_vinfo, slp_node); + reduction_type = VECT_REDUC_INFO_TYPE (reduc_info); + nested_cycle_p = nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), + stmt_info); + } } else { @@ -11682,7 +11767,7 @@ vectorizable_condition (vec_info *vinfo, vec_num, vectype, NULL); } /* Extra inactive lanes should be safe for vect_nested_cycle. */ - else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle) + else if (!nested_cycle_p) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -12208,13 +12293,11 @@ vectorizable_comparison (vec_info *vinfo, vectorization. */ bool -vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, +vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, slp_tree slp_node, stmt_vector_for_cost *cost_vec) { - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); - if (!loop_vinfo - || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) + if (!is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) return false; if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def) @@ -12279,7 +12362,7 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, return false; } - if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, + if (!vectorizable_comparison_1 (loop_vinfo, vectype, stmt_info, code, gsi, slp_node, cost_vec)) return false; @@ -12515,20 +12598,22 @@ vect_analyze_stmt (vec_info *vinfo, gcc_unreachable (); } - tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); - STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node); + if (! STMT_VINFO_DATA_REF (stmt_info)) + STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE; + else + STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node); if (STMT_VINFO_RELEVANT_P (stmt_info)) { gcall *call = dyn_cast <gcall *> (stmt_info->stmt); - gcc_assert (STMT_VINFO_VECTYPE (stmt_info) + gcc_assert (SLP_TREE_VECTYPE (node) || gimple_code (stmt_info->stmt) == GIMPLE_COND || (call && gimple_call_lhs (call) == NULL_TREE)); } ok = true; - if (!bb_vinfo - && (STMT_VINFO_RELEVANT_P (stmt_info) + if (bb_vinfo + || (STMT_VINFO_RELEVANT_P (stmt_info) || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)) /* Prefer vectorizable_call over vectorizable_simd_clone_call so -mveclibabi= takes preference over library functions with @@ -12536,60 +12621,31 @@ vect_analyze_stmt (vec_info *vinfo, ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec) || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, node, cost_vec) - || vectorizable_conversion (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_operation (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_assignment (vinfo, stmt_info, - NULL, node, cost_vec) + || vectorizable_conversion (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_operation (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_assignment (vinfo, stmt_info, NULL, node, cost_vec) || vectorizable_load (vinfo, stmt_info, NULL, node, cost_vec) || vectorizable_store (vinfo, stmt_info, NULL, node, cost_vec) - || vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo), - stmt_info, node, cost_vec) - || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info, - node, node_instance, cost_vec) - || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info, - node, cost_vec) || vectorizable_shift (vinfo, stmt_info, NULL, node, cost_vec) - || vectorizable_condition (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_comparison (vinfo, stmt_info, NULL, node, - cost_vec) - || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), - stmt_info, node) - || vectorizable_recurr (as_a <loop_vec_info> (vinfo), - stmt_info, node, cost_vec) - || vectorizable_early_exit (vinfo, stmt_info, NULL, node, - cost_vec)); - else - { - if (bb_vinfo) - ok = (vectorizable_call (vinfo, stmt_info, NULL, node, cost_vec) - || vectorizable_simd_clone_call (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_conversion (vinfo, stmt_info, NULL, node, - cost_vec) - || vectorizable_shift (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_operation (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_assignment (vinfo, stmt_info, NULL, node, - cost_vec) - || vectorizable_load (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_store (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_condition (vinfo, stmt_info, - NULL, node, cost_vec) - || vectorizable_comparison (vinfo, stmt_info, NULL, node, - cost_vec) - || vectorizable_phi (vinfo, stmt_info, node, cost_vec) - || vectorizable_early_exit (vinfo, stmt_info, NULL, node, - cost_vec)); - - } - - STMT_VINFO_VECTYPE (stmt_info) = saved_vectype; + || vectorizable_condition (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_comparison (vinfo, stmt_info, NULL, node, cost_vec) + || (bb_vinfo + && vectorizable_phi (bb_vinfo, stmt_info, node, cost_vec)) + || (is_a <loop_vec_info> (vinfo) + && (vectorizable_lane_reducing (as_a <loop_vec_info> (vinfo), + stmt_info, node, cost_vec) + || vectorizable_reduction (as_a <loop_vec_info> (vinfo), + stmt_info, + node, node_instance, cost_vec) + || vectorizable_induction (as_a <loop_vec_info> (vinfo), + stmt_info, node, cost_vec) + || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), + stmt_info, node) + || vectorizable_recurr (as_a <loop_vec_info> (vinfo), + stmt_info, node, cost_vec) + || vectorizable_early_exit (as_a <loop_vec_info> (vinfo), + stmt_info, NULL, node, + cost_vec)))); if (!ok) return opt_result::failure_at (stmt_info->stmt, @@ -12602,8 +12658,8 @@ vect_analyze_stmt (vec_info *vinfo, if (!bb_vinfo && SLP_TREE_TYPE (node) != reduc_vec_info_type && (SLP_TREE_TYPE (node) != lc_phi_info_type - || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def) - && (!node->ldst_lanes || SLP_TREE_CODE (node) == VEC_PERM_EXPR) + || SLP_TREE_DEF_TYPE (node) == vect_internal_def) + && (!node->ldst_lanes || SLP_TREE_PERMUTE_P (node)) && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo), node, node_instance, false, cost_vec)) @@ -12634,8 +12690,10 @@ vect_transform_stmt (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "------>vectorizing statement: %G", stmt_info->stmt); - tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info); - STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node); + if (! STMT_VINFO_DATA_REF (stmt_info)) + STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE; + else + STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node); switch (SLP_TREE_TYPE (slp_node)) { @@ -12734,12 +12792,14 @@ vect_transform_stmt (vec_info *vinfo, break; case phi_info_type: - done = vectorizable_phi (vinfo, stmt_info, slp_node, NULL); + done = vectorizable_phi (as_a <bb_vec_info> (vinfo), + stmt_info, slp_node, NULL); gcc_assert (done); break; case loop_exit_ctrl_vec_info_type: - done = vectorizable_early_exit (vinfo, stmt_info, gsi, slp_node, NULL); + done = vectorizable_early_exit (as_a <loop_vec_info> (vinfo), + stmt_info, gsi, slp_node, NULL); gcc_assert (done); break; @@ -12755,8 +12815,7 @@ vect_transform_stmt (vec_info *vinfo, } if (SLP_TREE_TYPE (slp_node) != store_vec_info_type - && (!slp_node->ldst_lanes - || SLP_TREE_CODE (slp_node) == VEC_PERM_EXPR)) + && (!slp_node->ldst_lanes || SLP_TREE_PERMUTE_P (slp_node))) { /* Handle stmts whose DEF is used outside the loop-nest that is being vectorized. */ @@ -12765,8 +12824,6 @@ vect_transform_stmt (vec_info *vinfo, gcc_assert (done); } - STMT_VINFO_VECTYPE (stmt_info) = saved_vectype; - return is_store; } @@ -13228,7 +13285,7 @@ vect_is_simple_use (vec_info *vinfo, slp_tree slp_node, } else { - gcc_assert (SLP_TREE_CODE (child) == VEC_PERM_EXPR); + gcc_assert (SLP_TREE_PERMUTE_P (child)); *op = error_mark_node; *dt = vect_internal_def; if (def_stmt_info_out) |