aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-stmts.c
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2021-09-13 10:37:49 -0700
committerIan Lance Taylor <iant@golang.org>2021-09-13 10:37:49 -0700
commite252b51ccde010cbd2a146485d8045103cd99533 (patch)
treee060f101cdc32bf5e520de8e5275db9d4236b74c /gcc/tree-vect-stmts.c
parentf10c7c4596dda99d2ee872c995ae4aeda65adbdf (diff)
parent104c05c5284b7822d770ee51a7d91946c7e56d50 (diff)
downloadgcc-e252b51ccde010cbd2a146485d8045103cd99533.zip
gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.gz
gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.bz2
Merge from trunk revision 104c05c5284b7822d770ee51a7d91946c7e56d50.
Diffstat (limited to 'gcc/tree-vect-stmts.c')
-rw-r--r--gcc/tree-vect-stmts.c405
1 files changed, 264 insertions, 141 deletions
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index d791d3a..4e0b2ad 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -836,20 +836,24 @@ vect_model_simple_cost (vec_info *,
one if two-step promotion/demotion is required, and so on. NCOPIES
is the number of vector results (and thus number of instructions)
for the narrowest end of the operation chain. Each additional
- step doubles the number of instructions required. */
+ step doubles the number of instructions required. If WIDEN_ARITH
+ is true the stmt is doing widening arithmetic. */
static void
vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
enum vect_def_type *dt,
unsigned int ncopies, int pwr,
- stmt_vector_for_cost *cost_vec)
+ stmt_vector_for_cost *cost_vec,
+ bool widen_arith)
{
int i;
int inside_cost = 0, prologue_cost = 0;
for (i = 0; i < pwr + 1; i++)
{
- inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
+ inside_cost += record_stmt_cost (cost_vec, ncopies,
+ widen_arith
+ ? vector_stmt : vec_promote_demote,
stmt_info, 0, vect_body);
ncopies *= 2;
}
@@ -1080,6 +1084,7 @@ static void
vect_model_load_cost (vec_info *vinfo,
stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
vect_memory_access_type memory_access_type,
+ gather_scatter_info *gs_info,
slp_tree slp_node,
stmt_vector_for_cost *cost_vec)
{
@@ -1168,9 +1173,17 @@ vect_model_load_cost (vec_info *vinfo,
if (memory_access_type == VMAT_ELEMENTWISE
|| memory_access_type == VMAT_GATHER_SCATTER)
{
- /* N scalar loads plus gathering them into a vector. */
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info->ifn == IFN_LAST && !gs_info->decl)
+ /* For emulated gathers N offset vector element extracts
+ (we assume the scalar scaling and ptr + offset add is consumed by
+ the load). */
+ inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
+ vec_to_scalar, stmt_info, 0,
+ vect_body);
+ /* N scalar loads plus gathering them into a vector. */
inside_cost += record_stmt_cost (cost_vec,
ncopies * assumed_nunits,
scalar_load, stmt_info, 0, vect_body);
@@ -1180,7 +1193,9 @@ vect_model_load_cost (vec_info *vinfo,
&inside_cost, &prologue_cost,
cost_vec, cost_vec, true);
if (memory_access_type == VMAT_ELEMENTWISE
- || memory_access_type == VMAT_STRIDED_SLP)
+ || memory_access_type == VMAT_STRIDED_SLP
+ || (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info->ifn == IFN_LAST && !gs_info->decl))
inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
stmt_info, 0, vect_body);
@@ -1862,7 +1877,8 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
tree memory_type = TREE_TYPE (DR_REF (dr));
if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
vectype, memory_type, offset_type, scale,
- &gs_info->ifn, &gs_info->offset_vectype))
+ &gs_info->ifn, &gs_info->offset_vectype)
+ || gs_info->ifn == IFN_LAST)
continue;
gs_info->decl = NULL_TREE;
@@ -1897,7 +1913,7 @@ vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
gather_scatter_info *gs_info)
{
if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
- || gs_info->decl)
+ || gs_info->ifn == IFN_LAST)
return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
masked_p, gs_info);
@@ -2351,6 +2367,29 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
vls_type == VLS_LOAD ? "gather" : "scatter");
return false;
}
+ else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
+ {
+ if (vls_type != VLS_LOAD)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported emulated scatter.\n");
+ return false;
+ }
+ else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
+ || !TYPE_VECTOR_SUBPARTS
+ (gs_info->offset_vectype).is_constant ()
+ || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
+ (gs_info->offset_vectype),
+ TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported vector types for emulated "
+ "gather.\n");
+ return false;
+ }
+ }
/* Gather-scatter accesses perform only component accesses, alignment
is irrelevant for them. */
*alignment_support_scheme = dr_unaligned_supported;
@@ -2439,39 +2478,47 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
return true;
}
-/* Return true if boolean argument MASK is suitable for vectorizing
- conditional operation STMT_INFO. When returning true, store the type
- of the definition in *MASK_DT_OUT and the type of the vectorized mask
- in *MASK_VECTYPE_OUT. */
+/* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
+ conditional operation STMT_INFO. When returning true, store the mask
+ in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
+ vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
+ to the mask in *MASK_NODE if MASK_NODE is not NULL. */
static bool
-vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
- vect_def_type *mask_dt_out,
- tree *mask_vectype_out)
+vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
+ slp_tree slp_node, unsigned mask_index,
+ tree *mask, slp_tree *mask_node,
+ vect_def_type *mask_dt_out, tree *mask_vectype_out)
{
- if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
+ enum vect_def_type mask_dt;
+ tree mask_vectype;
+ slp_tree mask_node_1;
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
+ mask, &mask_node_1, &mask_dt, &mask_vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "mask argument is not a boolean.\n");
+ "mask use not simple.\n");
return false;
}
- if (TREE_CODE (mask) != SSA_NAME)
+ if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "mask argument is not an SSA name.\n");
+ "mask argument is not a boolean.\n");
return false;
}
- enum vect_def_type mask_dt;
- tree mask_vectype;
- if (!vect_is_simple_use (mask, vinfo, &mask_dt, &mask_vectype))
+ /* If the caller is not prepared for adjusting an external/constant
+ SLP mask vector type fail. */
+ if (slp_node
+ && !mask_node
+ && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "mask use not simple.\n");
+ "SLP mask argument is not vectorized.\n");
return false;
}
@@ -2501,6 +2548,8 @@ vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
*mask_dt_out = mask_dt;
*mask_vectype_out = mask_vectype;
+ if (mask_node)
+ *mask_node = mask_node_1;
return true;
}
@@ -2525,10 +2574,18 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
return false;
}
+ unsigned op_no = 0;
+ if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+ {
+ if (gimple_call_internal_p (call)
+ && internal_store_fn_p (gimple_call_internal_fn (call)))
+ op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
+ }
+
enum vect_def_type rhs_dt;
tree rhs_vectype;
slp_tree slp_op;
- if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+ if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
&rhs, &slp_op, &rhs_dt, &rhs_vectype))
{
if (dump_enabled_p ())
@@ -2865,11 +2922,10 @@ vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
containing loop. */
static void
-vect_get_gather_scatter_ops (vec_info *vinfo,
+vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
class loop *loop, stmt_vec_info stmt_info,
gather_scatter_info *gs_info,
- tree *dataref_ptr, vec<tree> *vec_offset,
- unsigned ncopies)
+ tree *dataref_ptr, vec<tree> *vec_offset)
{
gimple_seq stmts = NULL;
*dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
@@ -2880,8 +2936,10 @@ vect_get_gather_scatter_ops (vec_info *vinfo,
new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
gcc_assert (!new_bb);
}
- vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, gs_info->offset,
- vec_offset, gs_info->offset_vectype);
+ unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
+ vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
+ gs_info->offset, vec_offset,
+ gs_info->offset_vectype);
}
/* Prepare to implement a grouped or strided load or store using
@@ -3163,9 +3221,8 @@ vectorizable_call (vec_info *vinfo,
{
if ((int) i == mask_opno)
{
- op = gimple_call_arg (stmt, i);
- if (!vect_check_scalar_mask (vinfo,
- stmt_info, op, &dt[i], &vectypes[i]))
+ if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
+ &op, &slp_op[i], &dt[i], &vectypes[i]))
return false;
continue;
}
@@ -4437,7 +4494,7 @@ static void
vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
int multi_step_cvt,
stmt_vec_info stmt_info,
- vec<tree> vec_dsts,
+ vec<tree> &vec_dsts,
gimple_stmt_iterator *gsi,
slp_tree slp_node, enum tree_code code)
{
@@ -4665,6 +4722,10 @@ vectorizable_conversion (vec_info *vinfo,
&& code != WIDEN_LSHIFT_EXPR)
return false;
+ bool widen_arith = (code == WIDEN_PLUS_EXPR
+ || code == WIDEN_MINUS_EXPR
+ || code == WIDEN_MULT_EXPR
+ || code == WIDEN_LSHIFT_EXPR);
op_type = TREE_CODE_LENGTH (code);
/* Check types of lhs and rhs. */
@@ -4754,10 +4815,7 @@ vectorizable_conversion (vec_info *vinfo,
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (known_eq (nunits_out, nunits_in))
- if (code == WIDEN_MINUS_EXPR
- || code == WIDEN_PLUS_EXPR
- || code == WIDEN_LSHIFT_EXPR
- || code == WIDEN_MULT_EXPR)
+ if (widen_arith)
modifier = WIDEN;
else
modifier = NONE;
@@ -4934,7 +4992,8 @@ vectorizable_conversion (vec_info *vinfo,
unsigned int nvectors
= (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
- multi_step_cvt, cost_vec);
+ multi_step_cvt, cost_vec,
+ widen_arith);
}
else
{
@@ -4947,7 +5006,8 @@ vectorizable_conversion (vec_info *vinfo,
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
: ncopies * 2);
vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
- multi_step_cvt, cost_vec);
+ multi_step_cvt, cost_vec,
+ widen_arith);
}
interm_types.release ();
return true;
@@ -5617,26 +5677,11 @@ vectorizable_shift (vec_info *vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"op not supported by target.\n");
- /* Check only during analysis. */
- if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
- || (!vec_stmt
- && !vect_worthwhile_without_simd_p (vinfo, code)))
- return false;
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "proceeding using word mode.\n");
- }
-
- /* Worthwhile without SIMD support? Check only during analysis. */
- if (!vec_stmt
- && !VECTOR_MODE_P (TYPE_MODE (vectype))
- && !vect_worthwhile_without_simd_p (vinfo, code))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.\n");
return false;
}
+ /* vector lowering cannot optimize vector shifts using word arithmetic. */
+ if (vect_emulated_vector_p (vectype))
+ return false;
if (!vec_stmt) /* transformation not required. */
{
@@ -6022,6 +6067,7 @@ vectorizable_operation (vec_info *vinfo,
!= CODE_FOR_nothing);
}
+ bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
if (!target_support_p)
{
if (dump_enabled_p ())
@@ -6029,21 +6075,19 @@ vectorizable_operation (vec_info *vinfo,
"op not supported by target.\n");
/* Check only during analysis. */
if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
- || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
+ || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
return false;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"proceeding using word mode.\n");
+ using_emulated_vectors_p = true;
}
- /* Worthwhile without SIMD support? Check only during analysis. */
- if (!VECTOR_MODE_P (vec_mode)
- && !vec_stmt
- && !vect_worthwhile_without_simd_p (vinfo, code))
+ if (using_emulated_vectors_p
+ && !vect_can_vectorize_without_simd_p (code))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not worthwhile without SIMD support.\n");
+ dump_printf (MSG_NOTE, "using word mode not possible.\n");
return false;
}
@@ -6091,6 +6135,29 @@ vectorizable_operation (vec_info *vinfo,
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, stmt_info,
ncopies, dt, ndts, slp_node, cost_vec);
+ if (using_emulated_vectors_p)
+ {
+ /* The above vect_model_simple_cost call handles constants
+ in the prologue and (mis-)costs one of the stmts as
+ vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
+ for the actual lowering that will be applied. */
+ unsigned n
+ = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
+ switch (code)
+ {
+ case PLUS_EXPR:
+ n *= 5;
+ break;
+ case MINUS_EXPR:
+ n *= 6;
+ break;
+ case NEGATE_EXPR:
+ n *= 4;
+ break;
+ default:;
+ }
+ record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
+ }
return true;
}
@@ -7213,13 +7280,10 @@ vectorizable_store (vec_info *vinfo,
}
int mask_index = internal_fn_mask_index (ifn);
- if (mask_index >= 0)
- {
- mask = gimple_call_arg (call, mask_index);
- if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
- &mask_vectype))
- return false;
- }
+ if (mask_index >= 0
+ && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
+ &mask, NULL, &mask_dt, &mask_vectype))
+ return false;
}
op = vect_get_store_rhs (stmt_info);
@@ -8003,8 +8067,9 @@ vectorizable_store (vec_info *vinfo,
}
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
- vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
- &dataref_ptr, &vec_offsets, ncopies);
+ vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+ &gs_info, &dataref_ptr,
+ &vec_offsets);
vec_offset = vec_offsets[0];
}
else
@@ -8166,6 +8231,7 @@ vectorizable_store (vec_info *vinfo,
&& TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
misalign);
+ align = least_bit_hwi (misalign | align);
if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
{
@@ -8187,7 +8253,6 @@ vectorizable_store (vec_info *vinfo,
/* Arguments are ready. Create the new vector stmt. */
if (final_mask)
{
- align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
gcall *call
= gimple_build_call_internal (IFN_MASK_STORE, 4,
@@ -8202,7 +8267,6 @@ vectorizable_store (vec_info *vinfo,
tree final_len
= vect_get_loop_len (loop_vinfo, loop_lens,
vec_num * ncopies, vec_num * j + i);
- align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
machine_mode vmode = TYPE_MODE (vectype);
opt_machine_mode new_ovmode
@@ -8241,14 +8305,10 @@ vectorizable_store (vec_info *vinfo,
: build_int_cst (ref_type, 0));
if (aligned_access_p (first_dr_info))
;
- else if (DR_MISALIGNMENT (first_dr_info) == -1)
- TREE_TYPE (data_ref)
- = build_aligned_type (TREE_TYPE (data_ref),
- align * BITS_PER_UNIT);
else
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
- TYPE_ALIGN (elem_type));
+ align * BITS_PER_UNIT);
vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
@@ -8499,13 +8559,13 @@ vectorizable_load (vec_info *vinfo,
return false;
int mask_index = internal_fn_mask_index (ifn);
- if (mask_index >= 0)
- {
- mask = gimple_call_arg (call, mask_index);
- if (!vect_check_scalar_mask (vinfo, stmt_info, mask, &mask_dt,
- &mask_vectype))
- return false;
- }
+ if (mask_index >= 0
+ && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
+ /* ??? For SLP we only have operands for
+ the mask operand. */
+ slp_node ? 0 : mask_index,
+ &mask, NULL, &mask_dt, &mask_vectype))
+ return false;
}
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
@@ -8668,6 +8728,15 @@ vectorizable_load (vec_info *vinfo,
"unsupported access type for masked load.\n");
return false;
}
+ else if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info.ifn == IFN_LAST
+ && !gs_info.decl)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported masked emulated gather.\n");
+ return false;
+ }
}
if (!vec_stmt) /* transformation not required. */
@@ -8701,7 +8770,7 @@ vectorizable_load (vec_info *vinfo,
STMT_VINFO_TYPE (orig_stmt_info) = load_vec_info_type;
vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
- slp_node, cost_vec);
+ &gs_info, slp_node, cost_vec);
return true;
}
@@ -9303,9 +9372,9 @@ vectorizable_load (vec_info *vinfo,
}
else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
{
- vect_get_gather_scatter_ops (vinfo, loop, stmt_info, &gs_info,
- &dataref_ptr, &vec_offsets, ncopies);
- vec_offset = vec_offsets[0];
+ vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
+ &gs_info, &dataref_ptr,
+ &vec_offsets);
}
else
dataref_ptr
@@ -9322,9 +9391,7 @@ vectorizable_load (vec_info *vinfo,
if (dataref_offset)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
bump);
- else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
- vec_offset = vec_offsets[j];
- else
+ else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
if (mask)
@@ -9414,8 +9481,11 @@ vectorizable_load (vec_info *vinfo,
unsigned int misalign;
unsigned HOST_WIDE_INT align;
- if (memory_access_type == VMAT_GATHER_SCATTER)
+ if (memory_access_type == VMAT_GATHER_SCATTER
+ && gs_info.ifn != IFN_LAST)
{
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ vec_offset = vec_offsets[j];
tree zero = build_zero_cst (vectype);
tree scale = size_int (gs_info.scale);
gcall *call;
@@ -9432,6 +9502,61 @@ vectorizable_load (vec_info *vinfo,
data_ref = NULL_TREE;
break;
}
+ else if (memory_access_type == VMAT_GATHER_SCATTER)
+ {
+ /* Emulated gather-scatter. */
+ gcc_assert (!final_mask);
+ unsigned HOST_WIDE_INT const_nunits
+ = nunits.to_constant ();
+ unsigned HOST_WIDE_INT const_offset_nunits
+ = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
+ .to_constant ();
+ vec<constructor_elt, va_gc> *ctor_elts;
+ vec_alloc (ctor_elts, const_nunits);
+ gimple_seq stmts = NULL;
+ /* We support offset vectors with more elements
+ than the data vector for now. */
+ unsigned HOST_WIDE_INT factor
+ = const_offset_nunits / const_nunits;
+ vec_offset = vec_offsets[j / factor];
+ unsigned elt_offset = (j % factor) * const_nunits;
+ tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
+ tree scale = size_int (gs_info.scale);
+ align
+ = get_object_alignment (DR_REF (first_dr_info->dr));
+ tree ltype = build_aligned_type (TREE_TYPE (vectype),
+ align);
+ for (unsigned k = 0; k < const_nunits; ++k)
+ {
+ tree boff = size_binop (MULT_EXPR,
+ TYPE_SIZE (idx_type),
+ bitsize_int
+ (k + elt_offset));
+ tree idx = gimple_build (&stmts, BIT_FIELD_REF,
+ idx_type, vec_offset,
+ TYPE_SIZE (idx_type),
+ boff);
+ idx = gimple_convert (&stmts, sizetype, idx);
+ idx = gimple_build (&stmts, MULT_EXPR,
+ sizetype, idx, scale);
+ tree ptr = gimple_build (&stmts, PLUS_EXPR,
+ TREE_TYPE (dataref_ptr),
+ dataref_ptr, idx);
+ ptr = gimple_convert (&stmts, ptr_type_node, ptr);
+ tree elt = make_ssa_name (TREE_TYPE (vectype));
+ tree ref = build2 (MEM_REF, ltype, ptr,
+ build_int_cst (ref_type, 0));
+ new_stmt = gimple_build_assign (elt, ref);
+ gimple_seq_add_stmt (&stmts, new_stmt);
+ CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
+ }
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ new_stmt = gimple_build_assign (NULL_TREE,
+ build_constructor
+ (vectype, ctor_elts));
+ data_ref = NULL_TREE;
+ break;
+ }
align =
known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
@@ -9452,10 +9577,10 @@ vectorizable_load (vec_info *vinfo,
&& TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr),
align, misalign);
+ align = least_bit_hwi (misalign | align);
if (final_mask)
{
- align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type,
align * BITS_PER_UNIT);
gcall *call
@@ -9472,7 +9597,6 @@ vectorizable_load (vec_info *vinfo,
= vect_get_loop_len (loop_vinfo, loop_lens,
vec_num * ncopies,
vec_num * j + i);
- align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type,
align * BITS_PER_UNIT);
gcall *call
@@ -9548,14 +9672,10 @@ vectorizable_load (vec_info *vinfo,
= fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
if (alignment_support_scheme == dr_aligned)
;
- else if (DR_MISALIGNMENT (first_dr_info) == -1)
- TREE_TYPE (data_ref)
- = build_aligned_type (TREE_TYPE (data_ref),
- align * BITS_PER_UNIT);
else
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
- TYPE_ALIGN (elem_type));
+ align * BITS_PER_UNIT);
if (ltype != vectype)
{
vect_copy_ref_info (data_ref,
@@ -9747,6 +9867,9 @@ vectorizable_load (vec_info *vinfo,
poly_wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
* group_gap_adj);
+ if (tree_int_cst_sgn
+ (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+ bump_val = -bump_val;
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
gsi, stmt_info, bump);
@@ -9760,6 +9883,9 @@ vectorizable_load (vec_info *vinfo,
poly_wide_int bump_val
= (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
* group_gap_adj);
+ if (tree_int_cst_sgn
+ (vect_dr_behavior (vinfo, dr_info)->step) == -1)
+ bump_val = -bump_val;
tree bump = wide_int_to_tree (sizetype, bump_val);
dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
stmt_info, bump);
@@ -9772,8 +9898,13 @@ vectorizable_load (vec_info *vinfo,
if (slp_perm)
{
unsigned n_perms;
+ /* For SLP we know we've seen all possible uses of dr_chain so
+ direct vect_transform_slp_perm_load to DCE the unused parts.
+ ??? This is a hack to prevent compile-time issues as seen
+ in PR101120 and friends. */
bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
- gsi, vf, false, &n_perms);
+ gsi, vf, false, &n_perms,
+ nullptr, true);
gcc_assert (ok);
}
else
@@ -10779,8 +10910,6 @@ vect_analyze_stmt (vec_info *vinfo,
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
- tree type = gimple_expr_type (stmt_info->stmt);
- gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
|| (call && gimple_call_lhs (call) == NULL_TREE));
@@ -11336,17 +11465,7 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
{
stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
def_stmt = stmt_vinfo->stmt;
- switch (gimple_code (def_stmt))
- {
- case GIMPLE_PHI:
- case GIMPLE_ASSIGN:
- case GIMPLE_CALL:
- *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
- break;
- default:
- *dt = vect_unknown_def_type;
- break;
- }
+ *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
if (def_stmt_info_out)
*def_stmt_info_out = stmt_vinfo;
}
@@ -11499,13 +11618,7 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
*op = gimple_op (ass, operand + 1);
}
else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
- {
- if (gimple_call_internal_p (call)
- && internal_store_fn_p (gimple_call_internal_fn (call)))
- operand = internal_fn_stored_value_index (gimple_call_internal_fn
- (call));
- *op = gimple_call_arg (call, operand);
- }
+ *op = gimple_call_arg (call, operand);
else
gcc_unreachable ();
return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
@@ -11977,22 +12090,29 @@ supportable_narrowing_operation (enum tree_code code,
return false;
}
-/* Generate and return a statement that sets vector mask MASK such that
- MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
+/* Generate and return a vector mask of MASK_TYPE such that
+ mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
+ Add the statements to SEQ. */
-gcall *
-vect_gen_while (tree mask, tree start_index, tree end_index)
+tree
+vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
+ tree end_index, const char *name)
{
tree cmp_type = TREE_TYPE (start_index);
- tree mask_type = TREE_TYPE (mask);
gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
cmp_type, mask_type,
OPTIMIZE_FOR_SPEED));
gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
start_index, end_index,
build_zero_cst (mask_type));
- gimple_call_set_lhs (call, mask);
- return call;
+ tree tmp;
+ if (name)
+ tmp = make_temp_ssa_name (mask_type, NULL, name);
+ else
+ tmp = make_ssa_name (mask_type);
+ gimple_call_set_lhs (call, tmp);
+ gimple_seq_add_stmt (seq, call);
+ return tmp;
}
/* Generate a vector mask of type MASK_TYPE for which index I is false iff
@@ -12002,9 +12122,7 @@ tree
vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
tree end_index)
{
- tree tmp = make_ssa_name (mask_type);
- gcall *call = vect_gen_while (tmp, start_index, end_index);
- gimple_seq_add_stmt (seq, call);
+ tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
}
@@ -12064,11 +12182,6 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
"not vectorized: irregular stmt.%G", stmt);
}
- if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
- return opt_result::failure_at (stmt,
- "not vectorized: vector stmt in loop:%G",
- stmt);
-
tree vectype;
tree scalar_type = NULL_TREE;
if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
@@ -12118,6 +12231,12 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
}
+
+ if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
+ return opt_result::failure_at (stmt,
+ "not vectorized: vector stmt in loop:%G",
+ stmt);
+
*stmt_vectype_out = vectype;
/* Don't try to compute scalar types if the stmt produces a boolean
@@ -12128,8 +12247,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
/* The number of units is set according to the smallest scalar
type (or the largest vector size, but we only support one
vector size per vectorization). */
- HOST_WIDE_INT dummy;
- scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
+ scalar_type = vect_get_smallest_scalar_type (stmt_info,
+ TREE_TYPE (vectype));
if (scalar_type != TREE_TYPE (vectype))
{
if (dump_enabled_p ())
@@ -12148,8 +12267,12 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
}
}
- gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
- TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
+ if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
+ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
+ return opt_result::failure_at (stmt,
+ "Not vectorized: Incompatible number "
+ "of vector subparts between %T and %T\n",
+ nunits_vectype, *stmt_vectype_out);
if (dump_enabled_p ())
{