diff options
Diffstat (limited to 'gcc/tree-vect-data-refs.cc')
-rw-r--r-- | gcc/tree-vect-data-refs.cc | 122 |
1 files changed, 55 insertions, 67 deletions
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index dc82bf6..a3d3b3e 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -1448,17 +1448,20 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, if (loop_vinfo && dr_safe_speculative_read_required (stmt_info)) { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - auto vectype_size + /* The required target alignment must be a power-of-2 value and is + computed as the product of vector element size, VF and group size. + We compute the constant part first as VF may be a variable. For + variable VF, the power-of-2 check of VF is deferred to runtime. */ + auto align_factor_c = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); - poly_uint64 new_alignment = vf * vectype_size; - /* If we have a grouped access we require that the alignment be N * elem. */ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - new_alignment *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 new_alignment = vf * align_factor_c; - unsigned HOST_WIDE_INT target_alignment; - if (new_alignment.is_constant (&target_alignment) - && pow2p_hwi (target_alignment)) + if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ())) + || (!vf.is_constant () && pow2p_hwi (align_factor_c))) { if (dump_enabled_p ()) { @@ -1467,7 +1470,7 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, dump_dec (MSG_NOTE, new_alignment); dump_printf (MSG_NOTE, " bytes.\n"); } - vector_alignment = target_alignment; + vector_alignment = new_alignment; } } @@ -2438,6 +2441,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) - The cost of peeling (the extra runtime checks, the increase in code size). */ + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); FOR_EACH_VEC_ELT (datarefs, i, dr) { dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); @@ -2446,9 +2450,18 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) stmt_vec_info stmt_info = dr_info->stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); - do_peeling - = vector_alignment_reachable_p (dr_info, - LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + + /* With variable VF, unsafe speculative read can be avoided for known + inbounds DRs as long as partial vectors are used. */ + if (!vf.is_constant () + && dr_safe_speculative_read_required (stmt_info) + && DR_SCALAR_KNOWN_BOUNDS (dr_info)) + { + dr_set_safe_speculative_read_required (stmt_info, false); + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + } + + do_peeling = vector_alignment_reachable_p (dr_info, vf); if (do_peeling) { if (known_alignment_for_access_p (dr_info, vectype)) @@ -2488,7 +2501,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) poly_uint64 nscalars = npeel_tmp; if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned group_size = 1; if (STMT_SLP_TYPE (stmt_info) && STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -2911,14 +2923,12 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and 3) all misaligned data refs with a known misalignment are supported, and - 4) the number of runtime alignment checks is within reason. - 5) the vectorization factor is a constant. */ + 4) the number of runtime alignment checks is within reason. */ do_versioning = (optimize_loop_nest_for_speed_p (loop) && !loop->inner /* FORNOW */ - && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP) - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (); + && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP); if (do_versioning) { @@ -2965,25 +2975,22 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ?? We could actually unroll the loop to achieve the required overall step alignment, and forcing the alignment could be done by doing some iterations of the non-vectorized loop. */ - if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - * DR_STEP_ALIGNMENT (dr), + if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr), DR_TARGET_ALIGNMENT (dr_info))) { do_versioning = false; break; } - /* The rightmost bits of an aligned address must be zeros. - Construct the mask needed for this test. For example, - GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the - mask must be 15 = 0xf. */ - gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ()); - int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1; + /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address + bits for runtime alignment check. For example, for 16 bytes + target alignment the mask is 15 = 0xf. */ + poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1; /* FORNOW: use the same mask to test all potentially unaligned references in the loop. */ - if (LOOP_VINFO_PTR_MASK (loop_vinfo) - && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask) + if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U) + && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask)) { do_versioning = false; break; @@ -4423,8 +4430,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) MASKED_P is true if the load or store is conditional. MEMORY_TYPE is the type of the memory elements being loaded or stored. OFFSET_TYPE is the type of the offset that is being applied to the invariant - base address. SCALE is the amount by which the offset should - be multiplied *after* it has been converted to address width. + base address. If OFFSET_TYPE is scalar the function chooses an + appropriate vector type for it. SCALE is the amount by which the + offset should be multiplied *after* it has been converted to address width. Return true if the function is supported, storing the function id in *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. @@ -4467,9 +4475,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, for (;;) { - tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); - if (!offset_vectype) - return false; + tree offset_vectype; + if (VECTOR_TYPE_P (offset_type)) + offset_vectype = offset_type; + else + { + offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; + } /* Test whether the target supports this combination. */ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, @@ -4500,10 +4514,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, return true; } + /* For fixed offset vector type we're done. */ + if (VECTOR_TYPE_P (offset_type)) + return false; + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE && TYPE_PRECISION (offset_type) >= element_bits) return false; + /* Try a larger offset vector type. */ offset_type = build_nonstandard_integer_type (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); } @@ -4512,7 +4531,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, /* STMT_INFO is a call to an internal gather load or scatter store function. Describe the operation in INFO. */ -static void +void vect_describe_gather_scatter_call (stmt_vec_info stmt_info, gather_scatter_info *info) { @@ -6524,7 +6543,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info, tree vectype, int misalignment, gather_scatter_info *gs_info) { - data_reference *dr = dr_info ? dr_info->dr : nullptr; + data_reference *dr = dr_info->dr; stmt_vec_info stmt_info = dr_info->stmt; machine_mode mode = TYPE_MODE (vectype); loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); @@ -6605,7 +6624,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info, } } */ - if (dr && DR_IS_READ (dr)) + if (DR_IS_READ (dr)) { if (can_implement_p (vec_realign_load_optab, mode) && (!targetm.vectorize.builtin_mask_for_load @@ -6635,38 +6654,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info, tree type = TREE_TYPE (DR_REF (dr)); bool is_gather_scatter = gs_info != nullptr; if (misalignment == DR_MISALIGNMENT_UNKNOWN) - { - if (!is_gather_scatter || dr != nullptr) - is_packed = not_size_aligned (DR_REF (dr)); - else - { - /* Gather-scatter accesses normally perform only component accesses - so alignment is irrelevant for them. Targets like riscv do care - about scalar alignment in vector accesses, though, so check scalar - alignment here. We determined the alias pointer as well as the - base alignment during pattern recognition and can re-use it here. - - As we do not have an analyzed dataref we only know the alignment - of the reference itself and nothing about init, steps, etc. - For now don't try harder to determine misalignment and - just assume it is unknown. We consider the type packed if its - scalar alignment is lower than the natural alignment of a vector - element's type. */ - - gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info)); - gcc_assert (dr == nullptr); - - tree inner_vectype = TREE_TYPE (vectype); - - unsigned HOST_WIDE_INT scalar_align - = tree_to_uhwi (gs_info->alias_ptr); - unsigned HOST_WIDE_INT inner_vectype_sz - = tree_to_uhwi (TYPE_SIZE (inner_vectype)); - - bool is_misaligned = scalar_align < inner_vectype_sz; - is_packed = scalar_align > 1 && is_misaligned; - } - } + is_packed = not_size_aligned (DR_REF (dr)); if (targetm.vectorize.support_vector_misalignment (mode, type, misalignment, is_packed, is_gather_scatter)) |