diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c | 27 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c | 15 | ||||
-rw-r--r-- | gcc/tree-vect-data-refs.cc | 61 | ||||
-rw-r--r-- | gcc/tree-vect-loop-manip.cc | 71 | ||||
-rw-r--r-- | gcc/tree-vect-loop.cc | 8 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.cc | 103 | ||||
-rw-r--r-- | gcc/tree-vectorizer.h | 9 |
11 files changed, 291 insertions, 97 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c new file mode 100644 index 0000000..feb7ee7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c @@ -0,0 +1,20 @@ +/* Peeling for alignment with masking in VLA modes. */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ + +#define START 3 +#define END 510 + +int __attribute__((noipa)) +foo (int *a) { + for (signed int i = START; i < END; ++i) { + if (a[i] != 0) + return i; + } + return -1; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */ +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c new file mode 100644 index 0000000..b4c267f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c @@ -0,0 +1,27 @@ +/* Peeling for alignment with masking in VLA modes. */ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */ + +#include "peel_ind_11.c" +#include <stdio.h> +#include <stdlib.h> + +#define N 512 + +int __attribute__ ((optimize (1))) +main (void) +{ + for (int k = 5; k < 30; k++) { + int *a = (int *) malloc (sizeof(int) * N); + + /* Set only one non-zero element for test. */ + for (int i = 5; i < 30; i++) + a[i] = (i == k ? 1 : 0); + + int res = foo (a); + asm volatile (""); + if (res != k) { + __builtin_abort (); + } + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c new file mode 100644 index 0000000..260482a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c @@ -0,0 +1,21 @@ +/* Peeling for alignment with masking together with versioning in VLA modes. */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ + +#define START 5 +#define END 509 + +int __attribute__((noipa)) +foo (int *restrict a, int * restrict b) { + for (signed int i = START; i < END; ++i) { + if (a[i] != b[i]) + return i; + } + return -1; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */ +/* { dg-final { scan-tree-dump "Both peeling and versioning will be applied" "vect" } } */ +/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */ +/* { dg-final { scan-assembler {\teor\t.*\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c new file mode 100644 index 0000000..ba978fe --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c @@ -0,0 +1,29 @@ +/* Peeling for alignment with masking together with versioning in VLA modes. */ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */ + +#include "peel_ind_12.c" +#include <stdio.h> +#include <stdlib.h> + +#define N 512 + +int __attribute__ ((optimize (1))) +main (void) { + for (int k = 5; k < 50; k++) { + int *a = (int *) malloc (sizeof(int) * N); + int *b = (int *) malloc (sizeof(int) * N); + + /* Set only one place of different values for test. */ + for (int i = 5; i < 50; i++) { + a[i] = (i == k ? 1 : 0); + b[i] = 0; + } + + int res = foo (a, b); + asm volatile (""); + if (res != k) { + __builtin_abort (); + } + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c new file mode 100644 index 0000000..730e33e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c @@ -0,0 +1,24 @@ +/* Known inbounds DR in VLA modes. */ +/* { dg-do compile } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */ + +#define N 512 +#define START 5 +#define END 509 + +int x[N] __attribute__((aligned(32))); + +int __attribute__((noipa)) +foo (void) +{ + for (signed int i = START; i < END; ++i) + { + if (x[i] == 0) + return i; + } + return -1; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "pfa_iv_offset" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c new file mode 100644 index 0000000..83352a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c @@ -0,0 +1,15 @@ +/* Known inbounds DR in VLA modes. */ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */ + +#include "peel_ind_13.c" + +int __attribute__ ((optimize (1))) +main (void) +{ + int res = foo (); + asm volatile (""); + if (res != START) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index a9d4aae..a3d3b3e 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -1448,17 +1448,20 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, if (loop_vinfo && dr_safe_speculative_read_required (stmt_info)) { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - auto vectype_size + /* The required target alignment must be a power-of-2 value and is + computed as the product of vector element size, VF and group size. + We compute the constant part first as VF may be a variable. For + variable VF, the power-of-2 check of VF is deferred to runtime. */ + auto align_factor_c = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); - poly_uint64 new_alignment = vf * vectype_size; - /* If we have a grouped access we require that the alignment be N * elem. */ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - new_alignment *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 new_alignment = vf * align_factor_c; - unsigned HOST_WIDE_INT target_alignment; - if (new_alignment.is_constant (&target_alignment) - && pow2p_hwi (target_alignment)) + if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ())) + || (!vf.is_constant () && pow2p_hwi (align_factor_c))) { if (dump_enabled_p ()) { @@ -1467,7 +1470,7 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, dump_dec (MSG_NOTE, new_alignment); dump_printf (MSG_NOTE, " bytes.\n"); } - vector_alignment = target_alignment; + vector_alignment = new_alignment; } } @@ -2438,6 +2441,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) - The cost of peeling (the extra runtime checks, the increase in code size). */ + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); FOR_EACH_VEC_ELT (datarefs, i, dr) { dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr); @@ -2446,9 +2450,18 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) stmt_vec_info stmt_info = dr_info->stmt; tree vectype = STMT_VINFO_VECTYPE (stmt_info); - do_peeling - = vector_alignment_reachable_p (dr_info, - LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + + /* With variable VF, unsafe speculative read can be avoided for known + inbounds DRs as long as partial vectors are used. */ + if (!vf.is_constant () + && dr_safe_speculative_read_required (stmt_info) + && DR_SCALAR_KNOWN_BOUNDS (dr_info)) + { + dr_set_safe_speculative_read_required (stmt_info, false); + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + } + + do_peeling = vector_alignment_reachable_p (dr_info, vf); if (do_peeling) { if (known_alignment_for_access_p (dr_info, vectype)) @@ -2488,7 +2501,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) poly_uint64 nscalars = npeel_tmp; if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned group_size = 1; if (STMT_SLP_TYPE (stmt_info) && STMT_VINFO_GROUPED_ACCESS (stmt_info)) @@ -2911,14 +2923,12 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and 3) all misaligned data refs with a known misalignment are supported, and - 4) the number of runtime alignment checks is within reason. - 5) the vectorization factor is a constant. */ + 4) the number of runtime alignment checks is within reason. */ do_versioning = (optimize_loop_nest_for_speed_p (loop) && !loop->inner /* FORNOW */ - && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP) - && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (); + && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP); if (do_versioning) { @@ -2965,25 +2975,22 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ?? We could actually unroll the loop to achieve the required overall step alignment, and forcing the alignment could be done by doing some iterations of the non-vectorized loop. */ - if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo) - * DR_STEP_ALIGNMENT (dr), + if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr), DR_TARGET_ALIGNMENT (dr_info))) { do_versioning = false; break; } - /* The rightmost bits of an aligned address must be zeros. - Construct the mask needed for this test. For example, - GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the - mask must be 15 = 0xf. */ - gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ()); - int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1; + /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address + bits for runtime alignment check. For example, for 16 bytes + target alignment the mask is 15 = 0xf. */ + poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1; /* FORNOW: use the same mask to test all potentially unaligned references in the loop. */ - if (LOOP_VINFO_PTR_MASK (loop_vinfo) - && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask) + if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U) + && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask)) { do_versioning = false; break; diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 6c1b26a..566308f 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -2454,10 +2454,7 @@ get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo) else { tree vla = build_int_cst (type, target_align); - tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla, - fold_build2 (MINUS_EXPR, type, - build_int_cst (type, 0), vla)); - target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align, + target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla, build_int_cst (type, 1)); } @@ -3840,7 +3837,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, const vec<stmt_vec_info> &may_misalign_stmts = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); stmt_vec_info stmt_info; - int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); + poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo); tree mask_cst; unsigned int i; tree int_ptrsize_type; @@ -3852,9 +3849,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, tree ptrsize_zero; tree part_cond_expr; - /* Check that mask is one less than a power of 2, i.e., mask is - all zeros followed by all ones. */ - gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); + gcc_assert (known_ne (mask, 0U)); int_ptrsize_type = signed_type_for (ptr_type_node); @@ -3962,6 +3957,62 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, chain_cond_expr (cond_expr, part_cond_expr); } +/* Function vect_create_cond_for_vla_spec_read. + + Create a conditional expression that represents the run-time checks with + max speculative read amount in VLA modes. We check two things: + 1) if the max speculative read amount exceeds the min page size + 2) if the VF is power-of-2 - done by checking the max read amount instead + + Input: + COND_EXPR - input conditional expression. New conditions will be chained + with logical AND operation. + LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max + possible speculative read amount in VLA modes. + + Output: + COND_EXPR - conditional expression. + + The returned COND_EXPR is the conditional expression to be used in the + if statement that controls which version of the loop gets executed at + runtime. */ + +static void +vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr) +{ + poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo); + tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly); + + /* Both the read amount and the VF must be variants, and the read amount must + be a constant power-of-2 multiple of the VF. */ + unsigned HOST_WIDE_INT multiple; + gcc_assert (!read_amount_poly.is_constant () + && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant () + && constant_multiple_p (read_amount_poly, + LOOP_VINFO_VECT_FACTOR (loop_vinfo), + &multiple) + && pow2p_hwi (multiple)); + + tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, 0U); + tree cst_ul_one = build_int_cstu (long_unsigned_type_node, 1U); + tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node, + (unsigned long) param_min_pagesize); + + /* Create an expression of "amount & (amount - 1) == 0". */ + tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node, + amount, cst_ul_one); + tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node, + amount, amount_m1); + tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, + amount_and_expr, cst_ul_zero); + chain_cond_expr (cond_expr, powof2_cond_expr); + + /* Create an expression of "amount <= cst_ul_pagesize". */ + tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node, + amount, cst_ul_pagesize); + chain_cond_expr (cond_expr, pagesize_cond_expr); +} + /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>, create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn). Set *COND_EXPR to a tree that is true when both the original *COND_EXPR @@ -4087,6 +4138,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo, gimple_seq gimplify_stmt_list = NULL; tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo); bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo); + bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo); bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo); poly_uint64 versioning_threshold @@ -4145,6 +4197,9 @@ vect_loop_versioning (loop_vec_info loop_vinfo, vect_create_cond_for_align_checks (loop_vinfo, &cond_expr, &cond_expr_stmt_list); + if (version_spec_read) + vect_create_cond_for_vla_spec_read (loop_vinfo, &cond_expr); + if (version_alias) { vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 85f3e90..55a8495 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1009,6 +1009,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) unaligned_dr (NULL), peeling_for_alignment (0), ptr_mask (0), + max_spec_read_amount (0), nonlinear_iv (false), ivexpr_map (NULL), scan_map (NULL), @@ -10141,7 +10142,12 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (peel_mul) { if (!step_mul) - step_mul = peel_mul; + { + gcc_assert (!nunits.is_constant ()); + step_mul = gimple_build (&init_stmts, + MINUS_EXPR, step_vectype, + build_zero_cst (step_vectype), peel_mul); + } else step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index a6f4db4..dbeb8bd 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2400,70 +2400,26 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 read_amount + = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + read_amount *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + auto target_alignment = DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info)); - unsigned HOST_WIDE_INT target_align; - - bool group_aligned = false; - if (target_alignment.is_constant (&target_align) - && nunits.is_constant ()) - { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - auto vectype_size - = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); - poly_uint64 required_alignment = vf * vectype_size; - /* If we have a grouped access we require that the alignment be N * elem. */ - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - required_alignment *= - DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); - if (!multiple_p (target_alignment, required_alignment)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "desired alignment %wu not met. Instead got %wu " - "for DR alignment at %G", - required_alignment.to_constant (), - target_align, STMT_VINFO_STMT (stmt_info)); - return false; - } - - if (!pow2p_hwi (target_align)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "non-power-of-two vector alignment %wd " - "for DR alignment at %G", - target_align, STMT_VINFO_STMT (stmt_info)); - return false; - } - - /* For VLA we have to insert a runtime check that the vector loads - per iterations don't exceed a page size. For now we can use - POLY_VALUE_MAX as a proxy as we can't peel for VLA. */ - if (known_gt (required_alignment, (unsigned)param_min_pagesize)) + if (!multiple_p (target_alignment, read_amount)) + { + if (dump_enabled_p ()) { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "alignment required for correctness ("); - dump_dec (MSG_MISSED_OPTIMIZATION, required_alignment); - dump_printf (MSG_NOTE, ") may exceed page size\n"); - } - return false; + dump_printf_loc (MSG_NOTE, vect_location, + "desired alignment not met, target was "); + dump_dec (MSG_NOTE, target_alignment); + dump_printf (MSG_NOTE, " previously, but read amount is "); + dump_dec (MSG_NOTE, read_amount); + dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info)); } - - group_aligned = true; - } - - /* There are multiple loads that have a misalignment that we couldn't - align. We would need LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P to - vectorize. */ - if (!group_aligned) - { - if (inbounds) - LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; - else - return false; + return false; } /* When using a group access the first element may be aligned but the @@ -2485,6 +2441,33 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, STMT_VINFO_STMT (stmt_info)); return false; } + + /* Reject vectorization if we know the read mount per vector iteration + exceeds the min page size. */ + if (known_gt (read_amount, (unsigned) param_min_pagesize)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "alignment required for correctness ("); + dump_dec (MSG_MISSED_OPTIMIZATION, read_amount); + dump_printf (MSG_NOTE, ") may exceed page size.\n"); + } + return false; + } + + if (!vf.is_constant ()) + { + /* For VLA modes, we need a runtime check to ensure any speculative + read amount does not exceed the page size. Here we record the max + possible read amount for the check. */ + if (maybe_gt (read_amount, + LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo))) + LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount; + + /* For VLA modes, we must use partial vectors. */ + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + } } if (*alignment_support_scheme == dr_unaligned_unsupported) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9653496..041cff8 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -919,7 +919,10 @@ public: int peeling_for_alignment; /* The mask used to check the alignment of pointers or arrays. */ - int ptr_mask; + poly_uint64 ptr_mask; + + /* The maximum speculative read amount in VLA modes for runtime check. */ + poly_uint64 max_spec_read_amount; /* Indicates whether the loop has any non-linear IV. */ bool nonlinear_iv; @@ -1155,6 +1158,7 @@ public: #define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type #define LOOP_VINFO_PARTIAL_VECTORS_STYLE(L) (L)->partial_vector_style #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask +#define LOOP_VINFO_MAX_SPEC_READ_AMOUNT(L) (L)->max_spec_read_amount #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs @@ -1209,6 +1213,8 @@ public: #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ ((L)->may_misalign_stmts.length () > 0) +#define LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ(L) \ + (maybe_gt ((L)->max_spec_read_amount, 0U)) #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ ((L)->comp_alias_ddrs.length () > 0 \ || (L)->check_unequal_addrs.length () > 0 \ @@ -1219,6 +1225,7 @@ public: (LOOP_VINFO_SIMD_IF_COND (L)) #define LOOP_REQUIRES_VERSIONING(L) \ (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ + || LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (L) \ || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) |