aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c27
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c21
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c15
-rw-r--r--gcc/tree-vect-data-refs.cc61
-rw-r--r--gcc/tree-vect-loop-manip.cc71
-rw-r--r--gcc/tree-vect-loop.cc8
-rw-r--r--gcc/tree-vect-stmts.cc103
-rw-r--r--gcc/tree-vectorizer.h9
11 files changed, 291 insertions, 97 deletions
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
new file mode 100644
index 0000000..feb7ee7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11.c
@@ -0,0 +1,20 @@
+/* Peeling for alignment with masking in VLA modes. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+
+#define START 3
+#define END 510
+
+int __attribute__((noipa))
+foo (int *a) {
+ for (signed int i = START; i < END; ++i) {
+ if (a[i] != 0)
+ return i;
+ }
+ return -1;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
+/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */
+/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c
new file mode 100644
index 0000000..b4c267f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_11_run.c
@@ -0,0 +1,27 @@
+/* Peeling for alignment with masking in VLA modes. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */
+
+#include "peel_ind_11.c"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 512
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ for (int k = 5; k < 30; k++) {
+ int *a = (int *) malloc (sizeof(int) * N);
+
+ /* Set only one non-zero element for test. */
+ for (int i = 5; i < 30; i++)
+ a[i] = (i == k ? 1 : 0);
+
+ int res = foo (a);
+ asm volatile ("");
+ if (res != k) {
+ __builtin_abort ();
+ }
+ }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
new file mode 100644
index 0000000..260482a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12.c
@@ -0,0 +1,21 @@
+/* Peeling for alignment with masking together with versioning in VLA modes. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+
+#define START 5
+#define END 509
+
+int __attribute__((noipa))
+foo (int *restrict a, int * restrict b) {
+ for (signed int i = START; i < END; ++i) {
+ if (a[i] != b[i])
+ return i;
+ }
+ return -1;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "pfa_iv_offset" "vect" } } */
+/* { dg-final { scan-tree-dump "Both peeling and versioning will be applied" "vect" } } */
+/* { dg-final { scan-assembler {\tnot\tp[0-7]\.b, p[0-7]/z, p.*\n} } } */
+/* { dg-final { scan-assembler {\teor\t.*\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c
new file mode 100644
index 0000000..ba978fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_12_run.c
@@ -0,0 +1,29 @@
+/* Peeling for alignment with masking together with versioning in VLA modes. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */
+
+#include "peel_ind_12.c"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 512
+
+int __attribute__ ((optimize (1)))
+main (void) {
+ for (int k = 5; k < 50; k++) {
+ int *a = (int *) malloc (sizeof(int) * N);
+ int *b = (int *) malloc (sizeof(int) * N);
+
+ /* Set only one place of different values for test. */
+ for (int i = 5; i < 50; i++) {
+ a[i] = (i == k ? 1 : 0);
+ b[i] = 0;
+ }
+
+ int res = foo (a, b);
+ asm volatile ("");
+ if (res != k) {
+ __builtin_abort ();
+ }
+ }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c
new file mode 100644
index 0000000..730e33e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13.c
@@ -0,0 +1,24 @@
+/* Known inbounds DR in VLA modes. */
+/* { dg-do compile } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only -fdump-tree-vect-details" } */
+
+#define N 512
+#define START 5
+#define END 509
+
+int x[N] __attribute__((aligned(32)));
+
+int __attribute__((noipa))
+foo (void)
+{
+ for (signed int i = START; i < END; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ }
+ return -1;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "pfa_iv_offset" "vect" } } */
+/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c
new file mode 100644
index 0000000..83352a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_13_run.c
@@ -0,0 +1,15 @@
+/* Known inbounds DR in VLA modes. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast -msve-vector-bits=scalable --param aarch64-autovec-preference=sve-only" } */
+
+#include "peel_ind_13.c"
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+ int res = foo ();
+ asm volatile ("");
+ if (res != START)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index a9d4aae..a3d3b3e 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -1448,17 +1448,20 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
if (loop_vinfo
&& dr_safe_speculative_read_required (stmt_info))
{
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- auto vectype_size
+ /* The required target alignment must be a power-of-2 value and is
+ computed as the product of vector element size, VF and group size.
+ We compute the constant part first as VF may be a variable. For
+ variable VF, the power-of-2 check of VF is deferred to runtime. */
+ auto align_factor_c
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
- poly_uint64 new_alignment = vf * vectype_size;
- /* If we have a grouped access we require that the alignment be N * elem. */
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- new_alignment *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+ align_factor_c *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 new_alignment = vf * align_factor_c;
- unsigned HOST_WIDE_INT target_alignment;
- if (new_alignment.is_constant (&target_alignment)
- && pow2p_hwi (target_alignment))
+ if ((vf.is_constant () && pow2p_hwi (new_alignment.to_constant ()))
+ || (!vf.is_constant () && pow2p_hwi (align_factor_c)))
{
if (dump_enabled_p ())
{
@@ -1467,7 +1470,7 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info,
dump_dec (MSG_NOTE, new_alignment);
dump_printf (MSG_NOTE, " bytes.\n");
}
- vector_alignment = target_alignment;
+ vector_alignment = new_alignment;
}
}
@@ -2438,6 +2441,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
- The cost of peeling (the extra runtime checks, the increase
in code size). */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
FOR_EACH_VEC_ELT (datarefs, i, dr)
{
dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
@@ -2446,9 +2450,18 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
stmt_vec_info stmt_info = dr_info->stmt;
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
- do_peeling
- = vector_alignment_reachable_p (dr_info,
- LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+
+ /* With variable VF, unsafe speculative read can be avoided for known
+ inbounds DRs as long as partial vectors are used. */
+ if (!vf.is_constant ()
+ && dr_safe_speculative_read_required (stmt_info)
+ && DR_SCALAR_KNOWN_BOUNDS (dr_info))
+ {
+ dr_set_safe_speculative_read_required (stmt_info, false);
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ }
+
+ do_peeling = vector_alignment_reachable_p (dr_info, vf);
if (do_peeling)
{
if (known_alignment_for_access_p (dr_info, vectype))
@@ -2488,7 +2501,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
poly_uint64 nscalars = npeel_tmp;
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
{
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
unsigned group_size = 1;
if (STMT_SLP_TYPE (stmt_info)
&& STMT_VINFO_GROUPED_ACCESS (stmt_info))
@@ -2911,14 +2923,12 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
2) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
3) all misaligned data refs with a known misalignment are supported, and
- 4) the number of runtime alignment checks is within reason.
- 5) the vectorization factor is a constant. */
+ 4) the number of runtime alignment checks is within reason. */
do_versioning
= (optimize_loop_nest_for_speed_p (loop)
&& !loop->inner /* FORNOW */
- && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP)
- && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ();
+ && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP);
if (do_versioning)
{
@@ -2965,25 +2975,22 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
?? We could actually unroll the loop to achieve the required
overall step alignment, and forcing the alignment could be
done by doing some iterations of the non-vectorized loop. */
- if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- * DR_STEP_ALIGNMENT (dr),
+ if (!multiple_p (vf * DR_STEP_ALIGNMENT (dr),
DR_TARGET_ALIGNMENT (dr_info)))
{
do_versioning = false;
break;
}
- /* The rightmost bits of an aligned address must be zeros.
- Construct the mask needed for this test. For example,
- GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
- mask must be 15 = 0xf. */
- gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ());
- int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1;
+ /* Use "mask = DR_TARGET_ALIGNMENT - 1" to test rightmost address
+ bits for runtime alignment check. For example, for 16 bytes
+ target alignment the mask is 15 = 0xf. */
+ poly_uint64 mask = DR_TARGET_ALIGNMENT (dr_info) - 1;
/* FORNOW: use the same mask to test all potentially unaligned
references in the loop. */
- if (LOOP_VINFO_PTR_MASK (loop_vinfo)
- && LOOP_VINFO_PTR_MASK (loop_vinfo) != mask)
+ if (maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), 0U)
+ && maybe_ne (LOOP_VINFO_PTR_MASK (loop_vinfo), mask))
{
do_versioning = false;
break;
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 6c1b26a..566308f 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2454,10 +2454,7 @@ get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
else
{
tree vla = build_int_cst (type, target_align);
- tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
- fold_build2 (MINUS_EXPR, type,
- build_int_cst (type, 0), vla));
- target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
+ target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla,
build_int_cst (type, 1));
}
@@ -3840,7 +3837,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
const vec<stmt_vec_info> &may_misalign_stmts
= LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
stmt_vec_info stmt_info;
- int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
+ poly_uint64 mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
tree mask_cst;
unsigned int i;
tree int_ptrsize_type;
@@ -3852,9 +3849,7 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
tree ptrsize_zero;
tree part_cond_expr;
- /* Check that mask is one less than a power of 2, i.e., mask is
- all zeros followed by all ones. */
- gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
+ gcc_assert (known_ne (mask, 0U));
int_ptrsize_type = signed_type_for (ptr_type_node);
@@ -3962,6 +3957,62 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
chain_cond_expr (cond_expr, part_cond_expr);
}
+/* Function vect_create_cond_for_vla_spec_read.
+
+ Create a conditional expression that represents the run-time checks with
+ max speculative read amount in VLA modes. We check two things:
+ 1) if the max speculative read amount exceeds the min page size
+ 2) if the VF is power-of-2 - done by checking the max read amount instead
+
+ Input:
+ COND_EXPR - input conditional expression. New conditions will be chained
+ with logical AND operation.
+ LOOP_VINFO - field LOOP_VINFO_MAX_SPEC_READ_AMOUNT contains the max
+ possible speculative read amount in VLA modes.
+
+ Output:
+ COND_EXPR - conditional expression.
+
+ The returned COND_EXPR is the conditional expression to be used in the
+ if statement that controls which version of the loop gets executed at
+ runtime. */
+
+static void
+vect_create_cond_for_vla_spec_read (loop_vec_info loop_vinfo, tree *cond_expr)
+{
+ poly_uint64 read_amount_poly = LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo);
+ tree amount = build_int_cst (long_unsigned_type_node, read_amount_poly);
+
+ /* Both the read amount and the VF must be variants, and the read amount must
+ be a constant power-of-2 multiple of the VF. */
+ unsigned HOST_WIDE_INT multiple;
+ gcc_assert (!read_amount_poly.is_constant ()
+ && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()
+ && constant_multiple_p (read_amount_poly,
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ &multiple)
+ && pow2p_hwi (multiple));
+
+ tree cst_ul_zero = build_int_cstu (long_unsigned_type_node, 0U);
+ tree cst_ul_one = build_int_cstu (long_unsigned_type_node, 1U);
+ tree cst_ul_pagesize = build_int_cstu (long_unsigned_type_node,
+ (unsigned long) param_min_pagesize);
+
+ /* Create an expression of "amount & (amount - 1) == 0". */
+ tree amount_m1 = fold_build2 (MINUS_EXPR, long_unsigned_type_node,
+ amount, cst_ul_one);
+ tree amount_and_expr = fold_build2 (BIT_AND_EXPR, long_unsigned_type_node,
+ amount, amount_m1);
+ tree powof2_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
+ amount_and_expr, cst_ul_zero);
+ chain_cond_expr (cond_expr, powof2_cond_expr);
+
+ /* Create an expression of "amount <= cst_ul_pagesize". */
+ tree pagesize_cond_expr = fold_build2 (LE_EXPR, boolean_type_node,
+ amount, cst_ul_pagesize);
+ chain_cond_expr (cond_expr, pagesize_cond_expr);
+}
+
/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
@@ -4087,6 +4138,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
gimple_seq gimplify_stmt_list = NULL;
tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
+ bool version_spec_read = LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (loop_vinfo);
bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
poly_uint64 versioning_threshold
@@ -4145,6 +4197,9 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
&cond_expr_stmt_list);
+ if (version_spec_read)
+ vect_create_cond_for_vla_spec_read (loop_vinfo, &cond_expr);
+
if (version_alias)
{
vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 85f3e90..55a8495 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1009,6 +1009,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
unaligned_dr (NULL),
peeling_for_alignment (0),
ptr_mask (0),
+ max_spec_read_amount (0),
nonlinear_iv (false),
ivexpr_map (NULL),
scan_map (NULL),
@@ -10141,7 +10142,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
if (peel_mul)
{
if (!step_mul)
- step_mul = peel_mul;
+ {
+ gcc_assert (!nunits.is_constant ());
+ step_mul = gimple_build (&init_stmts,
+ MINUS_EXPR, step_vectype,
+ build_zero_cst (step_vectype), peel_mul);
+ }
else
step_mul = gimple_build (&init_stmts,
MINUS_EXPR, step_vectype,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a6f4db4..dbeb8bd 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2400,70 +2400,26 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
/* We can only peel for loops, of course. */
gcc_checking_assert (loop_vinfo);
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 read_amount
+ = vf * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ read_amount *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
+
auto target_alignment
= DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info));
- unsigned HOST_WIDE_INT target_align;
-
- bool group_aligned = false;
- if (target_alignment.is_constant (&target_align)
- && nunits.is_constant ())
- {
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- auto vectype_size
- = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
- poly_uint64 required_alignment = vf * vectype_size;
- /* If we have a grouped access we require that the alignment be N * elem. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
- required_alignment *=
- DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
- if (!multiple_p (target_alignment, required_alignment))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "desired alignment %wu not met. Instead got %wu "
- "for DR alignment at %G",
- required_alignment.to_constant (),
- target_align, STMT_VINFO_STMT (stmt_info));
- return false;
- }
-
- if (!pow2p_hwi (target_align))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "non-power-of-two vector alignment %wd "
- "for DR alignment at %G",
- target_align, STMT_VINFO_STMT (stmt_info));
- return false;
- }
-
- /* For VLA we have to insert a runtime check that the vector loads
- per iterations don't exceed a page size. For now we can use
- POLY_VALUE_MAX as a proxy as we can't peel for VLA. */
- if (known_gt (required_alignment, (unsigned)param_min_pagesize))
+ if (!multiple_p (target_alignment, read_amount))
+ {
+ if (dump_enabled_p ())
{
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "alignment required for correctness (");
- dump_dec (MSG_MISSED_OPTIMIZATION, required_alignment);
- dump_printf (MSG_NOTE, ") may exceed page size\n");
- }
- return false;
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "desired alignment not met, target was ");
+ dump_dec (MSG_NOTE, target_alignment);
+ dump_printf (MSG_NOTE, " previously, but read amount is ");
+ dump_dec (MSG_NOTE, read_amount);
+ dump_printf (MSG_NOTE, " at %G.\n", STMT_VINFO_STMT (stmt_info));
}
-
- group_aligned = true;
- }
-
- /* There are multiple loads that have a misalignment that we couldn't
- align. We would need LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P to
- vectorize. */
- if (!group_aligned)
- {
- if (inbounds)
- LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
- else
- return false;
+ return false;
}
/* When using a group access the first element may be aligned but the
@@ -2485,6 +2441,33 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
STMT_VINFO_STMT (stmt_info));
return false;
}
+
+ /* Reject vectorization if we know the read mount per vector iteration
+ exceeds the min page size. */
+ if (known_gt (read_amount, (unsigned) param_min_pagesize))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "alignment required for correctness (");
+ dump_dec (MSG_MISSED_OPTIMIZATION, read_amount);
+ dump_printf (MSG_NOTE, ") may exceed page size.\n");
+ }
+ return false;
+ }
+
+ if (!vf.is_constant ())
+ {
+ /* For VLA modes, we need a runtime check to ensure any speculative
+ read amount does not exceed the page size. Here we record the max
+ possible read amount for the check. */
+ if (maybe_gt (read_amount,
+ LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo)))
+ LOOP_VINFO_MAX_SPEC_READ_AMOUNT (loop_vinfo) = read_amount;
+
+ /* For VLA modes, we must use partial vectors. */
+ LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
+ }
}
if (*alignment_support_scheme == dr_unaligned_unsupported)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9653496..041cff8 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -919,7 +919,10 @@ public:
int peeling_for_alignment;
/* The mask used to check the alignment of pointers or arrays. */
- int ptr_mask;
+ poly_uint64 ptr_mask;
+
+ /* The maximum speculative read amount in VLA modes for runtime check. */
+ poly_uint64 max_spec_read_amount;
/* Indicates whether the loop has any non-linear IV. */
bool nonlinear_iv;
@@ -1155,6 +1158,7 @@ public:
#define LOOP_VINFO_RGROUP_IV_TYPE(L) (L)->rgroup_iv_type
#define LOOP_VINFO_PARTIAL_VECTORS_STYLE(L) (L)->partial_vector_style
#define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
+#define LOOP_VINFO_MAX_SPEC_READ_AMOUNT(L) (L)->max_spec_read_amount
#define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
#define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
#define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
@@ -1209,6 +1213,8 @@ public:
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
((L)->may_misalign_stmts.length () > 0)
+#define LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ(L) \
+ (maybe_gt ((L)->max_spec_read_amount, 0U))
#define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
((L)->comp_alias_ddrs.length () > 0 \
|| (L)->check_unequal_addrs.length () > 0 \
@@ -1219,6 +1225,7 @@ public:
(LOOP_VINFO_SIMD_IF_COND (L))
#define LOOP_REQUIRES_VERSIONING(L) \
(LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
+ || LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \
|| LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))