diff options
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c | 17 | ||||
-rw-r--r-- | gcc/tree-data-ref.c | 218 |
12 files changed, 260 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index eac0410..6ca0c57 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ 2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * tree-data-ref.c (create_intersect_range_checks_index): If the + alias pair describes simple WAW and WAR dependencies, just check + whether the first B access overlaps later A accesses. + (create_waw_or_war_checks): New function that performs the same + optimization on addresses. + (create_intersect_range_checks): Call it. + +2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * lra-constraints.c (valid_address_p): Take the operand and a constraint as argument. If the operand is a MEM and the constraint is a memory constraint, check whether the eliminated form of the diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 163ea21..12a0d08 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,17 @@ 2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used. + * gcc.dg/vect/vect-alias-check-14.c: Likewise. + * gcc.dg/vect/vect-alias-check-15.c: Likewise. + * gcc.dg/vect/vect-alias-check-18.c: Likewise. + * gcc.dg/vect/vect-alias-check-19.c: Likewise. + * gcc.target/aarch64/sve/var_stride_1.c: Update expected sequence. + * gcc.target/aarch64/sve/var_stride_2.c: Likewise. + * gcc.target/aarch64/sve/var_stride_3.c: Likewise. + * gcc.target/aarch64/sve/var_stride_5.c: Likewise. + +2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c: Remove XFAIL. * gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c: Likewise. diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c index 87045c9..1d148a0 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c @@ -60,5 +60,5 @@ main (void) /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ /* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */ -/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c index ad8d69e..fbe3f84 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c @@ -57,5 +57,5 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c index e9fd31a..9d07391 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c @@ -60,5 +60,5 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */ /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c index 583e296..7c0ff36 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c @@ -58,5 +58,5 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */ /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c index 6cd0219..7e5df13 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c @@ -60,5 +60,5 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */ /* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c index 68baba9..40ff2d5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c @@ -15,13 +15,9 @@ f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused))) /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ -/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ -/* { dg-final { scan-assembler-not {, 1024} } } */ -/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ -/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ -/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ -/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ -/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ -/* Two range checks and a check for n being zero. */ -/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ -/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ +/* Should use a WAR check that multiplies by (VF-2)*4 rather than + an overlap check that multiplies by (257-1)*4. */ +/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */ +/* One range check and a check for n being zero. */ +/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c index 30f6d26..b8afea7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c @@ -15,7 +15,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m) /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ -/* Should multiply by (257-1)*4 rather than (VF-1)*4. */ +/* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4. */ /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */ /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */ /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c index 70792ff..5ab6859 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c @@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, int n, long m __attribute__((unused))) /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ -/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ -/* { dg-final { scan-assembler-not {, 1024} } } */ -/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ -/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ -/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ -/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ -/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ -/* Two range checks and a check for n being zero. */ -/* { dg-final { scan-assembler {\tcmp\t} } } */ -/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ +/* Should use a WAR check that multiplies by (VF-2)*4 rather than + an overlap check that multiplies by (257-1)*4. */ +/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */ +/* One range check and a check for n being zero. */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c index 688f3be6..93c1141 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c @@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, long n, long m __attribute__((unused))) /* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ /* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */ /* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */ -/* Should multiply by (VF-1)*8 rather than (257-1)*8. */ -/* { dg-final { scan-assembler-not {, 2048} } } */ -/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ -/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */ -/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ -/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ -/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ -/* Two range checks and a check for n being zero. */ -/* { dg-final { scan-assembler {\tcmp\t} } } */ -/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ +/* Should use a WAR check that multiplies by (VF-2)*8 rather than + an overlap check that multiplies by (257-1)*4. */ +/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #16\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */ +/* One range check and a check for n being zero. */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 4dfa334..bad80e1 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1805,6 +1805,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, abs_step, &niter_access2)) return false; + bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0; + unsigned int i; for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) { @@ -1906,16 +1908,57 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, Combining the tests requires limit to be computable in an unsigned form of the index type; if it isn't, we fall back to the usual - pointer-based checks. */ - poly_offset_int limit = (idx_len1 + idx_access1 - 1 - + idx_len2 + idx_access2 - 1); + pointer-based checks. + + We can do better if DR_B is a write and if DR_A and DR_B are + well-ordered in both the original and the new code (see the + comment above the DR_ALIAS_* flags for details). In this case + we know that for each i in [0, n-1], the write performed by + access i of DR_B occurs after access numbers j<=i of DR_A in + both the original and the new code. Any write or anti + dependencies wrt those DR_A accesses are therefore maintained. + + We just need to make sure that each individual write in DR_B does not + overlap any higher-indexed access in DR_A; such DR_A accesses happen + after the DR_B access in the original code but happen before it in + the new code. + + We know the steps for both accesses are equal, so by induction, we + just need to test whether the first write of DR_B overlaps a later + access of DR_A. In other words, we need to move min1 along by + one iteration: + + min1' = min1 + idx_step + + and use the ranges: + + [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1] + + and: + + [min2, min2 + idx_access2 - 1] + + where: + + low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|) + high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */ + if (waw_or_war_p) + idx_len1 -= abs_idx_step; + + poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1; + if (!waw_or_war_p) + limit += idx_len2; + tree utype = unsigned_type_for (TREE_TYPE (min1)); if (!wi::fits_to_tree_p (limit, utype)) return false; poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; - poly_offset_int high_offset2 = neg_step ? 0 : idx_len2; + poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2; poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; + /* Equivalent to adding IDX_STEP to MIN1. */ + if (waw_or_war_p) + bias -= wi::to_offset (idx_step); tree subject = fold_build2 (MINUS_EXPR, utype, fold_convert (utype, min2), @@ -1931,7 +1974,169 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, *cond_expr = part_cond_expr; } if (dump_enabled_p ()) - dump_printf (MSG_NOTE, "using an index-based overlap test\n"); + { + if (waw_or_war_p) + dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n"); + else + dump_printf (MSG_NOTE, "using an index-based overlap test\n"); + } + return true; +} + +/* A subroutine of create_intersect_range_checks, with a subset of the + same arguments. Try to optimize cases in which the second access + is a write and in which some overlap is valid. */ + +static bool +create_waw_or_war_checks (tree *cond_expr, + const dr_with_seg_len_pair_t &alias_pair) +{ + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + + /* Check for cases in which: + + (a) DR_B is always a write; + (b) the accesses are well-ordered in both the original and new code + (see the comment above the DR_ALIAS_* flags for details); and + (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ + if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) + return false; + + /* Check for equal (but possibly variable) steps. */ + tree step = DR_STEP (dr_a.dr); + if (!operand_equal_p (step, DR_STEP (dr_b.dr))) + return false; + + /* Make sure that we can operate on sizetype without loss of precision. */ + tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); + if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) + return false; + + /* All addresses involved are known to have a common alignment ALIGN. + We can therefore subtract ALIGN from an exclusive endpoint to get + an inclusive endpoint. In the best (and common) case, ALIGN is the + same as the access sizes of both DRs, and so subtracting ALIGN + cancels out the addition of an access size. */ + unsigned int align = MIN (dr_a.align, dr_b.align); + poly_uint64 last_chunk_a = dr_a.access_size - align; + poly_uint64 last_chunk_b = dr_b.access_size - align; + + /* Get a boolean expression that is true when the step is negative. */ + tree indicator = dr_direction_indicator (dr_a.dr); + tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, + fold_convert (ssizetype, indicator), + ssize_int (0)); + + /* Get lengths in sizetype. */ + tree seg_len_a + = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); + step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); + + /* Each access has the following pattern: + + <- |seg_len| -> + <--- A: -ve step ---> + +-----+-------+-----+-------+-----+ + | n-1 | ..... | 0 | ..... | n-1 | + +-----+-------+-----+-------+-----+ + <--- B: +ve step ---> + <- |seg_len| -> + | + base address + + where "n" is the number of scalar iterations covered by the segment. + + A is the range of bytes accessed when the step is negative, + B is the range when the step is positive. + + We know that DR_B is a write. We also know (from checking that + DR_A and DR_B are well-ordered) that for each i in [0, n-1], + the write performed by access i of DR_B occurs after access numbers + j<=i of DR_A in both the original and the new code. Any write or + anti dependencies wrt those DR_A accesses are therefore maintained. + + We just need to make sure that each individual write in DR_B does not + overlap any higher-indexed access in DR_A; such DR_A accesses happen + after the DR_B access in the original code but happen before it in + the new code. + + We know the steps for both accesses are equal, so by induction, we + just need to test whether the first write of DR_B overlaps a later + access of DR_A. In other words, we need to move addr_a along by + one iteration: + + addr_a' = addr_a + step + + and check whether: + + [addr_b, addr_b + last_chunk_b] + + overlaps: + + [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a] + + where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.: + + low_offset_a = +ve step ? 0 : seg_len_a - step + high_offset_a = +ve step ? seg_len_a - step : 0 + + This is equivalent to testing whether: + + addr_a' + low_offset_a <= addr_b + last_chunk_b + && addr_b <= addr_a' + high_offset_a + last_chunk_a + + Converting this into a single test, there is an overlap if: + + 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit + + where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b + + If DR_A is performed, limit + |step| - last_chunk_b is known to be + less than the size of the object underlying DR_A. We also know + that last_chunk_b <= |step|; this is checked elsewhere if it isn't + guaranteed at compile time. There can therefore be no overflow if + "limit" is calculated in an unsigned type with pointer precision. */ + tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), + DR_OFFSET (dr_a.dr)); + addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); + + tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), + DR_OFFSET (dr_b.dr)); + addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); + + /* Advance ADDR_A by one iteration and adjust the length to compensate. */ + addr_a = fold_build_pointer_plus (addr_a, step); + tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype, + seg_len_a, step); + if (!CONSTANT_CLASS_P (seg_len_a_minus_step)) + seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step); + + tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step, + seg_len_a_minus_step, size_zero_node); + if (!CONSTANT_CLASS_P (low_offset_a)) + low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a); + + /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>, + but it's usually more efficient to reuse the LOW_OFFSET_A result. */ + tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step, + low_offset_a); + + /* The amount added to addr_b - addr_a'. */ + tree bias = fold_build2 (MINUS_EXPR, sizetype, + size_int (last_chunk_b), low_offset_a); + + tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a); + limit = fold_build2 (PLUS_EXPR, sizetype, limit, + size_int (last_chunk_a + last_chunk_b)); + + tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a); + subject = fold_build2 (PLUS_EXPR, sizetype, + fold_convert (sizetype, subject), bias); + + *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n"); return true; } @@ -2035,6 +2240,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; + if (create_waw_or_war_checks (cond_expr, alias_pair)) + return; + unsigned HOST_WIDE_INT min_align; tree_code cmp_code; /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions |