aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog27
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md58
-rw-r--r--gcc/config/aarch64/aarch64.md2
-rw-r--r--gcc/config/aarch64/iterators.md14
-rw-r--r--gcc/config/aarch64/predicates.md5
-rw-r--r--gcc/doc/md.texi31
-rw-r--r--gcc/doc/sourcebuild.texi4
-rw-r--r--gcc/internal-fn.c23
-rw-r--r--gcc/internal-fn.def5
-rw-r--r--gcc/internal-fn.h2
-rw-r--r--gcc/optabs.def3
-rw-r--r--gcc/testsuite/ChangeLog12
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c37
-rw-r--r--gcc/testsuite/lib/target-supports.exp7
-rw-r--r--gcc/tree-data-ref.c78
20 files changed, 372 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index be2fac7..c57e8c4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,32 @@
2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
+ * doc/sourcebuild.texi (vect_check_ptrs): Document.
+ * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs.
+ * doc/md.texi: Document them.
+ * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New
+ internal functions.
+ * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare.
+ * internal-fn.c (check_ptrs_direct): New macro.
+ (expand_check_ptrs_optab_fn): Likewise.
+ (direct_check_ptrs_optab_supported_p): Likewise.
+ (internal_check_ptrs_fn_supported_p): New fuction.
+ * tree-data-ref.c: Include internal-fn.h.
+ (create_ifn_alias_checks): New function.
+ (create_intersect_range_checks): Use it.
+ * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator.
+ (optab, cmp_op): Handle it.
+ (raw_war, unspec): New int attributes.
+ * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New
+ constants.
+ * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand):
+ New predicate.
+ * config/aarch64/aarch64-sve2.md (check_<raw_war>_ptrs<mode>): New
+ expander.
+ (@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest): New
+ pattern.
+
+2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
+
* tree.c (build_vector_from_ctor): Directly return a zero vector for
empty constructors.
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 15142d1..106a9a0 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -331,3 +331,61 @@
}
[(set_attr "movprfx" "*,yes")]
)
+
+;; Use WHILERW and WHILEWR to accelerate alias checks. This is only
+;; possible if the accesses we're checking are exactly the same size
+;; as an SVE vector.
+(define_expand "check_<raw_war>_ptrs<mode>"
+ [(match_operand:GPI 0 "register_operand")
+ (unspec:VNx16BI
+ [(match_operand:GPI 1 "register_operand")
+ (match_operand:GPI 2 "register_operand")
+ (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
+ (match_operand:GPI 4 "const_int_operand")]
+ SVE2_WHILE_PTR)]
+ "TARGET_SVE2"
+{
+ /* Use the widest predicate mode we can. */
+ unsigned int align = INTVAL (operands[4]);
+ if (align > 8)
+ align = 8;
+ machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
+
+ /* Emit a WHILERW or WHILEWR, setting the condition codes based on
+ the result. */
+ emit_insn (gen_aarch64_sve2_while_ptest
+ (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
+ gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
+ CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
+
+ /* Set operand 0 to true if the last bit of the predicate result is set,
+ i.e. if all elements are free of dependencies. */
+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
+ emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
+ DONE;
+})
+
+;; A WHILERW or WHILEWR in which only the flags result is interesting.
+(define_insn_and_rewrite "@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand 3)
+ (match_operand 4)
+ (const_int SVE_KNOWN_PTRUE)
+ (unspec:PRED_ALL
+ [(match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "register_operand" "r")]
+ SVE2_WHILE_PTR)]
+ UNSPEC_PTEST))
+ (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+ "TARGET_SVE2"
+ "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %x1, %x2"
+ ;; Force the compiler to drop the unused predicate operand, so that we
+ ;; don't have an unnecessary PTRUE.
+ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+ {
+ operands[3] = CONSTM1_RTX (VNx16BImode);
+ operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+ }
+)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f19e227..87e9b936 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -245,6 +245,8 @@
UNSPEC_WHILE_LO
UNSPEC_WHILE_LS
UNSPEC_WHILE_LT
+ UNSPEC_WHILERW
+ UNSPEC_WHILEWR
UNSPEC_LDN
UNSPEC_STN
UNSPEC_INSR
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bfeebe9..83a0d15 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -2077,6 +2077,8 @@
(define_int_iterator SVE_WHILE [UNSPEC_WHILE_LE UNSPEC_WHILE_LO
UNSPEC_WHILE_LS UNSPEC_WHILE_LT])
+(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR])
+
(define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE
UNSPEC_ASHIFTRT_WIDE
UNSPEC_LSHIFTRT_WIDE])
@@ -2157,6 +2159,8 @@
(UNSPEC_FEXPA "fexpa")
(UNSPEC_FTSMUL "ftsmul")
(UNSPEC_FTSSEL "ftssel")
+ (UNSPEC_WHILERW "vec_check_raw_alias")
+ (UNSPEC_WHILEWR "vec_check_war_alias")
(UNSPEC_COND_FABS "abs")
(UNSPEC_COND_FADD "add")
(UNSPEC_COND_FCADD90 "cadd90")
@@ -2480,13 +2484,18 @@
(UNSPEC_WHILE_LE "le")
(UNSPEC_WHILE_LO "lo")
(UNSPEC_WHILE_LS "ls")
- (UNSPEC_WHILE_LT "lt")])
+ (UNSPEC_WHILE_LT "lt")
+ (UNSPEC_WHILERW "rw")
+ (UNSPEC_WHILEWR "wr")])
(define_int_attr while_optab_cmp [(UNSPEC_WHILE_LE "le")
(UNSPEC_WHILE_LO "ult")
(UNSPEC_WHILE_LS "ule")
(UNSPEC_WHILE_LT "lt")])
+(define_int_attr raw_war [(UNSPEC_WHILERW "raw")
+ (UNSPEC_WHILEWR "war")])
+
(define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b")
(UNSPEC_BRKN "n")
(UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")])
@@ -2630,3 +2639,6 @@
(UNSPEC_REVB "16")
(UNSPEC_REVH "32")
(UNSPEC_REVW "64")])
+
+(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW")
+ (UNSPEC_WHILEWR "UNSPEC_WHILEWR")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 2c5c53c..2323612 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -869,3 +869,8 @@
(define_predicate "aarch64_sve_any_binary_operator"
(match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
+
+(define_predicate "aarch64_bytes_per_sve_vector_operand"
+ (and (match_code "const_int,const_poly_int")
+ (match_test "known_eq (wi::to_poly_wide (op, mode),
+ BYTES_PER_SVE_VECTOR)")))
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 87bbeb4..0ad4a00 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5076,6 +5076,37 @@ for (i = 1; i < GET_MODE_NUNITS (@var{n}); i++)
operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
@end smallexample
+@cindex @code{check_raw_ptrs@var{m}} instruction pattern
+@item @samp{check_raw_ptrs@var{m}}
+Check whether, given two pointers @var{a} and @var{b} and a length @var{len},
+a write of @var{len} bytes at @var{a} followed by a read of @var{len} bytes
+at @var{b} can be split into interleaved byte accesses
+@samp{@var{a}[0], @var{b}[0], @var{a}[1], @var{b}[1], @dots{}}
+without affecting the dependencies between the bytes. Set operand 0
+to true if the split is possible and false otherwise.
+
+Operands 1, 2 and 3 provide the values of @var{a}, @var{b} and @var{len}
+respectively. Operand 4 is a constant integer that provides the known
+common alignment of @var{a} and @var{b}. All inputs have mode @var{m}.
+
+This split is possible if:
+
+@smallexample
+@var{a} == @var{b} || @var{a} + @var{len} <= @var{b} || @var{b} + @var{len} <= @var{a}
+@end smallexample
+
+You should only define this pattern if the target has a way of accelerating
+the test without having to do the individual comparisons.
+
+@cindex @code{check_war_ptrs@var{m}} instruction pattern
+@item @samp{check_war_ptrs@var{m}}
+Like @samp{check_raw_ptrs@var{m}}, but with the read and write swapped round.
+The split is possible in this case if:
+
+@smallexample
+@var{b} <= @var{a} || @var{a} + @var{len} <= @var{b}
+@end smallexample
+
@cindex @code{vec_cmp@var{m}@var{n}} instruction pattern
@item @samp{vec_cmp@var{m}@var{n}}
Output a vector comparison. Operand 0 of mode @var{n} is the destination for
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index f3bf66c..a3432bc 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1487,6 +1487,10 @@ Target supports hardware vectors of @code{long}.
@item vect_long_long
Target supports hardware vectors of @code{long long}.
+@item vect_check_ptrs
+Target supports the @code{check_raw_ptrs} and @code{check_war_ptrs}
+optabs on vectors.
+
@item vect_fully_masked
Target supports fully-masked (also known as fully-predicated) loops,
so that vector loops can handle partial as well as full vectors.
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 6a878bd..88d52d2 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -118,6 +118,7 @@ init_internal_fns ()
#define fold_extract_direct { 2, 2, false }
#define fold_left_direct { 1, 1, false }
#define mask_fold_left_direct { 1, 1, false }
+#define check_ptrs_direct { 0, 0, false }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -3006,6 +3007,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
#define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \
expand_direct_optab_fn (FN, STMT, OPTAB, 3)
+#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
+ expand_direct_optab_fn (FN, STMT, OPTAB, 4)
+
/* RETURN_TYPE and ARGS are a return type and argument list that are
in principle compatible with FN (which satisfies direct_internal_fn_p).
Return the types that should be used to determine whether the
@@ -3095,6 +3099,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
#define direct_fold_extract_optab_supported_p direct_optab_supported_p
#define direct_fold_left_optab_supported_p direct_optab_supported_p
#define direct_mask_fold_left_optab_supported_p direct_optab_supported_p
+#define direct_check_ptrs_optab_supported_p direct_optab_supported_p
/* Return the optab used by internal function FN. */
@@ -3572,6 +3577,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
&& insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
}
+/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
+ for pointers of type TYPE when the accesses have LENGTH bytes and their
+ common byte alignment is ALIGN. */
+
+bool
+internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type,
+ poly_uint64 length, unsigned int align)
+{
+ machine_mode mode = TYPE_MODE (type);
+ optab optab = direct_internal_fn_optab (ifn);
+ insn_code icode = direct_optab_handler (optab, mode);
+ if (icode == CODE_FOR_nothing)
+ return false;
+ rtx length_rtx = immed_wide_int_const (length, mode);
+ return (insn_operand_matches (icode, 3, length_rtx)
+ && insn_operand_matches (icode, 4, GEN_INT (align)));
+}
+
/* Expand STMT as though it were a call to internal function FN. */
void
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index a945944..85f45d6 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
- cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
- fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
+ - check_ptrs: used for check_{raw,war}_ptrs
DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
maps to one of two optabs, depending on the signedness of an input.
@@ -136,6 +137,10 @@ DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
vec_mask_store_lanes, mask_store_lanes)
DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
+DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
+ check_raw_ptrs, check_ptrs)
+DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW,
+ check_war_ptrs, check_ptrs)
DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW,
vec_shl_insert, binary)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 389241a..a1bc081 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -221,6 +221,8 @@ extern int internal_fn_mask_index (internal_fn);
extern int internal_fn_stored_value_index (internal_fn);
extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
tree, tree, int);
+extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
+ poly_uint64, unsigned int);
extern void expand_internal_call (gcall *);
extern void expand_internal_call (internal_fn, gcall *);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 90e177a..24d8275 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -429,6 +429,9 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
+OPTAB_D (check_raw_ptrs_optab, "check_raw_ptrs$a")
+OPTAB_D (check_war_ptrs_optab, "check_war_ptrs$a")
+
OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3a37d94..01e8e2b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,17 @@
2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
+ * lib/target-supports.exp (check_effective_target_vect_check_ptrs):
+ New procedure.
+ * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be
+ used, if available.
+ * gcc.dg/vect/vect-alias-check-15.c: Likewise.
+ * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW.
+ * gcc.target/aarch64/sve2/whilerw_1.c: New test.
+ * gcc.target/aarch64/sve2/whilewr_1.c: Likewise.
+ * gcc.target/aarch64/sve2/whilewr_2.c: Likewise.
+
+2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/acle/asm/ptest_pmore.c: New test.
2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
index 1d148a0..29bc571 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
@@ -60,5 +60,6 @@ main (void)
/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
/* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
-/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */
/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
index fbe3f84..ad74496 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
@@ -57,5 +57,6 @@ main (void)
}
/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */
/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
index 81c252d..8a9a6ff 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
@@ -62,5 +62,6 @@ main (void)
}
/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
-/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" { target { ! vect_check_ptrs } } } } */
+/* { dg-final { scan-tree-dump "using an IFN_CHECK_RAW_PTRS test" "vect" { target vect_check_ptrs } } } */
/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c
new file mode 100644
index 0000000..63a6d2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE) \
+ TYPE \
+ test_##TYPE (TYPE *dst, TYPE *src, int n) \
+ { \
+ TYPE res = 0; \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst[i] += 1; \
+ res += src[i]; \
+ } \
+ return res; \
+ }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilerw\t} 4 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.b, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.h, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.s, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.d, x[0-9]+, x1\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilewr\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c
new file mode 100644
index 0000000..e204b37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE) \
+ void \
+ test_##TYPE (TYPE *dst, TYPE *src1, TYPE *src2, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ dst[i] = src1[i] + src2[i]; \
+ }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilewr\t} 8 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x0\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x2, x0\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilerw\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c
new file mode 100644
index 0000000..0b86991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */
+/* { dg-require-effective-target lp64 } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(TYPE) \
+ void \
+ test_##TYPE (TYPE *dst1, TYPE *dst2, TYPE *dst3, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ { \
+ dst1[i] = 1; \
+ dst2[i] = 2; \
+ dst3[i] = 3; \
+ } \
+ }
+
+TEST_LOOP (int8_t);
+TEST_LOOP (int16_t);
+TEST_LOOP (int32_t);
+TEST_LOOP (int64_t);
+
+/* { dg-final { scan-assembler-times {\twhilewr\t} 12 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x1\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x2\n} 1 } } */
+/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x2\n} 1 } } */
+/* { dg-final { scan-assembler-not {\twhilerw\t} } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 54b2fca..08af9f8 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6459,6 +6459,13 @@ proc check_effective_target_vect_natural_alignment { } {
return $et_vect_natural_alignment
}
+# Return true if the target supports the check_raw_ptrs and check_war_ptrs
+# optabs on vectors.
+
+proc check_effective_target_vect_check_ptrs { } {
+ return [check_effective_target_aarch64_sve2]
+}
+
# Return true if fully-masked loops are supported.
proc check_effective_target_vect_fully_masked { } {
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index bad80e1..117a14b 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -96,6 +96,7 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "tree-eh.h"
#include "ssa.h"
+#include "internal-fn.h"
static struct datadep_stats
{
@@ -1719,6 +1720,80 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
}
}
+/* A subroutine of create_intersect_range_checks, with a subset of the
+ same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
+ to optimize cases in which the references form a simple RAW, WAR or
+ WAR dependence. */
+
+static bool
+create_ifn_alias_checks (tree *cond_expr,
+ const dr_with_seg_len_pair_t &alias_pair)
+{
+ const dr_with_seg_len& dr_a = alias_pair.first;
+ const dr_with_seg_len& dr_b = alias_pair.second;
+
+ /* Check for cases in which:
+
+ (a) we have a known RAW, WAR or WAR dependence
+ (b) the accesses are well-ordered in both the original and new code
+ (see the comment above the DR_ALIAS_* flags for details); and
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
+ if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
+ return false;
+
+ /* Make sure that both DRs access the same pattern of bytes,
+ with a constant length and and step. */
+ poly_uint64 seg_len;
+ if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
+ || !poly_int_tree_p (dr_a.seg_len, &seg_len)
+ || maybe_ne (dr_a.access_size, dr_b.access_size)
+ || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
+ || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
+ return false;
+
+ unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
+ tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
+ tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
+
+ /* See whether the target suports what we want to do. WAW checks are
+ equivalent to WAR checks here. */
+ internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
+ ? IFN_CHECK_RAW_PTRS
+ : IFN_CHECK_WAR_PTRS);
+ unsigned int align = MIN (dr_a.align, dr_b.align);
+ poly_uint64 full_length = seg_len + bytes;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ {
+ full_length = seg_len + dr_a.access_size;
+ if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
+ full_length, align))
+ return false;
+ }
+
+ /* Commit to using this form of test. */
+ addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
+ addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
+
+ addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
+ addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
+
+ *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
+ ifn, boolean_type_node,
+ 4, addr_a, addr_b,
+ size_int (full_length),
+ size_int (align));
+
+ if (dump_enabled_p ())
+ {
+ if (ifn == IFN_CHECK_RAW_PTRS)
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
+ else
+ dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
+ }
+ return true;
+}
+
/* Try to generate a runtime condition that is true if ALIAS_PAIR is
free of aliases, using a condition based on index values instead
of a condition based on addresses. Return true on success,
@@ -2240,6 +2315,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr,
if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
return;
+ if (create_ifn_alias_checks (cond_expr, alias_pair))
+ return;
+
if (create_waw_or_war_checks (cond_expr, alias_pair))
return;