diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 27 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve2.md | 58 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 2 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 14 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 5 | ||||
-rw-r--r-- | gcc/doc/md.texi | 31 | ||||
-rw-r--r-- | gcc/doc/sourcebuild.texi | 4 | ||||
-rw-r--r-- | gcc/internal-fn.c | 23 | ||||
-rw-r--r-- | gcc/internal-fn.def | 5 | ||||
-rw-r--r-- | gcc/internal-fn.h | 2 | ||||
-rw-r--r-- | gcc/optabs.def | 3 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c | 37 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 7 | ||||
-rw-r--r-- | gcc/tree-data-ref.c | 78 |
20 files changed, 372 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index be2fac7..c57e8c4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,32 @@ 2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * doc/sourcebuild.texi (vect_check_ptrs): Document. + * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs. + * doc/md.texi: Document them. + * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New + internal functions. + * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare. + * internal-fn.c (check_ptrs_direct): New macro. + (expand_check_ptrs_optab_fn): Likewise. + (direct_check_ptrs_optab_supported_p): Likewise. + (internal_check_ptrs_fn_supported_p): New fuction. + * tree-data-ref.c: Include internal-fn.h. + (create_ifn_alias_checks): New function. + (create_intersect_range_checks): Use it. + * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator. + (optab, cmp_op): Handle it. + (raw_war, unspec): New int attributes. + * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New + constants. + * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand): + New predicate. + * config/aarch64/aarch64-sve2.md (check_<raw_war>_ptrs<mode>): New + expander. + (@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest): New + pattern. + +2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * tree.c (build_vector_from_ctor): Directly return a zero vector for empty constructors. diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 15142d1..106a9a0 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -331,3 +331,61 @@ } [(set_attr "movprfx" "*,yes")] ) + +;; Use WHILERW and WHILEWR to accelerate alias checks. This is only +;; possible if the accesses we're checking are exactly the same size +;; as an SVE vector. +(define_expand "check_<raw_war>_ptrs<mode>" + [(match_operand:GPI 0 "register_operand") + (unspec:VNx16BI + [(match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand") + (match_operand:GPI 4 "const_int_operand")] + SVE2_WHILE_PTR)] + "TARGET_SVE2" +{ + /* Use the widest predicate mode we can. */ + unsigned int align = INTVAL (operands[4]); + if (align > 8) + align = 8; + machine_mode pred_mode = aarch64_sve_pred_mode (align).require (); + + /* Emit a WHILERW or WHILEWR, setting the condition codes based on + the result. */ + emit_insn (gen_aarch64_sve2_while_ptest + (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode, + gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], + CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode))); + + /* Set operand 0 to true if the last bit of the predicate result is set, + i.e. if all elements are free of dependencies. */ + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx); + emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg)); + DONE; +}) + +;; A WHILERW or WHILEWR in which only the flags result is interesting. +(define_insn_and_rewrite "@aarch64_sve2_while<cmp_op><GPI:mode><PRED_ALL:mode>_ptest" + [(set (reg:CC_NZC CC_REGNUM) + (unspec:CC_NZC + [(match_operand 3) + (match_operand 4) + (const_int SVE_KNOWN_PTRUE) + (unspec:PRED_ALL + [(match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")] + SVE2_WHILE_PTR)] + UNSPEC_PTEST)) + (clobber (match_scratch:PRED_ALL 0 "=Upa"))] + "TARGET_SVE2" + "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %x1, %x2" + ;; Force the compiler to drop the unused predicate operand, so that we + ;; don't have an unnecessary PTRUE. + "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" + { + operands[3] = CONSTM1_RTX (VNx16BImode); + operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); + } +) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index f19e227..87e9b936 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -245,6 +245,8 @@ UNSPEC_WHILE_LO UNSPEC_WHILE_LS UNSPEC_WHILE_LT + UNSPEC_WHILERW + UNSPEC_WHILEWR UNSPEC_LDN UNSPEC_STN UNSPEC_INSR diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index bfeebe9..83a0d15 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -2077,6 +2077,8 @@ (define_int_iterator SVE_WHILE [UNSPEC_WHILE_LE UNSPEC_WHILE_LO UNSPEC_WHILE_LS UNSPEC_WHILE_LT]) +(define_int_iterator SVE2_WHILE_PTR [UNSPEC_WHILERW UNSPEC_WHILEWR]) + (define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE UNSPEC_ASHIFTRT_WIDE UNSPEC_LSHIFTRT_WIDE]) @@ -2157,6 +2159,8 @@ (UNSPEC_FEXPA "fexpa") (UNSPEC_FTSMUL "ftsmul") (UNSPEC_FTSSEL "ftssel") + (UNSPEC_WHILERW "vec_check_raw_alias") + (UNSPEC_WHILEWR "vec_check_war_alias") (UNSPEC_COND_FABS "abs") (UNSPEC_COND_FADD "add") (UNSPEC_COND_FCADD90 "cadd90") @@ -2480,13 +2484,18 @@ (UNSPEC_WHILE_LE "le") (UNSPEC_WHILE_LO "lo") (UNSPEC_WHILE_LS "ls") - (UNSPEC_WHILE_LT "lt")]) + (UNSPEC_WHILE_LT "lt") + (UNSPEC_WHILERW "rw") + (UNSPEC_WHILEWR "wr")]) (define_int_attr while_optab_cmp [(UNSPEC_WHILE_LE "le") (UNSPEC_WHILE_LO "ult") (UNSPEC_WHILE_LS "ule") (UNSPEC_WHILE_LT "lt")]) +(define_int_attr raw_war [(UNSPEC_WHILERW "raw") + (UNSPEC_WHILEWR "war")]) + (define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b") (UNSPEC_BRKN "n") (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")]) @@ -2630,3 +2639,6 @@ (UNSPEC_REVB "16") (UNSPEC_REVH "32") (UNSPEC_REVW "64")]) + +(define_int_attr unspec [(UNSPEC_WHILERW "UNSPEC_WHILERW") + (UNSPEC_WHILEWR "UNSPEC_WHILEWR")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 2c5c53c..2323612 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -869,3 +869,8 @@ (define_predicate "aarch64_sve_any_binary_operator" (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor")) + +(define_predicate "aarch64_bytes_per_sve_vector_operand" + (and (match_code "const_int,const_poly_int") + (match_test "known_eq (wi::to_poly_wide (op, mode), + BYTES_PER_SVE_VECTOR)"))) diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 87bbeb4..0ad4a00 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5076,6 +5076,37 @@ for (i = 1; i < GET_MODE_NUNITS (@var{n}); i++) operand0[i] = operand0[i - 1] && (operand1 + i < operand2); @end smallexample +@cindex @code{check_raw_ptrs@var{m}} instruction pattern +@item @samp{check_raw_ptrs@var{m}} +Check whether, given two pointers @var{a} and @var{b} and a length @var{len}, +a write of @var{len} bytes at @var{a} followed by a read of @var{len} bytes +at @var{b} can be split into interleaved byte accesses +@samp{@var{a}[0], @var{b}[0], @var{a}[1], @var{b}[1], @dots{}} +without affecting the dependencies between the bytes. Set operand 0 +to true if the split is possible and false otherwise. + +Operands 1, 2 and 3 provide the values of @var{a}, @var{b} and @var{len} +respectively. Operand 4 is a constant integer that provides the known +common alignment of @var{a} and @var{b}. All inputs have mode @var{m}. + +This split is possible if: + +@smallexample +@var{a} == @var{b} || @var{a} + @var{len} <= @var{b} || @var{b} + @var{len} <= @var{a} +@end smallexample + +You should only define this pattern if the target has a way of accelerating +the test without having to do the individual comparisons. + +@cindex @code{check_war_ptrs@var{m}} instruction pattern +@item @samp{check_war_ptrs@var{m}} +Like @samp{check_raw_ptrs@var{m}}, but with the read and write swapped round. +The split is possible in this case if: + +@smallexample +@var{b} <= @var{a} || @var{a} + @var{len} <= @var{b} +@end smallexample + @cindex @code{vec_cmp@var{m}@var{n}} instruction pattern @item @samp{vec_cmp@var{m}@var{n}} Output a vector comparison. Operand 0 of mode @var{n} is the destination for diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index f3bf66c..a3432bc 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -1487,6 +1487,10 @@ Target supports hardware vectors of @code{long}. @item vect_long_long Target supports hardware vectors of @code{long long}. +@item vect_check_ptrs +Target supports the @code{check_raw_ptrs} and @code{check_war_ptrs} +optabs on vectors. + @item vect_fully_masked Target supports fully-masked (also known as fully-predicated) loops, so that vector loops can handle partial as well as full vectors. diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 6a878bd..88d52d2 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -118,6 +118,7 @@ init_internal_fns () #define fold_extract_direct { 2, 2, false } #define fold_left_direct { 1, 1, false } #define mask_fold_left_direct { 1, 1, false } +#define check_ptrs_direct { 0, 0, false } const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, @@ -3006,6 +3007,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 3) +#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ + expand_direct_optab_fn (FN, STMT, OPTAB, 4) + /* RETURN_TYPE and ARGS are a return type and argument list that are in principle compatible with FN (which satisfies direct_internal_fn_p). Return the types that should be used to determine whether the @@ -3095,6 +3099,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_extract_optab_supported_p direct_optab_supported_p #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p +#define direct_check_ptrs_optab_supported_p direct_optab_supported_p /* Return the optab used by internal function FN. */ @@ -3572,6 +3577,24 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale))); } +/* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN + for pointers of type TYPE when the accesses have LENGTH bytes and their + common byte alignment is ALIGN. */ + +bool +internal_check_ptrs_fn_supported_p (internal_fn ifn, tree type, + poly_uint64 length, unsigned int align) +{ + machine_mode mode = TYPE_MODE (type); + optab optab = direct_internal_fn_optab (ifn); + insn_code icode = direct_optab_handler (optab, mode); + if (icode == CODE_FOR_nothing) + return false; + rtx length_rtx = immed_wide_int_const (length, mode); + return (insn_operand_matches (icode, 3, length_rtx) + && insn_operand_matches (icode, 4, GEN_INT (align))); +} + /* Expand STMT as though it were a call to internal function FN. */ void diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index a945944..85f45d6 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode> - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode + - check_ptrs: used for check_{raw,war}_ptrs DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that maps to one of two optabs, depending on the signedness of an input. @@ -136,6 +137,10 @@ DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0, vec_mask_store_lanes, mask_store_lanes) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) +DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW, + check_raw_ptrs, check_ptrs) +DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW, + check_war_ptrs, check_ptrs) DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, vec_shl_insert, binary) diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 389241a..a1bc081 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -221,6 +221,8 @@ extern int internal_fn_mask_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int); +extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree, + poly_uint64, unsigned int); extern void expand_internal_call (gcall *); extern void expand_internal_call (internal_fn, gcall *); diff --git a/gcc/optabs.def b/gcc/optabs.def index 90e177a..24d8275 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -429,6 +429,9 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a") OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a") OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a") +OPTAB_D (check_raw_ptrs_optab, "check_raw_ptrs$a") +OPTAB_D (check_war_ptrs_optab, "check_war_ptrs$a") + OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE) OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES) OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3a37d94..01e8e2b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,17 @@ 2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * lib/target-supports.exp (check_effective_target_vect_check_ptrs): + New procedure. + * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be + used, if available. + * gcc.dg/vect/vect-alias-check-15.c: Likewise. + * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW. + * gcc.target/aarch64/sve2/whilerw_1.c: New test. + * gcc.target/aarch64/sve2/whilewr_1.c: Likewise. + * gcc.target/aarch64/sve2/whilewr_2.c: Likewise. + +2019-11-18 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/acle/asm/ptest_pmore.c: New test. 2019-11-18 Richard Sandiford <richard.sandiford@arm.com> diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c index 1d148a0..29bc571 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c @@ -60,5 +60,6 @@ main (void) /* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */ /* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */ -/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c index fbe3f84..ad74496 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c @@ -57,5 +57,6 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_WAR_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c index 81c252d..8a9a6ff 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c @@ -62,5 +62,6 @@ main (void) } /* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */ -/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */ +/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" { target { ! vect_check_ptrs } } } } */ +/* { dg-final { scan-tree-dump "using an IFN_CHECK_RAW_PTRS test" "vect" { target vect_check_ptrs } } } */ /* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c new file mode 100644 index 0000000..63a6d2f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilerw_1.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdint.h> + +#define TEST_LOOP(TYPE) \ + TYPE \ + test_##TYPE (TYPE *dst, TYPE *src, int n) \ + { \ + TYPE res = 0; \ + for (int i = 0; i < n; ++i) \ + { \ + dst[i] += 1; \ + res += src[i]; \ + } \ + return res; \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilerw\t} 4 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.b, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.h, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.s, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilerw\tp[0-9]+\.d, x[0-9]+, x1\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilewr\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c new file mode 100644 index 0000000..e204b37 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdint.h> + +#define TEST_LOOP(TYPE) \ + void \ + test_##TYPE (TYPE *dst, TYPE *src1, TYPE *src2, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + dst[i] = src1[i] + src2[i]; \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilewr\t} 8 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x2, x0\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilerw\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c new file mode 100644 index 0000000..0b86991 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/whilewr_2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns" } */ +/* { dg-require-effective-target lp64 } */ + +#include <stdint.h> + +#define TEST_LOOP(TYPE) \ + void \ + test_##TYPE (TYPE *dst1, TYPE *dst2, TYPE *dst3, int n) \ + { \ + for (int i = 0; i < n; ++i) \ + { \ + dst1[i] = 1; \ + dst2[i] = 2; \ + dst3[i] = 3; \ + } \ + } + +TEST_LOOP (int8_t); +TEST_LOOP (int16_t); +TEST_LOOP (int32_t); +TEST_LOOP (int64_t); + +/* { dg-final { scan-assembler-times {\twhilewr\t} 12 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.b, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.h, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.s, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x1\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x0, x2\n} 1 } } */ +/* { dg-final { scan-assembler-times {\twhilewr\tp[0-9]+\.d, x1, x2\n} 1 } } */ +/* { dg-final { scan-assembler-not {\twhilerw\t} } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 54b2fca..08af9f8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6459,6 +6459,13 @@ proc check_effective_target_vect_natural_alignment { } { return $et_vect_natural_alignment } +# Return true if the target supports the check_raw_ptrs and check_war_ptrs +# optabs on vectors. + +proc check_effective_target_vect_check_ptrs { } { + return [check_effective_target_aarch64_sve2] +} + # Return true if fully-masked loops are supported. proc check_effective_target_vect_fully_masked { } { diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index bad80e1..117a14b 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -96,6 +96,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "tree-eh.h" #include "ssa.h" +#include "internal-fn.h" static struct datadep_stats { @@ -1719,6 +1720,80 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, } } +/* A subroutine of create_intersect_range_checks, with a subset of the + same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS + to optimize cases in which the references form a simple RAW, WAR or + WAR dependence. */ + +static bool +create_ifn_alias_checks (tree *cond_expr, + const dr_with_seg_len_pair_t &alias_pair) +{ + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + + /* Check for cases in which: + + (a) we have a known RAW, WAR or WAR dependence + (b) the accesses are well-ordered in both the original and new code + (see the comment above the DR_ALIAS_* flags for details); and + (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ + if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) + return false; + + /* Make sure that both DRs access the same pattern of bytes, + with a constant length and and step. */ + poly_uint64 seg_len; + if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0) + || !poly_int_tree_p (dr_a.seg_len, &seg_len) + || maybe_ne (dr_a.access_size, dr_b.access_size) + || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0) + || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) + return false; + + unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); + tree addr_a = DR_BASE_ADDRESS (dr_a.dr); + tree addr_b = DR_BASE_ADDRESS (dr_b.dr); + + /* See whether the target suports what we want to do. WAW checks are + equivalent to WAR checks here. */ + internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW + ? IFN_CHECK_RAW_PTRS + : IFN_CHECK_WAR_PTRS); + unsigned int align = MIN (dr_a.align, dr_b.align); + poly_uint64 full_length = seg_len + bytes; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + { + full_length = seg_len + dr_a.access_size; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + return false; + } + + /* Commit to using this form of test. */ + addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); + addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); + + addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); + addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); + + *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, + ifn, boolean_type_node, + 4, addr_a, addr_b, + size_int (full_length), + size_int (align)); + + if (dump_enabled_p ()) + { + if (ifn == IFN_CHECK_RAW_PTRS) + dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n"); + else + dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n"); + } + return true; +} + /* Try to generate a runtime condition that is true if ALIAS_PAIR is free of aliases, using a condition based on index values instead of a condition based on addresses. Return true on success, @@ -2240,6 +2315,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; + if (create_ifn_alias_checks (cond_expr, alias_pair)) + return; + if (create_waw_or_war_checks (cond_expr, alias_pair)) return; |