diff options
57 files changed, 1322 insertions, 329 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bf0e57d..bf7569f 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -702,6 +702,23 @@ } ) +;; Fold predicated loads/stores with a PTRUE predicate to unpredicated +;; loads/stores after RA. +(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str" + [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w") + (unspec:SVE_FULL + [(match_operand:<VPRED> 1 "aarch64_simd_imm_one") + (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")] + UNSPEC_PRED_X))] + "TARGET_SVE && reload_completed + && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) + && ((REG_P (operands[0]) && MEM_P (operands[2])) + || (REG_P (operands[2]) && MEM_P (operands[0])))" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 2))]) + ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors ;; or vectors for which little-endian ordering isn't acceptable. Memory ;; accesses require secondary reloads. diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc index 370bfa3..2764bb5 100644 --- a/gcc/cp/class.cc +++ b/gcc/cp/class.cc @@ -5744,6 +5744,9 @@ type_has_converting_constructor (tree t) { tree fn = *iter; tree parm = FUNCTION_FIRST_USER_PARMTYPE (fn); + if (parm == NULL_TREE) + /* Varargs. */ + return true; if (parm == void_list_node || !sufficient_parms_p (TREE_CHAIN (parm))) /* Can't accept a single argument, so won't be considered for diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index 21156f1..15db1d6 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -3160,7 +3160,9 @@ determine_visibility (tree decl) && !attr) { int depth = TMPL_ARGS_DEPTH (args); - if (DECL_VISIBILITY_SPECIFIED (decl)) + if (DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P (TI_TEMPLATE (tinfo))) + /* Class template args don't affect template friends. */; + else if (DECL_VISIBILITY_SPECIFIED (decl)) { /* A class template member with explicit visibility overrides the class visibility, so we need to apply diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 0694c28..09f74a2 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -23254,7 +23254,7 @@ fn_type_unification (tree fn, conversions that we know are not going to induce template instantiation (PR99599). */ if (strict == DEDUCE_CALL - && incomplete + && incomplete && flag_concepts && check_non_deducible_conversions (parms, args, nargs, fn, strict, flags, convs, explain_p, /*noninst_only_p=*/true)) @@ -1190,7 +1190,10 @@ canon_address (rtx mem, address = strip_offset_and_add (address, offset); if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)) - && const_or_frame_p (address)) + && const_or_frame_p (address) + /* Literal addresses can alias any base, avoid creating a + group for them. */ + && ! CONST_SCALAR_INT_P (address)) { group_info *group = get_group_info (address); diff --git a/gcc/gcov.cc b/gcc/gcov.cc index e1ad801..0dfe513 100644 --- a/gcc/gcov.cc +++ b/gcc/gcov.cc @@ -3591,7 +3591,7 @@ print_source_line (FILE *f, const vector<const char *> &source_lines, Returns 1 if the path was printed, 0 otherwise. */ static unsigned print_prime_path_lines (FILE *gcov_file, const function_info &fn, - const vector<unsigned> &path, size_t pathno) + const vector<unsigned> &path, unsigned pathno) { const bool is_covered = fn.paths.covered_p (pathno); if (is_covered && !flag_prime_paths_lines_covered) @@ -3600,9 +3600,9 @@ print_prime_path_lines (FILE *gcov_file, const function_info &fn, return 0; if (is_covered) - fprintf (gcov_file, "path %zu covered: lines", pathno); + fprintf (gcov_file, "path %u covered: lines", pathno); else - fprintf (gcov_file, "path %zu not covered: lines", pathno); + fprintf (gcov_file, "path %u not covered: lines", pathno); for (size_t k = 0; k != path.size (); ++k) { @@ -3658,7 +3658,7 @@ print_inlined_separator (FILE *gcov_file, unsigned current_index, const Returns 1 if the path was printed, 0 otherwise. */ static unsigned print_prime_path_source (FILE *gcov_file, const function_info &fn, - const vector<unsigned> &path, size_t pathno) + const vector<unsigned> &path, unsigned pathno) { const bool is_covered = fn.paths.covered_p (pathno); if (is_covered && !flag_prime_paths_source_covered) @@ -3667,9 +3667,9 @@ print_prime_path_source (FILE *gcov_file, const function_info &fn, return 0; if (is_covered) - fprintf (gcov_file, "path %zu covered:\n", pathno); + fprintf (gcov_file, "path %u covered:\n", pathno); else - fprintf (gcov_file, "path %zu not covered:\n", pathno); + fprintf (gcov_file, "path %u not covered:\n", pathno); unsigned current = fn.src; for (size_t k = 0; k != path.size (); ++k) { @@ -3728,19 +3728,19 @@ output_path_coverage (FILE *gcov_file, const function_info *fn) if (fn->paths.paths.empty ()) fnotice (gcov_file, "path coverage omitted\n"); else - fnotice (gcov_file, "paths covered %u of %zu\n", - fn->paths.covered_paths (), fn->paths.paths.size ()); + fnotice (gcov_file, "paths covered %u of " HOST_SIZE_T_PRINT_UNSIGNED "\n", + fn->paths.covered_paths (), (fmt_size_t)fn->paths.paths.size ()); if (flag_prime_paths_lines_uncovered || flag_prime_paths_lines_covered) { - size_t pathno = 0; + unsigned pathno = 0; for (const vector<unsigned> &path : fn->paths.paths) print_prime_path_lines (gcov_file, *fn, path, pathno++); } if (flag_prime_paths_source_uncovered || flag_prime_paths_source_covered) { - size_t pathno = 0; + unsigned pathno = 0; for (const vector<unsigned> &path : fn->paths.paths) print_prime_path_source (gcov_file, *fn, path, pathno++); } diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index f801e8b..e63fd6f 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -6258,10 +6258,21 @@ replace_stmt_with_simplification (gimple_stmt_iterator *gsi, } else if (code == INTEGER_CST) { + /* Make into the canonical form `1 != 0` and `0 != 0`. + If already in the canonical form return false + saying nothing has been done. */ if (integer_zerop (ops[0])) - gimple_cond_make_false (cond_stmt); + { + if (gimple_cond_false_canonical_p (cond_stmt)) + return false; + gimple_cond_make_false (cond_stmt); + } else - gimple_cond_make_true (cond_stmt); + { + if (gimple_cond_true_canonical_p (cond_stmt)) + return false; + gimple_cond_make_true (cond_stmt); + } } else if (!inplace) { diff --git a/gcc/gimple.h b/gcc/gimple.h index 032365f..977ff1c 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -3875,6 +3875,21 @@ gimple_cond_true_p (const gcond *gs) return false; } +/* Check if conditional statement GS is in the caonical form of 'if (1 != 0)'. */ + +inline bool +gimple_cond_true_canonical_p (const gcond *gs) +{ + tree lhs = gimple_cond_lhs (gs); + tree rhs = gimple_cond_rhs (gs); + tree_code code = gimple_cond_code (gs); + if (code == NE_EXPR + && lhs == boolean_true_node + && rhs == boolean_false_node) + return true; + return false; +} + /* Check if conditional statement GS is of the form 'if (1 != 1)', 'if (0 != 0)', 'if (1 == 0)' or 'if (0 == 1)' */ @@ -3900,6 +3915,21 @@ gimple_cond_false_p (const gcond *gs) return false; } +/* Check if conditional statement GS is in the caonical form of 'if (0 != 0)'. */ + +inline bool +gimple_cond_false_canonical_p (const gcond *gs) +{ + tree lhs = gimple_cond_lhs (gs); + tree rhs = gimple_cond_rhs (gs); + tree_code code = gimple_cond_code (gs); + if (code == NE_EXPR + && lhs == boolean_false_node + && rhs == boolean_false_node) + return true; + return false; +} + /* Set the code, LHS and RHS of GIMPLE_COND STMT from CODE, LHS and RHS. */ inline void diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C b/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C new file mode 100644 index 0000000..7adf6ec --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C @@ -0,0 +1,12 @@ +// PR c++/120185 + +struct A { + A(...); +}; + +template <class T> void f(A, T) { } + +int main() +{ + f(42, 24); +} diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit16.C b/gcc/testsuite/g++.dg/cpp2a/constinit16.C index dda81d5..046e9aa 100644 --- a/gcc/testsuite/g++.dg/cpp2a/constinit16.C +++ b/gcc/testsuite/g++.dg/cpp2a/constinit16.C @@ -2,7 +2,7 @@ // { dg-do compile { target c++20 } } // { dg-add-options tls } // { dg-require-alias "" } -// { dg-require-effective-target tls_runtime } +// { dg-require-effective-target tls } // { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar1" } } // { dg-final { scan-assembler "_ZTH17mythreadlocalvar2" } } // { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar3" } } diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp2.C b/gcc/testsuite/g++.dg/cpp2a/decomp2.C index c2bfe46..d13f424 100644 --- a/gcc/testsuite/g++.dg/cpp2a/decomp2.C +++ b/gcc/testsuite/g++.dg/cpp2a/decomp2.C @@ -1,7 +1,7 @@ // P1091R3 // { dg-do run { target c++11 } } // { dg-options "" } -// { dg-require-effective-target tls } +// { dg-require-effective-target tls_runtime } // { dg-add-options tls } namespace std { diff --git a/gcc/testsuite/gcc.dg/pr119160.c b/gcc/testsuite/gcc.dg/pr119160.c index b4629a1..5743b3b 100644 --- a/gcc/testsuite/gcc.dg/pr119160.c +++ b/gcc/testsuite/gcc.dg/pr119160.c @@ -1,5 +1,6 @@ /* { dg-do run } */ -/* { dg-options "-O2 -finstrument-functions-once -favoid-store-forwarding -fnon-call-exceptions -fschedule-insns -mgeneral-regs-only -Wno-psabi" } */ +/* { dg-options "-O2 -finstrument-functions-once -favoid-store-forwarding -fnon-call-exceptions -fschedule-insns -Wno-psabi" } */ +/* { dg-additional-options "-mgeneral-regs-only" { target { x86_64-*-* i?86-*-* arm*-*-* aarch64*-*-* } } } */ typedef __attribute__((__vector_size__ (32))) int V; diff --git a/gcc/testsuite/gcc.dg/torture/pr120182.c b/gcc/testsuite/gcc.dg/torture/pr120182.c new file mode 100644 index 0000000..5e2d171 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr120182.c @@ -0,0 +1,42 @@ +/* { dg-do run { target { { *-*-linux* *-*-gnu* *-*-uclinux* } && mmap } } } */ + +#include <unistd.h> +#include <stdlib.h> +#include <sys/mman.h> + +struct S +{ + struct S *next; +}; + +static void __attribute__((noipa)) +allocate(void *addr, unsigned long long size) +{ + void *ptr = mmap((void *)addr, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, + -1, 0); + if(ptr != addr) + exit(0); +} + +int main (void) +{ + int size = 0x8000; + char *ptr = (char *)0x288000ull; + allocate((void *)ptr, size); + + struct S *s1 = (struct S *)ptr; + struct S *s2 = (struct S *)256; + for (int i = 0; i < 3; i++) + { + for(char *addr = (char *)s1; addr < (char *)s1 + sizeof(*s1); ++addr) + *addr = 0; + + if(s1->next) + s1->next = s1->next->next = s2; + else + s1->next = s2; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c new file mode 100644 index 0000000..955fc7e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ + +double foo (double *dst, double *src, int b) +{ + double y = src[1]; + if (b) + { + dst[0] = src[0]; + dst[1] = y; + } + return y; +} + +/* { dg-final { scan-tree-dump "optimized: basic block part vectorized" "slp2" { target vect_double } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c index 907637f..eeba533 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c @@ -29,7 +29,7 @@ test_add (fixed_int8_t x, fixed_int8_t y) } /* -** test_add_gnu: +** test_add_gnu: {target aarch64_big_endian } ** ( ** add (z[0-9]+\.b), (?:z0\.b, z1\.b|z1\.b, z0\.b) ** ptrue (p[0-7])\.b, vl32 @@ -41,6 +41,12 @@ test_add (fixed_int8_t x, fixed_int8_t y) ** ) ** ret */ +/* +** test_add_gnu: {target aarch64_little_endian } +** add (z[0-9]+)\.b, (?:z0\.b, z1\.b|z1\.b, z0\.b) +** str \1, \[x8\] +** ret +*/ gnu_int8_t test_add_gnu (fixed_int8_t x, fixed_int8_t y) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c index b65826b..d423dcf 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c @@ -9,5 +9,7 @@ uint64_t f2(uint64_t *ptr, int n) { return res; } -/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 {target aarch64_little_endian} } } */ +/* { dg-final { scan-assembler-times {\tldr\tz[0-9]+,} 4 {target aarch64_little_endian} } } */ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c index a7ecfe3..93af4c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c index f3a29fc..fab49ed 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c @@ -9,5 +9,6 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */ /* { dg-final { scan-assembler-not {\tstp\tq} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c index 565e1e3..160667b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c index 31057c0..b71c673 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c @@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count) *dst++ = 1; } -/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */ +/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c index 50e77f9..8d480a8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.h), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.h, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c index e7b092a..b3c699d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.s), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.s, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c index c3389a8..7078afc 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** fmov (z[0-9]+\.d), #9\.0[^\n]* ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** fmov (z[0-9]+)\.d, #9\.0[^\n]* +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c index 2877787..fcbac37 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -32,9 +32,9 @@ callee_0 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x1\] +** str \2, \[x1\] ** ... ** ret */ @@ -47,9 +47,9 @@ caller_0 (mfloat8_t *ptr, mfloat8_t in) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -72,9 +72,9 @@ callee_1 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x2\] +** str \2, \[x2\] ** ... ** ret */ @@ -87,9 +87,9 @@ caller_1 (mfloat8_t *ptr, mfloat8_t in) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -117,9 +117,9 @@ callee_7 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[x7\] +** str \2, \[x7\] ** ... ** ret */ @@ -136,9 +136,9 @@ caller_7 (mfloat8_t *ptr, mfloat8_t in) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -167,9 +167,9 @@ callee_8 (mfloat8_t *ptr, ...) ** ... ** umov (w[0-9]+), v0.b\[0\] ** ... -** mov (z[0-9]+\.b), \1 +** mov (z[0-9]+)\.b, \1 ** ... -** st1b \2, p[0-7], \[(x[0-9]+)\] +** str \2, \[(x[0-9]+)\] ** ... ** str \3, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c index 3c644e1..e65e64f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c index 652d609..6488a5f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c index 72ea6a3..4b77b4f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c index 02f4bec..e686b3e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...) /* ** caller_0: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x1\] +** str \1, \[x1\] ** ... ** ret */ @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) /* ** caller_1: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x2\] +** str \1, \[x2\] ** ... ** ret */ @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) /* ** caller_7: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x7\] +** str \1, \[x7\] ** ... ** ret */ @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) /* ** caller_8: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[(x[0-9]+)\] +** str \1, \[(x[0-9]+)\] ** ... ** str \2, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c index b60d448..74ef4da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int16_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int16_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int16_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int16_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int16_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int16_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int16_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int16_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int16_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int16_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int16_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int16_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int16_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.h), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.h, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int16_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c index 5f01464..4f9ff78 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int32_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int32_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int32_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int32_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int32_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int32_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int32_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int32_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int32_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int32_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int32_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int32_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int32_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.s), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.s, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int32_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c index 986739f..27e437b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c @@ -6,7 +6,7 @@ #include <stdarg.h> /* -** callee_0: +** callee_0: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] ** ... @@ -14,6 +14,15 @@ ** ... ** ret */ +/* +** callee_0: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x1\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_0 (int64_t *ptr, ...) { @@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...) } /* -** caller_0: +** caller_0: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_0: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x1\] +** ... +** ret +*/ void __attribute__((noipa)) caller_0 (int64_t *ptr) { @@ -42,7 +60,7 @@ caller_0 (int64_t *ptr) } /* -** callee_1: +** callee_1: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] ** ... @@ -50,6 +68,15 @@ caller_0 (int64_t *ptr) ** ... ** ret */ +/* +** callee_1: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x2\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_1 (int64_t *ptr, ...) { @@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...) } /* -** caller_1: +** caller_1: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_1: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x2\] +** ... +** ret +*/ void __attribute__((noipa)) caller_1 (int64_t *ptr) { @@ -79,7 +115,7 @@ caller_1 (int64_t *ptr) } /* -** callee_7: +** callee_7: {target aarch64_big_endian} ** ... ** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] ** ... @@ -87,6 +123,15 @@ caller_1 (int64_t *ptr) ** ... ** ret */ +/* +** callee_7: {target aarch64_little_endian} +** ... +** ldr (z[0-9]+), \[x7\] +** ... +** str \1, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_7 (int64_t *ptr, ...) { @@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...) } /* -** caller_7: +** caller_7: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_7: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[x7\] +** ... +** ret +*/ void __attribute__((noipa)) caller_7 (int64_t *ptr) { @@ -122,7 +176,7 @@ caller_7 (int64_t *ptr) /* FIXME: We should be able to get rid of the va_list object. */ /* -** callee_8: +** callee_8: {target aarch64_big_endian} ** sub sp, sp, #([0-9]+) ** ... ** ldr (x[0-9]+), \[sp, \1\] @@ -133,6 +187,18 @@ caller_7 (int64_t *ptr) ** ... ** ret */ +/* +** callee_8: {target aarch64_little_endian} +** sub sp, sp, #([0-9]+) +** ... +** ldr (x[0-9]+), \[sp, \1\] +** ... +** ldr (z[0-9]+), \[\2\] +** ... +** str \3, \[x0\] +** ... +** ret +*/ void __attribute__((noipa)) callee_8 (int64_t *ptr, ...) { @@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...) } /* -** caller_8: +** caller_8: {target aarch64_big_endian} ** ... ** mov (z[0-9]+\.d), #42 ** ... @@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...) ** ... ** ret */ +/* +** caller_8: {target aarch64_little_endian} +** ... +** mov (z[0-9]+)\.d, #42 +** ... +** str \1, \[(x[0-9]+)\] +** ... +** str \2, \[sp\] +** ... +** ret +*/ void __attribute__((noipa)) caller_8 (int64_t *ptr) { diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c index 533cba67..d43a6da 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c @@ -8,9 +8,9 @@ /* ** callee_0: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] +** ldr (z[0-9]+), \[x1\] ** ... -** st1b \1, \2, \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...) /* ** caller_0: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x1\] +** str \1, \[x1\] ** ... ** ret */ @@ -44,9 +44,9 @@ caller_0 (int8_t *ptr) /* ** callee_1: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] +** ldr (z[0-9]+), \[x2\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...) /* ** caller_1: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x2\] +** str \1, \[x2\] ** ... ** ret */ @@ -81,9 +81,9 @@ caller_1 (int8_t *ptr) /* ** callee_7: ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] +** ldr (z[0-9]+), \[x7\] ** ... -** st1b \1, p[0-7], \[x0\] +** str \1, \[x0\] ** ... ** ret */ @@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...) /* ** caller_7: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[x7\] +** str \1, \[x7\] ** ... ** ret */ @@ -127,9 +127,9 @@ caller_7 (int8_t *ptr) ** ... ** ldr (x[0-9]+), \[sp, \1\] ** ... -** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] +** ldr (z[0-9]+), \[\2\] ** ... -** st1b \3, \4, \[x0\] +** str \3, \[x0\] ** ... ** ret */ @@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...) /* ** caller_8: ** ... -** mov (z[0-9]+\.b), #42 +** mov (z[0-9]+)\.b, #42 ** ... -** st1b \1, p[0-7], \[(x[0-9]+)\] +** str \1, \[(x[0-9]+)\] ** ... ** str \2, \[sp\] ** ... diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c index 985cd0c..f07900b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c @@ -19,5 +19,7 @@ foo (void) /* We should operate on aligned vectors. */ /* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, (x|\.LANCHOR0)\n} } } */ /* We should unroll the loop three times. */ -/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ +/* { dg-final { scan-assembler-times "\tst1w\t" 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times "\tst1w\t" 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times "\tstr\t" 1 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-9]+\.b, (p[0-9]+)/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c new file mode 100644 index 0000000..c3bfa98 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY, B) \ + sv##TYPE ld_##TY (TYPE *x) \ + { \ + return svld1_##TY(svptrue_b##B (), x); \ + } \ + void st_##TY (TYPE *x, sv##TYPE data) \ + { \ + svst1_##TY(svptrue_b##B (), x, data); \ + } + +TEST(bfloat16_t, bf16, 16) +TEST(float16_t, f16, 16) +TEST(float32_t, f32, 32) +TEST(float64_t, f64, 64) +TEST(uint8_t, u8, 8) +TEST(uint16_t, u16, 16) +TEST(uint32_t, u32, 32) +TEST(uint64_t, u64, 64) +TEST(int8_t, s8, 8) +TEST(int16_t, s16, 16) +TEST(int32_t, s32, 32) +TEST(int64_t, s64, 64) + +/* { dg-final { scan-assembler-times {\tldr\tz0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tstr\tz0, \[x0\]} 12 } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c index d9bb97e..be71921 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c @@ -40,12 +40,13 @@ TEST_LOOP (double, 3.0) /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 9 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c index d27eead..8692984 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 9 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c index 313a72d..10799fd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 9 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c index 4f46654..53658a8 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c @@ -16,12 +16,13 @@ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 9 { target aarch64_big_endian } } } */ -/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ -/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */ -/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */ +/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */ /* { dg-final { scan-assembler-not {\twhile} } } */ /* { dg-final { scan-assembler-not {\tb} } } */ diff --git a/gcc/testsuite/gnat.dg/opt106.adb b/gcc/testsuite/gnat.dg/opt106.adb new file mode 100644 index 0000000..525930b --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt106.adb @@ -0,0 +1,11 @@ +-- { dg-do run } +-- { dg-options "-O2" } + +with Opt106_Pkg1; use Opt106_Pkg1; + +procedure Opt106 is + Obj : T := (False, 0, 0, 0, True); + +begin + Proc (Obj, 0, False, True); +end; diff --git a/gcc/testsuite/gnat.dg/opt106_pkg1.adb b/gcc/testsuite/gnat.dg/opt106_pkg1.adb new file mode 100644 index 0000000..154b13f --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt106_pkg1.adb @@ -0,0 +1,39 @@ +with Opt106_Pkg2; use Opt106_Pkg2; + +package body Opt106_Pkg1 is + + procedure Proc (Obj : in out T; + Data : Integer; + Last : Boolean; + Stretch : Boolean) is + + begin + if Stretch and then (Obj.Delayed /= 0 or else not Obj.Attach_Last) then + raise Program_Error; + end if; + + if Obj.Delayed /= 0 then + Stop (Obj.Delayed, Obj.Before, Data, False); + end if; + + if Last or (Obj.Delayed = 0 and not Stretch) then + Stop (Data, Obj.Before, 0, Last); + + if Last then + Obj.Initialized := False; + else + Obj.Next := 0; + Obj.Before := Data; + end if; + + else + if Stretch then + Obj.Next := 1; + else + Obj.Before := Obj.Delayed; + end if; + Obj.Delayed := Data; + end if; + end; + +end Opt106_Pkg1; diff --git a/gcc/testsuite/gnat.dg/opt106_pkg1.ads b/gcc/testsuite/gnat.dg/opt106_pkg1.ads new file mode 100644 index 0000000..85ac24d --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt106_pkg1.ads @@ -0,0 +1,16 @@ +package Opt106_Pkg1 is + + type T is record + Initialized : Boolean; + Before : Integer; + Delayed : Integer; + Next : Integer; + Attach_Last : Boolean; + end record; + + procedure Proc (Obj : in out T; + Data : Integer; + Last : Boolean; + Stretch : Boolean); + +end Opt106_Pkg1; diff --git a/gcc/testsuite/gnat.dg/opt106_pkg2.adb b/gcc/testsuite/gnat.dg/opt106_pkg2.adb new file mode 100644 index 0000000..cf63956 --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt106_pkg2.adb @@ -0,0 +1,11 @@ +package body Opt106_Pkg2 is + + procedure Stop (Delayed : Integer; + Before : Integer; + After : Integer; + Last : Boolean) is + begin + raise Program_Error; + end; + +end Opt106_Pkg2; diff --git a/gcc/testsuite/gnat.dg/opt106_pkg2.ads b/gcc/testsuite/gnat.dg/opt106_pkg2.ads new file mode 100644 index 0000000..77e5b40 --- /dev/null +++ b/gcc/testsuite/gnat.dg/opt106_pkg2.ads @@ -0,0 +1,8 @@ +package Opt106_Pkg2 is + + procedure Stop (Delayed : Integer; + Before : Integer; + After : Integer; + Last : Boolean); + +end Opt106_Pkg2; diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 2774fd2..857517f 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7844,21 +7844,70 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, node, node_instance, cost_vec); } +static int +sort_ints (const void *a_, const void *b_) +{ + int a = *(const int *)a_; + int b = *(const int *)b_; + return a - b; +} + /* Verify if we can externalize a set of internal defs. */ static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts) { + /* Constant generation uses get_later_stmt which can only handle + defs from the same BB or a set of defs that can be ordered + with a dominance query. */ basic_block bb = NULL; + bool all_same = true; + auto_vec<int> bbs; + bbs.reserve_exact (stmts.length ()); for (stmt_vec_info stmt : stmts) - if (!stmt) - return false; - /* Constant generation uses get_later_stmt which can only handle - defs from the same BB. */ - else if (!bb) - bb = gimple_bb (stmt->stmt); - else if (gimple_bb (stmt->stmt) != bb) + { + if (!stmt) + return false; + else if (!bb) + bb = gimple_bb (stmt->stmt); + else if (gimple_bb (stmt->stmt) != bb) + all_same = false; + bbs.quick_push (gimple_bb (stmt->stmt)->index); + } + if (all_same) + return true; + + /* Produce a vector of unique BB indexes for the defs. */ + bbs.qsort (sort_ints); + unsigned i, j; + for (i = 1, j = 1; i < bbs.length (); ++i) + if (bbs[i] != bbs[j-1]) + bbs[j++] = bbs[i]; + gcc_assert (j >= 2); + bbs.truncate (j); + + if (bbs.length () == 2) + return (dominated_by_p (CDI_DOMINATORS, + BASIC_BLOCK_FOR_FN (cfun, bbs[0]), + BASIC_BLOCK_FOR_FN (cfun, bbs[1])) + || dominated_by_p (CDI_DOMINATORS, + BASIC_BLOCK_FOR_FN (cfun, bbs[1]), + BASIC_BLOCK_FOR_FN (cfun, bbs[0]))); + + /* ??? For more than two BBs we can sort the vector and verify the + result is a total order. But we can't use vec::qsort with a + compare function using a dominance query since there's no way to + signal failure and any fallback for an unordered pair would + fail qsort_chk later. + For now simply hope that ordering after BB index provides the + best candidate total order. If required we can implement our + own mergesort or export an entry without checking. */ + for (unsigned i = 1; i < bbs.length (); ++i) + if (!dominated_by_p (CDI_DOMINATORS, + BASIC_BLOCK_FOR_FN (cfun, bbs[i]), + BASIC_BLOCK_FOR_FN (cfun, bbs[i-1]))) return false; + return true; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 3373d75..efe6a2c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6811,7 +6811,6 @@ vectorizable_operation (vec_info *vinfo, poly_uint64 nunits_in; poly_uint64 nunits_out; tree vectype_out; - unsigned int ncopies; int vec_num; int i; vec<tree> vec_oprnds0 = vNULL; @@ -6872,7 +6871,7 @@ vectorizable_operation (vec_info *vinfo, } scalar_dest = gimple_assign_lhs (stmt); - vectype_out = STMT_VINFO_VECTYPE (stmt_info); + vectype_out = SLP_TREE_VECTYPE (slp_node); /* Most operations cannot handle bit-precision types without extra truncations. */ @@ -6983,20 +6982,8 @@ vectorizable_operation (vec_info *vinfo, } /* Multiple types in SLP are handled by creating the appropriate number of - vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in - case of SLP. */ - if (slp_node) - { - ncopies = 1; - vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - } - else - { - ncopies = vect_get_num_copies (loop_vinfo, vectype); - vec_num = 1; - } - - gcc_assert (ncopies >= 1); + vectorized stmts for each SLP node. */ + vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); /* Reject attempts to combine mask types with nonmask types, e.g. if we have an AND between a (nonmask) boolean loaded from memory and @@ -7038,9 +7025,10 @@ vectorizable_operation (vec_info *vinfo, ops we have to lower the lowering code assumes we are dealing with word_mode. */ if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)) + || !GET_MODE_SIZE (vec_mode).is_constant () || (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR) - || !target_support_p) - && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)) + || !target_support_p) + && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)) /* Check only during analysis. */ || (!vec_stmt && !vect_can_vectorize_without_simd_p (code))) { @@ -7080,12 +7068,12 @@ vectorizable_operation (vec_info *vinfo, if (cond_len_fn != IFN_LAST && direct_internal_fn_supported_p (cond_len_fn, vectype, OPTIMIZE_FOR_SPEED)) - vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype, + vect_record_loop_len (loop_vinfo, lens, vec_num, vectype, 1); else if (cond_fn != IFN_LAST && direct_internal_fn_supported_p (cond_fn, vectype, OPTIMIZE_FOR_SPEED)) - vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, + vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL); else { @@ -7098,10 +7086,9 @@ vectorizable_operation (vec_info *vinfo, } /* Put types on constant and invariant SLP children. */ - if (slp_node - && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) - || !vect_maybe_update_slp_op_vectype (slp_op1, vectype) - || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))) + if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype) + || !vect_maybe_update_slp_op_vectype (slp_op1, vectype) + || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7112,15 +7099,14 @@ vectorizable_operation (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); vect_model_simple_cost (vinfo, stmt_info, - ncopies, dt, ndts, slp_node, cost_vec); + 1, dt, ndts, slp_node, cost_vec); if (using_emulated_vectors_p) { /* The above vect_model_simple_cost call handles constants in the prologue and (mis-)costs one of the stmts as vector stmt. See below for the actual lowering that will be applied. */ - unsigned n - = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies; + unsigned n = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); switch (code) { case PLUS_EXPR: @@ -7173,60 +7159,7 @@ vectorizable_operation (vec_info *vinfo, else vec_dest = vect_create_destination_var (scalar_dest, vectype_out); - /* In case the vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate - more than one vector stmt - i.e - we need to "unroll" the - vector stmt by a factor VF/nunits. In doing so, we record a pointer - from one copy of the vector stmt to the next, in the field - STMT_VINFO_RELATED_STMT. This is necessary in order to allow following - stages to find the correct vector defs to be used when vectorizing - stmts that use the defs of the current stmt. The example below - illustrates the vectorization process when VF=16 and nunits=4 (i.e., - we need to create 4 vectorized stmts): - - before vectorization: - RELATED_STMT VEC_STMT - S1: x = memref - - - S2: z = x + 1 - - - - step 1: vectorize stmt S1 (done in vectorizable_load. See more details - there): - RELATED_STMT VEC_STMT - VS1_0: vx0 = memref0 VS1_1 - - VS1_1: vx1 = memref1 VS1_2 - - VS1_2: vx2 = memref2 VS1_3 - - VS1_3: vx3 = memref3 - - - S1: x = load - VS1_0 - S2: z = x + 1 - - - - step2: vectorize stmt S2 (done here): - To vectorize stmt S2 we first need to find the relevant vector - def for the first operand 'x'. This is, as usual, obtained from - the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt - that defines 'x' (S1). This way we find the stmt VS1_0, and the - relevant vector def 'vx0'. Having found 'vx0' we can generate - the vector stmt VS2_0, and as usual, record it in the - STMT_VINFO_VEC_STMT of stmt S2. - When creating the second copy (VS2_1), we obtain the relevant vector - def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of - stmt VS1_0. This way we find the stmt VS1_1 and the relevant - vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a - pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0. - Similarly when creating stmts VS2_2 and VS2_3. This is the resulting - chain of stmts and pointers: - RELATED_STMT VEC_STMT - VS1_0: vx0 = memref0 VS1_1 - - VS1_1: vx1 = memref1 VS1_2 - - VS1_2: vx2 = memref2 VS1_3 - - VS1_3: vx3 = memref3 - - - S1: x = load - VS1_0 - VS2_0: vz0 = vx0 + v1 VS2_1 - - VS2_1: vz1 = vx1 + v1 VS2_2 - - VS2_2: vz2 = vx2 + v1 VS2_3 - - VS2_3: vz3 = vx3 + v1 - - - S2: z = x + 1 - VS2_0 */ - - vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, + vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2); /* Arguments are ready. Create the new vector stmt. */ FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) @@ -7235,88 +7168,108 @@ vectorizable_operation (vec_info *vinfo, vop1 = ((op_type == binary_op || op_type == ternary_op) ? vec_oprnds1[i] : NULL_TREE); vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE); - if (using_emulated_vectors_p - && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)) + if (using_emulated_vectors_p) { /* Lower the operation. This follows vector lowering. */ - unsigned int width = vector_element_bits (vectype); - tree inner_type = TREE_TYPE (vectype); - tree word_type - = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1); - HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type)); - tree low_bits = build_replicated_int_cst (word_type, width, max >> 1); - tree high_bits - = build_replicated_int_cst (word_type, width, max & ~(max >> 1)); + tree word_type = build_nonstandard_integer_type + (GET_MODE_BITSIZE (vec_mode).to_constant (), 1); tree wvop0 = make_ssa_name (word_type); new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR, build1 (VIEW_CONVERT_EXPR, word_type, vop0)); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - tree result_low, signs; - if (code == PLUS_EXPR || code == MINUS_EXPR) + tree wvop1 = NULL_TREE; + if (vop1) { - tree wvop1 = make_ssa_name (word_type); + wvop1 = make_ssa_name (word_type); new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR, build1 (VIEW_CONVERT_EXPR, word_type, vop1)); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - signs = make_ssa_name (word_type); - new_stmt = gimple_build_assign (signs, - BIT_XOR_EXPR, wvop0, wvop1); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - tree b_low = make_ssa_name (word_type); - new_stmt = gimple_build_assign (b_low, - BIT_AND_EXPR, wvop1, low_bits); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - tree a_low = make_ssa_name (word_type); - if (code == PLUS_EXPR) - new_stmt = gimple_build_assign (a_low, - BIT_AND_EXPR, wvop0, low_bits); - else - new_stmt = gimple_build_assign (a_low, - BIT_IOR_EXPR, wvop0, high_bits); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - if (code == MINUS_EXPR) + } + + tree result_low; + if (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR) + { + unsigned int width = vector_element_bits (vectype); + tree inner_type = TREE_TYPE (vectype); + HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type)); + tree low_bits + = build_replicated_int_cst (word_type, width, max >> 1); + tree high_bits + = build_replicated_int_cst (word_type, + width, max & ~(max >> 1)); + tree signs; + if (code == PLUS_EXPR || code == MINUS_EXPR) + { + signs = make_ssa_name (word_type); + new_stmt = gimple_build_assign (signs, + BIT_XOR_EXPR, wvop0, wvop1); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + tree b_low = make_ssa_name (word_type); + new_stmt = gimple_build_assign (b_low, BIT_AND_EXPR, + wvop1, low_bits); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + tree a_low = make_ssa_name (word_type); + if (code == PLUS_EXPR) + new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR, + wvop0, low_bits); + else + new_stmt = gimple_build_assign (a_low, BIT_IOR_EXPR, + wvop0, high_bits); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + if (code == MINUS_EXPR) + { + new_stmt = gimple_build_assign (NULL_TREE, + BIT_NOT_EXPR, signs); + signs = make_ssa_name (word_type); + gimple_assign_set_lhs (new_stmt, signs); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + } + new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, + signs, high_bits); + signs = make_ssa_name (word_type); + gimple_assign_set_lhs (new_stmt, signs); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + result_low = make_ssa_name (word_type); + new_stmt = gimple_build_assign (result_low, code, + a_low, b_low); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + } + else /* if (code == NEGATE_EXPR) */ { - new_stmt = gimple_build_assign (NULL_TREE, - BIT_NOT_EXPR, signs); + tree a_low = make_ssa_name (word_type); + new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR, + wvop0, low_bits); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + signs = make_ssa_name (word_type); + new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR, + signs, high_bits); signs = make_ssa_name (word_type); gimple_assign_set_lhs (new_stmt, signs); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + result_low = make_ssa_name (word_type); + new_stmt = gimple_build_assign (result_low, + MINUS_EXPR, high_bits, a_low); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } - new_stmt = gimple_build_assign (NULL_TREE, - BIT_AND_EXPR, signs, high_bits); - signs = make_ssa_name (word_type); - gimple_assign_set_lhs (new_stmt, signs); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, + result_low, signs); result_low = make_ssa_name (word_type); - new_stmt = gimple_build_assign (result_low, code, a_low, b_low); + gimple_assign_set_lhs (new_stmt, result_low); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); } else { - tree a_low = make_ssa_name (word_type); - new_stmt = gimple_build_assign (a_low, - BIT_AND_EXPR, wvop0, low_bits); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - signs = make_ssa_name (word_type); - new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - new_stmt = gimple_build_assign (NULL_TREE, - BIT_AND_EXPR, signs, high_bits); - signs = make_ssa_name (word_type); - gimple_assign_set_lhs (new_stmt, signs); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + new_stmt = gimple_build_assign (NULL_TREE, code, wvop0, wvop1); result_low = make_ssa_name (word_type); - new_stmt = gimple_build_assign (result_low, - MINUS_EXPR, high_bits, a_low); + gimple_assign_set_lhs (new_stmt, result_low); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + } - new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low, - signs); - result_low = make_ssa_name (word_type); - gimple_assign_set_lhs (new_stmt, result_low); - vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, build1 (VIEW_CONVERT_EXPR, vectype, result_low)); @@ -7329,7 +7282,7 @@ vectorizable_operation (vec_info *vinfo, tree mask; if (masked_loop_p) mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype, i); + vec_num, vectype, i); else /* Dummy mask. */ mask = build_minus_one_cst (truth_type_for (vectype)); @@ -7356,7 +7309,7 @@ vectorizable_operation (vec_info *vinfo, if (len_loop_p) { tree len = vect_get_loop_len (loop_vinfo, gsi, lens, - vec_num * ncopies, vectype, i, 1); + vec_num, vectype, i, 1); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); tree bias = build_int_cst (intQI_type_node, biasval); @@ -7383,21 +7336,19 @@ vectorizable_operation (vec_info *vinfo, && code == BIT_AND_EXPR && VECTOR_BOOLEAN_TYPE_P (vectype)) { - if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, - ncopies})) + if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, 1 })) { mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype, i); + vec_num, vectype, i); vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, vop0, gsi); } - if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, - ncopies })) + if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, 1 })) { mask = vect_get_loop_mask (loop_vinfo, gsi, masks, - vec_num * ncopies, vectype, i); + vec_num, vectype, i); vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, vop1, gsi); @@ -7428,15 +7379,9 @@ vectorizable_operation (vec_info *vinfo, new_stmt, gsi); } - if (slp_node) - slp_node->push_vec_def (new_stmt); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + slp_node->push_vec_def (new_stmt); } - if (!slp_node) - *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0]; - vec_oprnds0.release (); vec_oprnds1.release (); vec_oprnds2.release (); diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc index 6603d90..4c78759 100644 --- a/gcc/vr-values.cc +++ b/gcc/vr-values.cc @@ -1996,10 +1996,13 @@ simplify_using_ranges::simplify (gimple_stmt_iterator *gsi) case BIT_AND_EXPR: case BIT_IOR_EXPR: - /* Optimize away BIT_AND_EXPR and BIT_IOR_EXPR - if all the bits being cleared are already cleared or - all the bits being set are already set. */ - if (INTEGRAL_TYPE_P (TREE_TYPE (rhs1))) + /* Optimize away BIT_AND_EXPR and BIT_IOR_EXPR if all the bits + being cleared are already cleared or all the bits being set + are already set. Beware that boolean types must be handled + logically (see range-op.cc) unless they have precision 1. */ + if (INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + && (TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE + || TYPE_PRECISION (TREE_TYPE (rhs1)) == 1)) return simplify_bit_ops_using_ranges (gsi, stmt); break; diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-full.c b/libgomp/testsuite/libgomp.c/interop-cuda-full.c index 38aa6b1..c48a934 100644 --- a/libgomp/testsuite/libgomp.c/interop-cuda-full.c +++ b/libgomp/testsuite/libgomp.c/interop-cuda-full.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + /* { dg-require-effective-target openacc_cuda } */ /* { dg-require-effective-target openacc_cudart } */ /* { dg-additional-options "-lcuda -lcudart" } */ diff --git a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c index 17cbb15..bc257a2 100644 --- a/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c +++ b/libgomp/testsuite/libgomp.c/interop-cuda-libonly.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + /* { dg-require-effective-target openacc_libcudart } */ /* { dg-require-effective-target openacc_libcuda } */ /* { dg-additional-options "-lcuda -lcudart" } */ diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c index d7725fc..bd44f44 100644 --- a/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-full.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + /* { dg-require-effective-target gomp_hip_header_amd } */ /* { dg-require-effective-target gomp_libamdhip64 } */ /* { dg-additional-options "-lamdhip64" } */ diff --git a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c index 2584537..91ad987 100644 --- a/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c +++ b/libgomp/testsuite/libgomp.c/interop-hip-amd-no-hip-header.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_gcn } } } */ +/* { dg-do link { target { ! offload_device_gcn } } } */ + /* { dg-require-effective-target gomp_libamdhip64 } */ /* { dg-additional-options "-lamdhip64" } */ diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c index 79af47d..d5dc236 100644 --- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-full.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + /* { dg-require-effective-target openacc_cudart } */ /* { dg-require-effective-target openacc_cuda } */ /* { dg-require-effective-target gomp_hip_header_nvidia } */ diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c index 4586398..7cff2cb 100644 --- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-headers.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + /* { dg-require-effective-target openacc_libcudart } */ /* { dg-require-effective-target openacc_libcuda } */ /* { dg-additional-options "-lcuda -lcudart" } */ diff --git a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c index 4186984..7b7dc74 100644 --- a/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c +++ b/libgomp/testsuite/libgomp.c/interop-hip-nvidia-no-hip-header.c @@ -1,3 +1,6 @@ +/* { dg-do run { target { offload_device_nvptx } } } */ +/* { dg-do link { target { ! offload_device_nvptx } } } */ + /* { dg-require-effective-target openacc_cudart } */ /* { dg-require-effective-target openacc_cuda } */ /* { dg-additional-options "-lcuda -lcudart" } */ diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 index bbd49dd..eb2f437 100644 --- a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-full.F90 @@ -1,3 +1,6 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + ! { dg-require-effective-target gomp_hipfort_module } ! { dg-require-effective-target gomp_libamdhip64 } ! { dg-additional-options "-lamdhip64" } diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 index 0afec83..0ebbe80 100644 --- a/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-amd-no-module.F90 @@ -1,3 +1,6 @@ +! { dg-do run { target { offload_device_gcn } } } +! { dg-do link { target { ! offload_device_gcn } } } + ! { dg-require-effective-target gomp_libamdhip64 } ! { dg-additional-options "-lamdhip64" } diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 index cef592f..d29a689 100644 --- a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-full.F90 @@ -1,3 +1,6 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + ! { dg-require-effective-target gomp_hipfort_module } ! { dg-require-effective-target openacc_cudart } ! { dg-require-effective-target openacc_cuda } diff --git a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 index c1ef29d..2063610 100644 --- a/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 +++ b/libgomp/testsuite/libgomp.fortran/interop-hip-nvidia-no-module.F90 @@ -1,3 +1,6 @@ +! { dg-do run { target { offload_device_nvptx } } } +! { dg-do link { target { ! offload_device_nvptx } } } + ! { dg-require-effective-target openacc_libcudart } ! { dg-require-effective-target openacc_libcuda } ! { dg-additional-options "-lcuda -lcudart" } |