aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-sve.md17
-rw-r--r--gcc/cp/class.cc3
-rw-r--r--gcc/cp/decl2.cc4
-rw-r--r--gcc/cp/pt.cc2
-rw-r--r--gcc/dse.cc5
-rw-r--r--gcc/gcov.cc20
-rw-r--r--gcc/gimple-fold.cc15
-rw-r--r--gcc/gimple.h30
-rw-r--r--gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C12
-rw-r--r--gcc/testsuite/g++.dg/cpp2a/constinit16.C2
-rw-r--r--gcc/testsuite/g++.dg/cpp2a/decomp2.C2
-rw-r--r--gcc/testsuite/gcc.dg/pr119160.c3
-rw-r--r--gcc/testsuite/gcc.dg/torture/pr120182.c42
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c3
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c93
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c32
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/single_1.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/single_2.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/single_3.c11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/single_4.c11
-rw-r--r--gcc/testsuite/gnat.dg/opt106.adb11
-rw-r--r--gcc/testsuite/gnat.dg/opt106_pkg1.adb39
-rw-r--r--gcc/testsuite/gnat.dg/opt106_pkg1.ads16
-rw-r--r--gcc/testsuite/gnat.dg/opt106_pkg2.adb11
-rw-r--r--gcc/testsuite/gnat.dg/opt106_pkg2.ads8
-rw-r--r--gcc/tree-vect-slp.cc63
-rw-r--r--gcc/tree-vect-stmts.cc251
-rw-r--r--gcc/vr-values.cc11
46 files changed, 1289 insertions, 329 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index bf0e57d..bf7569f 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -702,6 +702,23 @@
}
)
+;; Fold predicated loads/stores with a PTRUE predicate to unpredicated
+;; loads/stores after RA.
+(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str"
+ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w")
+ (unspec:SVE_FULL
+ [(match_operand:<VPRED> 1 "aarch64_simd_imm_one")
+ (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE && reload_completed
+ && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
+ && ((REG_P (operands[0]) && MEM_P (operands[2]))
+ || (REG_P (operands[2]) && MEM_P (operands[0])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (match_dup 2))])
+
;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
;; or vectors for which little-endian ordering isn't acceptable. Memory
;; accesses require secondary reloads.
diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 370bfa3..2764bb5 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -5744,6 +5744,9 @@ type_has_converting_constructor (tree t)
{
tree fn = *iter;
tree parm = FUNCTION_FIRST_USER_PARMTYPE (fn);
+ if (parm == NULL_TREE)
+ /* Varargs. */
+ return true;
if (parm == void_list_node
|| !sufficient_parms_p (TREE_CHAIN (parm)))
/* Can't accept a single argument, so won't be considered for
diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc
index 21156f1..15db1d6 100644
--- a/gcc/cp/decl2.cc
+++ b/gcc/cp/decl2.cc
@@ -3160,7 +3160,9 @@ determine_visibility (tree decl)
&& !attr)
{
int depth = TMPL_ARGS_DEPTH (args);
- if (DECL_VISIBILITY_SPECIFIED (decl))
+ if (DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P (TI_TEMPLATE (tinfo)))
+ /* Class template args don't affect template friends. */;
+ else if (DECL_VISIBILITY_SPECIFIED (decl))
{
/* A class template member with explicit visibility
overrides the class visibility, so we need to apply
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 0694c28..09f74a2 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -23254,7 +23254,7 @@ fn_type_unification (tree fn,
conversions that we know are not going to induce template instantiation
(PR99599). */
if (strict == DEDUCE_CALL
- && incomplete
+ && incomplete && flag_concepts
&& check_non_deducible_conversions (parms, args, nargs, fn, strict, flags,
convs, explain_p,
/*noninst_only_p=*/true))
diff --git a/gcc/dse.cc b/gcc/dse.cc
index ffc86ff..14f82c3 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1190,7 +1190,10 @@ canon_address (rtx mem,
address = strip_offset_and_add (address, offset);
if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
- && const_or_frame_p (address))
+ && const_or_frame_p (address)
+ /* Literal addresses can alias any base, avoid creating a
+ group for them. */
+ && ! CONST_SCALAR_INT_P (address))
{
group_info *group = get_group_info (address);
diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index e1ad801..0dfe513 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -3591,7 +3591,7 @@ print_source_line (FILE *f, const vector<const char *> &source_lines,
Returns 1 if the path was printed, 0 otherwise. */
static unsigned
print_prime_path_lines (FILE *gcov_file, const function_info &fn,
- const vector<unsigned> &path, size_t pathno)
+ const vector<unsigned> &path, unsigned pathno)
{
const bool is_covered = fn.paths.covered_p (pathno);
if (is_covered && !flag_prime_paths_lines_covered)
@@ -3600,9 +3600,9 @@ print_prime_path_lines (FILE *gcov_file, const function_info &fn,
return 0;
if (is_covered)
- fprintf (gcov_file, "path %zu covered: lines", pathno);
+ fprintf (gcov_file, "path %u covered: lines", pathno);
else
- fprintf (gcov_file, "path %zu not covered: lines", pathno);
+ fprintf (gcov_file, "path %u not covered: lines", pathno);
for (size_t k = 0; k != path.size (); ++k)
{
@@ -3658,7 +3658,7 @@ print_inlined_separator (FILE *gcov_file, unsigned current_index, const
Returns 1 if the path was printed, 0 otherwise. */
static unsigned
print_prime_path_source (FILE *gcov_file, const function_info &fn,
- const vector<unsigned> &path, size_t pathno)
+ const vector<unsigned> &path, unsigned pathno)
{
const bool is_covered = fn.paths.covered_p (pathno);
if (is_covered && !flag_prime_paths_source_covered)
@@ -3667,9 +3667,9 @@ print_prime_path_source (FILE *gcov_file, const function_info &fn,
return 0;
if (is_covered)
- fprintf (gcov_file, "path %zu covered:\n", pathno);
+ fprintf (gcov_file, "path %u covered:\n", pathno);
else
- fprintf (gcov_file, "path %zu not covered:\n", pathno);
+ fprintf (gcov_file, "path %u not covered:\n", pathno);
unsigned current = fn.src;
for (size_t k = 0; k != path.size (); ++k)
{
@@ -3728,19 +3728,19 @@ output_path_coverage (FILE *gcov_file, const function_info *fn)
if (fn->paths.paths.empty ())
fnotice (gcov_file, "path coverage omitted\n");
else
- fnotice (gcov_file, "paths covered %u of %zu\n",
- fn->paths.covered_paths (), fn->paths.paths.size ());
+ fnotice (gcov_file, "paths covered %u of " HOST_SIZE_T_PRINT_UNSIGNED "\n",
+ fn->paths.covered_paths (), (fmt_size_t)fn->paths.paths.size ());
if (flag_prime_paths_lines_uncovered || flag_prime_paths_lines_covered)
{
- size_t pathno = 0;
+ unsigned pathno = 0;
for (const vector<unsigned> &path : fn->paths.paths)
print_prime_path_lines (gcov_file, *fn, path, pathno++);
}
if (flag_prime_paths_source_uncovered || flag_prime_paths_source_covered)
{
- size_t pathno = 0;
+ unsigned pathno = 0;
for (const vector<unsigned> &path : fn->paths.paths)
print_prime_path_source (gcov_file, *fn, path, pathno++);
}
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index f801e8b..e63fd6f 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -6258,10 +6258,21 @@ replace_stmt_with_simplification (gimple_stmt_iterator *gsi,
}
else if (code == INTEGER_CST)
{
+ /* Make into the canonical form `1 != 0` and `0 != 0`.
+ If already in the canonical form return false
+ saying nothing has been done. */
if (integer_zerop (ops[0]))
- gimple_cond_make_false (cond_stmt);
+ {
+ if (gimple_cond_false_canonical_p (cond_stmt))
+ return false;
+ gimple_cond_make_false (cond_stmt);
+ }
else
- gimple_cond_make_true (cond_stmt);
+ {
+ if (gimple_cond_true_canonical_p (cond_stmt))
+ return false;
+ gimple_cond_make_true (cond_stmt);
+ }
}
else if (!inplace)
{
diff --git a/gcc/gimple.h b/gcc/gimple.h
index 032365f..977ff1c 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -3875,6 +3875,21 @@ gimple_cond_true_p (const gcond *gs)
return false;
}
+/* Check if conditional statement GS is in the caonical form of 'if (1 != 0)'. */
+
+inline bool
+gimple_cond_true_canonical_p (const gcond *gs)
+{
+ tree lhs = gimple_cond_lhs (gs);
+ tree rhs = gimple_cond_rhs (gs);
+ tree_code code = gimple_cond_code (gs);
+ if (code == NE_EXPR
+ && lhs == boolean_true_node
+ && rhs == boolean_false_node)
+ return true;
+ return false;
+}
+
/* Check if conditional statement GS is of the form 'if (1 != 1)',
'if (0 != 0)', 'if (1 == 0)' or 'if (0 == 1)' */
@@ -3900,6 +3915,21 @@ gimple_cond_false_p (const gcond *gs)
return false;
}
+/* Check if conditional statement GS is in the caonical form of 'if (0 != 0)'. */
+
+inline bool
+gimple_cond_false_canonical_p (const gcond *gs)
+{
+ tree lhs = gimple_cond_lhs (gs);
+ tree rhs = gimple_cond_rhs (gs);
+ tree_code code = gimple_cond_code (gs);
+ if (code == NE_EXPR
+ && lhs == boolean_false_node
+ && rhs == boolean_false_node)
+ return true;
+ return false;
+}
+
/* Set the code, LHS and RHS of GIMPLE_COND STMT from CODE, LHS and RHS. */
inline void
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C b/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C
new file mode 100644
index 0000000..7adf6ec
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-nondep6.C
@@ -0,0 +1,12 @@
+// PR c++/120185
+
+struct A {
+ A(...);
+};
+
+template <class T> void f(A, T) { }
+
+int main()
+{
+ f(42, 24);
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/constinit16.C b/gcc/testsuite/g++.dg/cpp2a/constinit16.C
index dda81d5..046e9aa 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constinit16.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constinit16.C
@@ -2,7 +2,7 @@
// { dg-do compile { target c++20 } }
// { dg-add-options tls }
// { dg-require-alias "" }
-// { dg-require-effective-target tls_runtime }
+// { dg-require-effective-target tls }
// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar1" } }
// { dg-final { scan-assembler "_ZTH17mythreadlocalvar2" } }
// { dg-final { scan-assembler-not "_ZTH17mythreadlocalvar3" } }
diff --git a/gcc/testsuite/g++.dg/cpp2a/decomp2.C b/gcc/testsuite/g++.dg/cpp2a/decomp2.C
index c2bfe46..d13f424 100644
--- a/gcc/testsuite/g++.dg/cpp2a/decomp2.C
+++ b/gcc/testsuite/g++.dg/cpp2a/decomp2.C
@@ -1,7 +1,7 @@
// P1091R3
// { dg-do run { target c++11 } }
// { dg-options "" }
-// { dg-require-effective-target tls }
+// { dg-require-effective-target tls_runtime }
// { dg-add-options tls }
namespace std {
diff --git a/gcc/testsuite/gcc.dg/pr119160.c b/gcc/testsuite/gcc.dg/pr119160.c
index b4629a1..5743b3b 100644
--- a/gcc/testsuite/gcc.dg/pr119160.c
+++ b/gcc/testsuite/gcc.dg/pr119160.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
-/* { dg-options "-O2 -finstrument-functions-once -favoid-store-forwarding -fnon-call-exceptions -fschedule-insns -mgeneral-regs-only -Wno-psabi" } */
+/* { dg-options "-O2 -finstrument-functions-once -favoid-store-forwarding -fnon-call-exceptions -fschedule-insns -Wno-psabi" } */
+/* { dg-additional-options "-mgeneral-regs-only" { target { x86_64-*-* i?86-*-* arm*-*-* aarch64*-*-* } } } */
typedef __attribute__((__vector_size__ (32))) int V;
diff --git a/gcc/testsuite/gcc.dg/torture/pr120182.c b/gcc/testsuite/gcc.dg/torture/pr120182.c
new file mode 100644
index 0000000..5e2d171
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr120182.c
@@ -0,0 +1,42 @@
+/* { dg-do run { target { { *-*-linux* *-*-gnu* *-*-uclinux* } && mmap } } } */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+struct S
+{
+ struct S *next;
+};
+
+static void __attribute__((noipa))
+allocate(void *addr, unsigned long long size)
+{
+ void *ptr = mmap((void *)addr, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
+ -1, 0);
+ if(ptr != addr)
+ exit(0);
+}
+
+int main (void)
+{
+ int size = 0x8000;
+ char *ptr = (char *)0x288000ull;
+ allocate((void *)ptr, size);
+
+ struct S *s1 = (struct S *)ptr;
+ struct S *s2 = (struct S *)256;
+ for (int i = 0; i < 3; i++)
+ {
+ for(char *addr = (char *)s1; addr < (char *)s1 + sizeof(*s1); ++addr)
+ *addr = 0;
+
+ if(s1->next)
+ s1->next = s1->next->next = s2;
+ else
+ s1->next = s2;
+ }
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c
new file mode 100644
index 0000000..955fc7e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119960-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+double foo (double *dst, double *src, int b)
+{
+ double y = src[1];
+ if (b)
+ {
+ dst[0] = src[0];
+ dst[1] = y;
+ }
+ return y;
+}
+
+/* { dg-final { scan-tree-dump "optimized: basic block part vectorized" "slp2" { target vect_double } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c
index 907637f..eeba533 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/attributes_6.c
@@ -29,7 +29,7 @@ test_add (fixed_int8_t x, fixed_int8_t y)
}
/*
-** test_add_gnu:
+** test_add_gnu: {target aarch64_big_endian }
** (
** add (z[0-9]+\.b), (?:z0\.b, z1\.b|z1\.b, z0\.b)
** ptrue (p[0-7])\.b, vl32
@@ -41,6 +41,12 @@ test_add (fixed_int8_t x, fixed_int8_t y)
** )
** ret
*/
+/*
+** test_add_gnu: {target aarch64_little_endian }
+** add (z[0-9]+)\.b, (?:z0\.b, z1\.b|z1\.b, z0\.b)
+** str \1, \[x8\]
+** ret
+*/
gnu_int8_t
test_add_gnu (fixed_int8_t x, fixed_int8_t y)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c
index b65826b..d423dcf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_14.c
@@ -9,5 +9,7 @@ uint64_t f2(uint64_t *ptr, int n) {
return res;
}
-/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 5 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 {target aarch64_little_endian} } } */
+/* { dg-final { scan-assembler-times {\tldr\tz[0-9]+,} 4 {target aarch64_little_endian} } } */
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d,} 8 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c
index a7ecfe3..93af4c1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_4.c
@@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count)
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
index f3a29fc..fab49ed 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_5.c
@@ -9,5 +9,6 @@ vset (int *restrict dst, int *restrict src, int count)
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */
/* { dg-final { scan-assembler-not {\tstp\tq} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c
index 565e1e3..160667b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_6.c
@@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count)
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 1 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 1 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 1 {target aarch64_little_endian} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c
index 31057c0..b71c673 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cost_model_7.c
@@ -9,4 +9,5 @@ vset (int *restrict dst, int *restrict src, int count)
*dst++ = 1;
}
-/* { dg-final { scan-assembler-times {\tst1w\tz} 2 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz} 2 {target aarch64_big_endian} } } */
+/* { dg-final { scan-assembler-times {\tstr\tz} 2 {target aarch64_little_endian} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c
index 50e77f9..8d480a8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
@@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int16_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
@@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int16_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
@@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int16_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.h), #9\.0[^\n]*
** ...
@@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.h, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c
index e7b092a..b3c699d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
@@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int32_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
@@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int32_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
@@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int32_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.s), #9\.0[^\n]*
** ...
@@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.s, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c
index c3389a8..7078afc 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
@@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int64_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
@@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int64_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
@@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int64_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_8:
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** fmov (z[0-9]+\.d), #9\.0[^\n]*
** ...
@@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** fmov (z[0-9]+)\.d, #9\.0[^\n]*
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c
index 2877787..fcbac37 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_mf8.c
@@ -8,9 +8,9 @@
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -32,9 +32,9 @@ callee_0 (mfloat8_t *ptr, ...)
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x1\]
+** str \2, \[x1\]
** ...
** ret
*/
@@ -47,9 +47,9 @@ caller_0 (mfloat8_t *ptr, mfloat8_t in)
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -72,9 +72,9 @@ callee_1 (mfloat8_t *ptr, ...)
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x2\]
+** str \2, \[x2\]
** ...
** ret
*/
@@ -87,9 +87,9 @@ caller_1 (mfloat8_t *ptr, mfloat8_t in)
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -117,9 +117,9 @@ callee_7 (mfloat8_t *ptr, ...)
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[x7\]
+** str \2, \[x7\]
** ...
** ret
*/
@@ -136,9 +136,9 @@ caller_7 (mfloat8_t *ptr, mfloat8_t in)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
@@ -167,9 +167,9 @@ callee_8 (mfloat8_t *ptr, ...)
** ...
** umov (w[0-9]+), v0.b\[0\]
** ...
-** mov (z[0-9]+\.b), \1
+** mov (z[0-9]+)\.b, \1
** ...
-** st1b \2, p[0-7], \[(x[0-9]+)\]
+** str \2, \[(x[0-9]+)\]
** ...
** str \3, \[sp\]
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c
index 3c644e1..e65e64f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int16_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int16_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int16_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c
index 652d609..6488a5f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int32_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int32_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int32_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c
index 72ea6a3..4b77b4f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int64_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int64_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int64_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c
index 02f4bec..e686b3e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c
@@ -8,9 +8,9 @@
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...)
/*
** caller_0:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x1\]
+** str \1, \[x1\]
** ...
** ret
*/
@@ -44,9 +44,9 @@ caller_0 (int8_t *ptr)
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...)
/*
** caller_1:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x2\]
+** str \1, \[x2\]
** ...
** ret
*/
@@ -81,9 +81,9 @@ caller_1 (int8_t *ptr)
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...)
/*
** caller_7:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x7\]
+** str \1, \[x7\]
** ...
** ret
*/
@@ -127,9 +127,9 @@ caller_7 (int8_t *ptr)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
@@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...)
/*
** caller_8:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[(x[0-9]+)\]
+** str \1, \[(x[0-9]+)\]
** ...
** str \2, \[sp\]
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c
index b60d448..74ef4da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int16_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int16_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int16_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int16_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int16_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int16_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int16_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int16_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int16_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int16_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int16_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int16_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int16_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int16_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int16_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.h), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int16_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.h, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int16_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c
index 5f01464..4f9ff78 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int32_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int32_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int32_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int32_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int32_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int32_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int32_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int32_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int32_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int32_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int32_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int32_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int32_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int32_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int32_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.s), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int32_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.s, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int32_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c
index 986739f..27e437b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c
@@ -6,7 +6,7 @@
#include <stdarg.h>
/*
-** callee_0:
+** callee_0: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\]
** ...
@@ -14,6 +14,15 @@
** ...
** ret
*/
+/*
+** callee_0: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x1\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_0 (int64_t *ptr, ...)
{
@@ -27,7 +36,7 @@ callee_0 (int64_t *ptr, ...)
}
/*
-** caller_0:
+** caller_0: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -35,6 +44,15 @@ callee_0 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_0: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x1\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_0 (int64_t *ptr)
{
@@ -42,7 +60,7 @@ caller_0 (int64_t *ptr)
}
/*
-** callee_1:
+** callee_1: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\]
** ...
@@ -50,6 +68,15 @@ caller_0 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_1: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x2\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_1 (int64_t *ptr, ...)
{
@@ -64,7 +91,7 @@ callee_1 (int64_t *ptr, ...)
}
/*
-** caller_1:
+** caller_1: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -72,6 +99,15 @@ callee_1 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_1: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x2\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_1 (int64_t *ptr)
{
@@ -79,7 +115,7 @@ caller_1 (int64_t *ptr)
}
/*
-** callee_7:
+** callee_7: {target aarch64_big_endian}
** ...
** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\]
** ...
@@ -87,6 +123,15 @@ caller_1 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_7: {target aarch64_little_endian}
+** ...
+** ldr (z[0-9]+), \[x7\]
+** ...
+** str \1, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_7 (int64_t *ptr, ...)
{
@@ -106,7 +151,7 @@ callee_7 (int64_t *ptr, ...)
}
/*
-** caller_7:
+** caller_7: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -114,6 +159,15 @@ callee_7 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_7: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[x7\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_7 (int64_t *ptr)
{
@@ -122,7 +176,7 @@ caller_7 (int64_t *ptr)
/* FIXME: We should be able to get rid of the va_list object. */
/*
-** callee_8:
+** callee_8: {target aarch64_big_endian}
** sub sp, sp, #([0-9]+)
** ...
** ldr (x[0-9]+), \[sp, \1\]
@@ -133,6 +187,18 @@ caller_7 (int64_t *ptr)
** ...
** ret
*/
+/*
+** callee_8: {target aarch64_little_endian}
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ldr (z[0-9]+), \[\2\]
+** ...
+** str \3, \[x0\]
+** ...
+** ret
+*/
void __attribute__((noipa))
callee_8 (int64_t *ptr, ...)
{
@@ -153,7 +219,7 @@ callee_8 (int64_t *ptr, ...)
}
/*
-** caller_8:
+** caller_8: {target aarch64_big_endian}
** ...
** mov (z[0-9]+\.d), #42
** ...
@@ -163,6 +229,17 @@ callee_8 (int64_t *ptr, ...)
** ...
** ret
*/
+/*
+** caller_8: {target aarch64_little_endian}
+** ...
+** mov (z[0-9]+)\.d, #42
+** ...
+** str \1, \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
void __attribute__((noipa))
caller_8 (int64_t *ptr)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c
index 533cba67..d43a6da 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c
@@ -8,9 +8,9 @@
/*
** callee_0:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\]
+** ldr (z[0-9]+), \[x1\]
** ...
-** st1b \1, \2, \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -29,9 +29,9 @@ callee_0 (int8_t *ptr, ...)
/*
** caller_0:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x1\]
+** str \1, \[x1\]
** ...
** ret
*/
@@ -44,9 +44,9 @@ caller_0 (int8_t *ptr)
/*
** callee_1:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\]
+** ldr (z[0-9]+), \[x2\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -66,9 +66,9 @@ callee_1 (int8_t *ptr, ...)
/*
** caller_1:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x2\]
+** str \1, \[x2\]
** ...
** ret
*/
@@ -81,9 +81,9 @@ caller_1 (int8_t *ptr)
/*
** callee_7:
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ldr (z[0-9]+), \[x7\]
** ...
-** st1b \1, p[0-7], \[x0\]
+** str \1, \[x0\]
** ...
** ret
*/
@@ -108,9 +108,9 @@ callee_7 (int8_t *ptr, ...)
/*
** caller_7:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[x7\]
+** str \1, \[x7\]
** ...
** ret
*/
@@ -127,9 +127,9 @@ caller_7 (int8_t *ptr)
** ...
** ldr (x[0-9]+), \[sp, \1\]
** ...
-** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ldr (z[0-9]+), \[\2\]
** ...
-** st1b \3, \4, \[x0\]
+** str \3, \[x0\]
** ...
** ret
*/
@@ -155,9 +155,9 @@ callee_8 (int8_t *ptr, ...)
/*
** caller_8:
** ...
-** mov (z[0-9]+\.b), #42
+** mov (z[0-9]+)\.b, #42
** ...
-** st1b \1, p[0-7], \[(x[0-9]+)\]
+** str \1, \[(x[0-9]+)\]
** ...
** str \2, \[sp\]
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c
index 985cd0c..f07900b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c
@@ -19,5 +19,7 @@ foo (void)
/* We should operate on aligned vectors. */
/* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, (x|\.LANCHOR0)\n} } } */
/* We should unroll the loop three times. */
-/* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tst1w\t" 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times "\tst1w\t" 2 { target aarch64_little_endian } } } */
+/* { dg-final { scan-assembler-times "\tstr\t" 1 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-9]+\.b, (p[0-9]+)/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c
new file mode 100644
index 0000000..c3bfa98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/ptrue_ldr_str.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#include <arm_sve.h>
+
+#define TEST(TYPE, TY, B) \
+ sv##TYPE ld_##TY (TYPE *x) \
+ { \
+ return svld1_##TY(svptrue_b##B (), x); \
+ } \
+ void st_##TY (TYPE *x, sv##TYPE data) \
+ { \
+ svst1_##TY(svptrue_b##B (), x, data); \
+ }
+
+TEST(bfloat16_t, bf16, 16)
+TEST(float16_t, f16, 16)
+TEST(float32_t, f32, 32)
+TEST(float64_t, f64, 64)
+TEST(uint8_t, u8, 8)
+TEST(uint16_t, u16, 16)
+TEST(uint32_t, u32, 32)
+TEST(uint64_t, u64, 64)
+TEST(int8_t, s8, 8)
+TEST(int16_t, s16, 16)
+TEST(int32_t, s32, 32)
+TEST(int64_t, s64, 64)
+
+/* { dg-final { scan-assembler-times {\tldr\tz0, \[x0\]} 12 } } */
+/* { dg-final { scan-assembler-times {\tstr\tz0, \[x0\]} 12 } } */ \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c
index d9bb97e..be71921 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c
@@ -40,12 +40,13 @@ TEST_LOOP (double, 3.0)
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c
index d27eead..8692984 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c
@@ -16,12 +16,13 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c
index 313a72d..10799fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c
@@ -16,12 +16,13 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c
index 4f46654..53658a8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c
@@ -16,12 +16,13 @@
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */
/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 9 { target aarch64_big_endian } } } */
-/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */
-/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
-/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 2 { target aarch64_big_endian } } } */
+/* { dg-final { scan-assembler-times {\tstr\tz[0-9]+, \[x0\]} 11 { target aarch64_little_endian } } } */
/* { dg-final { scan-assembler-not {\twhile} } } */
/* { dg-final { scan-assembler-not {\tb} } } */
diff --git a/gcc/testsuite/gnat.dg/opt106.adb b/gcc/testsuite/gnat.dg/opt106.adb
new file mode 100644
index 0000000..525930b
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt106.adb
@@ -0,0 +1,11 @@
+-- { dg-do run }
+-- { dg-options "-O2" }
+
+with Opt106_Pkg1; use Opt106_Pkg1;
+
+procedure Opt106 is
+ Obj : T := (False, 0, 0, 0, True);
+
+begin
+ Proc (Obj, 0, False, True);
+end;
diff --git a/gcc/testsuite/gnat.dg/opt106_pkg1.adb b/gcc/testsuite/gnat.dg/opt106_pkg1.adb
new file mode 100644
index 0000000..154b13f
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt106_pkg1.adb
@@ -0,0 +1,39 @@
+with Opt106_Pkg2; use Opt106_Pkg2;
+
+package body Opt106_Pkg1 is
+
+ procedure Proc (Obj : in out T;
+ Data : Integer;
+ Last : Boolean;
+ Stretch : Boolean) is
+
+ begin
+ if Stretch and then (Obj.Delayed /= 0 or else not Obj.Attach_Last) then
+ raise Program_Error;
+ end if;
+
+ if Obj.Delayed /= 0 then
+ Stop (Obj.Delayed, Obj.Before, Data, False);
+ end if;
+
+ if Last or (Obj.Delayed = 0 and not Stretch) then
+ Stop (Data, Obj.Before, 0, Last);
+
+ if Last then
+ Obj.Initialized := False;
+ else
+ Obj.Next := 0;
+ Obj.Before := Data;
+ end if;
+
+ else
+ if Stretch then
+ Obj.Next := 1;
+ else
+ Obj.Before := Obj.Delayed;
+ end if;
+ Obj.Delayed := Data;
+ end if;
+ end;
+
+end Opt106_Pkg1;
diff --git a/gcc/testsuite/gnat.dg/opt106_pkg1.ads b/gcc/testsuite/gnat.dg/opt106_pkg1.ads
new file mode 100644
index 0000000..85ac24d
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt106_pkg1.ads
@@ -0,0 +1,16 @@
+package Opt106_Pkg1 is
+
+ type T is record
+ Initialized : Boolean;
+ Before : Integer;
+ Delayed : Integer;
+ Next : Integer;
+ Attach_Last : Boolean;
+ end record;
+
+ procedure Proc (Obj : in out T;
+ Data : Integer;
+ Last : Boolean;
+ Stretch : Boolean);
+
+end Opt106_Pkg1;
diff --git a/gcc/testsuite/gnat.dg/opt106_pkg2.adb b/gcc/testsuite/gnat.dg/opt106_pkg2.adb
new file mode 100644
index 0000000..cf63956
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt106_pkg2.adb
@@ -0,0 +1,11 @@
+package body Opt106_Pkg2 is
+
+ procedure Stop (Delayed : Integer;
+ Before : Integer;
+ After : Integer;
+ Last : Boolean) is
+ begin
+ raise Program_Error;
+ end;
+
+end Opt106_Pkg2;
diff --git a/gcc/testsuite/gnat.dg/opt106_pkg2.ads b/gcc/testsuite/gnat.dg/opt106_pkg2.ads
new file mode 100644
index 0000000..77e5b40
--- /dev/null
+++ b/gcc/testsuite/gnat.dg/opt106_pkg2.ads
@@ -0,0 +1,8 @@
+package Opt106_Pkg2 is
+
+ procedure Stop (Delayed : Integer;
+ Before : Integer;
+ After : Integer;
+ Last : Boolean);
+
+end Opt106_Pkg2;
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 2774fd2..857517f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7844,21 +7844,70 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
node, node_instance, cost_vec);
}
+static int
+sort_ints (const void *a_, const void *b_)
+{
+ int a = *(const int *)a_;
+ int b = *(const int *)b_;
+ return a - b;
+}
+
/* Verify if we can externalize a set of internal defs. */
static bool
vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts)
{
+ /* Constant generation uses get_later_stmt which can only handle
+ defs from the same BB or a set of defs that can be ordered
+ with a dominance query. */
basic_block bb = NULL;
+ bool all_same = true;
+ auto_vec<int> bbs;
+ bbs.reserve_exact (stmts.length ());
for (stmt_vec_info stmt : stmts)
- if (!stmt)
- return false;
- /* Constant generation uses get_later_stmt which can only handle
- defs from the same BB. */
- else if (!bb)
- bb = gimple_bb (stmt->stmt);
- else if (gimple_bb (stmt->stmt) != bb)
+ {
+ if (!stmt)
+ return false;
+ else if (!bb)
+ bb = gimple_bb (stmt->stmt);
+ else if (gimple_bb (stmt->stmt) != bb)
+ all_same = false;
+ bbs.quick_push (gimple_bb (stmt->stmt)->index);
+ }
+ if (all_same)
+ return true;
+
+ /* Produce a vector of unique BB indexes for the defs. */
+ bbs.qsort (sort_ints);
+ unsigned i, j;
+ for (i = 1, j = 1; i < bbs.length (); ++i)
+ if (bbs[i] != bbs[j-1])
+ bbs[j++] = bbs[i];
+ gcc_assert (j >= 2);
+ bbs.truncate (j);
+
+ if (bbs.length () == 2)
+ return (dominated_by_p (CDI_DOMINATORS,
+ BASIC_BLOCK_FOR_FN (cfun, bbs[0]),
+ BASIC_BLOCK_FOR_FN (cfun, bbs[1]))
+ || dominated_by_p (CDI_DOMINATORS,
+ BASIC_BLOCK_FOR_FN (cfun, bbs[1]),
+ BASIC_BLOCK_FOR_FN (cfun, bbs[0])));
+
+ /* ??? For more than two BBs we can sort the vector and verify the
+ result is a total order. But we can't use vec::qsort with a
+ compare function using a dominance query since there's no way to
+ signal failure and any fallback for an unordered pair would
+ fail qsort_chk later.
+ For now simply hope that ordering after BB index provides the
+ best candidate total order. If required we can implement our
+ own mergesort or export an entry without checking. */
+ for (unsigned i = 1; i < bbs.length (); ++i)
+ if (!dominated_by_p (CDI_DOMINATORS,
+ BASIC_BLOCK_FOR_FN (cfun, bbs[i]),
+ BASIC_BLOCK_FOR_FN (cfun, bbs[i-1])))
return false;
+
return true;
}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3373d75..efe6a2c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6811,7 +6811,6 @@ vectorizable_operation (vec_info *vinfo,
poly_uint64 nunits_in;
poly_uint64 nunits_out;
tree vectype_out;
- unsigned int ncopies;
int vec_num;
int i;
vec<tree> vec_oprnds0 = vNULL;
@@ -6872,7 +6871,7 @@ vectorizable_operation (vec_info *vinfo,
}
scalar_dest = gimple_assign_lhs (stmt);
- vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+ vectype_out = SLP_TREE_VECTYPE (slp_node);
/* Most operations cannot handle bit-precision types without extra
truncations. */
@@ -6983,20 +6982,8 @@ vectorizable_operation (vec_info *vinfo,
}
/* Multiple types in SLP are handled by creating the appropriate number of
- vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
- case of SLP. */
- if (slp_node)
- {
- ncopies = 1;
- vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- }
- else
- {
- ncopies = vect_get_num_copies (loop_vinfo, vectype);
- vec_num = 1;
- }
-
- gcc_assert (ncopies >= 1);
+ vectorized stmts for each SLP node. */
+ vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
/* Reject attempts to combine mask types with nonmask types, e.g. if
we have an AND between a (nonmask) boolean loaded from memory and
@@ -7038,9 +7025,10 @@ vectorizable_operation (vec_info *vinfo,
ops we have to lower the lowering code assumes we are
dealing with word_mode. */
if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype))
+ || !GET_MODE_SIZE (vec_mode).is_constant ()
|| (((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
- || !target_support_p)
- && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
+ || !target_support_p)
+ && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
/* Check only during analysis. */
|| (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
{
@@ -7080,12 +7068,12 @@ vectorizable_operation (vec_info *vinfo,
if (cond_len_fn != IFN_LAST
&& direct_internal_fn_supported_p (cond_len_fn, vectype,
OPTIMIZE_FOR_SPEED))
- vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
+ vect_record_loop_len (loop_vinfo, lens, vec_num, vectype,
1);
else if (cond_fn != IFN_LAST
&& direct_internal_fn_supported_p (cond_fn, vectype,
OPTIMIZE_FOR_SPEED))
- vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+ vect_record_loop_mask (loop_vinfo, masks, vec_num,
vectype, NULL);
else
{
@@ -7098,10 +7086,9 @@ vectorizable_operation (vec_info *vinfo,
}
/* Put types on constant and invariant SLP children. */
- if (slp_node
- && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
- || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
- || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
+ if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
+ || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
+ || !vect_maybe_update_slp_op_vectype (slp_op2, vectype))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7112,15 +7099,14 @@ vectorizable_operation (vec_info *vinfo,
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, stmt_info,
- ncopies, dt, ndts, slp_node, cost_vec);
+ 1, dt, ndts, slp_node, cost_vec);
if (using_emulated_vectors_p)
{
/* The above vect_model_simple_cost call handles constants
in the prologue and (mis-)costs one of the stmts as
vector stmt. See below for the actual lowering that will
be applied. */
- unsigned n
- = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
+ unsigned n = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
switch (code)
{
case PLUS_EXPR:
@@ -7173,60 +7159,7 @@ vectorizable_operation (vec_info *vinfo,
else
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
- /* In case the vectorization factor (VF) is bigger than the number
- of elements that we can fit in a vectype (nunits), we have to generate
- more than one vector stmt - i.e - we need to "unroll" the
- vector stmt by a factor VF/nunits. In doing so, we record a pointer
- from one copy of the vector stmt to the next, in the field
- STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
- stages to find the correct vector defs to be used when vectorizing
- stmts that use the defs of the current stmt. The example below
- illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
- we need to create 4 vectorized stmts):
-
- before vectorization:
- RELATED_STMT VEC_STMT
- S1: x = memref - -
- S2: z = x + 1 - -
-
- step 1: vectorize stmt S1 (done in vectorizable_load. See more details
- there):
- RELATED_STMT VEC_STMT
- VS1_0: vx0 = memref0 VS1_1 -
- VS1_1: vx1 = memref1 VS1_2 -
- VS1_2: vx2 = memref2 VS1_3 -
- VS1_3: vx3 = memref3 - -
- S1: x = load - VS1_0
- S2: z = x + 1 - -
-
- step2: vectorize stmt S2 (done here):
- To vectorize stmt S2 we first need to find the relevant vector
- def for the first operand 'x'. This is, as usual, obtained from
- the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
- that defines 'x' (S1). This way we find the stmt VS1_0, and the
- relevant vector def 'vx0'. Having found 'vx0' we can generate
- the vector stmt VS2_0, and as usual, record it in the
- STMT_VINFO_VEC_STMT of stmt S2.
- When creating the second copy (VS2_1), we obtain the relevant vector
- def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
- stmt VS1_0. This way we find the stmt VS1_1 and the relevant
- vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
- pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
- Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
- chain of stmts and pointers:
- RELATED_STMT VEC_STMT
- VS1_0: vx0 = memref0 VS1_1 -
- VS1_1: vx1 = memref1 VS1_2 -
- VS1_2: vx2 = memref2 VS1_3 -
- VS1_3: vx3 = memref3 - -
- S1: x = load - VS1_0
- VS2_0: vz0 = vx0 + v1 VS2_1 -
- VS2_1: vz1 = vx1 + v1 VS2_2 -
- VS2_2: vz2 = vx2 + v1 VS2_3 -
- VS2_3: vz3 = vx3 + v1 - -
- S2: z = x + 1 - VS2_0 */
-
- vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
+ vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
/* Arguments are ready. Create the new vector stmt. */
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
@@ -7235,88 +7168,108 @@ vectorizable_operation (vec_info *vinfo,
vop1 = ((op_type == binary_op || op_type == ternary_op)
? vec_oprnds1[i] : NULL_TREE);
vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
- if (using_emulated_vectors_p
- && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
+ if (using_emulated_vectors_p)
{
/* Lower the operation. This follows vector lowering. */
- unsigned int width = vector_element_bits (vectype);
- tree inner_type = TREE_TYPE (vectype);
- tree word_type
- = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
- HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
- tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
- tree high_bits
- = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
+ tree word_type = build_nonstandard_integer_type
+ (GET_MODE_BITSIZE (vec_mode).to_constant (), 1);
tree wvop0 = make_ssa_name (word_type);
new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
build1 (VIEW_CONVERT_EXPR,
word_type, vop0));
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- tree result_low, signs;
- if (code == PLUS_EXPR || code == MINUS_EXPR)
+ tree wvop1 = NULL_TREE;
+ if (vop1)
{
- tree wvop1 = make_ssa_name (word_type);
+ wvop1 = make_ssa_name (word_type);
new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
build1 (VIEW_CONVERT_EXPR,
word_type, vop1));
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- signs = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (signs,
- BIT_XOR_EXPR, wvop0, wvop1);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- tree b_low = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (b_low,
- BIT_AND_EXPR, wvop1, low_bits);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- tree a_low = make_ssa_name (word_type);
- if (code == PLUS_EXPR)
- new_stmt = gimple_build_assign (a_low,
- BIT_AND_EXPR, wvop0, low_bits);
- else
- new_stmt = gimple_build_assign (a_low,
- BIT_IOR_EXPR, wvop0, high_bits);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- if (code == MINUS_EXPR)
+ }
+
+ tree result_low;
+ if (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
+ {
+ unsigned int width = vector_element_bits (vectype);
+ tree inner_type = TREE_TYPE (vectype);
+ HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
+ tree low_bits
+ = build_replicated_int_cst (word_type, width, max >> 1);
+ tree high_bits
+ = build_replicated_int_cst (word_type,
+ width, max & ~(max >> 1));
+ tree signs;
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
+ {
+ signs = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (signs,
+ BIT_XOR_EXPR, wvop0, wvop1);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree b_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (b_low, BIT_AND_EXPR,
+ wvop1, low_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ tree a_low = make_ssa_name (word_type);
+ if (code == PLUS_EXPR)
+ new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
+ wvop0, low_bits);
+ else
+ new_stmt = gimple_build_assign (a_low, BIT_IOR_EXPR,
+ wvop0, high_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ if (code == MINUS_EXPR)
+ {
+ new_stmt = gimple_build_assign (NULL_TREE,
+ BIT_NOT_EXPR, signs);
+ signs = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, signs);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ }
+ new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
+ signs, high_bits);
+ signs = make_ssa_name (word_type);
+ gimple_assign_set_lhs (new_stmt, signs);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ result_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (result_low, code,
+ a_low, b_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ }
+ else /* if (code == NEGATE_EXPR) */
{
- new_stmt = gimple_build_assign (NULL_TREE,
- BIT_NOT_EXPR, signs);
+ tree a_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (a_low, BIT_AND_EXPR,
+ wvop0, low_bits);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ signs = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (NULL_TREE, BIT_AND_EXPR,
+ signs, high_bits);
signs = make_ssa_name (word_type);
gimple_assign_set_lhs (new_stmt, signs);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ result_low = make_ssa_name (word_type);
+ new_stmt = gimple_build_assign (result_low,
+ MINUS_EXPR, high_bits, a_low);
+ vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
- new_stmt = gimple_build_assign (NULL_TREE,
- BIT_AND_EXPR, signs, high_bits);
- signs = make_ssa_name (word_type);
- gimple_assign_set_lhs (new_stmt, signs);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR,
+ result_low, signs);
result_low = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
+ gimple_assign_set_lhs (new_stmt, result_low);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
}
else
{
- tree a_low = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (a_low,
- BIT_AND_EXPR, wvop0, low_bits);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- signs = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
- new_stmt = gimple_build_assign (NULL_TREE,
- BIT_AND_EXPR, signs, high_bits);
- signs = make_ssa_name (word_type);
- gimple_assign_set_lhs (new_stmt, signs);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+ new_stmt = gimple_build_assign (NULL_TREE, code, wvop0, wvop1);
result_low = make_ssa_name (word_type);
- new_stmt = gimple_build_assign (result_low,
- MINUS_EXPR, high_bits, a_low);
+ gimple_assign_set_lhs (new_stmt, result_low);
vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
}
- new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
- signs);
- result_low = make_ssa_name (word_type);
- gimple_assign_set_lhs (new_stmt, result_low);
- vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
build1 (VIEW_CONVERT_EXPR,
vectype, result_low));
@@ -7329,7 +7282,7 @@ vectorizable_operation (vec_info *vinfo,
tree mask;
if (masked_loop_p)
mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype, i);
+ vec_num, vectype, i);
else
/* Dummy mask. */
mask = build_minus_one_cst (truth_type_for (vectype));
@@ -7356,7 +7309,7 @@ vectorizable_operation (vec_info *vinfo,
if (len_loop_p)
{
tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
- vec_num * ncopies, vectype, i, 1);
+ vec_num, vectype, i, 1);
signed char biasval
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
tree bias = build_int_cst (intQI_type_node, biasval);
@@ -7383,21 +7336,19 @@ vectorizable_operation (vec_info *vinfo,
&& code == BIT_AND_EXPR
&& VECTOR_BOOLEAN_TYPE_P (vectype))
{
- if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
- ncopies}))
+ if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, 1 }))
{
mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype, i);
+ vec_num, vectype, i);
vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
vop0, gsi);
}
- if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
- ncopies }))
+ if (loop_vinfo->scalar_cond_masked_set.contains ({ op1, 1 }))
{
mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
- vec_num * ncopies, vectype, i);
+ vec_num, vectype, i);
vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
vop1, gsi);
@@ -7428,15 +7379,9 @@ vectorizable_operation (vec_info *vinfo,
new_stmt, gsi);
}
- if (slp_node)
- slp_node->push_vec_def (new_stmt);
- else
- STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
+ slp_node->push_vec_def (new_stmt);
}
- if (!slp_node)
- *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
-
vec_oprnds0.release ();
vec_oprnds1.release ();
vec_oprnds2.release ();
diff --git a/gcc/vr-values.cc b/gcc/vr-values.cc
index 6603d90..4c78759 100644
--- a/gcc/vr-values.cc
+++ b/gcc/vr-values.cc
@@ -1996,10 +1996,13 @@ simplify_using_ranges::simplify (gimple_stmt_iterator *gsi)
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
- /* Optimize away BIT_AND_EXPR and BIT_IOR_EXPR
- if all the bits being cleared are already cleared or
- all the bits being set are already set. */
- if (INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
+ /* Optimize away BIT_AND_EXPR and BIT_IOR_EXPR if all the bits
+ being cleared are already cleared or all the bits being set
+ are already set. Beware that boolean types must be handled
+ logically (see range-op.cc) unless they have precision 1. */
+ if (INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
+ && (TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE
+ || TYPE_PRECISION (TREE_TYPE (rhs1)) == 1))
return simplify_bit_ops_using_ranges (gsi, stmt);
break;