aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/aarch64/aarch64-early-ra.cc283
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/accumulators_1.c200
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c65
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c72
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c96
5 files changed, 573 insertions, 143 deletions
diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index f05869b..484db94 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -256,6 +256,8 @@ private:
struct allocno_info
{
allocno_group_info *group ();
+ bool is_shared ();
+ bool is_equiv_to (unsigned int);
// The allocno's unique identifier.
unsigned int id;
@@ -292,6 +294,10 @@ private:
// so that it cannot be tied to the destination of the instruction.
unsigned int is_earlyclobbered : 1;
+ // True if this allocno is known to be equivalent to related_allocno
+ // for the whole of this allocno's lifetime.
+ unsigned int is_equiv : 1;
+
// The inclusive range of program points spanned by the allocno.
// START_POINT >= END_POINT.
unsigned int start_point;
@@ -302,9 +308,15 @@ private:
// See callers of record_copy for what counts as a copy.
unsigned int copy_dest;
- // If this field is not INVALID_ALLOCNO, this allocno is known to be
- // equivalent to EQUIV_ALLOCNO for the whole of this allocno's lifetime.
- unsigned int equiv_allocno;
+ // If this field is not INVALID_ALLOCNO, it indicates one of two things:
+ //
+ // - if is_equiv, this allocno is equivalent to related_allocno for
+ // the whole of this allocno's lifetime.
+ //
+ // - if !is_equiv, this allocno's live range is a subrange of
+ // related_allocno's and we have committed to making this allocno
+ // share whatever register related_allocno uses.
+ unsigned int related_allocno;
union
{
@@ -319,9 +331,18 @@ private:
unsigned int chain_next;
};
- // The previous chained allocno in program order (i.e. at higher
- // program points), or INVALID_ALLOCNO if none.
- unsigned int chain_prev;
+ union
+ {
+ // The program point before start_point at which the allocno was
+ // last used, or END_OF_REGION if none. This is only used temporarily
+ // while recording allocnos; after that, chain_prev below is used
+ // instead.
+ unsigned int last_use_point;
+
+ // The previous chained allocno in program order (i.e. at higher
+ // program points), or INVALID_ALLOCNO if none.
+ unsigned int chain_prev;
+ };
};
// Information about a full allocno group or a subgroup of it.
@@ -380,6 +401,9 @@ private:
// The clique's representative group.
allocno_group_info *group;
+ // The number of FPR preferences recorded in fpr_preferences.
+ unsigned int num_fpr_preferences;
+
// Weights in favor of choosing each FPR as the first register for GROUP.
int8_t fpr_preferences[32];
};
@@ -415,7 +439,7 @@ private:
void record_fpr_def (unsigned int);
void record_allocno_use (allocno_info *);
void record_allocno_def (allocno_info *);
- bool valid_equivalence_p (allocno_info *, allocno_info *);
+ allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
void record_copy (rtx, rtx, bool = false);
void record_constraints (rtx_insn *);
void record_artificial_refs (unsigned int);
@@ -431,6 +455,8 @@ private:
int rate_chain (allocno_info *, allocno_info *);
static int cmp_chain_candidates (const void *, const void *);
void chain_allocnos (unsigned int &, unsigned int &);
+ void merge_fpr_info (allocno_group_info *, allocno_group_info *,
+ unsigned int);
void set_single_color_rep (allocno_info *, allocno_group_info *,
unsigned int);
void set_color_rep (allocno_group_info *, allocno_group_info *,
@@ -445,7 +471,7 @@ private:
void process_copies ();
- static int cmp_decreasing_size (const void *, const void *);
+ static int cmp_allocation_order (const void *, const void *);
void allocate_colors ();
allocno_info *find_independent_subchain (allocno_info *);
color_info *find_oldest_color (unsigned int, unsigned int);
@@ -527,6 +553,9 @@ private:
// All allocnos, by increasing START_POINT.
auto_vec<allocno_info *> m_sorted_allocnos;
+ // Allocnos for which is_shared is true.
+ auto_vec<allocno_info *> m_shared_allocnos;
+
// All colors, by index.
auto_vec<color_info *> m_colors;
@@ -704,6 +733,22 @@ early_ra::allocno_info::group ()
return reinterpret_cast<allocno_group_info *> (chain_end - group_size) - 1;
}
+// Return true if this allocno's live range is a subrange of related_allocno's
+// and if we have committed to making this allocno share whatever register
+// related_allocno uses.
+inline bool
+early_ra::allocno_info::is_shared ()
+{
+ return related_allocno != INVALID_ALLOCNO && !is_equiv;
+}
+
+// Return true if this allocno is known to be equivalent to ALLOCNO.
+inline bool
+early_ra::allocno_info::is_equiv_to (unsigned int allocno)
+{
+ return is_equiv && related_allocno == allocno;
+}
+
// Return the allocnos in the subgroup.
inline array_slice<early_ra::allocno_info>
early_ra::allocno_subgroup::allocnos ()
@@ -859,8 +904,8 @@ early_ra::dump_allocnos ()
}
fprintf (dump_file, "\nAllocno chains:\n");
- fprintf (dump_file, " %5s %12s %12s %5s %5s %5s %5s\n",
- "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "FPR");
+ fprintf (dump_file, " %5s %12s %12s %6s %5s %5s %6s %5s\n",
+ "Id", "Regno", "Range ", "Src", "Dest", "Equiv", "Shared", "FPR");
for (unsigned int ai = 0; ai < m_allocnos.length (); ++ai)
{
auto *allocno = m_allocnos[ai];
@@ -877,7 +922,7 @@ early_ra::dump_allocnos ()
fprintf (dump_file, " %12s", buffer);
snprintf (buffer, sizeof (buffer), "[%d,%d]",
allocno->start_point, allocno->end_point);
- fprintf (dump_file, " %11s%s %5s", buffer,
+ fprintf (dump_file, " %11s%s %6s", buffer,
allocno->is_earlyclobbered ? "*" : " ",
allocno->is_strong_copy_dest ? "Strong"
: allocno->is_copy_dest ? "Yes" : "-");
@@ -885,10 +930,14 @@ early_ra::dump_allocnos ()
fprintf (dump_file, " %5s", "-");
else
fprintf (dump_file, " %5d", allocno->copy_dest);
- if (allocno->equiv_allocno != INVALID_ALLOCNO)
- fprintf (dump_file, " %5d", allocno->equiv_allocno);
+ if (allocno->is_equiv)
+ fprintf (dump_file, " %5d", allocno->related_allocno);
else
fprintf (dump_file, " %5s", "-");
+ if (allocno->is_shared ())
+ fprintf (dump_file, " %6d", allocno->related_allocno);
+ else
+ fprintf (dump_file, " %6s", "-");
if (allocno->hard_regno == FIRST_PSEUDO_REGISTER)
fprintf (dump_file, " %5s", "-");
else
@@ -1151,7 +1200,7 @@ early_ra::fpr_preference (unsigned int regno)
return 3;
else if (flags & NEEDS_FPR32)
return 2;
- else if (!(flags & ALLOWS_FPR32))
+ else if (!(flags & ALLOWS_FPR32) && (flags & ALLOWS_NONFPR))
return -2;
else if ((flags & HAS_FPR_COPY) && !(flags & HAS_NONFPR_COPY))
return 1;
@@ -1230,6 +1279,7 @@ early_ra::start_new_region ()
m_allocno_copies.truncate (0);
m_allocnos.truncate (0);
m_sorted_allocnos.truncate (0);
+ m_shared_allocnos.truncate (0);
m_colors.truncate (0);
m_insn_ranges.truncate (0);
for (auto &fpr_ranges : m_fpr_ranges)
@@ -1291,7 +1341,7 @@ early_ra::create_allocno_group (unsigned int regno, unsigned int size)
allocno->start_point = END_OF_REGION;
allocno->end_point = START_OF_REGION;
allocno->copy_dest = INVALID_ALLOCNO;
- allocno->equiv_allocno = INVALID_ALLOCNO;
+ allocno->related_allocno = INVALID_ALLOCNO;
allocno->chain_next = INVALID_ALLOCNO;
allocno->chain_prev = INVALID_ALLOCNO;
m_allocnos.safe_push (allocno);
@@ -1423,16 +1473,24 @@ early_ra::record_fpr_def (unsigned int regno)
void
early_ra::record_allocno_use (allocno_info *allocno)
{
+ if (allocno->start_point == m_current_point)
+ return;
+
+ gcc_checking_assert (!allocno->is_shared ());
bitmap_set_bit (m_live_allocnos, allocno->id);
if (allocno->end_point > m_current_point)
{
allocno->end_point = m_current_point;
allocno->last_def_point = START_OF_REGION;
+ allocno->last_use_point = END_OF_REGION;
}
+ else
+ allocno->last_use_point = allocno->start_point;
allocno->start_point = m_current_point;
allocno->is_copy_dest = false;
- allocno->is_strong_copy_dest = false;
- allocno->equiv_allocno = INVALID_ALLOCNO;
+ allocno->is_strong_copy_src = false;
+ allocno->related_allocno = INVALID_ALLOCNO;
+ allocno->is_equiv = false;
}
// Record a definition of the allocno with index AI at the current program
@@ -1441,37 +1499,89 @@ early_ra::record_allocno_use (allocno_info *allocno)
void
early_ra::record_allocno_def (allocno_info *allocno)
{
+ gcc_checking_assert (!allocno->is_shared ());
+ allocno->last_use_point = allocno->start_point;
allocno->last_def_point = m_current_point;
allocno->start_point = m_current_point;
allocno->num_defs = MIN (allocno->num_defs + 1, 2);
gcc_checking_assert (!allocno->is_copy_dest
- && !allocno->is_strong_copy_dest);
+ && !allocno->is_strong_copy_src);
if (!bitmap_clear_bit (m_live_allocnos, allocno->id))
gcc_unreachable ();
}
-// Return true if a move from SRC_ALLOCNO to DEST_ALLOCNO could be treated
-// as an equivalence.
-bool
-early_ra::valid_equivalence_p (allocno_info *dest_allocno,
- allocno_info *src_allocno)
+// SRC_ALLOCNO is copied or tied to DEST_ALLOCNO; IS_EQUIV is true if the
+// two allocnos are known to be equal. See whether we can mark a chain of
+// allocnos ending at DEST_ALLOCNO as related to SRC_ALLOCNO. Return the
+// start of the chain if so, otherwise return null.
+//
+// If IS_EQUIV, a chain that contains just DEST_ALLOCNO should be treated
+// as an equivalence. Otherwise the chain should be shared with SRC_ALLOCNO.
+//
+// Sharing chains are a rather hacky workaround for the fact that we
+// don't collect segmented live ranges, and that in the end we want to do
+// simple interval graph coloring.
+early_ra::allocno_info *
+early_ra::find_related_start (allocno_info *dest_allocno,
+ allocno_info *src_allocno, bool is_equiv)
{
- if (src_allocno->end_point > dest_allocno->end_point)
- // The src allocno dies first.
- return false;
-
- if (src_allocno->num_defs != 0)
+ allocno_info *res = nullptr;
+ for (;;)
{
- if (dest_allocno->end_point < m_current_bb_point)
- // We don't currently track enough information to handle multiple
- // definitions across basic block boundaries.
- return false;
+ if (src_allocno->end_point > dest_allocno->end_point)
+ // The src allocno dies first.
+ return res;
- if (src_allocno->last_def_point >= dest_allocno->end_point)
- // There is another definition during the destination's live range.
- return false;
+ if (src_allocno->num_defs != 0)
+ {
+ if (dest_allocno->end_point < m_current_bb_point)
+ // We don't currently track enough information to handle multiple
+ // definitions across basic block boundaries.
+ return res;
+
+ if (src_allocno->last_def_point >= dest_allocno->end_point)
+ // There is another definition during the destination's live range.
+ return res;
+ }
+ if (is_equiv)
+ {
+ if (dest_allocno->num_defs == 1)
+ // dest_allocno is equivalent to src_allocno for dest_allocno's
+ // entire live range. Fall back to that if we can't establish
+ // a sharing chain.
+ res = dest_allocno;
+ }
+ else
+ {
+ if (src_allocno->last_use_point >= dest_allocno->end_point)
+ // src_allocno is live during dest_allocno's live range,
+ // and the two allocnos do not necessarily have the same value.
+ return res;
+ }
+
+ if (dest_allocno->group_size != 1
+ || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
+ // Currently only single allocnos that are defined once can
+ // share registers with non-equivalent allocnos. This could be
+ // relaxed, but at the time of writing, aggregates are not valid
+ // SSA names and so generally only use a single pseudo throughout
+ // their lifetime.
+ return res;
+
+ if (dest_allocno->copy_dest == src_allocno->id)
+ // We've found a complete and valid sharing chain.
+ return dest_allocno;
+
+ if (dest_allocno->copy_dest == INVALID_ALLOCNO)
+ return res;
+
+ auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
+ if (!is_chain_candidate (dest_allocno, next_allocno))
+ return res;
+
+ dest_allocno = next_allocno;
+ is_equiv = false;
}
- return dest_allocno->num_defs == 1;
}
// Record any relevant allocno-related information for an actual or imagined
@@ -1558,9 +1668,21 @@ early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
src_allocno->hard_regno = dest_allocno->hard_regno;
dest_allocno->is_copy_dest = 1;
}
- else if (from_move_p
- && valid_equivalence_p (dest_allocno, src_allocno))
- dest_allocno->equiv_allocno = src_allocno->id;
+ else if (auto *start_allocno = find_related_start (dest_allocno,
+ src_allocno,
+ from_move_p))
+ {
+ auto *next_allocno = dest_allocno;
+ for (;;)
+ {
+ next_allocno->related_allocno = src_allocno->id;
+ next_allocno->is_equiv = (start_allocno == dest_allocno
+ && from_move_p);
+ if (next_allocno == start_allocno)
+ break;
+ next_allocno = m_allocnos[next_allocno->copy_dest];
+ }
+ }
}
}
}
@@ -1876,13 +1998,13 @@ early_ra::find_strided_accesses ()
{
// This function forms a graph of allocnos, linked by equivalences and
// natural copy chains. It temporarily uses chain_next to record the
- // reverse of equivalence edges (equiv_allocno) and chain_prev to record
+ // reverse of equivalence edges (related_allocno) and chain_prev to record
// the reverse of copy edges (copy_dest).
unsigned int allocno_info::*links[] = {
&allocno_info::chain_next,
&allocno_info::chain_prev,
&allocno_info::copy_dest,
- &allocno_info::equiv_allocno
+ &allocno_info::related_allocno
};
// Set up the temporary reverse edges. Check for strong copy chains.
@@ -1891,12 +2013,12 @@ early_ra::find_strided_accesses ()
auto *allocno1 = m_allocnos[i];
if (allocno1->copy_dest != INVALID_ALLOCNO)
m_allocnos[allocno1->copy_dest]->chain_prev = allocno1->id;
- if (allocno1->equiv_allocno != INVALID_ALLOCNO)
- m_allocnos[allocno1->equiv_allocno]->chain_next = allocno1->id;
+ if (allocno1->related_allocno != INVALID_ALLOCNO)
+ m_allocnos[allocno1->related_allocno]->chain_next = allocno1->id;
if (allocno1->is_strong_copy_src
- && (allocno1->is_copy_dest
- || !consider_strong_copy_src_chain (allocno1)))
+ && !allocno1->is_copy_dest
+ && !consider_strong_copy_src_chain (allocno1))
allocno1->is_strong_copy_src = false;
}
@@ -2062,11 +2184,14 @@ early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
bool
early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
{
- if (allocno1->equiv_allocno != INVALID_ALLOCNO)
- allocno1 = m_allocnos[allocno1->equiv_allocno];
+ if (allocno2->is_shared ())
+ return false;
+
+ if (allocno1->is_equiv)
+ allocno1 = m_allocnos[allocno1->related_allocno];
if (allocno2->start_point >= allocno1->end_point
- && allocno2->equiv_allocno != allocno1->id)
+ && !allocno2->is_equiv_to (allocno1->id))
return false;
if (allocno2->is_strong_copy_dest)
@@ -2156,12 +2281,12 @@ early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
&& head1->chain_prev == INVALID_ALLOCNO
&& head2->chain_prev == INVALID_ALLOCNO);
- if (head1->equiv_allocno != INVALID_ALLOCNO
- && m_allocnos[head1->equiv_allocno]->copy_dest == headi2)
+ if (head1->is_equiv
+ && m_allocnos[head1->related_allocno]->copy_dest == headi2)
{
head1->is_copy_dest = head2->is_copy_dest;
head1->is_strong_copy_dest = head2->is_strong_copy_dest;
- m_allocnos[head1->equiv_allocno]->copy_dest = headi1;
+ m_allocnos[head1->related_allocno]->copy_dest = headi1;
}
head1->chain_next = headi2;
head2->chain_prev = headi1;
@@ -2170,6 +2295,18 @@ early_ra::chain_allocnos (unsigned int &headi1, unsigned int &headi2)
}
}
+// Add GROUP2's FPR information to GROUP1's, given that GROUP2 starts
+// OFFSET allocnos into GROUP2.
+void
+early_ra::merge_fpr_info (allocno_group_info *group1,
+ allocno_group_info *group2,
+ unsigned int offset)
+{
+ group1->fpr_size = std::max (group1->fpr_size, group2->fpr_size);
+ group1->fpr_candidates &= (group2->fpr_candidates
+ >> (offset * group1->stride));
+}
+
// Set the color representative of ALLOCNO's group to REP, such that ALLOCNO
// ends being at allocno offset REP_OFFSET from the start of REP.
void
@@ -2185,9 +2322,7 @@ early_ra::set_single_color_rep (allocno_info *allocno, allocno_group_info *rep,
unsigned int factor = group->stride / rep->stride;
gcc_checking_assert (rep_offset >= allocno->offset * factor);
group->color_rep_offset = rep_offset - allocno->offset * factor;
- rep->fpr_size = std::max (rep->fpr_size, group->fpr_size);
- rep->fpr_candidates &= (group->fpr_candidates
- >> (group->color_rep_offset * rep->stride));
+ merge_fpr_info (rep, group, group->color_rep_offset);
}
// REP1 and REP2 are color representatives. Change REP1's color representative
@@ -2299,7 +2434,7 @@ early_ra::try_to_chain_allocnos (allocno_info *allocno1,
auto *head2 = m_allocnos[headi2];
if (head1->chain_next != INVALID_ALLOCNO)
return false;
- if (head2->equiv_allocno != head1->id
+ if (!head2->is_equiv_to (head1->id)
&& head1->end_point <= head2->start_point)
return false;
}
@@ -2429,6 +2564,18 @@ early_ra::form_chains ()
group1->fpr_candidates &= ~fprs >> allocno1->offset;
}
+ if (allocno1->is_shared ())
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " Allocno %d shares the same hard register"
+ " as allocno %d\n", allocno1->id,
+ allocno1->related_allocno);
+ auto *allocno2 = m_allocnos[allocno1->related_allocno];
+ merge_fpr_info (allocno2->group (), group1, allocno2->offset);
+ m_shared_allocnos.safe_push (allocno1);
+ continue;
+ }
+
// Find earlier allocnos (in processing order) that could be chained
// to this one.
candidates.truncate (0);
@@ -2470,6 +2617,9 @@ early_ra::form_chains ()
for (unsigned int hi = m_sorted_allocnos.length (); hi-- > 0; )
{
auto *allocno = m_sorted_allocnos[hi];
+ if (allocno->is_shared ())
+ continue;
+
auto *rep = allocno->group ()->color_rep ();
if (rep->has_color)
continue;
@@ -2582,19 +2732,27 @@ early_ra::process_copies ()
auto *color = m_colors[group->color_rep ()->color];
color->fpr_preferences[fpr] = MIN (color->fpr_preferences[fpr]
+ copy.weight, 127);
+ color->num_fpr_preferences += copy.weight;
}
}
// Compare the colors at *COLOR1_PTR and *COLOR2_PTR and return a <=>
-// result that puts colors in order of decreasing size.
+// result that puts colors in allocation order.
int
-early_ra::cmp_decreasing_size (const void *color1_ptr, const void *color2_ptr)
+early_ra::cmp_allocation_order (const void *color1_ptr, const void *color2_ptr)
{
auto *color1 = *(color_info *const *) color1_ptr;
auto *color2 = *(color_info *const *) color2_ptr;
+ // Allocate bigger groups before smaller groups.
if (color1->group->size != color2->group->size)
return color1->group->size > color2->group->size ? -1 : 1;
+
+ // Allocate groups with stronger FPR preferences before groups with weaker
+ // FPR preferences.
+ if (color1->num_fpr_preferences != color2->num_fpr_preferences)
+ return color1->num_fpr_preferences > color2->num_fpr_preferences ? -1 : 1;
+
return (color1->id < color2->id ? -1
: color1->id == color2->id ? 0 : 1);
}
@@ -2610,7 +2768,7 @@ early_ra::allocate_colors ()
auto_vec<color_info *> sorted_colors;
sorted_colors.safe_splice (m_colors);
- sorted_colors.qsort (cmp_decreasing_size);
+ sorted_colors.qsort (cmp_allocation_order);
for (unsigned int i = 0; i < 32; ++i)
if (!crtl->abi->clobbers_full_reg_p (V0_REGNUM + i))
@@ -2810,12 +2968,16 @@ early_ra::finalize_allocation ()
{
for (auto *allocno : m_allocnos)
{
+ if (allocno->is_shared ())
+ continue;
auto *group = allocno->group ();
auto *rep = group->color_rep ();
auto rep_regno = m_colors[rep->color]->hard_regno;
auto group_regno = rep_regno + group->color_rep_offset;
allocno->hard_regno = group_regno + allocno->offset * group->stride;
}
+ for (auto *allocno : m_shared_allocnos)
+ allocno->hard_regno = m_allocnos[allocno->related_allocno]->hard_regno;
}
// Replace any allocno references in REFS with the allocated register.
@@ -3094,7 +3256,10 @@ void
early_ra::process_region ()
{
for (auto *allocno : m_allocnos)
- allocno->chain_next = INVALID_ALLOCNO;
+ {
+ allocno->chain_next = INVALID_ALLOCNO;
+ allocno->chain_prev = INVALID_ALLOCNO;
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
{
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/accumulators_1.c b/gcc/testsuite/gcc.target/aarch64/sve/accumulators_1.c
new file mode 100644
index 0000000..bdb97d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/accumulators_1.c
@@ -0,0 +1,200 @@
+/* { dg-options "-O2 -fno-rename-registers" } */
+
+#include <arm_sve.h>
+
+#define ACCUMULATE(VAR, OP) \
+ do \
+ { \
+ VAR = OP (pg, VAR, svld1 (pg, ptr1), svld1 (pg, ptr2)); \
+ ptr1 += svcntw (); \
+ ptr2 += svcntw (); \
+ } \
+ while (0)
+
+svint32_t
+f1 (svint32_t x, int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ ACCUMULATE (x, svmla_x);
+ return x;
+}
+
+svint32_t
+f2 (svint32_t x, int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x, svmla_x);
+ ACCUMULATE (x, svmls_x);
+ }
+ return x;
+}
+
+svint32_t
+f3 (svint32_t x, int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x, svmla_x);
+ ACCUMULATE (x, svmls_x);
+ ACCUMULATE (x, svmad_x);
+ ACCUMULATE (x, svmsb_x);
+ }
+ return x;
+}
+
+void
+f4 (svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x3,
+ int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmla_x);
+ ACCUMULATE (x1, svmla_x);
+ ACCUMULATE (x2, svmla_x);
+ ACCUMULATE (x3, svmla_x);
+ }
+ svst1_vnum (pg, ptr1, 0, x0);
+ svst1_vnum (pg, ptr1, 1, x1);
+ svst1_vnum (pg, ptr1, 2, x2);
+ svst1_vnum (pg, ptr1, 3, x3);
+}
+
+void
+f5 (svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x3,
+ int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmla_x);
+ ACCUMULATE (x1, svmla_x);
+ ACCUMULATE (x2, svmla_x);
+ ACCUMULATE (x3, svmla_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmls_x);
+ ACCUMULATE (x1, svmls_x);
+ ACCUMULATE (x2, svmls_x);
+ ACCUMULATE (x3, svmls_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmad_x);
+ ACCUMULATE (x1, svmad_x);
+ ACCUMULATE (x2, svmad_x);
+ ACCUMULATE (x3, svmad_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmsb_x);
+ ACCUMULATE (x1, svmsb_x);
+ ACCUMULATE (x2, svmsb_x);
+ ACCUMULATE (x3, svmsb_x);
+ }
+ svst1_vnum (pg, ptr1, 0, x0);
+ svst1_vnum (pg, ptr1, 1, x1);
+ svst1_vnum (pg, ptr1, 2, x2);
+ svst1_vnum (pg, ptr1, 3, x3);
+}
+
+void
+f6 (svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x3,
+ int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmla_x);
+ ACCUMULATE (x1, svmla_x);
+ ACCUMULATE (x2, svmla_x);
+ ACCUMULATE (x3, svmla_x);
+ ACCUMULATE (x0, svmls_x);
+ ACCUMULATE (x1, svmls_x);
+ ACCUMULATE (x2, svmls_x);
+ ACCUMULATE (x3, svmls_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmad_x);
+ ACCUMULATE (x1, svmad_x);
+ ACCUMULATE (x2, svmad_x);
+ ACCUMULATE (x3, svmad_x);
+ ACCUMULATE (x0, svmsb_x);
+ ACCUMULATE (x1, svmsb_x);
+ ACCUMULATE (x2, svmsb_x);
+ ACCUMULATE (x3, svmsb_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmad_x);
+ ACCUMULATE (x1, svmad_x);
+ ACCUMULATE (x2, svmad_x);
+ ACCUMULATE (x3, svmad_x);
+ ACCUMULATE (x0, svmsb_x);
+ ACCUMULATE (x1, svmsb_x);
+ ACCUMULATE (x2, svmsb_x);
+ ACCUMULATE (x3, svmsb_x);
+ }
+ svst1_vnum (pg, ptr1, 0, x0);
+ svst1_vnum (pg, ptr1, 1, x1);
+ svst1_vnum (pg, ptr1, 2, x2);
+ svst1_vnum (pg, ptr1, 3, x3);
+}
+
+void
+f7 (svint32_t x0, svint32_t x1, svint32_t x2, svint32_t x3,
+ int32_t *ptr1, int32_t *ptr2)
+{
+ svbool_t pg = svptrue_b8 ();
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmla_x);
+ ACCUMULATE (x1, svmla_x);
+ ACCUMULATE (x2, svmla_x);
+ ACCUMULATE (x3, svmla_x);
+ ACCUMULATE (x0, svmls_x);
+ ACCUMULATE (x1, svmls_x);
+ ACCUMULATE (x2, svmls_x);
+ ACCUMULATE (x3, svmls_x);
+ ACCUMULATE (x0, svmad_x);
+ ACCUMULATE (x1, svmad_x);
+ ACCUMULATE (x2, svmad_x);
+ ACCUMULATE (x3, svmad_x);
+ ACCUMULATE (x0, svmsb_x);
+ ACCUMULATE (x1, svmsb_x);
+ ACCUMULATE (x2, svmsb_x);
+ ACCUMULATE (x3, svmsb_x);
+ }
+ for (int i = 0; i < 100; ++i)
+ {
+ ACCUMULATE (x0, svmla_x);
+ ACCUMULATE (x1, svmla_x);
+ ACCUMULATE (x2, svmla_x);
+ ACCUMULATE (x3, svmla_x);
+ ACCUMULATE (x0, svmls_x);
+ ACCUMULATE (x1, svmls_x);
+ ACCUMULATE (x2, svmls_x);
+ ACCUMULATE (x3, svmls_x);
+ ACCUMULATE (x0, svmad_x);
+ ACCUMULATE (x1, svmad_x);
+ ACCUMULATE (x2, svmad_x);
+ ACCUMULATE (x3, svmad_x);
+ ACCUMULATE (x0, svmsb_x);
+ ACCUMULATE (x1, svmsb_x);
+ ACCUMULATE (x2, svmsb_x);
+ ACCUMULATE (x3, svmsb_x);
+ }
+ svst1_vnum (pg, ptr1, 0, x0);
+ svst1_vnum (pg, ptr1, 1, x1);
+ svst1_vnum (pg, ptr1, 2, x2);
+ svst1_vnum (pg, ptr1, 3, x3);
+}
+
+/* { dg-final { scan-assembler-not {\tmov\tz} } } */
+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
index 3b9245e..7e7d890 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c
@@ -4,8 +4,13 @@
/*
** create2_s8:
+** (
** mov z0\.d, z6\.d
** mov z1\.d, z4\.d
+** |
+** mov z1\.d, z4\.d
+** mov z0\.d, z6\.d
+** )
** ret
*/
TEST_CREATE (create2_s8, svint8x2_t, svint8_t,
@@ -14,8 +19,13 @@ TEST_CREATE (create2_s8, svint8x2_t, svint8_t,
/*
** create2_u8:
+** (
** mov z0\.d, z4\.d
** mov z1\.d, z6\.d
+** |
+** mov z1\.d, z6\.d
+** mov z0\.d, z4\.d
+** )
** ret
*/
TEST_CREATE (create2_u8, svuint8x2_t, svuint8_t,
@@ -24,8 +34,13 @@ TEST_CREATE (create2_u8, svuint8x2_t, svuint8_t,
/*
** create2_s16:
+** (
** mov z0\.d, z6\.d
** mov z1\.d, z4\.d
+** |
+** mov z1\.d, z4\.d
+** mov z0\.d, z6\.d
+** )
** ret
*/
TEST_CREATE (create2_s16, svint16x2_t, svint16_t,
@@ -34,8 +49,13 @@ TEST_CREATE (create2_s16, svint16x2_t, svint16_t,
/*
** create2_u16:
+** (
** mov z0\.d, z6\.d
** mov z1\.d, z5\.d
+** |
+** mov z1\.d, z5\.d
+** mov z0\.d, z6\.d
+** )
** ret
*/
TEST_CREATE (create2_u16, svuint16x2_t, svuint16_t,
@@ -44,8 +64,13 @@ TEST_CREATE (create2_u16, svuint16x2_t, svuint16_t,
/*
** create2_bf16:
+** (
** mov z0\.d, z4\.d
** mov z1\.d, z5\.d
+** |
+** mov z1\.d, z5\.d
+** mov z0\.d, z4\.d
+** )
** ret
*/
TEST_CREATE (create2_bf16, svbfloat16x2_t, svbfloat16_t,
@@ -54,8 +79,13 @@ TEST_CREATE (create2_bf16, svbfloat16x2_t, svbfloat16_t,
/*
** create2_f16:
+** (
** mov z0\.d, z4\.d
** mov z1\.d, z5\.d
+** |
+** mov z1\.d, z5\.d
+** mov z0\.d, z4\.d
+** )
** ret
*/
TEST_CREATE (create2_f16, svfloat16x2_t, svfloat16_t,
@@ -64,8 +94,13 @@ TEST_CREATE (create2_f16, svfloat16x2_t, svfloat16_t,
/*
** create2_s32:
+** (
** mov z0\.d, z6\.d
** mov z1\.d, z7\.d
+** |
+** mov z1\.d, z7\.d
+** mov z0\.d, z6\.d
+** )
** ret
*/
TEST_CREATE (create2_s32, svint32x2_t, svint32_t,
@@ -74,8 +109,13 @@ TEST_CREATE (create2_s32, svint32x2_t, svint32_t,
/*
** create2_u32:
+** (
** mov z0\.d, z7\.d
** mov z1\.d, z5\.d
+** |
+** mov z1\.d, z5\.d
+** mov z0\.d, z7\.d
+** )
** ret
*/
TEST_CREATE (create2_u32, svuint32x2_t, svuint32_t,
@@ -84,8 +124,13 @@ TEST_CREATE (create2_u32, svuint32x2_t, svuint32_t,
/*
** create2_f32:
+** (
** mov z0\.d, z7\.d
** mov z1\.d, z4\.d
+** |
+** mov z1\.d, z4\.d
+** mov z0\.d, z7\.d
+** )
** ret
*/
TEST_CREATE (create2_f32, svfloat32x2_t, svfloat32_t,
@@ -94,8 +139,13 @@ TEST_CREATE (create2_f32, svfloat32x2_t, svfloat32_t,
/*
** create2_s64:
+** (
** mov z0\.d, z5\.d
** mov z1\.d, z7\.d
+** |
+** mov z1\.d, z7\.d
+** mov z0\.d, z5\.d
+** )
** ret
*/
TEST_CREATE (create2_s64, svint64x2_t, svint64_t,
@@ -104,8 +154,13 @@ TEST_CREATE (create2_s64, svint64x2_t, svint64_t,
/*
** create2_u64:
+** (
** mov z0\.d, z7\.d
** mov z1\.d, z6\.d
+** |
+** mov z1\.d, z6\.d
+** mov z0\.d, z7\.d
+** )
** ret
*/
TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t,
@@ -114,8 +169,13 @@ TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t,
/*
** create2_f64:
+** (
** mov z0\.d, z5\.d
** mov z1\.d, z4\.d
+** |
+** mov z1\.d, z4\.d
+** mov z0\.d, z5\.d
+** )
** ret
*/
TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t,
@@ -132,8 +192,13 @@ TEST_CREATE_B (create2_b_0, svboolx2_t,
/*
** create2_b_1:
+** (
** mov p0\.b, p2\.b
** mov p1\.b, p3\.b
+** |
+** mov p1\.b, p3\.b
+** mov p0\.b, p2\.b
+** )
** ret
*/
TEST_CREATE_B (create2_b_1, svboolx2_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c
index 6f1afb7..0bea951 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c
@@ -4,9 +4,9 @@
/*
** create3_s8:
-** mov z0\.d, z6\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_s8, svint8x3_t, svint8_t,
@@ -15,9 +15,9 @@ TEST_CREATE (create3_s8, svint8x3_t, svint8_t,
/*
** create3_u8:
-** mov z0\.d, z4\.d
-** mov z1\.d, z6\.d
-** mov z2\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_u8, svuint8x3_t, svuint8_t,
@@ -26,9 +26,9 @@ TEST_CREATE (create3_u8, svuint8x3_t, svuint8_t,
/*
** create3_s16:
-** mov z0\.d, z6\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_s16, svint16x3_t, svint16_t,
@@ -37,9 +37,9 @@ TEST_CREATE (create3_s16, svint16x3_t, svint16_t,
/*
** create3_u16:
-** mov z0\.d, z6\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z4\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_u16, svuint16x3_t, svuint16_t,
@@ -48,9 +48,9 @@ TEST_CREATE (create3_u16, svuint16x3_t, svuint16_t,
/*
** create3_bf16:
-** mov z0\.d, z4\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_bf16, svbfloat16x3_t, svbfloat16_t,
@@ -59,9 +59,9 @@ TEST_CREATE (create3_bf16, svbfloat16x3_t, svbfloat16_t,
/*
** create3_f16:
-** mov z0\.d, z4\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_f16, svfloat16x3_t, svfloat16_t,
@@ -70,9 +70,9 @@ TEST_CREATE (create3_f16, svfloat16x3_t, svfloat16_t,
/*
** create3_s32:
-** mov z0\.d, z6\.d
-** mov z1\.d, z7\.d
-** mov z2\.d, z4\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_s32, svint32x3_t, svint32_t,
@@ -81,9 +81,9 @@ TEST_CREATE (create3_s32, svint32x3_t, svint32_t,
/*
** create3_u32:
-** mov z0\.d, z7\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_u32, svuint32x3_t, svuint32_t,
@@ -92,9 +92,9 @@ TEST_CREATE (create3_u32, svuint32x3_t, svuint32_t,
/*
** create3_f32:
-** mov z0\.d, z7\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_f32, svfloat32x3_t, svfloat32_t,
@@ -103,9 +103,9 @@ TEST_CREATE (create3_f32, svfloat32x3_t, svfloat32_t,
/*
** create3_s64:
-** mov z0\.d, z5\.d
-** mov z1\.d, z7\.d
-** mov z2\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_s64, svint64x3_t, svint64_t,
@@ -114,9 +114,9 @@ TEST_CREATE (create3_s64, svint64x3_t, svint64_t,
/*
** create3_u64:
-** mov z0\.d, z7\.d
-** mov z1\.d, z6\.d
-** mov z2\.d, z4\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_u64, svuint64x3_t, svuint64_t,
@@ -125,9 +125,9 @@ TEST_CREATE (create3_u64, svuint64x3_t, svuint64_t,
/*
** create3_f64:
-** mov z0\.d, z5\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create3_f64, svfloat64x3_t, svfloat64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
index a386628..b5ffd4e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
@@ -4,10 +4,10 @@
/*
** create4_s8:
-** mov z0\.d, z6\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z7\.d
-** mov z3\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_s8, svint8x4_t, svint8_t,
@@ -16,10 +16,10 @@ TEST_CREATE (create4_s8, svint8x4_t, svint8_t,
/*
** create4_u8:
-** mov z0\.d, z4\.d
-** mov z1\.d, z6\.d
-** mov z2\.d, z5\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_u8, svuint8x4_t, svuint8_t,
@@ -28,10 +28,10 @@ TEST_CREATE (create4_u8, svuint8x4_t, svuint8_t,
/*
** create4_s16:
-** mov z0\.d, z6\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z5\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_s16, svint16x4_t, svint16_t,
@@ -40,10 +40,10 @@ TEST_CREATE (create4_s16, svint16x4_t, svint16_t,
/*
** create4_u16:
-** mov z0\.d, z6\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z4\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_u16, svuint16x4_t, svuint16_t,
@@ -52,10 +52,10 @@ TEST_CREATE (create4_u16, svuint16x4_t, svuint16_t,
/*
** create4_bf16:
-** mov z0\.d, z4\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_bf16, svbfloat16x4_t, svbfloat16_t,
@@ -64,10 +64,10 @@ TEST_CREATE (create4_bf16, svbfloat16x4_t, svbfloat16_t,
/*
** create4_f16:
-** mov z0\.d, z4\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_f16, svfloat16x4_t, svfloat16_t,
@@ -76,10 +76,10 @@ TEST_CREATE (create4_f16, svfloat16x4_t, svfloat16_t,
/*
** create4_s32:
-** mov z0\.d, z6\.d
-** mov z1\.d, z7\.d
-** mov z2\.d, z4\.d
-** mov z3\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_s32, svint32x4_t, svint32_t,
@@ -88,10 +88,10 @@ TEST_CREATE (create4_s32, svint32x4_t, svint32_t,
/*
** create4_u32:
-** mov z0\.d, z7\.d
-** mov z1\.d, z5\.d
-** mov z2\.d, z6\.d
-** mov z3\.d, z7\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_u32, svuint32x4_t, svuint32_t,
@@ -100,10 +100,10 @@ TEST_CREATE (create4_u32, svuint32x4_t, svuint32_t,
/*
** create4_f32:
-** mov z0\.d, z7\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z6\.d
-** mov z3\.d, z4\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_f32, svfloat32x4_t, svfloat32_t,
@@ -112,10 +112,10 @@ TEST_CREATE (create4_f32, svfloat32x4_t, svfloat32_t,
/*
** create4_s64:
-** mov z0\.d, z5\.d
-** mov z1\.d, z7\.d
-** mov z2\.d, z6\.d
-** mov z3\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_s64, svint64x4_t, svint64_t,
@@ -124,10 +124,10 @@ TEST_CREATE (create4_s64, svint64x4_t, svint64_t,
/*
** create4_u64:
-** mov z0\.d, z7\.d
-** mov z1\.d, z6\.d
-** mov z2\.d, z4\.d
-** mov z3\.d, z5\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t,
@@ -136,10 +136,10 @@ TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t,
/*
** create4_f64:
-** mov z0\.d, z5\.d
-** mov z1\.d, z4\.d
-** mov z2\.d, z7\.d
-** mov z3\.d, z6\.d
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
** ret
*/
TEST_CREATE (create4_f64, svfloat64x4_t, svfloat64_t,