aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-10-31 17:27:02 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-10-31 17:27:02 +0000
commit354ee867075b474b6f561a161b4140c7971fb2c0 (patch)
tree9a55350094a707145c26797168d3abdf5606f181
parent37115224b2ee1f03ce8d7816182c267c9ff9d545 (diff)
downloadgcc-354ee867075b474b6f561a161b4140c7971fb2c0.zip
gcc-354ee867075b474b6f561a161b4140c7971fb2c0.tar.gz
gcc-354ee867075b474b6f561a161b4140c7971fb2c0.tar.bz2
[AArch64] Split gcc.target/aarch64/sve/vcond_4*
vcond_4.c combined too much stuff into one test, so that when we needed to add XFAILs for one set of routines, we lost testing of others that used the same instructions. This patch splits it into four: vcond_4.c: The main test for selects between a vector and a scalar, with all elements having the same size. After Prathamesh's fix for PR91272, we are now guaranteed to use the conditionally-loaded vector as the "then" value and the scalar as the "else" value. vcond_4_zero.c: Like vcond_4.c, but comparing with zero. vcond_4_sel.c: Tests for selects between two scalars, with all elements having the same size. The optimisers can legitimately switch the "then" and "else" order and adjust the comparison to match. vcond_4_costly.c: Like vcnod_4.c, but with mixed element sizes. The sequences tested here will not be the preferred ones once we support mixtures of vector sizes, but it still tests an important code path. This means that vcond_4_run.c now only tests what's left in vcond_4.c, but that seems OK, since the main point was to ensure correct exception behaviour. Similarly it means that vcond_5.c only tests what's left in vcond_4.c, but that too is OK, since the point of the test was to compare the default handling of each comparison in vcond_4.c with the -fno-trapping-math equivalent. 2019-10-31 Richard Sandiford <richard.sandiford@arm.com> gcc/testsuite/ * gcc.target/aarch64/sve/vcond_4.c: Split parts out into... * gcc.target/aarch64/sve/vcond_4_costly.c, * gcc.target/aarch64/sve/vcond_4_sel.c, * gcc.target/aarch64/sve/vcond_4_zero.c: ...these new tests. * gcc.target/aarch64/sve/vcond_4_run.c: Adjust accordingly. * gcc.target/aarch64/sve/vcond_5.c: Likewise. From-SVN: r277682
-rw-r--r--gcc/testsuite/ChangeLog9
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c91
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c77
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4_sel.c78
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_4_zero.c91
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c65
7 files changed, 299 insertions, 132 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4dae2ac..93e26de 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,14 @@
2019-10-31 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/vcond_4.c: Split parts out into...
+ * gcc.target/aarch64/sve/vcond_4_costly.c,
+ * gcc.target/aarch64/sve/vcond_4_sel.c,
+ * gcc.target/aarch64/sve/vcond_4_zero.c: ...these new tests.
+ * gcc.target/aarch64/sve/vcond_4_run.c: Adjust accordingly.
+ * gcc.target/aarch64/sve/vcond_5.c: Likewise.
+
+2019-10-31 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/reduc_strict_3.c: Split all but the
first function out into...
* gcc.target/aarch64/sve/reduc_strict_4.c,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
index b38f23e..3ed5b14 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
@@ -33,38 +33,12 @@
{ \
for (int i = 0; i < count; ++i) \
dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
- } \
- \
- void __attribute__ ((noinline, noclone)) \
- test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
- TYPE1 *restrict src, \
- TYPE1 fallback, \
- TYPE2 *restrict a, \
- int count) \
- { \
- for (int i = 0; i < count; ++i) \
- dest[i] = CMP (a[i], 0) ? src[i] : fallback; \
- } \
- \
- void __attribute__ ((noinline, noclone)) \
- test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \
- TYPE1 if_true, \
- TYPE1 if_false, \
- TYPE2 *restrict a, \
- TYPE2 b, int count) \
- { \
- for (int i = 0; i < count; ++i) \
- dest[i] = CMP (a[i], b) ? if_true : if_false; \
}
#define TEST_CMP(CMP) \
TEST_LOOP (int32_t, float, CMP) \
TEST_LOOP (uint32_t, float, CMP) \
- TEST_LOOP (int64_t, float, CMP) \
- TEST_LOOP (uint64_t, float, CMP) \
TEST_LOOP (float, float, CMP) \
- TEST_LOOP (int32_t, double, CMP) \
- TEST_LOOP (uint32_t, double, CMP) \
TEST_LOOP (int64_t, double, CMP) \
TEST_LOOP (uint64_t, double, CMP) \
TEST_LOOP (double, double, CMP)
@@ -88,54 +62,29 @@ TEST_CMP (nule)
TEST_CMP (nuge)
TEST_CMP (nugt)
-/* See PR 86753 for the reason behind the XFAILs. */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 5 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 10 { xfail *-*-* } } } */
-
-/* 5 for ne, 5 for ueq and 5 for nueq. */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-
-/* 5 for lt, 5 for ult and 5 for nult. */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for gt, 5 for ugt and 5 for nugt. */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
-/* 3 loops * 5 invocations for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+/* 3 for ne, 3 for ueq and 3 for nueq. */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for olt, 3 for ult and 3 for nult. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for ogt, 3 for ugt and 3 for nugt. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
-/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
- for all 12 unordered comparisons. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c
new file mode 100644
index 0000000..4aa567e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_costly.c
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
+ TYPE1 *restrict src, \
+ TYPE1 fallback, \
+ TYPE2 *restrict a, \
+ TYPE2 *restrict b, \
+ int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int64_t, float, CMP) \
+ TEST_LOOP (uint64_t, float, CMP) \
+ TEST_LOOP (int32_t, double, CMP) \
+ TEST_LOOP (uint32_t, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* 2 each for: eq, ne, ueq, nueq. */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 8 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 16 { xfail *-*-* } } } */
+
+/* 2 each for: olt, ult, nult, ogt, ugt, nugt. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+
+/* 2 each for: ole, ule, nule, oge, uge, nuge. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+
+/* 2 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 24 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 48 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c
index c345087..abab3ee 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_run.c
@@ -14,7 +14,7 @@
#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID) \
{ \
- TYPE1 dest1[N], dest2[N], dest3[N], src[N]; \
+ TYPE1 dest[N], src[N]; \
TYPE2 a[N], b[N]; \
for (int i = 0; i < N; ++i) \
{ \
@@ -34,31 +34,19 @@
asm volatile ("" ::: "memory"); \
} \
feclearexcept (FE_ALL_EXCEPT); \
- test_##TYPE1##_##TYPE2##_##CMP##_var (dest1, src, 11, a, b, N); \
- test_##TYPE1##_##TYPE2##_##CMP##_zero (dest2, src, 22, a, N); \
- test_##TYPE1##_##TYPE2##_##CMP##_sel (dest3, 33, 44, a, 9, N); \
+ test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N); \
if (TEST_EXCEPTIONS \
&& !fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \
__builtin_abort (); \
for (int i = 0; i < N; ++i) \
- { \
- if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
- __builtin_abort (); \
- if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \
- __builtin_abort (); \
- if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \
- __builtin_abort (); \
- } \
+ if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \
+ __builtin_abort (); \
}
#define RUN_CMP(CMP, EXPECT_INVALID) \
RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (int64_t, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (uint64_t, float, CMP, EXPECT_INVALID) \
RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
- RUN_LOOP (int32_t, double, CMP, EXPECT_INVALID) \
- RUN_LOOP (uint32_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
RUN_LOOP (double, double, CMP, EXPECT_INVALID)
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_sel.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_sel.c
new file mode 100644
index 0000000..b83490c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_sel.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_sel (TYPE1 *restrict dest, \
+ TYPE1 if_true, \
+ TYPE1 if_false, \
+ TYPE2 *restrict a, \
+ TYPE2 b, int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], b) ? if_true : if_false; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int32_t, float, CMP) \
+ TEST_LOOP (uint32_t, float, CMP) \
+ TEST_LOOP (float, float, CMP) \
+ TEST_LOOP (int64_t, double, CMP) \
+ TEST_LOOP (uint64_t, double, CMP) \
+ TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* 3 each for: eq, ne, ueq, nueq. */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcm(?:eq|ne)\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+
+/* 3 each for: olt, ult, nult, ogt, ugt, nugt. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 18 } } */
+
+/* 3 each for: ole, ule, nule, oge, uge, nuge. */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tfcm[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 18 } } */
+
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_zero.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_zero.c
new file mode 100644
index 0000000..905cf9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4_zero.c
@@ -0,0 +1,91 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
+ void __attribute__ ((noinline, noclone)) \
+ test_##TYPE1##_##TYPE2##_##CMP##_zero (TYPE1 *restrict dest, \
+ TYPE1 *restrict src, \
+ TYPE1 fallback, \
+ TYPE2 *restrict a, \
+ int count) \
+ { \
+ for (int i = 0; i < count; ++i) \
+ dest[i] = CMP (a[i], 0) ? src[i] : fallback; \
+ }
+
+#define TEST_CMP(CMP) \
+ TEST_LOOP (int32_t, float, CMP) \
+ TEST_LOOP (uint32_t, float, CMP) \
+ TEST_LOOP (float, float, CMP) \
+ TEST_LOOP (int64_t, double, CMP) \
+ TEST_LOOP (uint64_t, double, CMP) \
+ TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 3 } } */
+
+/* 3 for ne, 3 for ueq and 3 for nueq. */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for olt, 3 for ult and 3 for nult. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for ogt, 3 for ugt and 3 for nugt. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 9 } } */
+
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
+/* 3 invocations for all 12 unordered comparisons. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 36 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
index 2f16fbf..ecf247d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
@@ -5,54 +5,29 @@
#include "vcond_4.c"
-/* See PR 86753 for the reason behind the XFAILs. */
+/* 3 for eq, 3 for ueq. */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
-/* 5 for eqand 5 for ueq. */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 10 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 20 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 5 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 10 { xfail *-*-* } } } */
+/* 3 each for: olt, ult, nult and nueq. */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
-/* 5 for lt, 5 for ult, 5 for nueq and 5 for nult. */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+/* 3 for ole, 3 for ule and 3 for nule. */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* 5 for le, 5 for ule and 5 for nule. */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+/* 3 each for: ogt, ugt, nugt and nueq. */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
-/* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt. */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+/* 3 for oge, 3 for uge and 3 for nuge. */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */
-/* 5 for ge, 5 for uge and 5 for nuge. */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
-/* 3 loops * 5 invocations for ordered, unordered amd ueq. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 45 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 14 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 28 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 7 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 14 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
-
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
-
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
-
-/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
-/* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
- for ordered, unordered and ueq. */
-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 63 { xfail *-*-* } } } */
+/* 3 for ordered, 3 for unordered and 3 for ueq. */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 9 } } */