aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2019-12-02 17:39:06 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2019-12-02 17:39:06 +0000
commit0435b10db008e3019be0898270070eec4f3e318a (patch)
tree96d50c1a58cb3a70e9d378758c766f97cb39ce6d
parentd6098f64819e729026df0ebf0e18ac5de713e36e (diff)
downloadgcc-0435b10db008e3019be0898270070eec4f3e318a.zip
gcc-0435b10db008e3019be0898270070eec4f3e318a.tar.gz
gcc-0435b10db008e3019be0898270070eec4f3e318a.tar.bz2
[AArch64] Add a couple of SVE ACLE comparison folds
When writing vector-length specific SVE code, it's useful to be able to store an svbool_t predicate in a GNU vector of unsigned chars. This patch makes sure that there is no overhead when converting to that form and then immediately reading it back again. 2019-12-02 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-sve-builtins.h (gimple_folder::force_vector): Declare. * config/aarch64/aarch64-sve-builtins.cc (gimple_folder::force_vector): New function. * config/aarch64/aarch64-sve-builtins-base.cc (svcmp_impl::fold): Likewise. (svdup_impl::fold): Handle svdup_z too. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test. * gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect the call to be folded to zero. * gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise. * gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise. From-SVN: r278907
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc33
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc11
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.h1
-rw-r--r--gcc/testsuite/ChangeLog16
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c2
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c40
17 files changed, 122 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 765e015..29fed4f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2019-12-02 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/aarch64-sve-builtins.h
+ (gimple_folder::force_vector): Declare.
+ * config/aarch64/aarch64-sve-builtins.cc
+ (gimple_folder::force_vector): New function.
+ * config/aarch64/aarch64-sve-builtins-base.cc
+ (svcmp_impl::fold): Likewise.
+ (svdup_impl::fold): Handle svdup_z too.
+
2019-12-02 Martin Liska <mliska@suse.cz>
* ipa-devirt.c (warn_types_mismatch): Use get_odr_name_for_type
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 52166c4..38bd3ad 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -333,6 +333,28 @@ public:
CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
: m_code (code), m_unspec_for_fp (unspec_for_fp) {}
+ gimple *
+ fold (gimple_folder &f) const OVERRIDE
+ {
+ tree pg = gimple_call_arg (f.call, 0);
+ tree rhs1 = gimple_call_arg (f.call, 1);
+ tree rhs2 = gimple_call_arg (f.call, 2);
+
+ /* Convert a ptrue-predicated integer comparison into the corresponding
+ gimple-level operation. */
+ if (integer_all_onesp (pg)
+ && f.type_suffix (0).element_bytes == 1
+ && f.type_suffix (0).integer_p)
+ {
+ gimple_seq stmts = NULL;
+ rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+ return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
+ }
+
+ return NULL;
+ }
+
rtx
expand (function_expander &e) const OVERRIDE
{
@@ -700,6 +722,17 @@ public:
return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
}
+ /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */
+ if (f.pred == PRED_z)
+ {
+ gimple_seq stmts = NULL;
+ tree pred = f.convert_pred (stmts, vec_type, 0);
+ rhs = f.force_vector (stmts, vec_type, rhs);
+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
+ return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
+ build_zero_cst (vec_type));
+ }
+
return NULL;
}
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 5dd7ccb..3a6b470 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2234,6 +2234,17 @@ gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
{
}
+/* VALUE might be a vector of type VECTYPE or a single scalar element.
+ Duplicate it into a vector of type VECTYPE in the latter case, adding any
+ new statements to STMTS. */
+tree
+gimple_folder::force_vector (gimple_seq &stmts, tree vectype, tree value)
+{
+ if (!VECTOR_TYPE_P (TREE_TYPE (value)))
+ value = gimple_build_vector_from_val (&stmts, vectype, value);
+ return value;
+}
+
/* Convert predicate argument ARGNO so that it has the type appropriate for
an operation on VECTYPE. Add any new statements to STMTS. */
tree
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 73b07c7..0884b0f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -488,6 +488,7 @@ public:
gimple_folder (const function_instance &, tree,
gimple_stmt_iterator *, gcall *);
+ tree force_vector (gimple_seq &, tree, tree);
tree convert_pred (gimple_seq &, tree, unsigned int);
tree fold_contiguous_base (gimple_seq &, tree);
tree load_store_cookie (tree);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 64f035f..2e6d171 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,19 @@
+2019-12-02 Richard Sandiford <richard.sandiford@arm.com>
+
+ * gcc.target/aarch64/sve/acle/general/eqne_dup_1.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/dup_f16.c (dup_0_f16_z): Expect
+ the call to be folded to zero.
+ * gcc.target/aarch64/sve/acle/asm/dup_f32.c (dup_0_f32_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_f64.c (dup_0_f64_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_s8.c (dup_0_s8_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_s16.c (dup_0_s16_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_s32.c (dup_0_s32_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_s64.c (dup_0_s64_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_u8.c (dup_0_u8_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_u16.c (dup_0_u16_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_u32.c (dup_0_u32_z): Likewise.
+ * gcc.target/aarch64/sve/acle/asm/dup_u64.c (dup_0_u64_z): Likewise.
+
2019-12-02 Sudakshina Das <sudi.das@arm.com>
* g++.dg/ext/arm-fp16/arm-fp16-ops.h: Remove volatile keyword.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c
index 2d48b9a..a90c711 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c
@@ -120,7 +120,7 @@ TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t,
/*
** dup_0_f16_z:
-** mov z0\.h, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c
index f997b7a..ba23781 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c
@@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f32_z, svfloat32_t,
/*
** dup_0_f32_z:
-** mov z0\.s, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_f32_z, svfloat32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c
index e177d91..b397da8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c
@@ -118,7 +118,7 @@ TEST_UNIFORM_Z (dup_1_f64_z, svfloat64_t,
/*
** dup_0_f64_z:
-** mov z0\.d, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_f64_z, svfloat64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c
index 876f36d..21ab6f6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c
@@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_s16_z, svint16_t,
/*
** dup_0_s16_z:
-** mov z0\.h, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_s16_z, svint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c
index 0b396db..500ec48 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s32_z, svint32_t,
/*
** dup_0_s32_z:
-** mov z0\.s, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_s32_z, svint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c
index 6259b7f..651bb1b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_s64_z, svint64_t,
/*
** dup_0_s64_z:
-** mov z0\.d, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_s64_z, svint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c
index 96fc5fa..f3c9db8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c
@@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_s8_z, svint8_t,
/*
** dup_0_s8_z:
-** mov z0\.b, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_s8_z, svint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c
index 263eafe..dba409d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c
@@ -869,7 +869,7 @@ TEST_UNIFORM_Z (dup_m8000_u16_z, svuint16_t,
/*
** dup_0_u16_z:
-** mov z0\.h, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_u16_z, svuint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c
index 667feea..7d5b462 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u32_z, svuint32_t,
/*
** dup_0_u32_z:
-** mov z0\.s, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_u32_z, svuint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c
index a7cca7a..0431e75 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c
@@ -849,7 +849,7 @@ TEST_UNIFORM_Z (dup_m8000_u64_z, svuint64_t,
/*
** dup_0_u64_z:
-** mov z0\.d, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_u64_z, svuint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c
index d27f4bb..1bb4cc1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c
@@ -275,7 +275,7 @@ TEST_UNIFORM_Z (dup_m128_u8_z, svuint8_t,
/*
** dup_0_u8_z:
-** mov z0\.b, p0/z, #0
+** mov z0\.[bhsd], #0
** ret
*/
TEST_UNIFORM_Z (dup_0_u8_z, svuint8_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c
new file mode 100644
index 0000000..651f5ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eqne_dup_1.c
@@ -0,0 +1,40 @@
+/* { dg-additional-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** cmp1:
+** ptrue (p[0-7])\.b(?:[^\n]*)
+** cmple p0\.b, \1/z, z0\.b, z1\.d
+** ret
+*/
+svbool_t
+cmp1 (svint8_t x, svint64_t y)
+{
+ svbool_t res = svcmple_wide (svptrue_b8 (), x, y);
+ svuint8_t res_u8 = svdup_u8_z (res, 1);
+ return svcmpne (svptrue_b8 (), res_u8, 0);
+}
+
+/*
+** cmp2:
+** ptrue (p[0-7])\.b(?:[^\n]*)
+** cmplt p0\.b, \1/z, z0\.b, z1\.d
+** ret
+*/
+svbool_t
+cmp2 (svint8_t x, svint64_t y)
+{
+ svbool_t res = svcmplt_wide (svptrue_b8 (), x, y);
+ svuint8_t res_u8 = svdup_u8_z (res, 42);
+ return svcmpeq (svptrue_b8 (), res_u8, 42);
+}
+
+#ifdef __cplusplus
+}
+#endif