aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2018-08-24 13:06:09 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-08-24 13:06:09 +0000
commit8c2f568c9a8375927efede4bfc83d6553da7f9a2 (patch)
tree1199c644ada18f8864e7a19909236e5647c67382
parentab7e60cec1a6f4185b0428f3a2b3e71df0bae533 (diff)
downloadgcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.zip
gcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.tar.gz
gcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.tar.bz2
Avoid is_constant calls in vectorizable_bswap
The "new" VEC_PERM_EXPR handling makes it easy to support bswap for variable-length vectors. 2018-08-24 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vect-stmts.c (vectorizable_bswap): Handle variable-length vectors. gcc/testsuite/ * gcc.target/aarch64/sve/bswap_1.c: New test. * gcc.target/aarch64/sve/bswap_2.c: Likewise. * gcc.target/aarch64/sve/bswap_3.c: Likewise. From-SVN: r263833
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c13
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c13
-rw-r--r--gcc/tree-vect-stmts.c11
6 files changed, 54 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac459b6..aa8f392 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
+ * tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
+ vectors.
+
+2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
+
* tree-vect-slp.c (vect_transform_slp_perm_load): Separate out
the case in which the permute needs only a single element and
repeats for every vector of the result. Extend that case to
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index db05964..6520f46 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
+ * gcc.target/aarch64/sve/bswap_1.c: New test.
+ * gcc.target/aarch64/sve/bswap_2.c: Likewise.
+ * gcc.target/aarch64/sve/bswap_3.c: Likewise.
+
+2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
+
* gcc.target/aarch64/sve/slp_perm_1.c: New test.
* gcc.target/aarch64/sve/slp_perm_2.c: Likewise.
* gcc.target/aarch64/sve/slp_perm_3.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c
new file mode 100644
index 0000000..aa4c033
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint16_t *a, uint16_t *b)
+{
+ for (int i = 0; i < 100; ++i)
+ a[i] = __builtin_bswap16 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 { xfail aarch64_big_endian } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c
new file mode 100644
index 0000000..442c65e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint32_t *a, uint32_t *b)
+{
+ for (int i = 0; i < 100; ++i)
+ a[i] = __builtin_bswap32 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 { xfail aarch64_big_endian } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c
new file mode 100644
index 0000000..75acc64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+void
+f (uint64_t *a, uint64_t *b)
+{
+ for (int i = 0; i < 100; ++i)
+ a[i] = __builtin_bswap64 (b[i]);
+}
+
+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 { xfail aarch64_big_endian } } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 63fb1fb..8d94fca 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2961,13 +2961,10 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_info *vinfo = stmt_info->vinfo;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
unsigned ncopies;
- unsigned HOST_WIDE_INT nunits, num_bytes;
op = gimple_call_arg (stmt, 0);
vectype = STMT_VINFO_VECTYPE (stmt_info);
-
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
- return false;
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
@@ -2983,11 +2980,11 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (! char_vectype)
return false;
- if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
+ poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
+ unsigned word_bytes;
+ if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
return false;
- unsigned word_bytes = num_bytes / nunits;
-
/* The encoding uses one stepped pattern for each byte in the word. */
vec_perm_builder elts (num_bytes, word_bytes, 3);
for (unsigned i = 0; i < 3; ++i)