diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2018-08-24 13:06:09 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-08-24 13:06:09 +0000 |
commit | 8c2f568c9a8375927efede4bfc83d6553da7f9a2 (patch) | |
tree | 1199c644ada18f8864e7a19909236e5647c67382 | |
parent | ab7e60cec1a6f4185b0428f3a2b3e71df0bae533 (diff) | |
download | gcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.zip gcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.tar.gz gcc-8c2f568c9a8375927efede4bfc83d6553da7f9a2.tar.bz2 |
Avoid is_constant calls in vectorizable_bswap
The "new" VEC_PERM_EXPR handling makes it easy to support bswap
for variable-length vectors.
2018-08-24 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vect-stmts.c (vectorizable_bswap): Handle variable-length
vectors.
gcc/testsuite/
* gcc.target/aarch64/sve/bswap_1.c: New test.
* gcc.target/aarch64/sve/bswap_2.c: Likewise.
* gcc.target/aarch64/sve/bswap_3.c: Likewise.
From-SVN: r263833
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c | 13 | ||||
-rw-r--r-- | gcc/tree-vect-stmts.c | 11 |
6 files changed, 54 insertions, 7 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ac459b6..aa8f392 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,10 @@ 2018-08-24 Richard Sandiford <richard.sandiford@arm.com> + * tree-vect-stmts.c (vectorizable_bswap): Handle variable-length + vectors. + +2018-08-24 Richard Sandiford <richard.sandiford@arm.com> + * tree-vect-slp.c (vect_transform_slp_perm_load): Separate out the case in which the permute needs only a single element and repeats for every vector of the result. Extend that case to diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index db05964..6520f46 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,11 @@ 2018-08-24 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/bswap_1.c: New test. + * gcc.target/aarch64/sve/bswap_2.c: Likewise. + * gcc.target/aarch64/sve/bswap_3.c: Likewise. + +2018-08-24 Richard Sandiford <richard.sandiford@arm.com> + * gcc.target/aarch64/sve/slp_perm_1.c: New test. * gcc.target/aarch64/sve/slp_perm_2.c: Likewise. * gcc.target/aarch64/sve/slp_perm_3.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c new file mode 100644 index 0000000..aa4c033 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +void +f (uint16_t *a, uint16_t *b) +{ + for (int i = 0; i < 100; ++i) + a[i] = __builtin_bswap16 (b[i]); +} + +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 { xfail aarch64_big_endian } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c new file mode 100644 index 0000000..442c65e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_2.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +void +f (uint32_t *a, uint32_t *b) +{ + for (int i = 0; i < 100; ++i) + a[i] = __builtin_bswap32 (b[i]); +} + +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 { xfail aarch64_big_endian } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c b/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c new file mode 100644 index 0000000..75acc64 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/bswap_3.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <stdint.h> + +void +f (uint64_t *a, uint64_t *b) +{ + for (int i = 0; i < 100; ++i) + a[i] = __builtin_bswap64 (b[i]); +} + +/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 { xfail aarch64_big_endian } } } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 63fb1fb..8d94fca 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2961,13 +2961,10 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, vec_info *vinfo = stmt_info->vinfo; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); unsigned ncopies; - unsigned HOST_WIDE_INT nunits, num_bytes; op = gimple_call_arg (stmt, 0); vectype = STMT_VINFO_VECTYPE (stmt_info); - - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) - return false; + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); /* Multiple types in SLP are handled by creating the appropriate number of vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in @@ -2983,11 +2980,11 @@ vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, if (! char_vectype) return false; - if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes)) + poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype); + unsigned word_bytes; + if (!constant_multiple_p (num_bytes, nunits, &word_bytes)) return false; - unsigned word_bytes = num_bytes / nunits; - /* The encoding uses one stepped pattern for each byte in the word. */ vec_perm_builder elts (num_bytes, word_bytes, 3); for (unsigned i = 0; i < 3; ++i) |