aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-12-16 13:08:07 +0100
committerJakub Jelinek <jakub@redhat.com>2020-12-16 13:08:07 +0100
commitcd676dfa57e643a4f7d8445e6ebad0f21cf3fd84 (patch)
tree3fc863b0210fb4d33afb5f5b16063cb9c4b79386 /gcc/testsuite
parenteece52b53b75767593282d178bae12ceebb33f12 (diff)
downloadgcc-cd676dfa57e643a4f7d8445e6ebad0f21cf3fd84.zip
gcc-cd676dfa57e643a4f7d8445e6ebad0f21cf3fd84.tar.gz
gcc-cd676dfa57e643a4f7d8445e6ebad0f21cf3fd84.tar.bz2
bswap: Handle vector CONSTRUCTORs [PR96239]
The following patch teaches the bswap pass to handle for small (2/4/8 byte long) vectors a CONSTRUCTOR by determining if the bytes of the constructor come from non-vector sources and are either nop or bswap and changing the CONSTRUCTOR in that case to VIEW_CONVERT_EXPR from scalar integer to the vector type. Unfortunately, as I found after the patch was written, due to pass ordering this doesn't really fix the original testcase, just the one I wrote, because both loop and slp vectorization is done only after the bswap pass. A possible way out of that would be to perform just this particular bswap optimization (i.e. for CONSTRUCTOR assignments with integral vector types call find_bswap_or_nop and bswap_replace if successful) also during the store merging pass, it isn't really a store, but the store merging pass already performs bswapping when handling store, so it wouldn't be that big hack. What do you think? 2020-12-16 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/96239 * gimple-ssa-store-merging.c (find_bswap_or_nop): Handle a vector CONSTRUCTOR. (bswap_replace): Likewise. * gcc.dg/pr96239.c: New test.
Diffstat (limited to 'gcc/testsuite')
-rw-r--r--gcc/testsuite/gcc.dg/pr96239.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/pr96239.c b/gcc/testsuite/gcc.dg/pr96239.c
new file mode 100644
index 0000000..8af56e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr96239.c
@@ -0,0 +1,54 @@
+/* PR tree-optimization/96239 */
+/* { dg-do run { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " r>> 8;" 1 "optimized" { target bswap } } } */
+/* { dg-final { scan-tree-dump-times " = __builtin_bswap64 " 1 "optimized" { target bswap } } } */
+/* { dg-final { scan-tree-dump-not " >> \(8\|16\|24\|32\|40\|48\|56\);" "optimized" { target bswap } } } */
+
+typedef unsigned char V __attribute__((vector_size (2)));
+typedef unsigned char W __attribute__((vector_size (8)));
+
+__attribute__((noipa)) void
+foo (unsigned short x, V *p)
+{
+ *p = (V) { x >> 8, x };
+}
+
+__attribute__((noipa)) void
+bar (unsigned long long x, W *p)
+{
+ *p = (W) { x >> 56, x >> 48, x >> 40, x >> 32, x >> 24, x >> 16, x >> 8, x };
+}
+
+__attribute__((noipa)) void
+baz (unsigned short x, V *p)
+{
+ *p = (V) { x, x >> 8 };
+}
+
+__attribute__((noipa)) void
+qux (unsigned long long x, W *p)
+{
+ *p = (W) { x, x >> 8, x >> 16, x >> 24, x >> 32, x >> 40, x >> 48, x >> 56 };
+}
+
+int
+main ()
+{
+ V a, c, e, g;
+ W b, d, f, h;
+ foo (0xcafe, &a);
+ bar (0xdeadbeefcafebabeULL, &b);
+ baz (0xdead, &c);
+ qux (0xfeedbac1beefdeadULL, &d);
+ e = (V) { 0xca, 0xfe };
+ f = (W) { 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe, 0xba, 0xbe };
+ g = (V) { 0xad, 0xde };
+ h = (W) { 0xad, 0xde, 0xef, 0xbe, 0xc1, 0xba, 0xed, 0xfe };
+ if (__builtin_memcmp (&a, &e, sizeof (V))
+ || __builtin_memcmp (&b, &f, sizeof (W))
+ || __builtin_memcmp (&c, &g, sizeof (V))
+ || __builtin_memcmp (&d, &h, sizeof (W)))
+ __builtin_abort ();
+ return 0;
+}