diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/gimple-ssa-store-merging.c | 100 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/optimize-bswapdi-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/optimize-bswapdi-2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/optimize-bswapsi-1.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/optimize-bswapsi-3.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/optimize-bswapsi-5.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr86723.c | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr86723.c | 52 |
8 files changed, 208 insertions, 27 deletions
diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index 5d3094b..781c02d 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -792,7 +792,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) void find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, - uint64_t *cmpnop) + uint64_t *cmpnop, bool *cast64_to_32) { unsigned rsize; uint64_t tmpn, mask; @@ -802,6 +802,7 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, according to the size of the symbolic number before using it. */ *cmpxchg = CMPXCHG; *cmpnop = CMPNOP; + *cast64_to_32 = false; /* Find real size of result (highest non-zero byte). */ if (n->base_addr) @@ -814,7 +815,27 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, if (n->range < (int) sizeof (int64_t)) { mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; - *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; + if (n->base_addr == NULL + && n->range == 4 + && int_size_in_bytes (TREE_TYPE (n->src)) == 8) + { + /* If all bytes in n->n are either 0 or in [5..8] range, this + might be a candidate for (unsigned) __builtin_bswap64 (src). + It is not worth it for (unsigned short) __builtin_bswap64 (src) + or (unsigned short) __builtin_bswap32 (src). */ + *cast64_to_32 = true; + for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER) + if ((tmpn & MARKER_MASK) + && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8)) + { + *cast64_to_32 = false; + break; + } + } + if (*cast64_to_32) + *cmpxchg &= mask; + else + *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; *cmpnop &= mask; } @@ -837,6 +858,8 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, n->range = rsize; } + if (*cast64_to_32) + n->range = 8; n->range *= BITS_PER_UNIT; } @@ -849,7 +872,8 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, expression. */ gimple * -find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) +find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, + bool *cast64_to_32, uint64_t *mask) { tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt))); if (!tree_fits_uhwi_p (type_size)) @@ -929,17 +953,30 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) } uint64_t cmpxchg, cmpnop; - find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop); + find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32); /* A complete byte swap should make the symbolic number to start with the largest digit in the highest order byte. Unchanged symbolic number indicates a read with same endianness as target architecture. */ + *mask = ~(uint64_t) 0; if (n->n == cmpnop) *bswap = false; else if (n->n == cmpxchg) *bswap = true; else - return NULL; + { + int set = 0; + for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER) + if ((n->n & msk) == 0) + *mask &= ~msk; + else if ((n->n & msk) == (cmpxchg & msk)) + set++; + else + return NULL; + if (set < 2) + return NULL; + *bswap = true; + } /* Useless bit manipulation performed by code. */ if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) @@ -1024,10 +1061,10 @@ bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val) tree bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, tree bswap_type, tree load_type, struct symbolic_number *n, - bool bswap) + bool bswap, uint64_t mask) { tree src, tmp, tgt = NULL_TREE; - gimple *bswap_stmt; + gimple *bswap_stmt, *mask_stmt = NULL; tree_code conv_code = NOP_EXPR; gimple *cur_stmt = gsi_stmt (gsi); @@ -1247,6 +1284,15 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, tgt = make_ssa_name (bswap_type); tmp = tgt; + if (mask != ~(uint64_t) 0) + { + tree m = build_int_cst (bswap_type, mask); + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); + gimple_set_lhs (bswap_stmt, tmp); + mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m); + tmp = tgt; + } + /* Convert the result if necessary. */ if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) { @@ -1260,7 +1306,7 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); } - gimple_set_lhs (bswap_stmt, tmp); + gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp); if (dump_file) { @@ -1277,11 +1323,17 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, if (cur_stmt) { + if (mask_stmt) + gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT); gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); gsi_remove (&gsi, true); } else - gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT); + { + gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT); + if (mask_stmt) + gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT); + } return tgt; } @@ -1341,8 +1393,14 @@ maybe_optimize_vector_constructor (gimple *cur_stmt) return false; } - gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); - if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz) + bool cast64_to_32; + uint64_t mask; + gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap, + &cast64_to_32, &mask); + if (!ins_stmt + || n.range != (unsigned HOST_WIDE_INT) sz + || cast64_to_32 + || mask != ~(uint64_t) 0) return false; if (bswap && !fndecl && n.range != 16) @@ -1351,7 +1409,7 @@ maybe_optimize_vector_constructor (gimple *cur_stmt) memset (&nop_stats, 0, sizeof (nop_stats)); memset (&bswap_stats, 0, sizeof (bswap_stats)); return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl, - bswap_type, load_type, &n, bswap) != NULL_TREE; + bswap_type, load_type, &n, bswap, mask) != NULL_TREE; } /* Find manual byte swap implementations as well as load in a given @@ -1405,7 +1463,8 @@ pass_optimize_bswap::execute (function *fun) tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; enum tree_code code; struct symbolic_number n; - bool bswap; + bool bswap, cast64_to_32; + uint64_t mask; /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt might be moved to a different basic block by bswap_replace and gsi @@ -1442,7 +1501,8 @@ pass_optimize_bswap::execute (function *fun) continue; } - ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); + ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap, + &cast64_to_32, &mask); if (!ins_stmt) continue; @@ -1479,7 +1539,7 @@ pass_optimize_bswap::execute (function *fun) continue; if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl, - bswap_type, load_type, &n, bswap)) + bswap_type, load_type, &n, bswap, mask)) changed = true; } } @@ -2820,7 +2880,8 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store, } uint64_t cmpxchg, cmpnop; - find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop); + bool cast64_to_32; + find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32); /* A complete byte swap should make the symbolic number to start with the largest digit in the highest order byte. Unchanged symbolic @@ -2828,6 +2889,10 @@ imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store, if (n.n != cmpnop && n.n != cmpxchg) return false; + /* For now. */ + if (cast64_to_32) + return false; + if (n.base_addr == NULL_TREE && !is_gimple_val (n.src)) return false; @@ -4161,7 +4226,8 @@ imm_store_chain_info::output_merged_store (merged_store_group *group) n->vuse = gimple_vuse (ins_stmt); } bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl, - bswap_type, load_type, n, bswap); + bswap_type, load_type, n, bswap, + ~(uint64_t) 0); gcc_assert (bswap_res); } diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c index a4a3a79..56a2071b 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target bswap } */ /* { dg-require-effective-target stdint_types } */ -/* { dg-options "-O2 -fdump-tree-bswap" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ /* { dg-additional-options "-mzarch" { target s390*-*-* } } */ #include <stdint.h> @@ -58,4 +58,4 @@ swap64_c (uint64_t x) } -/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 3 "bswap" } } */ +/* { dg-final { scan-tree-dump-times "= __builtin_bswap64 \\\(" 3 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/optimize-bswapdi-2.c b/gcc/testsuite/gcc.dg/optimize-bswapdi-2.c index 89b251f..c6d9604 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapdi-2.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapdi-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target bswap } */ /* { dg-require-effective-target stdint_types } */ -/* { dg-options "-O2 -fdump-tree-bswap" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ /* { dg-additional-options "-mzarch" { target s390*-*-* } } */ #include <stdint.h> @@ -23,4 +23,4 @@ swap64_c (uint64_t x) } -/* { dg-final { scan-tree-dump-times "64 bit bswap implementation found at" 1 "bswap" } } */ +/* { dg-final { scan-tree-dump-times "= __builtin_bswap64 \\\(" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c index c403d04..2d24f43 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-1.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target bswap } */ /* { dg-require-effective-target stdint_types } */ -/* { dg-options "-O2 -fdump-tree-bswap" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ /* { dg-additional-options "-march=z900" { target s390*-*-* } } */ #include <stdint.h> @@ -89,4 +89,4 @@ swap32_f (unsigned in) return in; } -/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 6 "bswap" } } */ +/* { dg-final { scan-tree-dump-times "= __builtin_bswap32 \\\(" 6 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c index 9418a83..7cd1b40 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-3.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target bswap } */ /* { dg-require-effective-target stdint_types } */ -/* { dg-options "-O2 -fdump-tree-bswap" } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ /* { dg-additional-options "-march=z900" { target s390-*-* } } */ typedef int SItype __attribute__ ((mode (SI))); @@ -20,4 +20,4 @@ swap32 (SItype in) | (((in >> 24) & 0xFF) << 0); } -/* { dg-final { scan-tree-dump-not "32 bit bswap implementation found at" "bswap" } } */ +/* { dg-final { scan-tree-dump-times "= __builtin_bswap32 \\\(" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c index b4d8b9a..91a5284 100644 --- a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c +++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-require-effective-target bswap } */ -/* { dg-options "-O2 -fdump-tree-bswap -fno-inline-functions" } */ +/* { dg-options "-O2 -fdump-tree-optimized -fno-inline-functions" } */ /* { dg-additional-options "-march=z900" { target s390-*-* } } */ struct L { unsigned int l[2]; }; @@ -28,4 +28,4 @@ bar (double a, struct L *p) foo (a, p); } -/* { dg-final { scan-tree-dump-times "32 bit bswap implementation found at" 2 "bswap" } } */ +/* { dg-final { scan-tree-dump-times "= __builtin_bswap32 \\\(" 2 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/pr86723.c b/gcc/testsuite/gcc.dg/pr86723.c new file mode 100644 index 0000000..e3fd6b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr86723.c @@ -0,0 +1,63 @@ +/* PR tree-optimization/86723 */ +/* { dg-do run { target { ilp32 || lp64 } } } */ +/* { dg-options "-O2" } */ + +__attribute__((noipa)) int +foo (unsigned long long value) +{ + return (((value & 0x00000000000000ffull) << 56) + | ((value & 0x000000000000ff00ull) << 40) + | ((value & 0x0000000000ff0000ull) << 24) + | ((value & 0x00000000ff000000ull) << 8) + | ((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0x00ff000000000000ull) >> 40) + | ((value & 0xff00000000000000ull) >> 56)); +} + +__attribute__((noipa)) int +bar (unsigned long long value) +{ + return (((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0x00ff000000000000ull) >> 40) + | ((value & 0xff00000000000000ull) >> 56)); +} + +__attribute__((noipa)) unsigned long long +baz (unsigned long long value) +{ + return (((value & 0x00000000000000ffull) << 56) + | ((value & 0x000000000000ff00ull) << 40) + | ((value & 0x00000000ff000000ull) << 8) + | ((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0xff00000000000000ull) >> 56)); +} + +__attribute__((noipa)) unsigned int +qux (unsigned int value) +{ + return (((value & 0x000000ff) << 24) + | ((value & 0x00ff0000) >> 8) + | ((value & 0xff000000) >> 24)); +} + +__attribute__((noipa)) unsigned int +corge (unsigned int value) +{ + return (((value & 0x000000ff) << 24) + | ((value & 0xff000000) >> 24)); +} + +int +main () +{ + if (foo (0x0102030405060708ull) != 0x04030201 + || bar (0x0102030405060708ull) != 0x04030201 + || baz (0x0102030405060708ull) != 0x0807000504030001ull + || qux (0x01020304) != 0x04000201 + || corge (0x01020304) != 0x04000001) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr86723.c b/gcc/testsuite/gcc.target/i386/pr86723.c new file mode 100644 index 0000000..b61d750c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr86723.c @@ -0,0 +1,52 @@ +/* PR tree-optimization/86723 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\tbswap\t" 5 } } */ + +int +foo (unsigned long long value) +{ + return (((value & 0x00000000000000ffull) << 56) + | ((value & 0x000000000000ff00ull) << 40) + | ((value & 0x0000000000ff0000ull) << 24) + | ((value & 0x00000000ff000000ull) << 8) + | ((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0x00ff000000000000ull) >> 40) + | ((value & 0xff00000000000000ull) >> 56)); +} + +int +bar (unsigned long long value) +{ + return (((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0x00ff000000000000ull) >> 40) + | ((value & 0xff00000000000000ull) >> 56)); +} + +unsigned long long +baz (unsigned long long value) +{ + return (((value & 0x00000000000000ffull) << 56) + | ((value & 0x000000000000ff00ull) << 40) + | ((value & 0x00000000ff000000ull) << 8) + | ((value & 0x000000ff00000000ull) >> 8) + | ((value & 0x0000ff0000000000ull) >> 24) + | ((value & 0xff00000000000000ull) >> 56)); +} + +unsigned int +qux (unsigned int value) +{ + return (((value & 0x000000ff) << 24) + | ((value & 0x00ff0000) >> 8) + | ((value & 0xff000000) >> 24)); +} + +unsigned int +corge (unsigned int value) +{ + return (((value & 0x000000ff) << 24) + | ((value & 0xff000000) >> 24)); +} |