diff options
Diffstat (limited to 'gcc/gimple-ssa-store-merging.cc')
-rw-r--r-- | gcc/gimple-ssa-store-merging.cc | 130 |
1 files changed, 103 insertions, 27 deletions
diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc index df7afd2..9cb574f 100644 --- a/gcc/gimple-ssa-store-merging.cc +++ b/gcc/gimple-ssa-store-merging.cc @@ -893,6 +893,37 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, n->range *= BITS_PER_UNIT; } +/* Helper function for find_bswap_or_nop, + Return true if N is a swap or nop with MASK. */ +static bool +is_bswap_or_nop_p (uint64_t n, uint64_t cmpxchg, + uint64_t cmpnop, uint64_t* mask, + bool* bswap) +{ + *mask = ~(uint64_t) 0; + if (n == cmpnop) + *bswap = false; + else if (n == cmpxchg) + *bswap = true; + else + { + int set = 0; + for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER) + if ((n & msk) == 0) + *mask &= ~msk; + else if ((n & msk) == (cmpxchg & msk)) + set++; + else + return false; + + if (set < 2) + return false; + *bswap = true; + } + return true; +} + + /* Check if STMT completes a bswap implementation or a read in a given endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP accordingly. It also sets N to represent the kind of operations @@ -903,7 +934,7 @@ find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg, gimple * find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, - bool *cast64_to_32, uint64_t *mask) + bool *cast64_to_32, uint64_t *mask, uint64_t* l_rotate) { tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt))); if (!tree_fits_uhwi_p (type_size)) @@ -984,29 +1015,57 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, } uint64_t cmpxchg, cmpnop; + uint64_t orig_range = n->range * BITS_PER_UNIT; find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32); /* A complete byte swap should make the symbolic number to start with the largest digit in the highest order byte. Unchanged symbolic number indicates a read with same endianness as target architecture. */ - *mask = ~(uint64_t) 0; - if (n->n == cmpnop) - *bswap = false; - else if (n->n == cmpxchg) - *bswap = true; - else + *l_rotate = 0; + uint64_t tmp_n = n->n; + if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap)) { - int set = 0; - for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER) - if ((n->n & msk) == 0) - *mask &= ~msk; - else if ((n->n & msk) == (cmpxchg & msk)) - set++; - else - return NULL; - if (set < 2) + /* Try bswap + lrotate. */ + /* TODO, handle cast64_to_32 and big/litte_endian memory + source when rsize < range. */ + if (n->range == orig_range + && ((orig_range == 32 + && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing) + || (orig_range == 64 + && optab_handler (rotl_optab, DImode) != CODE_FOR_nothing)) + && (tmp_n & MARKER_MASK) < orig_range / BITS_PER_UNIT) + { + uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER; + uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER; + /* .i.e. hanlde 0x203040506070800 when lower byte is zero. */ + if (!count) + { + for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++) + { + count = (tmp_n >> i * BITS_PER_MARKER) & MARKER_MASK; + if (count) + { + /* Count should be meaningful not 0xff. */ + if (count <= range / BITS_PER_MARKER) + { + count = (count + i) * BITS_PER_MARKER % range; + break; + } + else + return NULL; + } + } + } + tmp_n = tmp_n >> count | tmp_n << (range - count); + if (orig_range == 32) + tmp_n &= (1ULL << 32) - 1; + if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap)) + return NULL; + *l_rotate = count / BITS_PER_MARKER * BITS_PER_UNIT; + gcc_assert (*bswap); + } + else return NULL; - *bswap = true; } /* Useless bit manipulation performed by code. */ @@ -1099,10 +1158,10 @@ bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val, tree bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, tree bswap_type, tree load_type, struct symbolic_number *n, - bool bswap, uint64_t mask) + bool bswap, uint64_t mask, uint64_t l_rotate) { tree src, tmp, tgt = NULL_TREE; - gimple *bswap_stmt, *mask_stmt = NULL; + gimple *bswap_stmt, *mask_stmt = NULL, *rotl_stmt = NULL; tree_code conv_code = NOP_EXPR; gimple *cur_stmt = gsi_stmt (gsi); @@ -1332,6 +1391,16 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, tmp = tgt; } + if (l_rotate) + { + tree m = build_int_cst (bswap_type, l_rotate); + tmp = make_temp_ssa_name (bswap_type, NULL, + mask_stmt ? "bswapmaskdst" : "bswapdst"); + gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp); + rotl_stmt = gimple_build_assign (tgt, LROTATE_EXPR, tmp, m); + tmp = tgt; + } + /* Convert the result if necessary. */ if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) { @@ -1344,7 +1413,8 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT); } - gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp); + gimple_set_lhs (rotl_stmt ? rotl_stmt + : mask_stmt ? mask_stmt : bswap_stmt, tmp); if (dump_file) { @@ -1361,6 +1431,8 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, if (cur_stmt) { + if (rotl_stmt) + gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT); if (mask_stmt) gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT); gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); @@ -1371,6 +1443,8 @@ bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl, gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT); if (mask_stmt) gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT); + if (rotl_stmt) + gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT); } return tgt; } @@ -1432,9 +1506,9 @@ maybe_optimize_vector_constructor (gimple *cur_stmt) } bool cast64_to_32; - uint64_t mask; + uint64_t mask, l_rotate; gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap, - &cast64_to_32, &mask); + &cast64_to_32, &mask, &l_rotate); if (!ins_stmt || n.range != (unsigned HOST_WIDE_INT) sz || cast64_to_32 @@ -1447,7 +1521,8 @@ maybe_optimize_vector_constructor (gimple *cur_stmt) memset (&nop_stats, 0, sizeof (nop_stats)); memset (&bswap_stats, 0, sizeof (bswap_stats)); return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl, - bswap_type, load_type, &n, bswap, mask) != NULL_TREE; + bswap_type, load_type, &n, bswap, mask, + l_rotate) != NULL_TREE; } /* Find manual byte swap implementations as well as load in a given @@ -1502,7 +1577,7 @@ pass_optimize_bswap::execute (function *fun) enum tree_code code; struct symbolic_number n; bool bswap, cast64_to_32; - uint64_t mask; + uint64_t mask, l_rotate; /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt might be moved to a different basic block by bswap_replace and gsi @@ -1542,7 +1617,7 @@ pass_optimize_bswap::execute (function *fun) } ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap, - &cast64_to_32, &mask); + &cast64_to_32, &mask, &l_rotate); if (!ins_stmt) continue; @@ -1579,7 +1654,8 @@ pass_optimize_bswap::execute (function *fun) continue; if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl, - bswap_type, load_type, &n, bswap, mask)) + bswap_type, load_type, &n, bswap, mask, + l_rotate)) changed = true; } } @@ -4271,7 +4347,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group) } bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl, bswap_type, load_type, n, bswap, - ~(uint64_t) 0); + ~(uint64_t) 0, 0); gcc_assert (bswap_res); } |