diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/builtins.c | 182 | ||||
-rw-r--r-- | gcc/expr.c | 9 | ||||
-rw-r--r-- | gcc/expr.h | 13 | ||||
-rw-r--r-- | gcc/passes.def | 5 |
4 files changed, 197 insertions, 12 deletions
diff --git a/gcc/builtins.c b/gcc/builtins.c index 4613aec..b047128 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6667,6 +6667,166 @@ expand_builtin_memset (tree exp, rtx target, machine_mode mode) return expand_builtin_memset_args (dest, val, len, target, mode, exp); } +/* Try to store VAL (or, if NULL_RTX, VALC) in LEN bytes starting at TO. + Return TRUE if successful, FALSE otherwise. TO is assumed to be + aligned at an ALIGN-bits boundary. LEN must be a multiple of + 1<<CTZ_LEN between MIN_LEN and MAX_LEN. + + The strategy is to issue one store_by_pieces for each power of two, + from most to least significant, guarded by a test on whether there + are at least that many bytes left to copy in LEN. + + ??? Should we skip some powers of two in favor of loops? Maybe start + at the max of TO/LEN/word alignment, at least when optimizing for + size, instead of ensuring O(log len) dynamic compares? */ + +bool +try_store_by_multiple_pieces (rtx to, rtx len, unsigned int ctz_len, + unsigned HOST_WIDE_INT min_len, + unsigned HOST_WIDE_INT max_len, + rtx val, char valc, unsigned int align) +{ + int max_bits = floor_log2 (max_len); + int min_bits = floor_log2 (min_len); + int sctz_len = ctz_len; + + gcc_checking_assert (sctz_len >= 0); + + if (val) + valc = 1; + + /* Bits more significant than TST_BITS are part of the shared prefix + in the binary representation of both min_len and max_len. Since + they're identical, we don't need to test them in the loop. */ + int tst_bits = (max_bits != min_bits ? max_bits + : floor_log2 (max_len ^ min_len)); + + /* Check whether it's profitable to start by storing a fixed BLKSIZE + bytes, to lower max_bits. In the unlikely case of a constant LEN + (implied by identical MAX_LEN and MIN_LEN), we want to issue a + single store_by_pieces, but otherwise, select the minimum multiple + of the ALIGN (in bytes) and of the MCD of the possible LENs, that + brings MAX_LEN below TST_BITS, if that's lower than min_len. */ + unsigned HOST_WIDE_INT blksize; + if (max_len > min_len) + { + unsigned HOST_WIDE_INT alrng = MAX (HOST_WIDE_INT_1U << ctz_len, + align / BITS_PER_UNIT); + blksize = max_len - (HOST_WIDE_INT_1U << tst_bits) + alrng; + blksize &= ~(alrng - 1); + } + else if (max_len == min_len) + blksize = max_len; + else + gcc_unreachable (); + if (min_len >= blksize) + { + min_len -= blksize; + min_bits = floor_log2 (min_len); + max_len -= blksize; + max_bits = floor_log2 (max_len); + + tst_bits = (max_bits != min_bits ? max_bits + : floor_log2 (max_len ^ min_len)); + } + else + blksize = 0; + + /* Check that we can use store by pieces for the maximum store count + we may issue (initial fixed-size block, plus conditional + power-of-two-sized from max_bits to ctz_len. */ + unsigned HOST_WIDE_INT xlenest = blksize; + if (max_bits >= 0) + xlenest += ((HOST_WIDE_INT_1U << max_bits) * 2 + - (HOST_WIDE_INT_1U << ctz_len)); + if (!can_store_by_pieces (xlenest, builtin_memset_read_str, + &valc, align, true)) + return false; + + rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode); + void *constfundata; + if (val) + { + constfun = builtin_memset_gen_str; + constfundata = val = force_reg (TYPE_MODE (unsigned_char_type_node), + val); + } + else + { + constfun = builtin_memset_read_str; + constfundata = &valc; + } + + rtx ptr = copy_addr_to_reg (convert_to_mode (ptr_mode, XEXP (to, 0), 0)); + rtx rem = copy_to_mode_reg (ptr_mode, convert_to_mode (ptr_mode, len, 0)); + to = replace_equiv_address (to, ptr); + set_mem_align (to, align); + + if (blksize) + { + to = store_by_pieces (to, blksize, + constfun, constfundata, + align, true, + max_len != 0 ? RETURN_END : RETURN_BEGIN); + if (max_len == 0) + return true; + + /* Adjust PTR, TO and REM. Since TO's address is likely + PTR+offset, we have to replace it. */ + emit_move_insn (ptr, XEXP (to, 0)); + to = replace_equiv_address (to, ptr); + emit_move_insn (rem, plus_constant (ptr_mode, rem, -blksize)); + } + + /* Iterate over power-of-two block sizes from the maximum length to + the least significant bit possibly set in the length. */ + for (int i = max_bits; i >= sctz_len; i--) + { + rtx_code_label *label = NULL; + blksize = HOST_WIDE_INT_1U << i; + + /* If we're past the bits shared between min_ and max_len, expand + a test on the dynamic length, comparing it with the + BLKSIZE. */ + if (i <= tst_bits) + { + label = gen_label_rtx (); + emit_cmp_and_jump_insns (rem, GEN_INT (blksize), LT, NULL, + ptr_mode, 1, label, + profile_probability::even ()); + } + /* If we are at a bit that is in the prefix shared by min_ and + max_len, skip this BLKSIZE if the bit is clear. */ + else if ((max_len & blksize) == 0) + continue; + + /* Issue a store of BLKSIZE bytes. */ + to = store_by_pieces (to, blksize, + constfun, constfundata, + align, true, + i != sctz_len ? RETURN_END : RETURN_BEGIN); + + /* Adjust REM and PTR, unless this is the last iteration. */ + if (i != sctz_len) + { + emit_move_insn (ptr, XEXP (to, 0)); + to = replace_equiv_address (to, ptr); + emit_move_insn (rem, plus_constant (ptr_mode, rem, -blksize)); + } + + if (label) + { + emit_label (label); + + /* Given conditional stores, the offset can no longer be + known, so clear it. */ + clear_mem_offset (to); + } + } + + return true; +} + /* Helper function to do the actual work for expand_builtin_memset. The arguments to the builtin_memset call DEST, VAL, and LEN are broken out so that this can also be called without constructing an actual CALL_EXPR. @@ -6721,7 +6881,8 @@ expand_builtin_memset_args (tree dest, tree val, tree len, dest_mem = get_memory_rtx (dest, len); val_mode = TYPE_MODE (unsigned_char_type_node); - if (TREE_CODE (val) != INTEGER_CST) + if (TREE_CODE (val) != INTEGER_CST + || target_char_cast (val, &c)) { rtx val_rtx; @@ -6745,7 +6906,12 @@ expand_builtin_memset_args (tree dest, tree val, tree len, else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx, dest_align, expected_align, expected_size, min_size, max_size, - probable_max_size)) + probable_max_size) + && !try_store_by_multiple_pieces (dest_mem, len_rtx, + tree_ctz (len), + min_size, max_size, + val_rtx, 0, + dest_align)) goto do_libcall; dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); @@ -6753,9 +6919,6 @@ expand_builtin_memset_args (tree dest, tree val, tree len, return dest_mem; } - if (target_char_cast (val, &c)) - goto do_libcall; - if (c) { if (tree_fits_uhwi_p (len) @@ -6769,7 +6932,12 @@ expand_builtin_memset_args (tree dest, tree val, tree len, gen_int_mode (c, val_mode), dest_align, expected_align, expected_size, min_size, max_size, - probable_max_size)) + probable_max_size) + && !try_store_by_multiple_pieces (dest_mem, len_rtx, + tree_ctz (len), + min_size, max_size, + NULL_RTX, c, + dest_align)) goto do_libcall; dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); @@ -6783,7 +6951,7 @@ expand_builtin_memset_args (tree dest, tree val, tree len, ? BLOCK_OP_TAILCALL : BLOCK_OP_NORMAL, expected_align, expected_size, min_size, max_size, - probable_max_size); + probable_max_size, tree_ctz (len)); if (dest_addr == 0) { @@ -3150,7 +3150,8 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method, unsigned int expected_align, HOST_WIDE_INT expected_size, unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, - unsigned HOST_WIDE_INT probable_max_size) + unsigned HOST_WIDE_INT probable_max_size, + unsigned ctz_size) { machine_mode mode = GET_MODE (object); unsigned int align; @@ -3197,6 +3198,10 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method, expected_align, expected_size, min_size, max_size, probable_max_size)) ; + else if (try_store_by_multiple_pieces (object, size, ctz_size, + min_size, max_size, + NULL_RTX, 0, align)) + ; else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object))) return set_storage_via_libcall (object, size, const0_rtx, method == BLOCK_OP_TAILCALL); @@ -3214,7 +3219,7 @@ clear_storage (rtx object, rtx size, enum block_op_methods method) min = max = UINTVAL (size); else max = GET_MODE_MASK (GET_MODE (size)); - return clear_storage_hints (object, size, method, 0, -1, min, max, max); + return clear_storage_hints (object, size, method, 0, -1, min, max, max, 0); } @@ -201,7 +201,8 @@ extern rtx clear_storage_hints (rtx, rtx, enum block_op_methods, unsigned int, HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); + unsigned HOST_WIDE_INT, + unsigned); /* The same, but always output an library call. */ extern rtx set_storage_via_libcall (rtx, rtx, rtx, bool = false); @@ -232,6 +233,16 @@ extern int can_store_by_pieces (unsigned HOST_WIDE_INT, extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, by_pieces_constfn, void *, unsigned int, bool, memop_ret); +/* If can_store_by_pieces passes for worst-case values near MAX_LEN, call + store_by_pieces within conditionals so as to handle variable LEN efficiently, + storing VAL, if non-NULL_RTX, or valc instead. */ +extern bool try_store_by_multiple_pieces (rtx to, rtx len, + unsigned int ctz_len, + unsigned HOST_WIDE_INT min_len, + unsigned HOST_WIDE_INT max_len, + rtx val, char valc, + unsigned int align); + /* Emit insns to set X from Y. */ extern rtx_insn *emit_move_insn (rtx, rtx); extern rtx_insn *gen_move_insn (rtx, rtx); diff --git a/gcc/passes.def b/gcc/passes.def index f7c4289..55e8164 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -336,8 +336,9 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_thread_jumps); NEXT_PASS (pass_vrp, false /* warn_array_bounds_p */); /* Threading can leave many const/copy propagations in the IL. - Clean them up. */ - NEXT_PASS (pass_copy_prop); + Clean them up. Instead of just copy_prop, we use ccp to + compute alignment and nonzero bits. */ + NEXT_PASS (pass_ccp, true /* nonzero_p */); NEXT_PASS (pass_warn_restrict); NEXT_PASS (pass_dse); NEXT_PASS (pass_cd_dce, true /* update_address_taken_p */); |