diff options
author | Sandra Loosemore <sandra@codesourcery.com> | 2007-08-24 19:54:05 -0400 |
---|---|---|
committer | Sandra Loosemore <sandra@gcc.gnu.org> | 2007-08-24 19:54:05 -0400 |
commit | cfa311506c8dbc85b11fd7e52f3d177d850e1c11 (patch) | |
tree | 6895f19a8975ec8b4b0758ed56b3ed5a18bd56e7 /gcc | |
parent | f9837879d58101f62cd52cc7d17b5dd76b975978 (diff) | |
download | gcc-cfa311506c8dbc85b11fd7e52f3d177d850e1c11.zip gcc-cfa311506c8dbc85b11fd7e52f3d177d850e1c11.tar.gz gcc-cfa311506c8dbc85b11fd7e52f3d177d850e1c11.tar.bz2 |
re PR target/11787 (always call memcpy for block move in mips16)
2007-08-24 Sandra Loosemore <sandra@codesourcery.com>
Nigel Stephens <nigel@mips.com>
PR target/11787
gcc/
* doc/tm.texi (SET_RATIO, SET_BY_PIECES_P): Document new macros.
(STORE_BY_PIECES_P): No longer applies to __builtin_memset.
* expr.c (SET_BY_PIECES_P): Define.
(can_store_by_pieces, store_by_pieces): Add MEMSETP argument; use
it to decide whether to use SET_BY_PIECES_P or STORE_BY_PIECES_P.
(store_expr): Pass MEMSETP argument to can_store_by_pieces and
store_by_pieces.
* expr.h (SET_RATIO): Define.
(can_store_by_pieces, store_by_pieces): Update prototypes.
* builtins.c (expand_builtin_memcpy): Pass MEMSETP argument to
can_store_by_pieces/store_by_pieces.
(expand_builtin_memcpy_args): Likewise.
(expand_builtin_strncpy): Likewise.
(expand_builtin_memset_args): Likewise. Also remove special case
for optimize_size so that can_store_by_pieces/SET_BY_PIECES_P can
decide what to do instead.
* value-prof.c (tree_stringops_transform): Pass MEMSETP argument
to can_store_by_pieces.
* config/sh/sh.h (SET_BY_PIECES_P): Clone from STORE_BY_PIECES_P.
* config/s390/s390.h (SET_BY_PIECES_P): Likewise.
* config/mips/mips.opt (mmemcpy): Change from Var to Mask.
* config/mips/mips.c (override_options): Make -Os default to -mmemcpy.
* config/mips/mips.h (MIPS_CALL_RATIO): Define.
(MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Define.
(STORE_BY_PIECES_P): Define.
Co-Authored-By: Nigel Stephens <nigel@mips.com>
From-SVN: r127790
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 33 | ||||
-rw-r--r-- | gcc/builtins.c | 26 | ||||
-rw-r--r-- | gcc/config/mips/mips.c | 5 | ||||
-rw-r--r-- | gcc/config/mips/mips.h | 51 | ||||
-rw-r--r-- | gcc/config/mips/mips.opt | 2 | ||||
-rw-r--r-- | gcc/config/s390/s390.h | 7 | ||||
-rw-r--r-- | gcc/config/sh/sh.h | 2 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 26 | ||||
-rw-r--r-- | gcc/expr.c | 36 | ||||
-rw-r--r-- | gcc/expr.h | 16 | ||||
-rw-r--r-- | gcc/value-prof.c | 4 |
11 files changed, 173 insertions, 35 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 450c481..cd92965 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,36 @@ +2007-08-24 Sandra Loosemore <sandra@codesourcery.com> + Nigel Stephens <nigel@mips.com> + + PR target/11787 + + * doc/tm.texi (SET_RATIO, SET_BY_PIECES_P): Document new macros. + (STORE_BY_PIECES_P): No longer applies to __builtin_memset. + * expr.c (SET_BY_PIECES_P): Define. + (can_store_by_pieces, store_by_pieces): Add MEMSETP argument; use + it to decide whether to use SET_BY_PIECES_P or STORE_BY_PIECES_P. + (store_expr): Pass MEMSETP argument to can_store_by_pieces and + store_by_pieces. + * expr.h (SET_RATIO): Define. + (can_store_by_pieces, store_by_pieces): Update prototypes. + * builtins.c (expand_builtin_memcpy): Pass MEMSETP argument to + can_store_by_pieces/store_by_pieces. + (expand_builtin_memcpy_args): Likewise. + (expand_builtin_strncpy): Likewise. + (expand_builtin_memset_args): Likewise. Also remove special case + for optimize_size so that can_store_by_pieces/SET_BY_PIECES_P can + decide what to do instead. + * value-prof.c (tree_stringops_transform): Pass MEMSETP argument + to can_store_by_pieces. + + * config/sh/sh.h (SET_BY_PIECES_P): Clone from STORE_BY_PIECES_P. + * config/s390/s390.h (SET_BY_PIECES_P): Likewise. + + * config/mips/mips.opt (mmemcpy): Change from Var to Mask. + * config/mips/mips.c (override_options): Make -Os default to -mmemcpy. + * config/mips/mips.h (MIPS_CALL_RATIO): Define. + (MOVE_RATIO, CLEAR_RATIO, SET_RATIO): Define. + (STORE_BY_PIECES_P): Define. + 2007-08-24 Tom Tromey <tromey@redhat.com> * varpool.c (varpool_last_needed_node): Fix comment typo. diff --git a/gcc/builtins.c b/gcc/builtins.c index 8d2657b..e353e4d 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3331,11 +3331,11 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode) && GET_CODE (len_rtx) == CONST_INT && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1 && can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str, - (void *) src_str, dest_align)) + (void *) src_str, dest_align, false)) { dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx), builtin_memcpy_read_str, - (void *) src_str, dest_align, 0); + (void *) src_str, dest_align, false, 0); dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); dest_mem = convert_memory_address (ptr_mode, dest_mem); return dest_mem; @@ -3444,13 +3444,14 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len, tree type, && GET_CODE (len_rtx) == CONST_INT && (unsigned HOST_WIDE_INT) INTVAL (len_rtx) <= strlen (src_str) + 1 && can_store_by_pieces (INTVAL (len_rtx), builtin_memcpy_read_str, - (void *) src_str, dest_align)) + (void *) src_str, dest_align, false)) { dest_mem = get_memory_rtx (dest, len); set_mem_align (dest_mem, dest_align); dest_mem = store_by_pieces (dest_mem, INTVAL (len_rtx), builtin_memcpy_read_str, - (void *) src_str, dest_align, endp); + (void *) src_str, dest_align, + false, endp); dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); dest_mem = convert_memory_address (ptr_mode, dest_mem); return dest_mem; @@ -3792,13 +3793,13 @@ expand_builtin_strncpy (tree exp, rtx target, enum machine_mode mode) if (!p || dest_align == 0 || !host_integerp (len, 1) || !can_store_by_pieces (tree_low_cst (len, 1), builtin_strncpy_read_str, - (void *) p, dest_align)) + (void *) p, dest_align, false)) return NULL_RTX; dest_mem = get_memory_rtx (dest, len); store_by_pieces (dest_mem, tree_low_cst (len, 1), builtin_strncpy_read_str, - (void *) p, dest_align, 0); + (void *) p, dest_align, false, 0); dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX); dest_mem = convert_memory_address (ptr_mode, dest_mem); return dest_mem; @@ -3926,14 +3927,15 @@ expand_builtin_memset_args (tree dest, tree val, tree len, * We can't pass builtin_memset_gen_str as that emits RTL. */ c = 1; if (host_integerp (len, 1) - && !(optimize_size && tree_low_cst (len, 1) > 1) && can_store_by_pieces (tree_low_cst (len, 1), - builtin_memset_read_str, &c, dest_align)) + builtin_memset_read_str, &c, dest_align, + true)) { val_rtx = force_reg (TYPE_MODE (unsigned_char_type_node), val_rtx); store_by_pieces (dest_mem, tree_low_cst (len, 1), - builtin_memset_gen_str, val_rtx, dest_align, 0); + builtin_memset_gen_str, val_rtx, dest_align, + true, 0); } else if (!set_storage_via_setmem (dest_mem, len_rtx, val_rtx, dest_align, expected_align, @@ -3951,11 +3953,11 @@ expand_builtin_memset_args (tree dest, tree val, tree len, if (c) { if (host_integerp (len, 1) - && !(optimize_size && tree_low_cst (len, 1) > 1) && can_store_by_pieces (tree_low_cst (len, 1), - builtin_memset_read_str, &c, dest_align)) + builtin_memset_read_str, &c, dest_align, + true)) store_by_pieces (dest_mem, tree_low_cst (len, 1), - builtin_memset_read_str, &c, dest_align, 0); + builtin_memset_read_str, &c, dest_align, true, 0); else if (!set_storage_via_setmem (dest_mem, len_rtx, GEN_INT (c), dest_align, expected_align, expected_size)) diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c index 0dd25fc..2c2f11e 100644 --- a/gcc/config/mips/mips.c +++ b/gcc/config/mips/mips.c @@ -5323,6 +5323,11 @@ override_options (void) flag_delayed_branch = 0; } + /* Prefer a call to memcpy over inline code when optimizing for size, + though see MOVE_RATIO in mips.h. */ + if (optimize_size && (target_flags_explicit & MASK_MEMCPY) == 0) + target_flags |= MASK_MEMCPY; + #ifdef MIPS_TFMODE_FORMAT REAL_MODE_FORMAT (TFmode) = &MIPS_TFMODE_FORMAT; #endif diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 71809c2..c3797e5 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -2785,6 +2785,57 @@ while (0) #undef PTRDIFF_TYPE #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") + +/* The base cost of a memcpy call, for MOVE_RATIO and friends. These + values were determined experimentally by benchmarking with CSiBE. + In theory, the call overhead is higher for TARGET_ABICALLS (especially + for o32 where we have to restore $gp afterwards as well as make an + indirect call), but in practice, bumping this up higher for + TARGET_ABICALLS doesn't make much difference to code size. */ + +#define MIPS_CALL_RATIO 8 + +/* Define MOVE_RATIO to encourage use of movmemsi when enabled, + since it should always generate code at least as good as + move_by_pieces(). But when inline movmemsi pattern is disabled + (i.e., with -mips16 or -mmemcpy), instead use a value approximating + the length of a memcpy call sequence, so that move_by_pieces will + generate inline code if it is shorter than a function call. + Since move_by_pieces_ninsns() counts memory-to-memory moves, but + we'll have to generate a load/store pair for each, halve the value of + MIPS_CALL_RATIO to take that into account. + The default value for MOVE_RATIO when HAVE_movmemsi is true is 2. + There is no point to setting it to less than this to try to disable + move_by_pieces entirely, because that also disables some desirable + tree-level optimizations, specifically related to optimizing a + one-byte string copy into a simple move byte operation. */ + +#define MOVE_RATIO \ + ((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2) + +/* For CLEAR_RATIO, when optimizing for size, give a better estimate + of the length of a memset call, but use the default otherwise. */ + +#define CLEAR_RATIO \ + (optimize_size ? MIPS_CALL_RATIO : 15) + +/* This is similar to CLEAR_RATIO, but for a non-zero constant, so when + optimizing for size adjust the ratio to account for the overhead of + loading the constant and replicating it across the word. */ + +#define SET_RATIO \ + (optimize_size ? MIPS_CALL_RATIO - 2 : 15) + +/* STORE_BY_PIECES_P can be used when copying a constant string, but + in that case each word takes 3 insns (lui, ori, sw), or more in + 64-bit mode, instead of 2 (lw, sw). For now we always fail this + and let the move_by_pieces code copy the string from read-only + memory. In the future, this could be tuned further for multi-issue + CPUs that can issue stores down one pipe and arithmetic instructions + down another; in that case, the lui/ori/sw combination would be a + win for long enough strings. */ + +#define STORE_BY_PIECES_P(SIZE, ALIGN) 0 #ifndef __mips16 /* Since the bits of the _init and _fini function is spread across diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index 4a752ec..6f6c109 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -173,7 +173,7 @@ Target Report RejectNegative Mask(LONG64) Use a 64-bit long type mmemcpy -Target Report Var(TARGET_MEMCPY) +Target Report Mask(MEMCPY) Don't optimize block moves mmips-tfile diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 4fb5505..8cc8edf 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -803,10 +803,13 @@ extern struct rtx_def *s390_compare_op0, *s390_compare_op1, *s390_compare_emitte || (TARGET_64BIT && (SIZE) == 8) ) /* This macro is used to determine whether store_by_pieces should be - called to "memset" storage with byte values other than zero, or - to "memcpy" storage when the source is a constant string. */ + called to "memcpy" storage when the source is a constant string. */ #define STORE_BY_PIECES_P(SIZE, ALIGN) MOVE_BY_PIECES_P (SIZE, ALIGN) +/* Likewise to decide whether to "memset" storage with byte values + other than zero. */ +#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P (SIZE, ALIGN) + /* Don't perform CSE on function addresses. */ #define NO_FUNCTION_CSE diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h index 71f5b6f..8f299b7 100644 --- a/gcc/config/sh/sh.h +++ b/gcc/config/sh/sh.h @@ -2184,6 +2184,8 @@ struct sh_args { (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ < (TARGET_SMALLCODE ? 2 : ((ALIGN >= 32) ? 16 : 2))) +#define SET_BY_PIECES_P(SIZE, ALIGN) STORE_BY_PIECES_P(SIZE, ALIGN) + /* Macros to check register numbers against specific register classes. */ /* These assume that REGNO is a hard or pseudo reg number. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 0ec10c5..a913b80 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5897,12 +5897,30 @@ will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less than @code{CLEAR_RATIO}. @end defmac +@defmac SET_RATIO +The threshold of number of scalar move insns, @emph{below} which a sequence +of insns should be generated to set memory to a constant value, instead of +a block set insn or a library call. +Increasing the value will always make code faster, but +eventually incurs high cost in increased code size. + +If you don't define this, it defaults to the value of @code{MOVE_RATIO}. +@end defmac + +@defmac SET_BY_PIECES_P (@var{size}, @var{alignment}) +A C expression used to determine whether @code{store_by_pieces} will be +used to set a chunk of memory to a constant value, or whether some +other mechanism will be used. Used by @code{__builtin_memset} when +storing values other than constant zero. +Defaults to 1 if @code{move_by_pieces_ninsns} returns less +than @code{SET_RATIO}. +@end defmac + @defmac STORE_BY_PIECES_P (@var{size}, @var{alignment}) A C expression used to determine whether @code{store_by_pieces} will be -used to set a chunk of memory to a constant value, or whether some other -mechanism will be used. Used by @code{__builtin_memset} when storing -values other than constant zero and by @code{__builtin_strcpy} when -when called with a constant source string. +used to set a chunk of memory to a constant string value, or whether some +other mechanism will be used. Used by @code{__builtin_strcpy} when +called with a constant source string. Defaults to 1 if @code{move_by_pieces_ninsns} returns less than @code{MOVE_RATIO}. @end defmac @@ -186,8 +186,15 @@ static bool float_extend_from_mem[NUM_MACHINE_MODES][NUM_MACHINE_MODES]; #endif /* This macro is used to determine whether store_by_pieces should be - called to "memset" storage with byte values other than zero, or - to "memcpy" storage when the source is a constant string. */ + called to "memset" storage with byte values other than zero. */ +#ifndef SET_BY_PIECES_P +#define SET_BY_PIECES_P(SIZE, ALIGN) \ + (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ + < (unsigned int) SET_RATIO) +#endif + +/* This macro is used to determine whether store_by_pieces should be + called to "memcpy" storage when the source is a constant string. */ #ifndef STORE_BY_PIECES_P #define STORE_BY_PIECES_P(SIZE, ALIGN) \ (move_by_pieces_ninsns (SIZE, ALIGN, STORE_MAX_PIECES + 1) \ @@ -2191,13 +2198,14 @@ use_group_regs (rtx *call_fusage, rtx regs) /* Determine whether the LEN bytes generated by CONSTFUN can be stored to memory using several move instructions. CONSTFUNDATA is a pointer which will be passed as argument in every CONSTFUN call. - ALIGN is maximum alignment we can assume. Return nonzero if a - call to store_by_pieces should succeed. */ + ALIGN is maximum alignment we can assume. MEMSETP is true if this is + a memset operation and false if it's a copy of a constant string. + Return nonzero if a call to store_by_pieces should succeed. */ int can_store_by_pieces (unsigned HOST_WIDE_INT len, rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode), - void *constfundata, unsigned int align) + void *constfundata, unsigned int align, bool memsetp) { unsigned HOST_WIDE_INT l; unsigned int max_size; @@ -2210,7 +2218,9 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, if (len == 0) return 1; - if (! STORE_BY_PIECES_P (len, align)) + if (! (memsetp + ? SET_BY_PIECES_P (len, align) + : STORE_BY_PIECES_P (len, align))) return 0; tmode = mode_for_size (STORE_MAX_PIECES * BITS_PER_UNIT, MODE_INT, 1); @@ -2285,7 +2295,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, /* Generate several move instructions to store LEN bytes generated by CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a pointer which will be passed as argument in every CONSTFUN call. - ALIGN is maximum alignment we can assume. + ALIGN is maximum alignment we can assume. MEMSETP is true if this is + a memset operation and false if it's a copy of a constant string. If ENDP is 0 return to, if ENDP is 1 return memory at the end ala mempcpy, and if ENDP is 2 return memory the end minus one byte ala stpcpy. */ @@ -2293,7 +2304,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len, rtx store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, rtx (*constfun) (void *, HOST_WIDE_INT, enum machine_mode), - void *constfundata, unsigned int align, int endp) + void *constfundata, unsigned int align, bool memsetp, int endp) { struct store_by_pieces data; @@ -2303,7 +2314,9 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, return to; } - gcc_assert (STORE_BY_PIECES_P (len, align)); + gcc_assert (memsetp + ? SET_BY_PIECES_P (len, align) + : STORE_BY_PIECES_P (len, align)); data.constfun = constfun; data.constfundata = constfundata; data.len = len; @@ -4498,7 +4511,7 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal) str_copy_len = MIN (str_copy_len, exp_len); if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str, (void *) TREE_STRING_POINTER (exp), - MEM_ALIGN (target))) + MEM_ALIGN (target), false)) goto normal_expr; dest_mem = target; @@ -4507,7 +4520,8 @@ store_expr (tree exp, rtx target, int call_param_p, bool nontemporal) str_copy_len, builtin_strncpy_read_str, (void *) TREE_STRING_POINTER (exp), MEM_ALIGN (target), - exp_len > str_copy_len ? 1 : 0); + exp_len > str_copy_len ? 1 : 0, + false); if (exp_len > str_copy_len) clear_storage (dest_mem, GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL); @@ -84,6 +84,13 @@ enum expand_modifier {EXPAND_NORMAL = 0, EXPAND_STACK_PARM, EXPAND_SUM, #define CLEAR_RATIO (optimize_size ? 3 : 15) #endif #endif + +/* If a memory set (to value other than zero) operation would take + SET_RATIO or more simple move-instruction sequences, we will do a movmem + or libcall instead. */ +#ifndef SET_RATIO +#define SET_RATIO MOVE_RATIO +#endif enum direction {none, upward, downward}; @@ -444,20 +451,23 @@ extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int); CONSTFUN with several move instructions by store_by_pieces function. CONSTFUNDATA is a pointer which will be passed as argument in every CONSTFUN call. - ALIGN is maximum alignment we can assume. */ + ALIGN is maximum alignment we can assume. + MEMSETP is true if this is a real memset/bzero, not a copy + of a const string. */ extern int can_store_by_pieces (unsigned HOST_WIDE_INT, rtx (*) (void *, HOST_WIDE_INT, enum machine_mode), - void *, unsigned int); + void *, unsigned int, bool); /* Generate several move instructions to store LEN bytes generated by CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a pointer which will be passed as argument in every CONSTFUN call. ALIGN is maximum alignment we can assume. + MEMSETP is true if this is a real memset/bzero, not a copy. Returns TO + LEN. */ extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, rtx (*) (void *, HOST_WIDE_INT, enum machine_mode), - void *, unsigned int, int); + void *, unsigned int, bool, int); /* Emit insns to set X from Y. */ extern rtx emit_move_insn (rtx, rtx); diff --git a/gcc/value-prof.c b/gcc/value-prof.c index 937688f..124a3c8 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -1392,13 +1392,13 @@ tree_stringops_transform (block_stmt_iterator *bsi) case BUILT_IN_MEMSET: if (!can_store_by_pieces (val, builtin_memset_read_str, CALL_EXPR_ARG (call, 1), - dest_align)) + dest_align, true)) return false; break; case BUILT_IN_BZERO: if (!can_store_by_pieces (val, builtin_memset_read_str, integer_zero_node, - dest_align)) + dest_align, true)) return false; break; default: |