diff options
author | Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> | 2005-05-06 19:31:13 +0000 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2005-05-06 21:31:13 +0200 |
commit | 95935e2db5c45bef5631f51538d1e10d8b5b7524 (patch) | |
tree | a8c4fe03058504b928f111c7c07e133341661d9e /gcc | |
parent | 2b41c040165ff3e5fe42a491f17b9e8a5475afff (diff) | |
download | gcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.zip gcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.tar.gz gcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.tar.bz2 |
re PR target/21329 (optimize i386 block copy)
2005-05-06 Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua>
Jakub Jelinek <jakub@redhat.com>
PR target/21329
* config/i386/i386.c (ix86_expand_movmem): Don't use rep; movsb
for -Os if (movsl;)*(movsw;)?(movsb;)? sequence is shorter.
Don't use rep; movs{l,q} if the repetition count is really small,
instead use a sequence of movs{l,q} instructions.
Co-Authored-By: Jakub Jelinek <jakub@redhat.com>
From-SVN: r99330
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 54 |
2 files changed, 50 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 846b95f..1db6106 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2005-05-06 Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> + Jakub Jelinek <jakub@redhat.com> + + PR target/21329 + * config/i386/i386.c (ix86_expand_movmem): Don't use rep; movsb + for -Os if (movsl;)*(movsw;)?(movsb;)? sequence is shorter. + Don't use rep; movs{l,q} if the repetition count is really small, + instead use a sequence of movs{l,q} instructions. + 2005-05-06 Jeff Law <law@redhat.com> PR tree-optimization/21380 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 49d22fd..1e8326d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -11285,9 +11285,20 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) src = replace_equiv_address_nv (src, srcreg); /* When optimizing for size emit simple rep ; movsb instruction for - counts not divisible by 4. */ + counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)? + sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb. + Sice of (movsl;)*(movsw;)?(movsb;)? sequence is + count / 4 + (count & 3), the other sequence is either 4 or 7 bytes, + but we don't know whether upper 24 (resp. 56) bits of %ecx will be + known to be zero or not. The rep; movsb sequence causes higher + register preasure though, so take that into account. */ - if ((!optimize || optimize_size) && (count == 0 || (count & 0x03))) + if ((!optimize || optimize_size) + && (count == 0 + || ((count & 0x03) + && (!optimize_size + || count > 5 * 4 + || (count & 3) + count / 4 > 6)))) { emit_insn (gen_cld ()); countreg = ix86_zero_extend_to_Pmode (count_exp); @@ -11313,19 +11324,36 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) emit_insn (gen_cld ()); if (count & ~(size - 1)) { - countreg = copy_to_mode_reg (counter_mode, - GEN_INT ((count >> (size == 4 ? 2 : 3)) - & (TARGET_64BIT ? -1 : 0x3fffffff))); - countreg = ix86_zero_extend_to_Pmode (countreg); + if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4) + { + enum machine_mode movs_mode = size == 4 ? SImode : DImode; + + while (offset < (count & ~(size - 1))) + { + srcmem = adjust_automodify_address_nv (src, movs_mode, + srcreg, offset); + dstmem = adjust_automodify_address_nv (dst, movs_mode, + destreg, offset); + emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem)); + offset += size; + } + } + else + { + countreg = GEN_INT ((count >> (size == 4 ? 2 : 3)) + & (TARGET_64BIT ? -1 : 0x3fffffff)); + countreg = copy_to_mode_reg (counter_mode, countreg); + countreg = ix86_zero_extend_to_Pmode (countreg); - destexp = gen_rtx_ASHIFT (Pmode, countreg, - GEN_INT (size == 4 ? 2 : 3)); - srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); - destexp = gen_rtx_PLUS (Pmode, destexp, destreg); + destexp = gen_rtx_ASHIFT (Pmode, countreg, + GEN_INT (size == 4 ? 2 : 3)); + srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg); + destexp = gen_rtx_PLUS (Pmode, destexp, destreg); - emit_insn (gen_rep_mov (destreg, dst, srcreg, src, - countreg, destexp, srcexp)); - offset = count & ~(size - 1); + emit_insn (gen_rep_mov (destreg, dst, srcreg, src, + countreg, destexp, srcexp)); + offset = count & ~(size - 1); + } } if (size == 8 && (count & 0x04)) { |