aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua>2005-05-06 19:31:13 +0000
committerJakub Jelinek <jakub@gcc.gnu.org>2005-05-06 21:31:13 +0200
commit95935e2db5c45bef5631f51538d1e10d8b5b7524 (patch)
treea8c4fe03058504b928f111c7c07e133341661d9e
parent2b41c040165ff3e5fe42a491f17b9e8a5475afff (diff)
downloadgcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.zip
gcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.tar.gz
gcc-95935e2db5c45bef5631f51538d1e10d8b5b7524.tar.bz2
re PR target/21329 (optimize i386 block copy)
2005-05-06 Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua> Jakub Jelinek <jakub@redhat.com> PR target/21329 * config/i386/i386.c (ix86_expand_movmem): Don't use rep; movsb for -Os if (movsl;)*(movsw;)?(movsb;)? sequence is shorter. Don't use rep; movs{l,q} if the repetition count is really small, instead use a sequence of movs{l,q} instructions. Co-Authored-By: Jakub Jelinek <jakub@redhat.com> From-SVN: r99330
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/config/i386/i386.c54
2 files changed, 50 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 846b95f..1db6106 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2005-05-06 Denis Vlasenko <vda@port.imtp.ilyichevsk.odessa.ua>
+ Jakub Jelinek <jakub@redhat.com>
+
+ PR target/21329
+ * config/i386/i386.c (ix86_expand_movmem): Don't use rep; movsb
+ for -Os if (movsl;)*(movsw;)?(movsb;)? sequence is shorter.
+ Don't use rep; movs{l,q} if the repetition count is really small,
+ instead use a sequence of movs{l,q} instructions.
+
2005-05-06 Jeff Law <law@redhat.com>
PR tree-optimization/21380
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 49d22fd..1e8326d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -11285,9 +11285,20 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
src = replace_equiv_address_nv (src, srcreg);
/* When optimizing for size emit simple rep ; movsb instruction for
- counts not divisible by 4. */
+ counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
+ sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
+ Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
+ count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
+ but we don't know whether upper 24 (resp. 56) bits of %ecx will be
+ known to be zero or not. The rep; movsb sequence causes higher
+ register preasure though, so take that into account. */
- if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
+ if ((!optimize || optimize_size)
+ && (count == 0
+ || ((count & 0x03)
+ && (!optimize_size
+ || count > 5 * 4
+ || (count & 3) + count / 4 > 6))))
{
emit_insn (gen_cld ());
countreg = ix86_zero_extend_to_Pmode (count_exp);
@@ -11313,19 +11324,36 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
emit_insn (gen_cld ());
if (count & ~(size - 1))
{
- countreg = copy_to_mode_reg (counter_mode,
- GEN_INT ((count >> (size == 4 ? 2 : 3))
- & (TARGET_64BIT ? -1 : 0x3fffffff)));
- countreg = ix86_zero_extend_to_Pmode (countreg);
+ if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
+ {
+ enum machine_mode movs_mode = size == 4 ? SImode : DImode;
+
+ while (offset < (count & ~(size - 1)))
+ {
+ srcmem = adjust_automodify_address_nv (src, movs_mode,
+ srcreg, offset);
+ dstmem = adjust_automodify_address_nv (dst, movs_mode,
+ destreg, offset);
+ emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
+ offset += size;
+ }
+ }
+ else
+ {
+ countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
+ & (TARGET_64BIT ? -1 : 0x3fffffff));
+ countreg = copy_to_mode_reg (counter_mode, countreg);
+ countreg = ix86_zero_extend_to_Pmode (countreg);
- destexp = gen_rtx_ASHIFT (Pmode, countreg,
- GEN_INT (size == 4 ? 2 : 3));
- srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
- destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
+ destexp = gen_rtx_ASHIFT (Pmode, countreg,
+ GEN_INT (size == 4 ? 2 : 3));
+ srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
+ destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
- emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
- countreg, destexp, srcexp));
- offset = count & ~(size - 1);
+ emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
+ countreg, destexp, srcexp));
+ offset = count & ~(size - 1);
+ }
}
if (size == 8 && (count & 0x04))
{