aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeoffrey Keating <geoffk@apple.com>2004-09-02 22:38:18 +0000
committerGeoffrey Keating <geoffk@gcc.gnu.org>2004-09-02 22:38:18 +0000
commit5514620adbc7b2f9d5e7714d5e910da07fe6c800 (patch)
tree799a0795138fb79e938ceb80a461d68bfb419ccc
parent02f14c3856c17b3e65c2c791b3830386daabc2e7 (diff)
downloadgcc-5514620adbc7b2f9d5e7714d5e910da07fe6c800.zip
gcc-5514620adbc7b2f9d5e7714d5e910da07fe6c800.tar.gz
gcc-5514620adbc7b2f9d5e7714d5e910da07fe6c800.tar.bz2
rs6000.c (expand_block_clear): Use vector instructions if available.
* config/rs6000/rs6000.c (expand_block_clear): Use vector instructions if available. (expand_block_move): Likewise. From-SVN: r86996
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/rs6000/rs6000.c57
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/ppc-vector-memcpy.c9
-rw-r--r--gcc/testsuite/gcc.dg/ppc-vector-memset.c12
5 files changed, 70 insertions, 19 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8c2573f..49855d6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2004-09-02 Geoffrey Keating <geoffk@apple.com>
+
+ * config/rs6000/rs6000.c (expand_block_clear): Use vector
+ instructions if available.
+ (expand_block_move): Likewise.
+
2004-09-03 Jan Hubicka <jh@suse.cz>
* rtl.def (RANGE_INFO, RANGE_REG, RANGE_VAR, RANGE_LIVE): Kill.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0ec9c57..335dbe3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -8319,11 +8319,12 @@ expand_block_clear (rtx operands[])
rtx orig_dest = operands[0];
rtx bytes_rtx = operands[1];
rtx align_rtx = operands[2];
- int constp = (GET_CODE (bytes_rtx) == CONST_INT);
- int align;
- int bytes;
+ bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
+ HOST_WIDE_INT align;
+ HOST_WIDE_INT bytes;
int offset;
int clear_bytes;
+ int clear_step;
/* If this is not a fixed size move, just call memcpy */
if (! constp)
@@ -8339,49 +8340,59 @@ expand_block_clear (rtx operands[])
if (bytes <= 0)
return 1;
- if (bytes > (TARGET_POWERPC64 && align >= 32 ? 64 : 32))
- return 0;
+ /* Use the builtin memset after a point, to avoid huge code bloat.
+ When optimize_size, avoid any significant code bloat; calling
+ memset is about 4 instructions, so allow for one instruction to
+ load zero and three to do clearing. */
+ if (TARGET_ALTIVEC && align >= 128)
+ clear_step = 16;
+ else if (TARGET_POWERPC64 && align >= 32)
+ clear_step = 8;
+ else
+ clear_step = 4;
- if (optimize_size && bytes > 16)
+ if (optimize_size && bytes > 3 * clear_step)
+ return 0;
+ if (! optimize_size && bytes > 8 * clear_step)
return 0;
for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
{
- rtx (*mov) (rtx, rtx);
enum machine_mode mode = BLKmode;
rtx dest;
- if (bytes >= 8 && TARGET_POWERPC64
- /* 64-bit loads and stores require word-aligned
- displacements. */
- && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
+ if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+ {
+ clear_bytes = 16;
+ mode = V4SImode;
+ }
+ else if (bytes >= 8 && TARGET_POWERPC64
+ /* 64-bit loads and stores require word-aligned
+ displacements. */
+ && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
{
clear_bytes = 8;
mode = DImode;
- mov = gen_movdi;
}
- else if (bytes >= 4 && !STRICT_ALIGNMENT)
+ else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
{ /* move 4 bytes */
clear_bytes = 4;
mode = SImode;
- mov = gen_movsi;
}
- else if (bytes == 2 && !STRICT_ALIGNMENT)
+ else if (bytes == 2 && (align >= 16 || !STRICT_ALIGNMENT))
{ /* move 2 bytes */
clear_bytes = 2;
mode = HImode;
- mov = gen_movhi;
}
else /* move 1 byte at a time */
{
clear_bytes = 1;
mode = QImode;
- mov = gen_movqi;
}
dest = adjust_address (orig_dest, mode, offset);
- emit_insn ((*mov) (dest, const0_rtx));
+ emit_move_insn (dest, CONST0_RTX (mode));
}
return 1;
@@ -8441,7 +8452,15 @@ expand_block_move (rtx operands[])
enum machine_mode mode = BLKmode;
rtx src, dest;
- if (TARGET_STRING
+ /* Altivec first, since it will be faster than a string move
+ when it applies, and usually not significantly larger. */
+ if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+ {
+ move_bytes = 16;
+ mode = V4SImode;
+ gen_func.mov = gen_movv4si;
+ }
+ else if (TARGET_STRING
&& bytes > 24 /* move up to 32 bytes at a time */
&& ! fixed_regs[5]
&& ! fixed_regs[6]
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 011aeff..df9b806 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2004-09-02 Geoffrey Keating <geoffk@apple.com>
+
+ * gcc.dg/ppc-vector-memcpy.c: New.
+ * gcc.dg/ppc-vector-memset.c: New.
+
2004-09-02 Chao-ying Fu <fu@mips.com>
* gcc.target/mips/mips-3d-1.c: New test.
diff --git a/gcc/testsuite/gcc.dg/ppc-vector-memcpy.c b/gcc/testsuite/gcc.dg/ppc-vector-memcpy.c
new file mode 100644
index 0000000..b6b9e00
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ppc-vector-memcpy.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O -maltivec" } */
+/* { dg-final { scan-assembler "lvx" } } */
+
+void foo(void)
+{
+ int x[8] __attribute__((aligned(128))) = { 1 };
+ bar (x);
+}
diff --git a/gcc/testsuite/gcc.dg/ppc-vector-memset.c b/gcc/testsuite/gcc.dg/ppc-vector-memset.c
new file mode 100644
index 0000000..615a025
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ppc-vector-memset.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-O -maltivec" } */
+/* { dg-final { scan-assembler "stvx" } } */
+
+#include <string.h>
+
+void foo(void)
+{
+ int x[8] __attribute__((aligned(128)));
+ memset (x, 0, sizeof (x));
+ bar (x);
+}