aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorWill Schmidt <will_schmidt@vnet.ibm.com>2012-04-18 14:52:25 -0500
committerRyan S. Arnold <rsa@linux.vnet.ibm.com>2012-04-18 14:52:25 -0500
commitb282631e367f474bbd9e5d92e2aa54b3ce44efe8 (patch)
tree8baaa0d7b178a0679aa1c1f2a37cc8508c20cd7d /sysdeps
parent95aa737cf9fd7f0c31de62950b253861b863dc50 (diff)
downloadglibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.zip
glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.gz
glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.bz2
Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/powerpc/powerpc32/power6/wordcopy.c117
-rw-r--r--sysdeps/powerpc/powerpc64/power6/wordcopy.c234
2 files changed, 79 insertions, 272 deletions
diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
index 2594b1d..d0df56f 100644
--- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
@@ -1,5 +1,5 @@
/* _memcopy.c -- subroutines for memory copy functions.
- Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1991-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
@@ -65,6 +65,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
DSTP should be aligned for memory operations on `op_t's, but SRCP must
*not* be aligned. */
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0)
+
void
_wordcopy_fwd_dest_aligned (dstp, srcp, len)
long int dstp;
@@ -104,49 +118,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (1);
break;
case 2:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (2);
break;
case 3:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (3);
break;
}
@@ -191,6 +169,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0)
+
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@@ -235,52 +227,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (1);
break;
case 2:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (2);
break;
case 3:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (3);
break;
}
}
diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
index 184904d..b18b97f 100644
--- a/sysdeps/powerpc/powerpc64/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
@@ -1,5 +1,5 @@
/* _memcopy.c -- subroutines for memory copy functions.
- Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+ Copyright (C) 1991-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Torbjorn Granlund (tege@sics.se).
@@ -59,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define fwd_align_merge(align) \
+ do \
+ { \
+ a1 = ((op_t *) srcp)[1]; \
+ a2 = ((op_t *) srcp)[2]; \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ a0 = a2; \
+ srcp += 2 * OPSIZ; \
+ dstp += 2 * OPSIZ; \
+ len -= 2; \
+ } \
+ while (len != 0)
+
+
/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
DSTP should be aligned for memory operations on `op_t's, but SRCP must
@@ -103,109 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (1);
break;
case 2:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (2);
break;
case 3:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (3);
break;
case 4:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
- ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (4);
break;
case 5:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
- ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (5);
break;
case 6:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
- ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (6);
break;
case 7:
- do
- {
- a1 = ((op_t *) srcp)[1];
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
- ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
- a0 = a2;
-
- srcp += 2 * OPSIZ;
- dstp += 2 * OPSIZ;
- len -= 2;
- }
- while (len != 0);
+ fwd_align_merge (7);
break;
}
@@ -250,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
while (len != 0);
}
+#define bwd_align_merge(align) \
+ do \
+ { \
+ srcp -= 2 * OPSIZ; \
+ dstp -= 2 * OPSIZ; \
+ a1 = ((op_t *) srcp)[1]; \
+ a0 = ((op_t *) srcp)[0]; \
+ ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \
+ ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \
+ a2 = a0; \
+ len -= 2; \
+ } \
+ while (len != 0)
+
/* _wordcopy_bwd_dest_aligned -- Copy block finishing right
before SRCP to block finishing right before DSTP with LEN `op_t'
words (not LEN bytes!). DSTP should be aligned for memory
@@ -294,116 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
switch (align)
{
case 1:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
- ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (1);
break;
case 2:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
- ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (2);
break;
case 3:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
- ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (3);
break;
case 4:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
- ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (4);
break;
case 5:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
- ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (5);
break;
case 6:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
- ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (6);
break;
case 7:
- do
- {
- srcp -= 2 * OPSIZ;
- dstp -= 2 * OPSIZ;
-
- a1 = ((op_t *) srcp)[1];
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
- ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
- a2 = a0;
-
- len -= 2;
- }
- while (len != 0);
+ bwd_align_merge (7);
break;
}
}