aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/powerpc/powerpc32/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/powerpc/powerpc32/memset.S')
-rw-r--r--sysdeps/powerpc/powerpc32/memset.S76
1 files changed, 36 insertions, 40 deletions
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index bee87af..1a8eae5 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -1,5 +1,5 @@
/* Optimized memset implementation for PowerPC.
- Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1999, 2000, 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -21,27 +21,23 @@
#include <bp-sym.h>
#include <bp-asm.h>
-/* Define a global static that can hold the cache line size. The
+/* Define a global static that can hold the cache line size. The
assumption is that startup code will access the "aux vector" to
- to obtain the value set by the kernel and store it into this
+ to obtain the value set by the kernel and store it into this
variable. */
-
+
.globl __cache_line_size
- .section ".data","aw"
- .align 2
- .type __cache_line_size,@object
- .size __cache_line_size,4
-__cache_line_size:
- .long 0
- .section ".text"
+ .lcomm __cache_line_size,4,4
+
/* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5]));
Returns 's'.
The memset is done in four sizes: byte (8 bits), word (32 bits),
32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits).
- There is a special case for setting whole cache lines to 0, which
+ There is a special case for setting whole cache lines to 0, which
takes advantage of the dcbz instruction. */
+ .section ".text"
EALIGN (BP_SYM (memset), 5, 1)
#define rTMP r0
@@ -123,14 +119,14 @@ L(caligned):
cmplwi cr1, rCHR, 0
clrrwi. rALIGN, rLEN, 5
mtcrf 0x01, rLEN /* 40th instruction from .align */
-
+
/* Check if we can use the special case for clearing memory using dcbz.
- This requires that we know the correct cache line size for this
+ This requires that we know the correct cache line size for this
processor. Getting the __cache_line_size may require establishing GOT
addressability, so branch out of line to set this up. */
- beq cr1, L(checklinesize)
-
-/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
+ beq cr1, L(checklinesize)
+
+/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary.
Can't assume that rCHR is zero or that the cache line size is either
32-bytes or even known. */
L(nondcbz):
@@ -172,7 +168,7 @@ L(cloopdone):
.align 5
nop
-/* Clear cache lines of memory in 128-byte chunks.
+/* Clear cache lines of memory in 128-byte chunks.
This code is optimized for processors with 32-byte cache lines.
It is further optimized for the 601 processor, which requires
some care in how the code is aligned in the i-cache. */
@@ -259,22 +255,22 @@ L(medium_28t):
stw rCHR, -4(rMEMP)
stw rCHR, -8(rMEMP)
blr
-
+
L(checklinesize):
#ifdef SHARED
mflr rTMP
/* If the remaining length is less the 32 bytes then don't bother getting
the cache line size. */
- beq L(medium)
-/* Establishes GOT addressability so we can load __cache_line_size
+ beq L(medium)
+/* Establishes GOT addressability so we can load __cache_line_size
from static. This value was set from the aux vector during startup. */
bl _GLOBAL_OFFSET_TABLE_@local-4
mflr rGOT
lwz rGOT,__cache_line_size@got(rGOT)
lwz rCLS,0(rGOT)
mtlr rTMP
-#else
-/* Load __cache_line_size from static. This value was set from the
+#else
+/* Load __cache_line_size from static. This value was set from the
aux vector during startup. */
lis rCLS,__cache_line_size@ha
/* If the remaining length is less the 32 bytes then don't bother getting
@@ -282,22 +278,22 @@ L(checklinesize):
beq L(medium)
lwz rCLS,__cache_line_size@l(rCLS)
#endif
-
+
/*If the cache line size was not set then goto to L(nondcbz), which is
- safe for any cache line size. */
+ safe for any cache line size. */
cmplwi cr1,rCLS,0
beq cr1,L(nondcbz)
-
+
/* If the cache line size is 32 bytes then goto to L(zloopstart),
- which is coded specificly for 32-byte lines (and 601). */
+ which is coded specificly for 32-byte lines (and 601). */
cmplwi cr1,rCLS,32
beq cr1,L(zloopstart)
-
-/* Now we know the cache line size and it is not 32-bytes. However
- we may not yet be aligned to the cache line and may have a partial
- line to fill. Touch it 1st to fetch the cache line. */
- dcbtst 0,rMEMP
-
+
+/* Now we know the cache line size and it is not 32-bytes. However
+ we may not yet be aligned to the cache line and may have a partial
+ line to fill. Touch it 1st to fetch the cache line. */
+ dcbtst 0,rMEMP
+
addi rCLM,rCLS,-1
L(getCacheAligned):
cmplwi cr1,rLEN,32
@@ -317,8 +313,8 @@ L(getCacheAligned):
stw rCHR,-8(rMEMP)
stw rCHR,-4(rMEMP)
b L(getCacheAligned)
-
-/* Now we are aligned to the cache line and can use dcbz. */
+
+/* Now we are aligned to the cache line and can use dcbz. */
L(cacheAligned):
cmplw cr1,rLEN,rCLS
blt cr1,L(handletail32)
@@ -327,12 +323,12 @@ L(cacheAligned):
add rMEMP,rMEMP,rCLS
b L(cacheAligned)
-/* We are here because; the cache line size was set, it was not
- 32-bytes, and the remainder (rLEN) is now less than the actual cache
- line size. Set up the preconditions for L(nondcbz) and go there to
- store the remaining bytes. */
+/* We are here because; the cache line size was set, it was not
+ 32-bytes, and the remainder (rLEN) is now less than the actual cache
+ line size. Set up the preconditions for L(nondcbz) and go there to
+ store the remaining bytes. */
L(handletail32):
clrrwi. rALIGN, rLEN, 5
b L(nondcbz)
-
+
END (BP_SYM (memset))