diff options
Diffstat (limited to 'sysdeps/powerpc/powerpc32/memset.S')
-rw-r--r-- | sysdeps/powerpc/powerpc32/memset.S | 76 |
1 files changed, 36 insertions, 40 deletions
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S index bee87af..1a8eae5 100644 --- a/sysdeps/powerpc/powerpc32/memset.S +++ b/sysdeps/powerpc/powerpc32/memset.S @@ -1,5 +1,5 @@ /* Optimized memset implementation for PowerPC. - Copyright (C) 1997, 1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1997, 1999, 2000, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -21,27 +21,23 @@ #include <bp-sym.h> #include <bp-asm.h> -/* Define a global static that can hold the cache line size. The +/* Define a global static that can hold the cache line size. The assumption is that startup code will access the "aux vector" to - to obtain the value set by the kernel and store it into this + to obtain the value set by the kernel and store it into this variable. */ - + .globl __cache_line_size - .section ".data","aw" - .align 2 - .type __cache_line_size,@object - .size __cache_line_size,4 -__cache_line_size: - .long 0 - .section ".text" + .lcomm __cache_line_size,4,4 + /* __ptr_t [r3] memset (__ptr_t s [r3], int c [r4], size_t n [r5])); Returns 's'. The memset is done in four sizes: byte (8 bits), word (32 bits), 32-byte blocks (256 bits) and __cache_line_size (128, 256, 1024 bits). - There is a special case for setting whole cache lines to 0, which + There is a special case for setting whole cache lines to 0, which takes advantage of the dcbz instruction. */ + .section ".text" EALIGN (BP_SYM (memset), 5, 1) #define rTMP r0 @@ -123,14 +119,14 @@ L(caligned): cmplwi cr1, rCHR, 0 clrrwi. rALIGN, rLEN, 5 mtcrf 0x01, rLEN /* 40th instruction from .align */ - + /* Check if we can use the special case for clearing memory using dcbz. - This requires that we know the correct cache line size for this + This requires that we know the correct cache line size for this processor. Getting the __cache_line_size may require establishing GOT addressability, so branch out of line to set this up. */ - beq cr1, L(checklinesize) - -/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary. + beq cr1, L(checklinesize) + +/* Store blocks of 32-bytes (256-bits) starting on a 32-byte boundary. Can't assume that rCHR is zero or that the cache line size is either 32-bytes or even known. */ L(nondcbz): @@ -172,7 +168,7 @@ L(cloopdone): .align 5 nop -/* Clear cache lines of memory in 128-byte chunks. +/* Clear cache lines of memory in 128-byte chunks. This code is optimized for processors with 32-byte cache lines. It is further optimized for the 601 processor, which requires some care in how the code is aligned in the i-cache. */ @@ -259,22 +255,22 @@ L(medium_28t): stw rCHR, -4(rMEMP) stw rCHR, -8(rMEMP) blr - + L(checklinesize): #ifdef SHARED mflr rTMP /* If the remaining length is less the 32 bytes then don't bother getting the cache line size. */ - beq L(medium) -/* Establishes GOT addressability so we can load __cache_line_size + beq L(medium) +/* Establishes GOT addressability so we can load __cache_line_size from static. This value was set from the aux vector during startup. */ bl _GLOBAL_OFFSET_TABLE_@local-4 mflr rGOT lwz rGOT,__cache_line_size@got(rGOT) lwz rCLS,0(rGOT) mtlr rTMP -#else -/* Load __cache_line_size from static. This value was set from the +#else +/* Load __cache_line_size from static. This value was set from the aux vector during startup. */ lis rCLS,__cache_line_size@ha /* If the remaining length is less the 32 bytes then don't bother getting @@ -282,22 +278,22 @@ L(checklinesize): beq L(medium) lwz rCLS,__cache_line_size@l(rCLS) #endif - + /*If the cache line size was not set then goto to L(nondcbz), which is - safe for any cache line size. */ + safe for any cache line size. */ cmplwi cr1,rCLS,0 beq cr1,L(nondcbz) - + /* If the cache line size is 32 bytes then goto to L(zloopstart), - which is coded specificly for 32-byte lines (and 601). */ + which is coded specificly for 32-byte lines (and 601). */ cmplwi cr1,rCLS,32 beq cr1,L(zloopstart) - -/* Now we know the cache line size and it is not 32-bytes. However - we may not yet be aligned to the cache line and may have a partial - line to fill. Touch it 1st to fetch the cache line. */ - dcbtst 0,rMEMP - + +/* Now we know the cache line size and it is not 32-bytes. However + we may not yet be aligned to the cache line and may have a partial + line to fill. Touch it 1st to fetch the cache line. */ + dcbtst 0,rMEMP + addi rCLM,rCLS,-1 L(getCacheAligned): cmplwi cr1,rLEN,32 @@ -317,8 +313,8 @@ L(getCacheAligned): stw rCHR,-8(rMEMP) stw rCHR,-4(rMEMP) b L(getCacheAligned) - -/* Now we are aligned to the cache line and can use dcbz. */ + +/* Now we are aligned to the cache line and can use dcbz. */ L(cacheAligned): cmplw cr1,rLEN,rCLS blt cr1,L(handletail32) @@ -327,12 +323,12 @@ L(cacheAligned): add rMEMP,rMEMP,rCLS b L(cacheAligned) -/* We are here because; the cache line size was set, it was not - 32-bytes, and the remainder (rLEN) is now less than the actual cache - line size. Set up the preconditions for L(nondcbz) and go there to - store the remaining bytes. */ +/* We are here because; the cache line size was set, it was not + 32-bytes, and the remainder (rLEN) is now less than the actual cache + line size. Set up the preconditions for L(nondcbz) and go there to + store the remaining bytes. */ L(handletail32): clrrwi. rALIGN, rLEN, 5 b L(nondcbz) - + END (BP_SYM (memset)) |