diff options
author | Ulrich Drepper <drepper@redhat.com> | 2001-07-16 06:29:42 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2001-07-16 06:29:42 +0000 |
commit | 3c204435e91ba20fbfef438e52fdc147e679ca58 (patch) | |
tree | f3cdbfd9405e6844b14d51b05974284ed902099d | |
parent | a6d214835f18ce3232d44ed6f0aba679d090693d (diff) | |
download | glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.zip glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.tar.gz glibc-3c204435e91ba20fbfef438e52fdc147e679ca58.tar.bz2 |
Use mvcle for big blocks (> 64K) and a mvc loop for small blocks.
-rw-r--r-- | sysdeps/s390/s390-32/bcopy.S | 27 | ||||
-rw-r--r-- | sysdeps/s390/s390-32/memcpy.S | 15 | ||||
-rw-r--r-- | sysdeps/s390/s390-64/bcopy.S | 18 | ||||
-rw-r--r-- | sysdeps/s390/s390-64/memcpy.S | 15 |
4 files changed, 61 insertions, 14 deletions
diff --git a/sysdeps/s390/s390-32/bcopy.S b/sysdeps/s390/s390-32/bcopy.S index d3ecf1d..ae90dc1 100644 --- a/sysdeps/s390/s390-32/bcopy.S +++ b/sysdeps/s390/s390-32/bcopy.S @@ -34,12 +34,14 @@ ENTRY(__bcopy) jnl .L0 alr %r1,%r2 clr %r1,%r3 - jh .L5 + jh .L7 .L0: ahi %r4,-1 # length - 1 lr %r1,%r4 srl %r1,8 - ltr %r1,%r1 + ltr %r1,%r1 # < 256 bytes to move ? jz .L2 + chi %r1,255 # > 1MB to move ? + jh .L5 .L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks la %r2,256(%r2) la %r3,256(%r3) @@ -49,22 +51,31 @@ ENTRY(__bcopy) .L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1 .L4: br %r14 -.L5: # destructive overlay, can not use mvcle + # data copies > 1MB are faster with mvcle. +.L5: ahi %r4,1 # length + 1 + lr %r5,%r4 # source length + lr %r4,%r2 # source address + lr %r2,%r3 # set destination + lr %r3,%r5 # destination length = source length +.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L6 + br %r14 +.L7: # destructive overlay, can not use mvcle lr %r1,%r2 # bcopy is called with source,dest lr %r2,%r3 # memmove with dest,source! Oh, well... lr %r3,%r1 basr %r1,0 -.L6: +.L8: #ifdef PIC - al %r1,.L7-.L6(%r1) # get address of global offset table + al %r1,.L9-.L8(%r1) # get address of global offset table # load address of memmove l %r1,memmove@GOT12(%r1) br %r1 -.L7: .long _GLOBAL_OFFSET_TABLE_-.L6 +.L9: .long _GLOBAL_OFFSET_TABLE_-.L8 #else - al %r1,.L7-.L6(%r1) # load address of memmove + al %r1,.L9-.L8(%r1) # load address of memmove br %r1 # jump to memmove -.L7: .long memmove-.L6 +.L9: .long memmove-.L8 #endif END(__bcopy) diff --git a/sysdeps/s390/s390-32/memcpy.S b/sysdeps/s390/s390-32/memcpy.S index c7045f2..ad82f8a 100644 --- a/sysdeps/s390/s390-32/memcpy.S +++ b/sysdeps/s390/s390-32/memcpy.S @@ -33,8 +33,11 @@ ENTRY(memcpy) ahi %r4,-1 # length - 1 lr %r1,%r2 # copy destination address lr %r5,%r4 - sra %r5,8 + srl %r5,8 + ltr %r5,%r5 # < 256 bytes to move ? jz .L1 + chi %r5,255 # > 1MB to move ? + jh .L4 .L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks la %r1,256(%r1) la %r3,256(%r3) @@ -43,5 +46,15 @@ ENTRY(memcpy) mvc 0(1,%r1),0(%r3) # instruction for execute .L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1 .L3: br %r14 + # data copies > 1MB are faster with mvcle. +.L4: ahi %r4,1 # length + 1 + lr %r5,%r4 # source length + lr %r4,%r2 # source address + lr %r2,%r3 # set destination + lr %r3,%r5 # destination length = source length +.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L5 + lr %r2,%r1 # return destination address + br %r14 END(memcpy) diff --git a/sysdeps/s390/s390-64/bcopy.S b/sysdeps/s390/s390-64/bcopy.S index bb3acdb..ff79667 100644 --- a/sysdeps/s390/s390-64/bcopy.S +++ b/sysdeps/s390/s390-64/bcopy.S @@ -34,11 +34,13 @@ ENTRY(__bcopy) jnl .L0 algr %r1,%r2 clgr %r1,%r3 - jh .L5 + jh .L7 .L0: aghi %r4,-1 # length - 1 srlg %r1,%r4,8 - ltgr %r1,%r1 + ltgr %r1,%r1 # < 256 bytes to move ? jz .L2 + cghi %r1,255 # > 1MB to move ? + jh .L5 .L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks la %r2,256(%r2) la %r3,256(%r3) @@ -47,8 +49,16 @@ ENTRY(__bcopy) mvc 0(1,%r3),0(%r2) # instruction for execute .L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1 .L4: br %r14 - -.L5: # destructive overlay, can not use mvcle + # data copies > 1MB are faster with mvcle. +.L5: aghi %r4,1 # length + 1 + lgr %r5,%r4 # source length + lgr %r4,%r2 # source address + lgr %r2,%r3 # set destination + lgr %r3,%r5 # destination length = source length +.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L6 + br %r14 +.L7: # destructive overlay, can not use mvcle lgr %r1,%r2 # bcopy is called with source,dest lgr %r2,%r3 # memmove with dest,source! Oh, well... lgr %r3,%r1 diff --git a/sysdeps/s390/s390-64/memcpy.S b/sysdeps/s390/s390-64/memcpy.S index 8212bd3..d13c0ad 100644 --- a/sysdeps/s390/s390-64/memcpy.S +++ b/sysdeps/s390/s390-64/memcpy.S @@ -32,8 +32,11 @@ ENTRY(memcpy) jz .L3 aghi %r4,-1 # length - 1 lgr %r1,%r2 # copy destination address - srag %r5,%r4,8 + srlg %r5,%r4,8 + ltgr %r5,%r5 # < 256 bytes to mvoe ? jz .L1 + chi %r6,255 # > 1 MB to move ? + jh .L4 .L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks la %r1,256(%r1) la %r3,256(%r3) @@ -42,5 +45,15 @@ ENTRY(memcpy) mvc 0(1,%r1),0(%r3) # instruction for execute .L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1 .L3: br %r14 + # data copies > 1MB are faster with mvcle. +.L4: aghi %r4,1 # length + 1 + lgr %r5,%r4 # source length + lgr %r4,%r2 # source address + lgr %r2,%r3 # set destination + lgr %r3,%r5 # destination length = source length +.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L5 + lgr %r2,%r1 # return destination address + br %r14 END(memcpy) |