aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/s390
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.ibm.com>2020-06-26 09:45:11 +0200
committerStefan Liebler <stli@linux.ibm.com>2020-06-26 09:45:11 +0200
commit0792c8ae1aebf538de45ff9a0e2e401a60525de2 (patch)
tree7928bae9286828c8c9d69f7418299ff9143abb51 /sysdeps/s390
parent2034c70e64b31e48140c8e31c5ae839af5ccb6eb (diff)
downloadglibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.zip
glibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.tar.gz
glibc-0792c8ae1aebf538de45ff9a0e2e401a60525de2.tar.bz2
S390: Optimize __memcpy_z196.
This patch introduces an extra loop without pfd instructions as it turned out that the pfd instructions are usefull for copies >=64KB but are counterproductive for smaller copies.
Diffstat (limited to 'sysdeps/s390')
-rw-r--r--sysdeps/s390/memcpy-z900.S21
1 files changed, 15 insertions, 6 deletions
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
index f2e9aae..dc2f491 100644
--- a/sysdeps/s390/memcpy-z900.S
+++ b/sysdeps/s390/memcpy-z900.S
@@ -184,25 +184,34 @@ ENTRY(MEMCPY_Z196)
je .L_Z196_4
.L_Z196_start2:
aghi %r4,-1
- srlg %r5,%r4,8
- ltgr %r5,%r5
+ risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256
jne .L_Z196_5
.L_Z196_3:
exrl %r4,.L_Z196_14
.L_Z196_4:
br %r14
.L_Z196_5:
- cgfi %r5,262144 # Switch to mvcle for copies >64MB
- jh __memcpy_mvcle
+ cgfi %r5,255 # Switch to loop with pfd for copies >=64kB
+ jh .L_Z196_6
.L_Z196_2:
- pfd 1,768(%r3)
- pfd 2,768(%r1)
mvc 0(256,%r1),0(%r3)
aghi %r5,-1
la %r1,256(%r1)
la %r3,256(%r3)
jne .L_Z196_2
j .L_Z196_3
+.L_Z196_6:
+ cgfi %r5,262144 # Switch to mvcle for copies >64MB
+ jh __memcpy_mvcle
+.L_Z196_7:
+ pfd 1,1024(%r3)
+ pfd 2,1024(%r1)
+ mvc 0(256,%r1),0(%r3)
+ aghi %r5,-1
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ jne .L_Z196_7
+ j .L_Z196_3
.L_Z196_14:
mvc 0(1,%r1),0(%r3)
END(MEMCPY_Z196)