aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/ia64/memccpy.S
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-09-09 20:18:53 +0000
committerUlrich Drepper <drepper@redhat.com>2003-09-09 20:18:53 +0000
commite352f0d83289322fe9706ee45c449170653f85e8 (patch)
tree7715fce2fc7f8924d72931a7ef4febd5860669f7 /sysdeps/ia64/memccpy.S
parent2036b767dd60813df266647ab1e23159f5043706 (diff)
downloadglibc-e352f0d83289322fe9706ee45c449170653f85e8.zip
glibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.gz
glibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.bz2
Update.
2003-09-09 Ulrich Drepper <drepper@redhat.com> * string/stratcliff.c (main): Check memccpy. 2003-04-11 Jes Sorensen <jes@wildopensource.com> * sysdeps/ia64/memccpy.S: When recovering for src_aligned and the character is found during recovery, use correct register when determining the position of the found character. 2003-04-01 Jes Sorensen <jes@wildopensource.com> * sysdeps/ia64/memccpy.S: Use speculatively loads for readahead to avoid segfaults when reading from unmapped pages. For aligned reload and continue, for misaligned, roll back and use byte copy. Save ar.ec on entry and restore on exit.
Diffstat (limited to 'sysdeps/ia64/memccpy.S')
-rw-r--r--sysdeps/ia64/memccpy.S52
1 files changed, 45 insertions, 7 deletions
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S
index 7ffe9b9..6bccb96 100644
--- a/sysdeps/ia64/memccpy.S
+++ b/sysdeps/ia64/memccpy.S
@@ -1,6 +1,6 @@
/* Optimized version of the memccpy() function.
This file is part of the GNU C Library.
- Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
Contributed by Dan Pop <Dan.Pop@cern.ch>.
The GNU C Library is free software; you can redistribute it and/or
@@ -46,6 +46,7 @@
#define tmp r23
#define char r24
#define charx8 r25
+#define saved_ec r26
#define sh2 r28
#define sh1 r29
#define loopcnt r30
@@ -56,25 +57,27 @@ ENTRY(memccpy)
alloc r2 = ar.pfs, 4, 40 - 4, 0, 40
#include "softpipe.h"
- .rotr r[MEMLAT + 3], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
+ .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2]
.rotp p[MEMLAT + 6 + 1]
mov ret0 = r0 // return NULL if no match
.save pr, saved_pr
mov saved_pr = pr // save the predicate registers
+ mov dest = in0 // dest
.save ar.lc, saved_lc
mov saved_lc = ar.lc // save the loop counter
+ mov saved_ec = ar.ec // save the loop counter
.body
- mov dest = in0 // dest
mov src = in1 // src
extr.u char = in2, 0, 8 // char
mov len = in3 // len
sub tmp = r0, in0 // tmp = -dest
cmp.ne p7, p0 = r0, r0 // clear p7
;;
- and loopcnt = 7, tmp // loopcnt = -dest % 8
+ and loopcnt = 7, tmp // loopcnt = -dest % 8
cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES
-(p6) br.cond.spnt .cpyfew // copy byte by byte
+ mov ar.ec = 0 // ec not guaranteed zero on entry
+(p6) br.cond.spnt .cpyfew // copy byte by byte
;;
cmp.eq p6, p0 = loopcnt, r0
mux1 charx8 = char, @brcst
@@ -109,26 +112,31 @@ ENTRY(memccpy)
cmp.ne p6, p0 = r0, r0 ;; // clear p6
.align 32
.l2:
-(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1
+(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1
(p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1
(p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2
(p[MEMLAT+4]) xor tmp3[0] = val[1], charx8
(p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1]
+(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't
+ // valid - rollback!
(p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1]
(p6) br.cond.spnt .gotit
(p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest
(p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2
br.ctop.sptk .l2
br.cond.sptk .cpyfew
+
.src_aligned:
cmp.ne p6, p0 = r0, r0 // clear p6
mov ar.ec = MEMLAT + 2 + 1 ;; // set EC
.l3:
-(p[0]) ld8 r[0] = [src], 8
+(p[0]) ld8.s r[0] = [src], 8
(p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8
(p[MEMLAT+1]) czx1.r pos0[0] = tmp3[1]
(p[MEMLAT+2]) cmp.ne p7, p0 = 8, pos0[1]
+(p[MEMLAT+2]) chk.s r[MEMLAT+2], .recovery2
(p7) br.cond.spnt .gotit
+.back2:
(p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8
br.ctop.dptk .l3
.cpyfew:
@@ -148,6 +156,7 @@ ENTRY(memccpy)
.restore_and_exit:
mov pr = saved_pr, -1 // restore the predicate registers
mov ar.lc = saved_lc // restore the loop counter
+ mov ar.ec = saved_ec ;; // restore the epilog counter
br.ret.sptk.many b0
.gotit:
.pred.rel "mutex" p6, p7
@@ -163,4 +172,33 @@ ENTRY(memccpy)
mov pr = saved_pr, -1
mov ar.lc = saved_lc
br.ret.sptk.many b0
+
+.recovery1:
+ adds src = -(MEMLAT + 6 + 1) * 8, asrc
+ mov loopcnt = ar.lc
+ mov tmp = ar.ec ;;
+ sub sh1 = (MEMLAT + 6 + 1), tmp
+ shr.u sh2 = sh2, 3
+ ;;
+ shl loopcnt = loopcnt, 3
+ sub src = src, sh2
+ shl sh1 = sh1, 3
+ shl tmp = tmp, 3
+ ;;
+ add len = len, loopcnt
+ add src = sh1, src ;;
+ add len = tmp, len
+.back1:
+ br.cond.sptk .cpyfew
+
+.recovery2:
+ add tmp = -(MEMLAT + 3) * 8, src
+(p7) br.cond.spnt .gotit
+ ;;
+ ld8 r[MEMLAT+2] = [tmp] ;;
+ xor pos0[1] = r[MEMLAT+2], charx8 ;;
+ czx1.r pos0[1] = pos0[1] ;;
+ cmp.ne p7, p6 = 8, pos0[1]
+(p7) br.cond.spnt .gotit
+ br.cond.sptk .back2
END(memccpy)