diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-09-09 20:18:53 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-09-09 20:18:53 +0000 |
commit | e352f0d83289322fe9706ee45c449170653f85e8 (patch) | |
tree | 7715fce2fc7f8924d72931a7ef4febd5860669f7 /sysdeps/ia64/memccpy.S | |
parent | 2036b767dd60813df266647ab1e23159f5043706 (diff) | |
download | glibc-e352f0d83289322fe9706ee45c449170653f85e8.zip glibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.gz glibc-e352f0d83289322fe9706ee45c449170653f85e8.tar.bz2 |
Update.
2003-09-09 Ulrich Drepper <drepper@redhat.com>
* string/stratcliff.c (main): Check memccpy.
2003-04-11 Jes Sorensen <jes@wildopensource.com>
* sysdeps/ia64/memccpy.S: When recovering for src_aligned and the
character is found during recovery, use correct register when
determining the position of the found character.
2003-04-01 Jes Sorensen <jes@wildopensource.com>
* sysdeps/ia64/memccpy.S: Use speculatively loads for readahead to
avoid segfaults when reading from unmapped pages. For aligned
reload and continue, for misaligned, roll back and use byte copy.
Save ar.ec on entry and restore on exit.
Diffstat (limited to 'sysdeps/ia64/memccpy.S')
-rw-r--r-- | sysdeps/ia64/memccpy.S | 52 |
1 files changed, 45 insertions, 7 deletions
diff --git a/sysdeps/ia64/memccpy.S b/sysdeps/ia64/memccpy.S index 7ffe9b9..6bccb96 100644 --- a/sysdeps/ia64/memccpy.S +++ b/sysdeps/ia64/memccpy.S @@ -1,6 +1,6 @@ /* Optimized version of the memccpy() function. This file is part of the GNU C Library. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc. Contributed by Dan Pop <Dan.Pop@cern.ch>. The GNU C Library is free software; you can redistribute it and/or @@ -46,6 +46,7 @@ #define tmp r23 #define char r24 #define charx8 r25 +#define saved_ec r26 #define sh2 r28 #define sh1 r29 #define loopcnt r30 @@ -56,25 +57,27 @@ ENTRY(memccpy) alloc r2 = ar.pfs, 4, 40 - 4, 0, 40 #include "softpipe.h" - .rotr r[MEMLAT + 3], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] + .rotr r[MEMLAT + 7], tmp1[4], tmp2[4], val[4], tmp3[2], pos0[2] .rotp p[MEMLAT + 6 + 1] mov ret0 = r0 // return NULL if no match .save pr, saved_pr mov saved_pr = pr // save the predicate registers + mov dest = in0 // dest .save ar.lc, saved_lc mov saved_lc = ar.lc // save the loop counter + mov saved_ec = ar.ec // save the loop counter .body - mov dest = in0 // dest mov src = in1 // src extr.u char = in2, 0, 8 // char mov len = in3 // len sub tmp = r0, in0 // tmp = -dest cmp.ne p7, p0 = r0, r0 // clear p7 ;; - and loopcnt = 7, tmp // loopcnt = -dest % 8 + and loopcnt = 7, tmp // loopcnt = -dest % 8 cmp.ge p6, p0 = OP_T_THRES, len // is len <= OP_T_THRES -(p6) br.cond.spnt .cpyfew // copy byte by byte + mov ar.ec = 0 // ec not guaranteed zero on entry +(p6) br.cond.spnt .cpyfew // copy byte by byte ;; cmp.eq p6, p0 = loopcnt, r0 mux1 charx8 = char, @brcst @@ -109,26 +112,31 @@ ENTRY(memccpy) cmp.ne p6, p0 = r0, r0 ;; // clear p6 .align 32 .l2: -(p[0]) ld8 r[0] = [asrc], 8 // r[0] = w1 +(p[0]) ld8.s r[0] = [asrc], 8 // r[0] = w1 (p[MEMLAT]) shr.u tmp1[0] = r[1 + MEMLAT], sh1 // tmp1 = w0 >> sh1 (p[MEMLAT]) shl tmp2[0] = r[0 + MEMLAT], sh2 // tmp2 = w1 << sh2 (p[MEMLAT+4]) xor tmp3[0] = val[1], charx8 (p[MEMLAT+5]) czx1.r pos0[0] = tmp3[1] +(p[MEMLAT+6]) chk.s r[6 + MEMLAT], .recovery1 // our data isn't + // valid - rollback! (p[MEMLAT+6]) cmp.ne p6, p0 = 8, pos0[1] (p6) br.cond.spnt .gotit (p[MEMLAT+6]) st8 [dest] = val[3], 8 // store val to dest (p[MEMLAT+3]) or val[0] = tmp1[3], tmp2[3] // val = tmp1 | tmp2 br.ctop.sptk .l2 br.cond.sptk .cpyfew + .src_aligned: cmp.ne p6, p0 = r0, r0 // clear p6 mov ar.ec = MEMLAT + 2 + 1 ;; // set EC .l3: -(p[0]) ld8 r[0] = [src], 8 +(p[0]) ld8.s r[0] = [src], 8 (p[MEMLAT]) xor tmp3[0] = r[MEMLAT], charx8 (p[MEMLAT+1]) czx1.r pos0[0] = tmp3[1] (p[MEMLAT+2]) cmp.ne p7, p0 = 8, pos0[1] +(p[MEMLAT+2]) chk.s r[MEMLAT+2], .recovery2 (p7) br.cond.spnt .gotit +.back2: (p[MEMLAT+2]) st8 [dest] = r[MEMLAT+2], 8 br.ctop.dptk .l3 .cpyfew: @@ -148,6 +156,7 @@ ENTRY(memccpy) .restore_and_exit: mov pr = saved_pr, -1 // restore the predicate registers mov ar.lc = saved_lc // restore the loop counter + mov ar.ec = saved_ec ;; // restore the epilog counter br.ret.sptk.many b0 .gotit: .pred.rel "mutex" p6, p7 @@ -163,4 +172,33 @@ ENTRY(memccpy) mov pr = saved_pr, -1 mov ar.lc = saved_lc br.ret.sptk.many b0 + +.recovery1: + adds src = -(MEMLAT + 6 + 1) * 8, asrc + mov loopcnt = ar.lc + mov tmp = ar.ec ;; + sub sh1 = (MEMLAT + 6 + 1), tmp + shr.u sh2 = sh2, 3 + ;; + shl loopcnt = loopcnt, 3 + sub src = src, sh2 + shl sh1 = sh1, 3 + shl tmp = tmp, 3 + ;; + add len = len, loopcnt + add src = sh1, src ;; + add len = tmp, len +.back1: + br.cond.sptk .cpyfew + +.recovery2: + add tmp = -(MEMLAT + 3) * 8, src +(p7) br.cond.spnt .gotit + ;; + ld8 r[MEMLAT+2] = [tmp] ;; + xor pos0[1] = r[MEMLAT+2], charx8 ;; + czx1.r pos0[1] = pos0[1] ;; + cmp.ne p7, p6 = 8, pos0[1] +(p7) br.cond.spnt .gotit + br.cond.sptk .back2 END(memccpy) |