aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/arm/armv7
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-05-11 20:36:15 -0400
committerZack Weinberg <zackw@panix.com>2017-05-20 08:12:11 -0400
commit81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71 (patch)
treeed0a4b6153ee727aca95aadfb8b9061cd873ca7e /sysdeps/arm/armv7
parent42a844c6a213f9219a4baa013c7305679d5dcaaa (diff)
downloadglibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.zip
glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.gz
glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.bz2
Remove sfi_* annotations from ARM assembly files.
This semi-mechanical patch removes all uses and definitions of the sfi_breg, sfi_pld, and sfi_sp macros from various ARM-specific assembly files. These were only used by NaCl. * sysdeps/arm/sysdep.h (ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions. * sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S * sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S * sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S * sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S * sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S * sysdeps/arm/armv6t2/strlen.S * sysdeps/arm/armv7/multiarch/memcpy_impl.S * sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S * sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S * sysdeps/arm/memset.S, sysdeps/arm/setjmp.S * sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S: Remove all uses of sfi_breg, sfi_pld, and sfi_sp.
Diffstat (limited to 'sysdeps/arm/armv7')
-rw-r--r--sysdeps/arm/armv7/multiarch/memcpy_impl.S581
-rw-r--r--sysdeps/arm/armv7/strcmp.S93
2 files changed, 227 insertions, 447 deletions
diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
index 5d5a3ce..c1b9fb0 100644
--- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
@@ -226,71 +226,40 @@
#ifdef USE_VFP
.macro cpy_line_vfp vreg, base
- sfi_breg dst, \
- vstr \vreg, [\B, #\base]
- sfi_breg src, \
- vldr \vreg, [\B, #\base]
- sfi_breg dst, \
- vstr d0, [\B, #\base + 8]
- sfi_breg src, \
- vldr d0, [\B, #\base + 8]
- sfi_breg dst, \
- vstr d1, [\B, #\base + 16]
- sfi_breg src, \
- vldr d1, [\B, #\base + 16]
- sfi_breg dst, \
- vstr d2, [\B, #\base + 24]
- sfi_breg src, \
- vldr d2, [\B, #\base + 24]
- sfi_breg dst, \
- vstr \vreg, [\B, #\base + 32]
- sfi_breg src, \
- vldr \vreg, [\B, #\base + prefetch_lines * 64 - 32]
- sfi_breg dst, \
- vstr d0, [\B, #\base + 40]
- sfi_breg src, \
- vldr d0, [\B, #\base + 40]
- sfi_breg dst, \
- vstr d1, [\B, #\base + 48]
- sfi_breg src, \
- vldr d1, [\B, #\base + 48]
- sfi_breg dst, \
- vstr d2, [\B, #\base + 56]
- sfi_breg src, \
- vldr d2, [\B, #\base + 56]
+ vstr \vreg, [dst, #\base]
+ vldr \vreg, [src, #\base]
+ vstr d0, [dst, #\base + 8]
+ vldr d0, [src, #\base + 8]
+ vstr d1, [dst, #\base + 16]
+ vldr d1, [src, #\base + 16]
+ vstr d2, [dst, #\base + 24]
+ vldr d2, [src, #\base + 24]
+ vstr \vreg, [dst, #\base + 32]
+ vldr \vreg, [src, #\base + prefetch_lines * 64 - 32]
+ vstr d0, [dst, #\base + 40]
+ vldr d0, [src, #\base + 40]
+ vstr d1, [dst, #\base + 48]
+ vldr d1, [src, #\base + 48]
+ vstr d2, [dst, #\base + 56]
+ vldr d2, [src, #\base + 56]
.endm
.macro cpy_tail_vfp vreg, base
- sfi_breg dst, \
- vstr \vreg, [\B, #\base]
- sfi_breg src, \
- vldr \vreg, [\B, #\base]
- sfi_breg dst, \
- vstr d0, [\B, #\base + 8]
- sfi_breg src, \
- vldr d0, [\B, #\base + 8]
- sfi_breg dst, \
- vstr d1, [\B, #\base + 16]
- sfi_breg src, \
- vldr d1, [\B, #\base + 16]
- sfi_breg dst, \
- vstr d2, [\B, #\base + 24]
- sfi_breg src, \
- vldr d2, [\B, #\base + 24]
- sfi_breg dst, \
- vstr \vreg, [\B, #\base + 32]
- sfi_breg dst, \
- vstr d0, [\B, #\base + 40]
- sfi_breg src, \
- vldr d0, [\B, #\base + 40]
- sfi_breg dst, \
- vstr d1, [\B, #\base + 48]
- sfi_breg src, \
- vldr d1, [\B, #\base + 48]
- sfi_breg dst, \
- vstr d2, [\B, #\base + 56]
- sfi_breg src, \
- vldr d2, [\B, #\base + 56]
+ vstr \vreg, [dst, #\base]
+ vldr \vreg, [src, #\base]
+ vstr d0, [dst, #\base + 8]
+ vldr d0, [src, #\base + 8]
+ vstr d1, [dst, #\base + 16]
+ vldr d1, [src, #\base + 16]
+ vstr d2, [dst, #\base + 24]
+ vldr d2, [src, #\base + 24]
+ vstr \vreg, [dst, #\base + 32]
+ vstr d0, [dst, #\base + 40]
+ vldr d0, [src, #\base + 40]
+ vstr d1, [dst, #\base + 48]
+ vldr d1, [src, #\base + 48]
+ vstr d2, [dst, #\base + 56]
+ vldr d2, [src, #\base + 56]
.endm
#endif
@@ -316,26 +285,16 @@ ENTRY(memcpy)
vst1.8 {d0}, [\reg]!
.endm
- /* These are used by the NaCl sfi_breg macro. */
- .macro _sfi_breg_dmask_neon_load_d0 reg
- _sfi_dmask \reg
- .endm
- .macro _sfi_breg_dmask_neon_store_d0 reg
- _sfi_dmask \reg
- .endm
-
and tmp1, count, #0x38
.macro dispatch_step i
- sfi_breg src, neon_load_d0 \B
- sfi_breg dst, neon_store_d0 \B
+ neon_load_d0 src
+ neon_store_d0 dst
.endm
dispatch_7_dword
tst count, #4
- sfi_breg src, \
- ldrne tmp1, [\B], #4
- sfi_breg dst, \
- strne tmp1, [\B], #4
+ ldrne tmp1, [src], #4
+ strne tmp1, [dst], #4
#else
/* Copy up to 15 full words of data. May not be aligned. */
/* Cannot use VFP for unaligned data. */
@@ -344,23 +303,17 @@ ENTRY(memcpy)
add src, src, tmp1
/* Jump directly into the sequence below at the correct offset. */
.macro dispatch_step i
- sfi_breg src, \
- ldr tmp1, [\B, #-(\i * 4)]
- sfi_breg dst, \
- str tmp1, [\B, #-(\i * 4)]
+ ldr tmp1, [src, #-(\i * 4)]
+ str tmp1, [dst, #-(\i * 4)]
.endm
dispatch_15_word
#endif
lsls count, count, #31
- sfi_breg src, \
- ldrhcs tmp1, [\B], #2
- sfi_breg src, \
- ldrbne src, [\B] /* Src is dead, use as a scratch. */
- sfi_breg dst, \
- strhcs tmp1, [\B], #2
- sfi_breg dst, \
- strbne src, [\B]
+ ldrhcs tmp1, [src], #2
+ ldrbne src, [src] /* Src is dead, use as a scratch. */
+ strhcs tmp1, [dst], #2
+ strbne src, [dst]
bx lr
.Lcpy_not_short:
@@ -388,19 +341,13 @@ ENTRY(memcpy)
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
- sfi_breg src, \
- ldrmi tmp1, [\B], #4
- sfi_breg dst, \
- strmi tmp1, [\B], #4
+ ldrmi tmp1, [src], #4
+ strmi tmp1, [dst], #4
lsls tmp2, tmp2, #2
- sfi_breg src, \
- ldrhcs tmp1, [\B], #2
- sfi_breg src, \
- ldrbne tmp2, [\B], #1
- sfi_breg dst, \
- strhcs tmp1, [\B], #2
- sfi_breg dst, \
- strbne tmp2, [\B], #1
+ ldrhcs tmp1, [src], #2
+ ldrbne tmp2, [src], #1
+ strhcs tmp1, [dst], #2
+ strbne tmp2, [dst], #1
1:
subs tmp2, count, #64 /* Use tmp2 for count. */
@@ -412,40 +359,24 @@ ENTRY(memcpy)
.Lcpy_body_medium: /* Count in tmp2. */
#ifdef USE_VFP
1:
- sfi_breg src, \
- vldr d0, [\B, #0]
+ vldr d0, [src, #0]
subs tmp2, tmp2, #64
- sfi_breg src, \
- vldr d1, [\B, #8]
- sfi_breg dst, \
- vstr d0, [\B, #0]
- sfi_breg src, \
- vldr d0, [\B, #16]
- sfi_breg dst, \
- vstr d1, [\B, #8]
- sfi_breg src, \
- vldr d1, [\B, #24]
- sfi_breg dst, \
- vstr d0, [\B, #16]
- sfi_breg src, \
- vldr d0, [\B, #32]
- sfi_breg dst, \
- vstr d1, [\B, #24]
- sfi_breg src, \
- vldr d1, [\B, #40]
- sfi_breg dst, \
- vstr d0, [\B, #32]
- sfi_breg src, \
- vldr d0, [\B, #48]
- sfi_breg dst, \
- vstr d1, [\B, #40]
- sfi_breg src, \
- vldr d1, [\B, #56]
- sfi_breg dst, \
- vstr d0, [\B, #48]
+ vldr d1, [src, #8]
+ vstr d0, [dst, #0]
+ vldr d0, [src, #16]
+ vstr d1, [dst, #8]
+ vldr d1, [src, #24]
+ vstr d0, [dst, #16]
+ vldr d0, [src, #32]
+ vstr d1, [dst, #24]
+ vldr d1, [src, #40]
+ vstr d0, [dst, #32]
+ vldr d0, [src, #48]
+ vstr d1, [dst, #40]
+ vldr d1, [src, #56]
+ vstr d0, [dst, #48]
add src, src, #64
- sfi_breg dst, \
- vstr d1, [\B, #56]
+ vstr d1, [dst, #56]
add dst, dst, #64
bge 1b
tst tmp2, #0x3f
@@ -456,48 +387,30 @@ ENTRY(memcpy)
add dst, dst, tmp1
add src, src, tmp1
.macro dispatch_step i
- sfi_breg src, \
- vldr d0, [\B, #-(\i * 8)]
- sfi_breg dst, \
- vstr d0, [\B, #-(\i * 8)]
+ vldr d0, [src, #-(\i * 8)]
+ vstr d0, [dst, #-(\i * 8)]
.endm
dispatch_7_dword
#else
sub src, src, #8
sub dst, dst, #8
1:
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #8]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #8]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #16]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #16]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #24]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #24]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #32]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #32]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #40]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #40]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #48]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #48]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #56]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #56]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #64]!
- sfi_breg dst, \
- strd A_l, A_h, [\B, #64]!
+ ldrd A_l, A_h, [src, #8]
+ strd A_l, A_h, [dst, #8]
+ ldrd A_l, A_h, [src, #16]
+ strd A_l, A_h, [dst, #16]
+ ldrd A_l, A_h, [src, #24]
+ strd A_l, A_h, [dst, #24]
+ ldrd A_l, A_h, [src, #32]
+ strd A_l, A_h, [dst, #32]
+ ldrd A_l, A_h, [src, #40]
+ strd A_l, A_h, [dst, #40]
+ ldrd A_l, A_h, [src, #48]
+ strd A_l, A_h, [dst, #48]
+ ldrd A_l, A_h, [src, #56]
+ strd A_l, A_h, [dst, #56]
+ ldrd A_l, A_h, [src, #64]!
+ strd A_l, A_h, [dst, #64]!
subs tmp2, tmp2, #64
bge 1b
tst tmp2, #0x3f
@@ -524,28 +437,20 @@ ENTRY(memcpy)
add dst, dst, tmp1
add src, src, tmp1
.macro dispatch_step i
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #-(\i * 8)]
- sfi_breg dst, \
- strd A_l, A_h, [\B, #-(\i * 8)]
+ ldrd A_l, A_h, [src, #-(\i * 8)]
+ strd A_l, A_h, [dst, #-(\i * 8)]
.endm
dispatch_7_dword
#endif
tst tmp2, #4
- sfi_breg src, \
- ldrne tmp1, [\B], #4
- sfi_breg dst, \
- strne tmp1, [\B], #4
+ ldrne tmp1, [src], #4
+ strne tmp1, [dst], #4
lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */
- sfi_breg src, \
- ldrhcs tmp1, [\B], #2
- sfi_breg src, \
- ldrbne tmp2, [\B]
- sfi_breg dst, \
- strhcs tmp1, [\B], #2
- sfi_breg dst, \
- strbne tmp2, [\B]
+ ldrhcs tmp1, [src], #2
+ ldrbne tmp2, [src]
+ strhcs tmp1, [dst], #2
+ strbne tmp2, [dst]
.Ldone:
ldr tmp2, [sp], #FRAME_SIZE
@@ -565,23 +470,15 @@ ENTRY(memcpy)
copy position into a register. This should act like a PLD
operation but we won't have to repeat the transfer. */
- sfi_breg src, \
- vldr d3, [\B, #0]
- sfi_breg src, \
- vldr d4, [\B, #64]
- sfi_breg src, \
- vldr d5, [\B, #128]
- sfi_breg src, \
- vldr d6, [\B, #192]
- sfi_breg src, \
- vldr d7, [\B, #256]
-
- sfi_breg src, \
- vldr d0, [\B, #8]
- sfi_breg src, \
- vldr d1, [\B, #16]
- sfi_breg src, \
- vldr d2, [\B, #24]
+ vldr d3, [src, #0]
+ vldr d4, [src, #64]
+ vldr d5, [src, #128]
+ vldr d6, [src, #192]
+ vldr d7, [src, #256]
+
+ vldr d0, [src, #8]
+ vldr d1, [src, #16]
+ vldr d2, [src, #24]
add src, src, #32
subs tmp2, tmp2, #prefetch_lines * 64 * 2
@@ -606,31 +503,19 @@ ENTRY(memcpy)
add src, src, #3 * 64
add dst, dst, #3 * 64
cpy_tail_vfp d6, 0
- sfi_breg dst, \
- vstr d7, [\B, #64]
- sfi_breg src, \
- vldr d7, [\B, #64]
- sfi_breg dst, \
- vstr d0, [\B, #64 + 8]
- sfi_breg src, \
- vldr d0, [\B, #64 + 8]
- sfi_breg dst, \
- vstr d1, [\B, #64 + 16]
- sfi_breg src, \
- vldr d1, [\B, #64 + 16]
- sfi_breg dst, \
- vstr d2, [\B, #64 + 24]
- sfi_breg src, \
- vldr d2, [\B, #64 + 24]
- sfi_breg dst, \
- vstr d7, [\B, #64 + 32]
+ vstr d7, [dst, #64]
+ vldr d7, [src, #64]
+ vstr d0, [dst, #64 + 8]
+ vldr d0, [src, #64 + 8]
+ vstr d1, [dst, #64 + 16]
+ vldr d1, [src, #64 + 16]
+ vstr d2, [dst, #64 + 24]
+ vldr d2, [src, #64 + 24]
+ vstr d7, [dst, #64 + 32]
add src, src, #96
- sfi_breg dst, \
- vstr d0, [\B, #64 + 40]
- sfi_breg dst, \
- vstr d1, [\B, #64 + 48]
- sfi_breg dst, \
- vstr d2, [\B, #64 + 56]
+ vstr d0, [dst, #64 + 40]
+ vstr d1, [dst, #64 + 48]
+ vstr d2, [dst, #64 + 56]
add dst, dst, #128
add tmp2, tmp2, #prefetch_lines * 64
b .Lcpy_body_medium
@@ -641,83 +526,59 @@ ENTRY(memcpy)
/* Pre-bias src and dst. */
sub src, src, #8
sub dst, dst, #8
- sfi_pld src, #8
- sfi_pld src, #72
+ pld [src, #8]
+ pld [src, #72]
subs tmp2, tmp2, #64
- sfi_pld src, #136
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #8]
+ pld [src, #136]
+ ldrd A_l, A_h, [src, #8]
strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12)
- sfi_breg src, \
- ldrd B_l, B_h, [\B, #16]
+ ldrd B_l, B_h, [src, #16]
strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20)
- sfi_breg src, \
- ldrd C_l, C_h, [\B, #24]
+ ldrd C_l, C_h, [src, #24]
strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28)
- sfi_pld src, #200
- sfi_breg src, \
- ldrd D_l, D_h, [\B, #32]!
+ pld [src, #200]
+ ldrd D_l, D_h, [src, #32]!
b 1f
.p2align 6
2:
- sfi_pld src, #232
- sfi_breg dst, \
- strd A_l, A_h, [\B, #40]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #40]
- sfi_breg dst, \
- strd B_l, B_h, [\B, #48]
- sfi_breg src, \
- ldrd B_l, B_h, [\B, #48]
- sfi_breg dst, \
- strd C_l, C_h, [\B, #56]
- sfi_breg src, \
- ldrd C_l, C_h, [\B, #56]
- sfi_breg dst, \
- strd D_l, D_h, [\B, #64]!
- sfi_breg src, \
- ldrd D_l, D_h, [\B, #64]!
+ pld [src, #232]
+ strd A_l, A_h, [dst, #40]
+ ldrd A_l, A_h, [src, #40]
+ strd B_l, B_h, [dst, #48]
+ ldrd B_l, B_h, [src, #48]
+ strd C_l, C_h, [dst, #56]
+ ldrd C_l, C_h, [src, #56]
+ strd D_l, D_h, [dst, #64]!
+ ldrd D_l, D_h, [src, #64]!
subs tmp2, tmp2, #64
1:
- sfi_breg dst, \
- strd A_l, A_h, [\B, #8]
- sfi_breg src, \
- ldrd A_l, A_h, [\B, #8]
- sfi_breg dst, \
- strd B_l, B_h, [\B, #16]
- sfi_breg src, \
- ldrd B_l, B_h, [\B, #16]
- sfi_breg dst, \
- strd C_l, C_h, [\B, #24]
- sfi_breg src, \
- ldrd C_l, C_h, [\B, #24]
- sfi_breg dst, \
- strd D_l, D_h, [\B, #32]
- sfi_breg src, \
- ldrd D_l, D_h, [\B, #32]
+ strd A_l, A_h, [dst, #8]
+ ldrd A_l, A_h, [src, #8]
+ strd B_l, B_h, [dst, #16]
+ ldrd B_l, B_h, [src, #16]
+ strd C_l, C_h, [dst, #24]
+ ldrd C_l, C_h, [src, #24]
+ strd D_l, D_h, [dst, #32]
+ ldrd D_l, D_h, [src, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
- sfi_breg dst, \
- strd A_l, A_h, [\B, #40]
+ strd A_l, A_h, [dst, #40]
add src, src, #40
- sfi_breg dst, \
- strd B_l, B_h, [\B, #48]
+ strd B_l, B_h, [dst, #48]
ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l)
cfi_restore (B_h)
- sfi_breg dst, \
- strd C_l, C_h, [\B, #56]
+ strd C_l, C_h, [dst, #56]
ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l)
cfi_restore (C_h)
- sfi_breg dst, \
- strd D_l, D_h, [\B, #64]
+ strd D_l, D_h, [dst, #64]
ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l)
cfi_restore (D_h)
@@ -734,35 +595,29 @@ ENTRY(memcpy)
cfi_remember_state
.Lcpy_notaligned:
- sfi_pld src
- sfi_pld src, #64
+ pld [src, #0]
+ pld [src, #64]
/* There's at least 64 bytes to copy, but there is no mutual
alignment. */
/* Bring DST to 64-bit alignment. */
lsls tmp2, dst, #29
- sfi_pld src, #(2 * 64)
+ pld [src, #(2 * 64)]
beq 1f
rsbs tmp2, tmp2, #0
sub count, count, tmp2, lsr #29
- sfi_breg src, \
- ldrmi tmp1, [\B], #4
- sfi_breg dst, \
- strmi tmp1, [\B], #4
+ ldrmi tmp1, [src], #4
+ strmi tmp1, [dst], #4
lsls tmp2, tmp2, #2
- sfi_breg src, \
- ldrbne tmp1, [\B], #1
- sfi_breg src, \
- ldrhcs tmp2, [\B], #2
- sfi_breg dst, \
- strbne tmp1, [\B], #1
- sfi_breg dst, \
- strhcs tmp2, [\B], #2
+ ldrbne tmp1, [src], #1
+ ldrhcs tmp2, [src], #2
+ strbne tmp1, [dst], #1
+ strhcs tmp2, [dst], #2
1:
- sfi_pld src, #(3 * 64)
+ pld [src, #(3 * 64)]
subs count, count, #64
ldrmi tmp2, [sp], #FRAME_SIZE
bmi .Ltail63unaligned
- sfi_pld src, #(4 * 64)
+ pld [src, #(4 * 64)]
#ifdef USE_NEON
/* These need an extra layer of macro just to work around a
@@ -775,132 +630,88 @@ ENTRY(memcpy)
vst1.8 {\reglist}, [ALIGN (\basereg, 64)]!
.endm
- /* These are used by the NaCl sfi_breg macro. */
- .macro _sfi_breg_dmask_neon_load_multi reg
- _sfi_dmask \reg
- .endm
- .macro _sfi_breg_dmask_neon_store_multi reg
- _sfi_dmask \reg
- .endm
-
- sfi_breg src, neon_load_multi d0-d3, \B
- sfi_breg src, neon_load_multi d4-d7, \B
+ neon_load_multi d0-d3, src
+ neon_load_multi d4-d7, src
subs count, count, #64
bmi 2f
1:
- sfi_pld src, #(4 * 64)
- sfi_breg dst, neon_store_multi d0-d3, \B
- sfi_breg src, neon_load_multi d0-d3, \B
- sfi_breg dst, neon_store_multi d4-d7, \B
- sfi_breg src, neon_load_multi d4-d7, \B
+ pld [src, #(4 * 64)]
+ neon_store_multi d0-d3, dst
+ neon_load_multi d0-d3, src
+ neon_store_multi d4-d7, dst
+ neon_load_multi d4-d7, src
subs count, count, #64
bpl 1b
2:
- sfi_breg dst, neon_store_multi d0-d3, \B
- sfi_breg dst, neon_store_multi d4-d7, \B
+ neon_store_multi d0-d3, dst
+ neon_store_multi d4-d7, dst
ands count, count, #0x3f
#else
/* Use an SMS style loop to maximize the I/O bandwidth. */
sub src, src, #4
sub dst, dst, #8
subs tmp2, count, #64 /* Use tmp2 for count. */
- sfi_breg src, \
- ldr A_l, [\B, #4]
- sfi_breg src, \
- ldr A_h, [\B, #8]
+ ldr A_l, [src, #4]
+ ldr A_h, [src, #8]
strd B_l, B_h, [sp, #8]
cfi_rel_offset (B_l, 8)
cfi_rel_offset (B_h, 12)
- sfi_breg src, \
- ldr B_l, [\B, #12]
- sfi_breg src, \
- ldr B_h, [\B, #16]
+ ldr B_l, [src, #12]
+ ldr B_h, [src, #16]
strd C_l, C_h, [sp, #16]
cfi_rel_offset (C_l, 16)
cfi_rel_offset (C_h, 20)
- sfi_breg src, \
- ldr C_l, [\B, #20]
- sfi_breg src, \
- ldr C_h, [\B, #24]
+ ldr C_l, [src, #20]
+ ldr C_h, [src, #24]
strd D_l, D_h, [sp, #24]
cfi_rel_offset (D_l, 24)
cfi_rel_offset (D_h, 28)
- sfi_breg src, \
- ldr D_l, [\B, #28]
- sfi_breg src, \
- ldr D_h, [\B, #32]!
+ ldr D_l, [src, #28]
+ ldr D_h, [src, #32]!
b 1f
.p2align 6
2:
- sfi_pld src, #(5 * 64) - (32 - 4)
- sfi_breg dst, \
- strd A_l, A_h, [\B, #40]
- sfi_breg src, \
- ldr A_l, [\B, #36]
- sfi_breg src, \
- ldr A_h, [\B, #40]
- sfi_breg dst, \
- strd B_l, B_h, [\B, #48]
- sfi_breg src, \
- ldr B_l, [\B, #44]
- sfi_breg src, \
- ldr B_h, [\B, #48]
- sfi_breg dst, \
- strd C_l, C_h, [\B, #56]
- sfi_breg src, \
- ldr C_l, [\B, #52]
- sfi_breg src, \
- ldr C_h, [\B, #56]
- sfi_breg dst, \
- strd D_l, D_h, [\B, #64]!
- sfi_breg src, \
- ldr D_l, [\B, #60]
- sfi_breg src, \
- ldr D_h, [\B, #64]!
+ pld [src, #(5 * 64) - (32 - 4)]
+ strd A_l, A_h, [dst, #40]
+ ldr A_l, [src, #36]
+ ldr A_h, [src, #40]
+ strd B_l, B_h, [dst, #48]
+ ldr B_l, [src, #44]
+ ldr B_h, [src, #48]
+ strd C_l, C_h, [dst, #56]
+ ldr C_l, [src, #52]
+ ldr C_h, [src, #56]
+ strd D_l, D_h, [dst, #64]!
+ ldr D_l, [src, #60]
+ ldr D_h, [src, #64]!
subs tmp2, tmp2, #64
1:
- sfi_breg dst, \
- strd A_l, A_h, [\B, #8]
- sfi_breg src, \
- ldr A_l, [\B, #4]
- sfi_breg src, \
- ldr A_h, [\B, #8]
- sfi_breg dst, \
- strd B_l, B_h, [\B, #16]
- sfi_breg src, \
- ldr B_l, [\B, #12]
- sfi_breg src, \
- ldr B_h, [\B, #16]
- sfi_breg dst, \
- strd C_l, C_h, [\B, #24]
- sfi_breg src, \
- ldr C_l, [\B, #20]
- sfi_breg src, \
- ldr C_h, [\B, #24]
- sfi_breg dst, \
- strd D_l, D_h, [\B, #32]
- sfi_breg src, \
- ldr D_l, [\B, #28]
- sfi_breg src, \
- ldr D_h, [\B, #32]
+ strd A_l, A_h, [dst, #8]
+ ldr A_l, [src, #4]
+ ldr A_h, [src, #8]
+ strd B_l, B_h, [dst, #16]
+ ldr B_l, [src, #12]
+ ldr B_h, [src, #16]
+ strd C_l, C_h, [dst, #24]
+ ldr C_l, [src, #20]
+ ldr C_h, [src, #24]
+ strd D_l, D_h, [dst, #32]
+ ldr D_l, [src, #28]
+ ldr D_h, [src, #32]
bcs 2b
/* Save the remaining bytes and restore the callee-saved regs. */
- sfi_breg dst, \
- strd A_l, A_h, [\B, #40]
+ strd A_l, A_h, [dst, #40]
add src, src, #36
- sfi_breg dst, \
- strd B_l, B_h, [\B, #48]
+ strd B_l, B_h, [dst, #48]
ldrd B_l, B_h, [sp, #8]
cfi_restore (B_l)
cfi_restore (B_h)
- sfi_breg dst, \
- strd C_l, C_h, [\B, #56]
+ strd C_l, C_h, [dst, #56]
ldrd C_l, C_h, [sp, #16]
cfi_restore (C_l)
cfi_restore (C_h)
- sfi_breg dst, \
- strd D_l, D_h, [\B, #64]
+ strd D_l, D_h, [dst, #64]
ldrd D_l, D_h, [sp, #24]
cfi_restore (D_l)
cfi_restore (D_h)
diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S
index c8fab4b..25d0557 100644
--- a/sysdeps/arm/armv7/strcmp.S
+++ b/sysdeps/arm/armv7/strcmp.S
@@ -178,10 +178,8 @@
#endif
ENTRY (strcmp)
#if STRCMP_PRECHECK == 1
- sfi_breg src1, \
- ldrb r2, [\B]
- sfi_breg src2, \
- ldrb r3, [\B]
+ ldrb r2, [src1]
+ ldrb r3, [src2]
cmp r2, #1
it cs
cmpcs r2, r3
@@ -211,11 +209,9 @@ ENTRY (strcmp)
and tmp2, tmp1, #3
bic src2, src2, #7
lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- sfi_breg src1, \
- ldrd data1a, data1b, [\B], #16
+ ldrd data1a, data1b, [src1], #16
tst tmp1, #4
- sfi_breg src2, \
- ldrd data2a, data2b, [\B], #16
+ ldrd data2a, data2b, [src2], #16
prepare_mask tmp1, tmp2
apply_mask data1a, tmp1
apply_mask data2a, tmp1
@@ -231,10 +227,8 @@ ENTRY (strcmp)
.p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
.p2align 2 /* Always word aligned. */
.Lloop_aligned8:
- sfi_breg src1, \
- ldrd data1a, data1b, [\B], #16
- sfi_breg src2, \
- ldrd data2a, data2b, [\B], #16
+ ldrd data1a, data1b, [src1], #16
+ ldrd data2a, data2b, [src2], #16
.Lstart_realigned8:
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
eor syndrome_a, data1a, data2a
@@ -245,10 +239,8 @@ ENTRY (strcmp)
sel syndrome_b, syndrome_b, const_m1
cbnz syndrome_b, .Ldiff_in_b
- sfi_breg src1, \
- ldrd data1a, data1b, [\B, #-8]
- sfi_breg src2, \
- ldrd data2a, data2b, [\B, #-8]
+ ldrd data1a, data1b, [src1, #-8]
+ ldrd data2a, data2b, [src2, #-8]
uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
eor syndrome_a, data1a, data2a
sel syndrome_a, syndrome_a, const_m1
@@ -279,19 +271,15 @@ ENTRY (strcmp)
/* Unrolled by a factor of 2, to reduce the number of post-increment
operations. */
.Lloop_aligned4:
- sfi_breg src1, \
- ldr data1, [\B], #8
- sfi_breg src2, \
- ldr data2, [\B], #8
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
.Lstart_realigned4:
uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
eor syndrome, data1, data2
sel syndrome, syndrome, const_m1
cbnz syndrome, .Laligned4_done
- sfi_breg src1, \
- ldr data1, [\B, #-4]
- sfi_breg src2, \
- ldr data2, [\B, #-4]
+ ldr data1, [src1, #-4]
+ ldr data2, [src2, #-4]
uadd8 syndrome, data1, const_m1
eor syndrome, data1, data2
sel syndrome, syndrome, const_m1
@@ -307,11 +295,9 @@ ENTRY (strcmp)
masking off the unwanted loaded data to prevent a difference. */
lsl tmp1, tmp1, #3 /* Bytes -> bits. */
bic src1, src1, #3
- sfi_breg src1, \
- ldr data1, [\B], #8
+ ldr data1, [src1], #8
bic src2, src2, #3
- sfi_breg src2, \
- ldr data2, [\B], #8
+ ldr data2, [src2], #8
prepare_mask tmp1, tmp1
apply_mask data1, tmp1
@@ -324,30 +310,26 @@ ENTRY (strcmp)
sub src2, src2, tmp1
bic src1, src1, #3
lsls tmp1, tmp1, #31
- sfi_breg src1, \
- ldr data1, [\B], #4
+ ldr data1, [src1], #4
beq .Laligned_m2
bcs .Laligned_m1
#if STRCMP_PRECHECK == 0
- sfi_breg src2, \
- ldrb data2, [\B, #1]
+ ldrb data2, [src2, #1]
uxtb tmp1, data1, ror #BYTE1_OFFSET
subs tmp1, tmp1, data2
bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit
.Laligned_m2:
- sfi_breg src2, \
- ldrb data2, [\B, #2]
+ ldrb data2, [src2, #2]
uxtb tmp1, data1, ror #BYTE2_OFFSET
subs tmp1, tmp1, data2
bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit
.Laligned_m1:
- sfi_breg src2, \
- ldrb data2, [\B, #3]
+ ldrb data2, [src2, #3]
uxtb tmp1, data1, ror #BYTE3_OFFSET
subs tmp1, tmp1, data2
bne .Lmisaligned_exit
@@ -356,16 +338,14 @@ ENTRY (strcmp)
#else /* STRCMP_PRECHECK */
/* If we've done the pre-check, then we don't need to check the
first byte again here. */
- sfi_breg src2, \
- ldrb data2, [\B, #2]
+ ldrb data2, [src2, #2]
uxtb tmp1, data1, ror #BYTE2_OFFSET
subs tmp1, tmp1, data2
bne .Lmisaligned_exit
cbz data2, .Lmisaligned_exit
.Laligned_m2:
- sfi_breg src2, \
- ldrb data2, [\B, #3]
+ ldrb data2, [src2, #3]
uxtb tmp1, data1, ror #BYTE3_OFFSET
subs tmp1, tmp1, data2
bne .Lmisaligned_exit
@@ -391,13 +371,11 @@ ENTRY (strcmp)
cfi_restore_state
/* src1 is word aligned, but src2 has no common alignment
with it. */
- sfi_breg src1, \
- ldr data1, [\B], #4
+ ldr data1, [src1], #4
lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
bic src2, src2, #3
- sfi_breg src2, \
- ldr data2, [\B], #4
+ ldr data2, [src2], #4
bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
@@ -409,13 +387,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1
bne 4f
cbnz syndrome, 5f
- sfi_breg src2, \
- ldr data2, [\B], #4
+ ldr data2, [src2], #4
eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #24
bne 6f
- sfi_breg src1, \
- ldr data1, [\B], #4
+ ldr data1, [src1], #4
b .Loverlap3
4:
S2LO data2, data2, #8
@@ -427,8 +403,7 @@ ENTRY (strcmp)
/* We can only get here if the MSB of data1 contains 0, so
fast-path the exit. */
- sfi_breg src2, \
- ldrb result, [\B]
+ ldrb result, [src2]
ldrd r4, r5, [sp], #16
cfi_remember_state
cfi_def_cfa_offset (0)
@@ -454,13 +429,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1
bne 4f
cbnz syndrome, 5f
- sfi_breg src2, \
- ldr data2, [\B], #4
+ ldr data2, [src2], #4
eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #16
bne 6f
- sfi_breg src1, \
- ldr data1, [\B], #4
+ ldr data1, [src1], #4
b .Loverlap2
4:
S2LO data2, data2, #16
@@ -469,8 +442,7 @@ ENTRY (strcmp)
ands syndrome, syndrome, const_m1, S2LO #16
bne .Lstrcmp_done_equal
- sfi_breg src2, \
- ldrh data2, [\B]
+ ldrh data2, [src2]
S2LO data1, data1, #16
#ifdef __ARM_BIG_ENDIAN
lsl data2, data2, #16
@@ -490,13 +462,11 @@ ENTRY (strcmp)
sel syndrome, syndrome, const_m1
bne 4f
cbnz syndrome, 5f
- sfi_breg src2, \
- ldr data2, [\B], #4
+ ldr data2, [src2], #4
eor tmp1, tmp1, data1
cmp tmp1, data2, S2HI #8
bne 6f
- sfi_breg src1, \
- ldr data1, [\B], #4
+ ldr data1, [src1], #4
b .Loverlap1
4:
S2LO data2, data2, #24
@@ -504,8 +474,7 @@ ENTRY (strcmp)
5:
tst syndrome, #LSB
bne .Lstrcmp_done_equal
- sfi_breg src2, \
- ldr data2, [\B]
+ ldr data2, [src2]
6:
S2LO data1, data1, #8
bic data2, data2, #MSB