diff options
author | Zack Weinberg <zackw@panix.com> | 2017-05-11 20:36:15 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-05-20 08:12:11 -0400 |
commit | 81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71 (patch) | |
tree | ed0a4b6153ee727aca95aadfb8b9061cd873ca7e /sysdeps/arm/armv7 | |
parent | 42a844c6a213f9219a4baa013c7305679d5dcaaa (diff) | |
download | glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.zip glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.gz glibc-81cb7a0b2b6b905a504b8b56fe3c1634adf8fb71.tar.bz2 |
Remove sfi_* annotations from ARM assembly files.
This semi-mechanical patch removes all uses and definitions of the
sfi_breg, sfi_pld, and sfi_sp macros from various ARM-specific
assembly files. These were only used by NaCl.
* sysdeps/arm/sysdep.h
(ARM_SFI_MACROS, sfi_breg, sfi_pld, sfi_sp): Delete definitions.
* sysdeps/arm/__longjmp.S, sysdeps/arm/add_n.S
* sysdeps/arm/addmul_1.S, sysdeps/arm/arm-mcount.S
* sysdeps/arm/armv6/rawmemchr.S, sysdeps/arm/armv6/strchr.S
* sysdeps/arm/armv6/strcpy.S, sysdeps/arm/armv6/strlen.S
* sysdeps/arm/armv6/strrchr.S, sysdeps/arm/armv6t2/memchr.S
* sysdeps/arm/armv6t2/strlen.S
* sysdeps/arm/armv7/multiarch/memcpy_impl.S
* sysdeps/arm/armv7/strcmp.S, sysdeps/arm/dl-tlsdesc.S
* sysdeps/arm/memcpy.S, sysdeps/arm/memmove.S
* sysdeps/arm/memset.S, sysdeps/arm/setjmp.S
* sysdeps/arm/strlen.S, sysdeps/arm/submul_1.S:
Remove all uses of sfi_breg, sfi_pld, and sfi_sp.
Diffstat (limited to 'sysdeps/arm/armv7')
-rw-r--r-- | sysdeps/arm/armv7/multiarch/memcpy_impl.S | 581 | ||||
-rw-r--r-- | sysdeps/arm/armv7/strcmp.S | 93 |
2 files changed, 227 insertions, 447 deletions
diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S index 5d5a3ce..c1b9fb0 100644 --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S @@ -226,71 +226,40 @@ #ifdef USE_VFP .macro cpy_line_vfp vreg, base - sfi_breg dst, \ - vstr \vreg, [\B, #\base] - sfi_breg src, \ - vldr \vreg, [\B, #\base] - sfi_breg dst, \ - vstr d0, [\B, #\base + 8] - sfi_breg src, \ - vldr d0, [\B, #\base + 8] - sfi_breg dst, \ - vstr d1, [\B, #\base + 16] - sfi_breg src, \ - vldr d1, [\B, #\base + 16] - sfi_breg dst, \ - vstr d2, [\B, #\base + 24] - sfi_breg src, \ - vldr d2, [\B, #\base + 24] - sfi_breg dst, \ - vstr \vreg, [\B, #\base + 32] - sfi_breg src, \ - vldr \vreg, [\B, #\base + prefetch_lines * 64 - 32] - sfi_breg dst, \ - vstr d0, [\B, #\base + 40] - sfi_breg src, \ - vldr d0, [\B, #\base + 40] - sfi_breg dst, \ - vstr d1, [\B, #\base + 48] - sfi_breg src, \ - vldr d1, [\B, #\base + 48] - sfi_breg dst, \ - vstr d2, [\B, #\base + 56] - sfi_breg src, \ - vldr d2, [\B, #\base + 56] + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vldr \vreg, [src, #\base + prefetch_lines * 64 - 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] .endm .macro cpy_tail_vfp vreg, base - sfi_breg dst, \ - vstr \vreg, [\B, #\base] - sfi_breg src, \ - vldr \vreg, [\B, #\base] - sfi_breg dst, \ - vstr d0, [\B, #\base + 8] - sfi_breg src, \ - vldr d0, [\B, #\base + 8] - sfi_breg dst, \ - vstr d1, [\B, #\base + 16] - sfi_breg src, \ - vldr d1, [\B, #\base + 16] - sfi_breg dst, \ - vstr d2, [\B, #\base + 24] - sfi_breg src, \ - vldr d2, [\B, #\base + 24] - sfi_breg dst, \ - vstr \vreg, [\B, #\base + 32] - sfi_breg dst, \ - vstr d0, [\B, #\base + 40] - sfi_breg src, \ - vldr d0, [\B, #\base + 40] - sfi_breg dst, \ - vstr d1, [\B, #\base + 48] - sfi_breg src, \ - vldr d1, [\B, #\base + 48] - sfi_breg dst, \ - vstr d2, [\B, #\base + 56] - sfi_breg src, \ - vldr d2, [\B, #\base + 56] + vstr \vreg, [dst, #\base] + vldr \vreg, [src, #\base] + vstr d0, [dst, #\base + 8] + vldr d0, [src, #\base + 8] + vstr d1, [dst, #\base + 16] + vldr d1, [src, #\base + 16] + vstr d2, [dst, #\base + 24] + vldr d2, [src, #\base + 24] + vstr \vreg, [dst, #\base + 32] + vstr d0, [dst, #\base + 40] + vldr d0, [src, #\base + 40] + vstr d1, [dst, #\base + 48] + vldr d1, [src, #\base + 48] + vstr d2, [dst, #\base + 56] + vldr d2, [src, #\base + 56] .endm #endif @@ -316,26 +285,16 @@ ENTRY(memcpy) vst1.8 {d0}, [\reg]! .endm - /* These are used by the NaCl sfi_breg macro. */ - .macro _sfi_breg_dmask_neon_load_d0 reg - _sfi_dmask \reg - .endm - .macro _sfi_breg_dmask_neon_store_d0 reg - _sfi_dmask \reg - .endm - and tmp1, count, #0x38 .macro dispatch_step i - sfi_breg src, neon_load_d0 \B - sfi_breg dst, neon_store_d0 \B + neon_load_d0 src + neon_store_d0 dst .endm dispatch_7_dword tst count, #4 - sfi_breg src, \ - ldrne tmp1, [\B], #4 - sfi_breg dst, \ - strne tmp1, [\B], #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 #else /* Copy up to 15 full words of data. May not be aligned. */ /* Cannot use VFP for unaligned data. */ @@ -344,23 +303,17 @@ ENTRY(memcpy) add src, src, tmp1 /* Jump directly into the sequence below at the correct offset. */ .macro dispatch_step i - sfi_breg src, \ - ldr tmp1, [\B, #-(\i * 4)] - sfi_breg dst, \ - str tmp1, [\B, #-(\i * 4)] + ldr tmp1, [src, #-(\i * 4)] + str tmp1, [dst, #-(\i * 4)] .endm dispatch_15_word #endif lsls count, count, #31 - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne src, [\B] /* Src is dead, use as a scratch. */ - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne src, [\B] + ldrhcs tmp1, [src], #2 + ldrbne src, [src] /* Src is dead, use as a scratch. */ + strhcs tmp1, [dst], #2 + strbne src, [dst] bx lr .Lcpy_not_short: @@ -388,19 +341,13 @@ ENTRY(memcpy) beq 1f rsbs tmp2, tmp2, #0 sub count, count, tmp2, lsr #29 - sfi_breg src, \ - ldrmi tmp1, [\B], #4 - sfi_breg dst, \ - strmi tmp1, [\B], #4 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 lsls tmp2, tmp2, #2 - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne tmp2, [\B], #1 - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne tmp2, [\B], #1 + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src], #1 + strhcs tmp1, [dst], #2 + strbne tmp2, [dst], #1 1: subs tmp2, count, #64 /* Use tmp2 for count. */ @@ -412,40 +359,24 @@ ENTRY(memcpy) .Lcpy_body_medium: /* Count in tmp2. */ #ifdef USE_VFP 1: - sfi_breg src, \ - vldr d0, [\B, #0] + vldr d0, [src, #0] subs tmp2, tmp2, #64 - sfi_breg src, \ - vldr d1, [\B, #8] - sfi_breg dst, \ - vstr d0, [\B, #0] - sfi_breg src, \ - vldr d0, [\B, #16] - sfi_breg dst, \ - vstr d1, [\B, #8] - sfi_breg src, \ - vldr d1, [\B, #24] - sfi_breg dst, \ - vstr d0, [\B, #16] - sfi_breg src, \ - vldr d0, [\B, #32] - sfi_breg dst, \ - vstr d1, [\B, #24] - sfi_breg src, \ - vldr d1, [\B, #40] - sfi_breg dst, \ - vstr d0, [\B, #32] - sfi_breg src, \ - vldr d0, [\B, #48] - sfi_breg dst, \ - vstr d1, [\B, #40] - sfi_breg src, \ - vldr d1, [\B, #56] - sfi_breg dst, \ - vstr d0, [\B, #48] + vldr d1, [src, #8] + vstr d0, [dst, #0] + vldr d0, [src, #16] + vstr d1, [dst, #8] + vldr d1, [src, #24] + vstr d0, [dst, #16] + vldr d0, [src, #32] + vstr d1, [dst, #24] + vldr d1, [src, #40] + vstr d0, [dst, #32] + vldr d0, [src, #48] + vstr d1, [dst, #40] + vldr d1, [src, #56] + vstr d0, [dst, #48] add src, src, #64 - sfi_breg dst, \ - vstr d1, [\B, #56] + vstr d1, [dst, #56] add dst, dst, #64 bge 1b tst tmp2, #0x3f @@ -456,48 +387,30 @@ ENTRY(memcpy) add dst, dst, tmp1 add src, src, tmp1 .macro dispatch_step i - sfi_breg src, \ - vldr d0, [\B, #-(\i * 8)] - sfi_breg dst, \ - vstr d0, [\B, #-(\i * 8)] + vldr d0, [src, #-(\i * 8)] + vstr d0, [dst, #-(\i * 8)] .endm dispatch_7_dword #else sub src, src, #8 sub dst, dst, #8 1: - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #16] - sfi_breg dst, \ - strd A_l, A_h, [\B, #16] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #24] - sfi_breg dst, \ - strd A_l, A_h, [\B, #24] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #32] - sfi_breg dst, \ - strd A_l, A_h, [\B, #32] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #40] - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #48] - sfi_breg dst, \ - strd A_l, A_h, [\B, #48] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #56] - sfi_breg dst, \ - strd A_l, A_h, [\B, #56] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #64]! - sfi_breg dst, \ - strd A_l, A_h, [\B, #64]! + ldrd A_l, A_h, [src, #8] + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #16] + strd A_l, A_h, [dst, #16] + ldrd A_l, A_h, [src, #24] + strd A_l, A_h, [dst, #24] + ldrd A_l, A_h, [src, #32] + strd A_l, A_h, [dst, #32] + ldrd A_l, A_h, [src, #40] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #48] + strd A_l, A_h, [dst, #48] + ldrd A_l, A_h, [src, #56] + strd A_l, A_h, [dst, #56] + ldrd A_l, A_h, [src, #64]! + strd A_l, A_h, [dst, #64]! subs tmp2, tmp2, #64 bge 1b tst tmp2, #0x3f @@ -524,28 +437,20 @@ ENTRY(memcpy) add dst, dst, tmp1 add src, src, tmp1 .macro dispatch_step i - sfi_breg src, \ - ldrd A_l, A_h, [\B, #-(\i * 8)] - sfi_breg dst, \ - strd A_l, A_h, [\B, #-(\i * 8)] + ldrd A_l, A_h, [src, #-(\i * 8)] + strd A_l, A_h, [dst, #-(\i * 8)] .endm dispatch_7_dword #endif tst tmp2, #4 - sfi_breg src, \ - ldrne tmp1, [\B], #4 - sfi_breg dst, \ - strne tmp1, [\B], #4 + ldrne tmp1, [src], #4 + strne tmp1, [dst], #4 lsls tmp2, tmp2, #31 /* Count (tmp2) now dead. */ - sfi_breg src, \ - ldrhcs tmp1, [\B], #2 - sfi_breg src, \ - ldrbne tmp2, [\B] - sfi_breg dst, \ - strhcs tmp1, [\B], #2 - sfi_breg dst, \ - strbne tmp2, [\B] + ldrhcs tmp1, [src], #2 + ldrbne tmp2, [src] + strhcs tmp1, [dst], #2 + strbne tmp2, [dst] .Ldone: ldr tmp2, [sp], #FRAME_SIZE @@ -565,23 +470,15 @@ ENTRY(memcpy) copy position into a register. This should act like a PLD operation but we won't have to repeat the transfer. */ - sfi_breg src, \ - vldr d3, [\B, #0] - sfi_breg src, \ - vldr d4, [\B, #64] - sfi_breg src, \ - vldr d5, [\B, #128] - sfi_breg src, \ - vldr d6, [\B, #192] - sfi_breg src, \ - vldr d7, [\B, #256] - - sfi_breg src, \ - vldr d0, [\B, #8] - sfi_breg src, \ - vldr d1, [\B, #16] - sfi_breg src, \ - vldr d2, [\B, #24] + vldr d3, [src, #0] + vldr d4, [src, #64] + vldr d5, [src, #128] + vldr d6, [src, #192] + vldr d7, [src, #256] + + vldr d0, [src, #8] + vldr d1, [src, #16] + vldr d2, [src, #24] add src, src, #32 subs tmp2, tmp2, #prefetch_lines * 64 * 2 @@ -606,31 +503,19 @@ ENTRY(memcpy) add src, src, #3 * 64 add dst, dst, #3 * 64 cpy_tail_vfp d6, 0 - sfi_breg dst, \ - vstr d7, [\B, #64] - sfi_breg src, \ - vldr d7, [\B, #64] - sfi_breg dst, \ - vstr d0, [\B, #64 + 8] - sfi_breg src, \ - vldr d0, [\B, #64 + 8] - sfi_breg dst, \ - vstr d1, [\B, #64 + 16] - sfi_breg src, \ - vldr d1, [\B, #64 + 16] - sfi_breg dst, \ - vstr d2, [\B, #64 + 24] - sfi_breg src, \ - vldr d2, [\B, #64 + 24] - sfi_breg dst, \ - vstr d7, [\B, #64 + 32] + vstr d7, [dst, #64] + vldr d7, [src, #64] + vstr d0, [dst, #64 + 8] + vldr d0, [src, #64 + 8] + vstr d1, [dst, #64 + 16] + vldr d1, [src, #64 + 16] + vstr d2, [dst, #64 + 24] + vldr d2, [src, #64 + 24] + vstr d7, [dst, #64 + 32] add src, src, #96 - sfi_breg dst, \ - vstr d0, [\B, #64 + 40] - sfi_breg dst, \ - vstr d1, [\B, #64 + 48] - sfi_breg dst, \ - vstr d2, [\B, #64 + 56] + vstr d0, [dst, #64 + 40] + vstr d1, [dst, #64 + 48] + vstr d2, [dst, #64 + 56] add dst, dst, #128 add tmp2, tmp2, #prefetch_lines * 64 b .Lcpy_body_medium @@ -641,83 +526,59 @@ ENTRY(memcpy) /* Pre-bias src and dst. */ sub src, src, #8 sub dst, dst, #8 - sfi_pld src, #8 - sfi_pld src, #72 + pld [src, #8] + pld [src, #72] subs tmp2, tmp2, #64 - sfi_pld src, #136 - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] + pld [src, #136] + ldrd A_l, A_h, [src, #8] strd B_l, B_h, [sp, #8] cfi_rel_offset (B_l, 8) cfi_rel_offset (B_h, 12) - sfi_breg src, \ - ldrd B_l, B_h, [\B, #16] + ldrd B_l, B_h, [src, #16] strd C_l, C_h, [sp, #16] cfi_rel_offset (C_l, 16) cfi_rel_offset (C_h, 20) - sfi_breg src, \ - ldrd C_l, C_h, [\B, #24] + ldrd C_l, C_h, [src, #24] strd D_l, D_h, [sp, #24] cfi_rel_offset (D_l, 24) cfi_rel_offset (D_h, 28) - sfi_pld src, #200 - sfi_breg src, \ - ldrd D_l, D_h, [\B, #32]! + pld [src, #200] + ldrd D_l, D_h, [src, #32]! b 1f .p2align 6 2: - sfi_pld src, #232 - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #40] - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] - sfi_breg src, \ - ldrd B_l, B_h, [\B, #48] - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] - sfi_breg src, \ - ldrd C_l, C_h, [\B, #56] - sfi_breg dst, \ - strd D_l, D_h, [\B, #64]! - sfi_breg src, \ - ldrd D_l, D_h, [\B, #64]! + pld [src, #232] + strd A_l, A_h, [dst, #40] + ldrd A_l, A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldrd B_l, B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldrd C_l, C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldrd D_l, D_h, [src, #64]! subs tmp2, tmp2, #64 1: - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldrd A_l, A_h, [\B, #8] - sfi_breg dst, \ - strd B_l, B_h, [\B, #16] - sfi_breg src, \ - ldrd B_l, B_h, [\B, #16] - sfi_breg dst, \ - strd C_l, C_h, [\B, #24] - sfi_breg src, \ - ldrd C_l, C_h, [\B, #24] - sfi_breg dst, \ - strd D_l, D_h, [\B, #32] - sfi_breg src, \ - ldrd D_l, D_h, [\B, #32] + strd A_l, A_h, [dst, #8] + ldrd A_l, A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldrd B_l, B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldrd C_l, C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldrd D_l, D_h, [src, #32] bcs 2b /* Save the remaining bytes and restore the callee-saved regs. */ - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] + strd A_l, A_h, [dst, #40] add src, src, #40 - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] + strd B_l, B_h, [dst, #48] ldrd B_l, B_h, [sp, #8] cfi_restore (B_l) cfi_restore (B_h) - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] + strd C_l, C_h, [dst, #56] ldrd C_l, C_h, [sp, #16] cfi_restore (C_l) cfi_restore (C_h) - sfi_breg dst, \ - strd D_l, D_h, [\B, #64] + strd D_l, D_h, [dst, #64] ldrd D_l, D_h, [sp, #24] cfi_restore (D_l) cfi_restore (D_h) @@ -734,35 +595,29 @@ ENTRY(memcpy) cfi_remember_state .Lcpy_notaligned: - sfi_pld src - sfi_pld src, #64 + pld [src, #0] + pld [src, #64] /* There's at least 64 bytes to copy, but there is no mutual alignment. */ /* Bring DST to 64-bit alignment. */ lsls tmp2, dst, #29 - sfi_pld src, #(2 * 64) + pld [src, #(2 * 64)] beq 1f rsbs tmp2, tmp2, #0 sub count, count, tmp2, lsr #29 - sfi_breg src, \ - ldrmi tmp1, [\B], #4 - sfi_breg dst, \ - strmi tmp1, [\B], #4 + ldrmi tmp1, [src], #4 + strmi tmp1, [dst], #4 lsls tmp2, tmp2, #2 - sfi_breg src, \ - ldrbne tmp1, [\B], #1 - sfi_breg src, \ - ldrhcs tmp2, [\B], #2 - sfi_breg dst, \ - strbne tmp1, [\B], #1 - sfi_breg dst, \ - strhcs tmp2, [\B], #2 + ldrbne tmp1, [src], #1 + ldrhcs tmp2, [src], #2 + strbne tmp1, [dst], #1 + strhcs tmp2, [dst], #2 1: - sfi_pld src, #(3 * 64) + pld [src, #(3 * 64)] subs count, count, #64 ldrmi tmp2, [sp], #FRAME_SIZE bmi .Ltail63unaligned - sfi_pld src, #(4 * 64) + pld [src, #(4 * 64)] #ifdef USE_NEON /* These need an extra layer of macro just to work around a @@ -775,132 +630,88 @@ ENTRY(memcpy) vst1.8 {\reglist}, [ALIGN (\basereg, 64)]! .endm - /* These are used by the NaCl sfi_breg macro. */ - .macro _sfi_breg_dmask_neon_load_multi reg - _sfi_dmask \reg - .endm - .macro _sfi_breg_dmask_neon_store_multi reg - _sfi_dmask \reg - .endm - - sfi_breg src, neon_load_multi d0-d3, \B - sfi_breg src, neon_load_multi d4-d7, \B + neon_load_multi d0-d3, src + neon_load_multi d4-d7, src subs count, count, #64 bmi 2f 1: - sfi_pld src, #(4 * 64) - sfi_breg dst, neon_store_multi d0-d3, \B - sfi_breg src, neon_load_multi d0-d3, \B - sfi_breg dst, neon_store_multi d4-d7, \B - sfi_breg src, neon_load_multi d4-d7, \B + pld [src, #(4 * 64)] + neon_store_multi d0-d3, dst + neon_load_multi d0-d3, src + neon_store_multi d4-d7, dst + neon_load_multi d4-d7, src subs count, count, #64 bpl 1b 2: - sfi_breg dst, neon_store_multi d0-d3, \B - sfi_breg dst, neon_store_multi d4-d7, \B + neon_store_multi d0-d3, dst + neon_store_multi d4-d7, dst ands count, count, #0x3f #else /* Use an SMS style loop to maximize the I/O bandwidth. */ sub src, src, #4 sub dst, dst, #8 subs tmp2, count, #64 /* Use tmp2 for count. */ - sfi_breg src, \ - ldr A_l, [\B, #4] - sfi_breg src, \ - ldr A_h, [\B, #8] + ldr A_l, [src, #4] + ldr A_h, [src, #8] strd B_l, B_h, [sp, #8] cfi_rel_offset (B_l, 8) cfi_rel_offset (B_h, 12) - sfi_breg src, \ - ldr B_l, [\B, #12] - sfi_breg src, \ - ldr B_h, [\B, #16] + ldr B_l, [src, #12] + ldr B_h, [src, #16] strd C_l, C_h, [sp, #16] cfi_rel_offset (C_l, 16) cfi_rel_offset (C_h, 20) - sfi_breg src, \ - ldr C_l, [\B, #20] - sfi_breg src, \ - ldr C_h, [\B, #24] + ldr C_l, [src, #20] + ldr C_h, [src, #24] strd D_l, D_h, [sp, #24] cfi_rel_offset (D_l, 24) cfi_rel_offset (D_h, 28) - sfi_breg src, \ - ldr D_l, [\B, #28] - sfi_breg src, \ - ldr D_h, [\B, #32]! + ldr D_l, [src, #28] + ldr D_h, [src, #32]! b 1f .p2align 6 2: - sfi_pld src, #(5 * 64) - (32 - 4) - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] - sfi_breg src, \ - ldr A_l, [\B, #36] - sfi_breg src, \ - ldr A_h, [\B, #40] - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] - sfi_breg src, \ - ldr B_l, [\B, #44] - sfi_breg src, \ - ldr B_h, [\B, #48] - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] - sfi_breg src, \ - ldr C_l, [\B, #52] - sfi_breg src, \ - ldr C_h, [\B, #56] - sfi_breg dst, \ - strd D_l, D_h, [\B, #64]! - sfi_breg src, \ - ldr D_l, [\B, #60] - sfi_breg src, \ - ldr D_h, [\B, #64]! + pld [src, #(5 * 64) - (32 - 4)] + strd A_l, A_h, [dst, #40] + ldr A_l, [src, #36] + ldr A_h, [src, #40] + strd B_l, B_h, [dst, #48] + ldr B_l, [src, #44] + ldr B_h, [src, #48] + strd C_l, C_h, [dst, #56] + ldr C_l, [src, #52] + ldr C_h, [src, #56] + strd D_l, D_h, [dst, #64]! + ldr D_l, [src, #60] + ldr D_h, [src, #64]! subs tmp2, tmp2, #64 1: - sfi_breg dst, \ - strd A_l, A_h, [\B, #8] - sfi_breg src, \ - ldr A_l, [\B, #4] - sfi_breg src, \ - ldr A_h, [\B, #8] - sfi_breg dst, \ - strd B_l, B_h, [\B, #16] - sfi_breg src, \ - ldr B_l, [\B, #12] - sfi_breg src, \ - ldr B_h, [\B, #16] - sfi_breg dst, \ - strd C_l, C_h, [\B, #24] - sfi_breg src, \ - ldr C_l, [\B, #20] - sfi_breg src, \ - ldr C_h, [\B, #24] - sfi_breg dst, \ - strd D_l, D_h, [\B, #32] - sfi_breg src, \ - ldr D_l, [\B, #28] - sfi_breg src, \ - ldr D_h, [\B, #32] + strd A_l, A_h, [dst, #8] + ldr A_l, [src, #4] + ldr A_h, [src, #8] + strd B_l, B_h, [dst, #16] + ldr B_l, [src, #12] + ldr B_h, [src, #16] + strd C_l, C_h, [dst, #24] + ldr C_l, [src, #20] + ldr C_h, [src, #24] + strd D_l, D_h, [dst, #32] + ldr D_l, [src, #28] + ldr D_h, [src, #32] bcs 2b /* Save the remaining bytes and restore the callee-saved regs. */ - sfi_breg dst, \ - strd A_l, A_h, [\B, #40] + strd A_l, A_h, [dst, #40] add src, src, #36 - sfi_breg dst, \ - strd B_l, B_h, [\B, #48] + strd B_l, B_h, [dst, #48] ldrd B_l, B_h, [sp, #8] cfi_restore (B_l) cfi_restore (B_h) - sfi_breg dst, \ - strd C_l, C_h, [\B, #56] + strd C_l, C_h, [dst, #56] ldrd C_l, C_h, [sp, #16] cfi_restore (C_l) cfi_restore (C_h) - sfi_breg dst, \ - strd D_l, D_h, [\B, #64] + strd D_l, D_h, [dst, #64] ldrd D_l, D_h, [sp, #24] cfi_restore (D_l) cfi_restore (D_h) diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S index c8fab4b..25d0557 100644 --- a/sysdeps/arm/armv7/strcmp.S +++ b/sysdeps/arm/armv7/strcmp.S @@ -178,10 +178,8 @@ #endif ENTRY (strcmp) #if STRCMP_PRECHECK == 1 - sfi_breg src1, \ - ldrb r2, [\B] - sfi_breg src2, \ - ldrb r3, [\B] + ldrb r2, [src1] + ldrb r3, [src2] cmp r2, #1 it cs cmpcs r2, r3 @@ -211,11 +209,9 @@ ENTRY (strcmp) and tmp2, tmp1, #3 bic src2, src2, #7 lsl tmp2, tmp2, #3 /* Bytes -> bits. */ - sfi_breg src1, \ - ldrd data1a, data1b, [\B], #16 + ldrd data1a, data1b, [src1], #16 tst tmp1, #4 - sfi_breg src2, \ - ldrd data2a, data2b, [\B], #16 + ldrd data2a, data2b, [src2], #16 prepare_mask tmp1, tmp2 apply_mask data1a, tmp1 apply_mask data2a, tmp1 @@ -231,10 +227,8 @@ ENTRY (strcmp) .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ .p2align 2 /* Always word aligned. */ .Lloop_aligned8: - sfi_breg src1, \ - ldrd data1a, data1b, [\B], #16 - sfi_breg src2, \ - ldrd data2a, data2b, [\B], #16 + ldrd data1a, data1b, [src1], #16 + ldrd data2a, data2b, [src2], #16 .Lstart_realigned8: uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a @@ -245,10 +239,8 @@ ENTRY (strcmp) sel syndrome_b, syndrome_b, const_m1 cbnz syndrome_b, .Ldiff_in_b - sfi_breg src1, \ - ldrd data1a, data1b, [\B, #-8] - sfi_breg src2, \ - ldrd data2a, data2b, [\B, #-8] + ldrd data1a, data1b, [src1, #-8] + ldrd data2a, data2b, [src2, #-8] uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ eor syndrome_a, data1a, data2a sel syndrome_a, syndrome_a, const_m1 @@ -279,19 +271,15 @@ ENTRY (strcmp) /* Unrolled by a factor of 2, to reduce the number of post-increment operations. */ .Lloop_aligned4: - sfi_breg src1, \ - ldr data1, [\B], #8 - sfi_breg src2, \ - ldr data2, [\B], #8 + ldr data1, [src1], #8 + ldr data2, [src2], #8 .Lstart_realigned4: uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 cbnz syndrome, .Laligned4_done - sfi_breg src1, \ - ldr data1, [\B, #-4] - sfi_breg src2, \ - ldr data2, [\B, #-4] + ldr data1, [src1, #-4] + ldr data2, [src2, #-4] uadd8 syndrome, data1, const_m1 eor syndrome, data1, data2 sel syndrome, syndrome, const_m1 @@ -307,11 +295,9 @@ ENTRY (strcmp) masking off the unwanted loaded data to prevent a difference. */ lsl tmp1, tmp1, #3 /* Bytes -> bits. */ bic src1, src1, #3 - sfi_breg src1, \ - ldr data1, [\B], #8 + ldr data1, [src1], #8 bic src2, src2, #3 - sfi_breg src2, \ - ldr data2, [\B], #8 + ldr data2, [src2], #8 prepare_mask tmp1, tmp1 apply_mask data1, tmp1 @@ -324,30 +310,26 @@ ENTRY (strcmp) sub src2, src2, tmp1 bic src1, src1, #3 lsls tmp1, tmp1, #31 - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 beq .Laligned_m2 bcs .Laligned_m1 #if STRCMP_PRECHECK == 0 - sfi_breg src2, \ - ldrb data2, [\B, #1] + ldrb data2, [src2, #1] uxtb tmp1, data1, ror #BYTE1_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - sfi_breg src2, \ - ldrb data2, [\B, #2] + ldrb data2, [src2, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m1: - sfi_breg src2, \ - ldrb data2, [\B, #3] + ldrb data2, [src2, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -356,16 +338,14 @@ ENTRY (strcmp) #else /* STRCMP_PRECHECK */ /* If we've done the pre-check, then we don't need to check the first byte again here. */ - sfi_breg src2, \ - ldrb data2, [\B, #2] + ldrb data2, [src2, #2] uxtb tmp1, data1, ror #BYTE2_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit cbz data2, .Lmisaligned_exit .Laligned_m2: - sfi_breg src2, \ - ldrb data2, [\B, #3] + ldrb data2, [src2, #3] uxtb tmp1, data1, ror #BYTE3_OFFSET subs tmp1, tmp1, data2 bne .Lmisaligned_exit @@ -391,13 +371,11 @@ ENTRY (strcmp) cfi_restore_state /* src1 is word aligned, but src2 has no common alignment with it. */ - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ bic src2, src2, #3 - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ @@ -409,13 +387,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #24 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap3 4: S2LO data2, data2, #8 @@ -427,8 +403,7 @@ ENTRY (strcmp) /* We can only get here if the MSB of data1 contains 0, so fast-path the exit. */ - sfi_breg src2, \ - ldrb result, [\B] + ldrb result, [src2] ldrd r4, r5, [sp], #16 cfi_remember_state cfi_def_cfa_offset (0) @@ -454,13 +429,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #16 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap2 4: S2LO data2, data2, #16 @@ -469,8 +442,7 @@ ENTRY (strcmp) ands syndrome, syndrome, const_m1, S2LO #16 bne .Lstrcmp_done_equal - sfi_breg src2, \ - ldrh data2, [\B] + ldrh data2, [src2] S2LO data1, data1, #16 #ifdef __ARM_BIG_ENDIAN lsl data2, data2, #16 @@ -490,13 +462,11 @@ ENTRY (strcmp) sel syndrome, syndrome, const_m1 bne 4f cbnz syndrome, 5f - sfi_breg src2, \ - ldr data2, [\B], #4 + ldr data2, [src2], #4 eor tmp1, tmp1, data1 cmp tmp1, data2, S2HI #8 bne 6f - sfi_breg src1, \ - ldr data1, [\B], #4 + ldr data1, [src1], #4 b .Loverlap1 4: S2LO data2, data2, #24 @@ -504,8 +474,7 @@ ENTRY (strcmp) 5: tst syndrome, #LSB bne .Lstrcmp_done_equal - sfi_breg src2, \ - ldr data2, [\B] + ldr data2, [src2] 6: S2LO data1, data1, #8 bic data2, data2, #MSB |