From 1e9d5987fd94b88bdf4ebfb9f13d4a472d529cdd Mon Sep 17 00:00:00 2001 From: Paul Pluzhnikov Date: Tue, 23 May 2023 03:57:01 +0000 Subject: Fix misspellings in sysdeps/x86_64 -- BZ 25337. Applying this commit results in bit-identical rebuild of libc.so.6 math/libm.so.6 elf/ld-linux-x86-64.so.2 mathvec/libmvec.so.1 Reviewed-by: Florian Weimer --- sysdeps/x86_64/dl-trampoline.h | 12 ++++++------ sysdeps/x86_64/fpu/feupdateenv.c | 2 +- sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 2 +- sysdeps/x86_64/multiarch/memchr-evex.S | 10 +++++----- sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 4 ++-- sysdeps/x86_64/multiarch/memcmp-evex-movbe.S | 8 ++++---- sysdeps/x86_64/multiarch/memcmp-sse2.S | 4 ++-- sysdeps/x86_64/multiarch/memcmpeq-avx2.S | 4 ++-- sysdeps/x86_64/multiarch/memcmpeq-evex.S | 6 +++--- sysdeps/x86_64/multiarch/memmove-ssse3.S | 4 ++-- .../x86_64/multiarch/memmove-vec-unaligned-erms.S | 8 ++++---- sysdeps/x86_64/multiarch/memrchr-avx2.S | 4 ++-- sysdeps/x86_64/multiarch/memrchr-evex.S | 4 ++-- sysdeps/x86_64/multiarch/memrchr-sse2.S | 2 +- sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 2 +- sysdeps/x86_64/multiarch/rawmemchr-evex.S | 6 +++--- sysdeps/x86_64/multiarch/strcat-sse2.S | 2 +- sysdeps/x86_64/multiarch/strcat-strlen-avx2.h.S | 2 +- sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S | 2 +- sysdeps/x86_64/multiarch/strchr-evex.S | 12 ++++++------ sysdeps/x86_64/multiarch/strchr-sse2.S | 2 +- sysdeps/x86_64/multiarch/strcmp-avx2.S | 14 +++++++------- sysdeps/x86_64/multiarch/strcmp-evex.S | 20 ++++++++++---------- sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S | 2 +- sysdeps/x86_64/multiarch/strcmp-sse2.S | 8 ++++---- sysdeps/x86_64/multiarch/strcmp-sse4_2.S | 4 ++-- sysdeps/x86_64/multiarch/strcpy-sse2.S | 2 +- sysdeps/x86_64/multiarch/strlen-avx2.S | 2 +- sysdeps/x86_64/multiarch/strncat-evex.S | 2 +- sysdeps/x86_64/multiarch/strncpy-avx2.S | 2 +- sysdeps/x86_64/multiarch/strncpy-evex.S | 12 ++++++------ sysdeps/x86_64/multiarch/strnlen-evex.S | 2 +- sysdeps/x86_64/multiarch/strrchr-avx2.S | 6 +++--- sysdeps/x86_64/multiarch/strrchr-evex-base.S | 4 ++-- sysdeps/x86_64/multiarch/strrchr-evex.S | 14 +++++++------- sysdeps/x86_64/multiarch/strrchr-sse2.S | 12 ++++++------ sysdeps/x86_64/multiarch/strstr-avx512.c | 2 +- 37 files changed, 105 insertions(+), 105 deletions(-) diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h index f8ad31a..70ba6ec 100644 --- a/sysdeps/x86_64/dl-trampoline.h +++ b/sysdeps/x86_64/dl-trampoline.h @@ -28,11 +28,11 @@ # undef BASE # if (STATE_SAVE_ALIGNMENT % 16) != 0 -# error STATE_SAVE_ALIGNMENT must be multples of 16 +# error STATE_SAVE_ALIGNMENT must be multiple of 16 # endif # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 -# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT +# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT # endif # if DL_RUNTIME_RESOLVE_REALIGN_STACK @@ -43,7 +43,7 @@ /* Use fxsave to save XMM registers. */ # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) # if (REGISTER_SAVE_AREA % 16) != 0 -# error REGISTER_SAVE_AREA must be multples of 16 +# error REGISTER_SAVE_AREA must be multiple of 16 # endif # endif # else @@ -57,7 +57,7 @@ # define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA # define BASE rsp # if (REGISTER_SAVE_AREA % 16) != 8 -# error REGISTER_SAVE_AREA must be odd multples of 8 +# error REGISTER_SAVE_AREA must be odd multiple of 8 # endif # endif @@ -161,7 +161,7 @@ _dl_runtime_resolve: #if !defined PROF && defined _dl_runtime_profile # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 -# error LR_VECTOR_OFFSET must be multples of VEC_SIZE +# error LR_VECTOR_OFFSET must be multiple of VEC_SIZE # endif .globl _dl_runtime_profile @@ -173,7 +173,7 @@ _dl_runtime_profile: cfi_adjust_cfa_offset(16) # Incorporate PLT _CET_ENDBR /* The La_x86_64_regs data structure pointed to by the - fourth paramater must be VEC_SIZE-byte aligned. This must + fourth parameter must be VEC_SIZE-byte aligned. This must be explicitly enforced. We have the set up a dynamically sized stack frame. %rbx points to the top half which has a fixed size and preserves the original stack pointer. */ diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c index 129445e..96977a1 100644 --- a/sysdeps/x86_64/fpu/feupdateenv.c +++ b/sysdeps/x86_64/fpu/feupdateenv.c @@ -31,7 +31,7 @@ __feupdateenv (const fenv_t *envp) /* Install new environment. */ __fesetenv (envp); - /* Raise the saved exception. Incidently for us the implementation + /* Raise the saved exception. Incidentally for us the implementation defined format of the values in objects of type fexcept_t is the same as the ones specified using the FE_* constants. */ __feraiseexcept ((int) temp); diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h index 10ae5d6..89d366b 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h @@ -1,4 +1,4 @@ -/* Common definition for strcasecmp famly ifunc selections. +/* Common definition for strcasecmp family ifunc selections. All versions must be listed in ifunc-impl-list.c. Copyright (C) 2017-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S index 764a419..35347da 100644 --- a/sysdeps/x86_64/multiarch/memchr-evex.S +++ b/sysdeps/x86_64/multiarch/memchr-evex.S @@ -440,13 +440,13 @@ L(loop_4x_vec): ymm0-15 is used at all is because there is no EVEX encoding vpcmpeq and with vpcmpeq this loop can be performed more efficiently. The non-vzeroupper version is safe for RTM - while the vzeroupper version should be prefered if RTM are + while the vzeroupper version should be preferred if RTM are not supported. Which loop version we use is determined by USE_TERN_IN_LOOP. */ # if USE_TERN_IN_LOOP /* Since vptern can only take 3x vectors fastest to do 1 vec - seperately with EVEX vpcmp. */ + separately with EVEX vpcmp. */ # ifdef USE_AS_WMEMCHR /* vptern can only accept masks for epi32/epi64 so can only save instruction using not equals mask on vptern with wmemchr. @@ -539,7 +539,7 @@ L(last_vec_x1_novzero): # if CHAR_PER_VEC == 64 /* Since we can't combine the last 2x VEC when CHAR_PER_VEC == - 64 it needs a seperate return label. */ + 64 it needs a separate return label. */ .p2align 4,, 4 L(last_vec_x2): L(last_vec_x2_novzero): @@ -579,8 +579,8 @@ L(loop_vec_ret): (only if used VEX encoded loop). */ COND_VZEROUPPER - /* Seperate logic for CHAR_PER_VEC == 64 vs the rest. For - CHAR_PER_VEC we test the last 2x VEC seperately, for + /* Separate logic for CHAR_PER_VEC == 64 vs the rest. For + CHAR_PER_VEC we test the last 2x VEC separately, for CHAR_PER_VEC <= 32 we can combine the results from the 2x VEC in a single GPR. */ # if CHAR_PER_VEC == 64 diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S index b81d9f7..61dbfe7 100644 --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S @@ -29,7 +29,7 @@ 3. Use xmm vector compare when size >= 4 bytes for memcmp or size >= 8 bytes for wmemcmp. 4. Optimistically compare up to first 4 * VEC_SIZE one at a - to check for early mismatches. Only do this if its guranteed the + to check for early mismatches. Only do this if its guaranteed the work is not wasted. 5. If size is 8 * VEC_SIZE or less, unroll the loop. 6. Compare 4 * VEC_SIZE at a time with the aligned first memory @@ -66,7 +66,7 @@ /* Warning! wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. + memcmp has to use UNSIGNED comparison for elements. */ .section SECTION(.text),"ax",@progbits diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S index a63db75..7e6fed9 100644 --- a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S @@ -30,7 +30,7 @@ 3. Use xmm vector compare when size >= 4 bytes for memcmp or size >= 8 bytes for wmemcmp. 4. Optimistically compare up to first 4 * CHAR_PER_VEC one at a - to check for early mismatches. Only do this if its guranteed the + to check for early mismatches. Only do this if its guaranteed the work is not wasted. 5. If size is 8 * VEC_SIZE or less, unroll the loop. 6. Compare 4 * VEC_SIZE at a time with the aligned first memory @@ -90,7 +90,7 @@ Latency: /* Warning! wmemcmp has to use SIGNED comparison for elements. - memcmp has to use UNSIGNED comparison for elemnts. + memcmp has to use UNSIGNED comparison for elements. */ .section SECTION(.text), "ax", @progbits @@ -105,7 +105,7 @@ ENTRY_P2ALIGN (MEMCMP, 6) /* Fall through for [0, VEC_SIZE] as its the hottest. */ ja L(more_1x_vec) - /* Create mask of bytes that are guranteed to be valid because + /* Create mask of bytes that are guaranteed to be valid because of length (edx). Using masked movs allows us to skip checks for page crosses/zero size. */ mov $-1, %VRAX @@ -365,7 +365,7 @@ L(loop_4x_vec): /* Load regardless of branch. */ VMOVU (VEC_SIZE * 2)(%rsi, %rdx), %VMM(3) - /* Seperate logic as we can only use testb for VEC_SIZE == 64. + /* Separate logic as we can only use testb for VEC_SIZE == 64. */ # if VEC_SIZE == 64 testb %dil, %dil diff --git a/sysdeps/x86_64/multiarch/memcmp-sse2.S b/sysdeps/x86_64/multiarch/memcmp-sse2.S index 305bd02..77174e7 100644 --- a/sysdeps/x86_64/multiarch/memcmp-sse2.S +++ b/sysdeps/x86_64/multiarch/memcmp-sse2.S @@ -410,7 +410,7 @@ L(ret_nonzero_vec_start_4_5): .p2align 4,, 8 L(ret_nonzero_vec_end_1): pmovmskb %xmm1, %ecx - /* High 16 bits of eax guranteed to be all ones. Rotate them in + /* High 16 bits of eax guaranteed to be all ones. Rotate them in to we can do `or + not` with just `xor`. */ rorl $16, %eax xorl %ecx, %eax @@ -562,7 +562,7 @@ L(ret_nonzero_loop): sall $(VEC_SIZE * 1), %edx leal 1(%rcx, %rdx), %edx pmovmskb %xmm2, %ecx - /* High 16 bits of eax guranteed to be all ones. Rotate them in + /* High 16 bits of eax guaranteed to be all ones. Rotate them in to we can do `or + not` with just `xor`. */ rorl $16, %eax xorl %ecx, %eax diff --git a/sysdeps/x86_64/multiarch/memcmpeq-avx2.S b/sysdeps/x86_64/multiarch/memcmpeq-avx2.S index 4b013c5..f6e39ca 100644 --- a/sysdeps/x86_64/multiarch/memcmpeq-avx2.S +++ b/sysdeps/x86_64/multiarch/memcmpeq-avx2.S @@ -26,7 +26,7 @@ and loading from either s1 or s2 would cause a page cross. 2. Use xmm vector compare when size >= 8 bytes. 3. Optimistically compare up to first 4 * VEC_SIZE one at a - to check for early mismatches. Only do this if its guranteed the + to check for early mismatches. Only do this if its guaranteed the work is not wasted. 4. If size is 8 * VEC_SIZE or less, unroll the loop. 5. Compare 4 * VEC_SIZE at a time with the aligned first memory @@ -302,7 +302,7 @@ L(between_9_15): movq -8(%rsi, %rdx), %rdi subq %rdi, %rcx orq %rcx, %rax - /* edx is guranteed to be a non-zero int. */ + /* edx is guaranteed to be a non-zero int. */ cmovnz %edx, %eax ret diff --git a/sysdeps/x86_64/multiarch/memcmpeq-evex.S b/sysdeps/x86_64/multiarch/memcmpeq-evex.S index 7ae3e3c..3666f64 100644 --- a/sysdeps/x86_64/multiarch/memcmpeq-evex.S +++ b/sysdeps/x86_64/multiarch/memcmpeq-evex.S @@ -26,7 +26,7 @@ and loading from either s1 or s2 would cause a page cross. 2. Use xmm vector compare when size >= 8 bytes. 3. Optimistically compare up to first 4 * VEC_SIZE one at a - to check for early mismatches. Only do this if its guranteed the + to check for early mismatches. Only do this if its guaranteed the work is not wasted. 4. If size is 8 * VEC_SIZE or less, unroll the loop. 5. Compare 4 * VEC_SIZE at a time with the aligned first memory @@ -97,7 +97,7 @@ ENTRY_P2ALIGN (MEMCMPEQ, 6) /* Fall through for [0, VEC_SIZE] as its the hottest. */ ja L(more_1x_vec) - /* Create mask of bytes that are guranteed to be valid because + /* Create mask of bytes that are guaranteed to be valid because of length (edx). Using masked movs allows us to skip checks for page crosses/zero size. */ mov $-1, %VRAX @@ -253,7 +253,7 @@ L(loop_4x_vec): oring with VEC(4). Result is stored in VEC(4). */ vpternlogd $0xf6, (VEC_SIZE * 2)(%rdx), %VMM(3), %VMM(4) - /* Seperate logic as we can only use testb for VEC_SIZE == 64. + /* Separate logic as we can only use testb for VEC_SIZE == 64. */ # if VEC_SIZE == 64 testb %dil, %dil diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S index ded86bd..460b0ec 100644 --- a/sysdeps/x86_64/multiarch/memmove-ssse3.S +++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S @@ -231,7 +231,7 @@ L(end_loop_fwd): movups %xmm7, 48(%r8) ret - /* Extactly 64 bytes if `jmp L(end_loop_fwd)` is long encoding. + /* Exactly 64 bytes if `jmp L(end_loop_fwd)` is long encoding. 60 bytes otherwise. */ # define ALIGNED_LOOP_FWD(align_by); \ .p2align 6; \ @@ -368,7 +368,7 @@ L(end_loop_bkwd): ret - /* Extactly 64 bytes if `jmp L(end_loop_bkwd)` is long encoding. + /* Exactly 64 bytes if `jmp L(end_loop_bkwd)` is long encoding. 60 bytes otherwise. */ # define ALIGNED_LOOP_BKWD(align_by); \ .p2align 6; \ diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index d1b9278..51eb622 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -445,7 +445,7 @@ L(more_8x_vec_check): shrq $63, %r8 /* Get 4k difference dst - src. */ andl $(PAGE_SIZE - 256), %ecx - /* If r8 is non-zero must do foward for correctness. Otherwise + /* If r8 is non-zero must do forward for correctness. Otherwise if ecx is non-zero there is 4k False Alaising so do backward copy. */ addl %r8d, %ecx @@ -460,7 +460,7 @@ L(more_8x_vec_forward): /* First vec was already loaded into VEC(0). */ VMOVU -VEC_SIZE(%rsi, %rdx), %VMM(5) VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VMM(6) - /* Save begining of dst. */ + /* Save beginning of dst. */ movq %rdi, %rcx /* Align dst to VEC_SIZE - 1. */ orq $(VEC_SIZE - 1), %rdi @@ -517,7 +517,7 @@ L(more_8x_vec_backward): /* First vec was also loaded into VEC(0). */ VMOVU VEC_SIZE(%rsi), %VMM(5) VMOVU (VEC_SIZE * 2)(%rsi), %VMM(6) - /* Begining of region for 4x backward copy stored in rcx. */ + /* Beginning of region for 4x backward copy stored in rcx. */ leaq (VEC_SIZE * -4 + -1)(%rdi, %rdx), %rcx VMOVU (VEC_SIZE * 3)(%rsi), %VMM(7) VMOVU -VEC_SIZE(%rsi, %rdx), %VMM(8) @@ -611,7 +611,7 @@ L(movsb): movq %rdi, %r8 # endif /* If above __x86_rep_movsb_stop_threshold most likely is - candidate for NT moves aswell. */ + candidate for NT moves as well. */ cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP jae L(large_memcpy_2x_check) # if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index 15c83f6..409706f 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -65,7 +65,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6) L(ret_vec_x0_test): /* If ecx is zero (no matches) lzcnt will set it 32 (VEC_SIZE) which - will gurantee edx (len) is less than it. */ + will guarantee edx (len) is less than it. */ lzcntl %ecx, %ecx /* Hoist vzeroupper (not great for RTM) to save code size. This allows @@ -233,7 +233,7 @@ L(more_4x_vec): jnz L(ret_vec_x3) /* Check if near end before re-aligning (otherwise might do an - unnecissary loop iteration). */ + unnecessary loop iteration). */ addq $-(VEC_SIZE * 4), %rax cmpq $(VEC_SIZE * 4), %rdx jbe L(last_4x_vec) diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S index 3d3ef06..f7a1178 100644 --- a/sysdeps/x86_64/multiarch/memrchr-evex.S +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S @@ -119,7 +119,7 @@ L(last_2x_vec): # endif jle L(zero_2) - /* We adjusted rax (length) for VEC_SIZE == 64 so need seperate + /* We adjusted rax (length) for VEC_SIZE == 64 so need separate offsets. */ # if VEC_SIZE == 64 vpcmpeqb (VEC_SIZE * -1)(%rdi, %rax), %VMATCH, %k0 @@ -354,7 +354,7 @@ L(loop_4x_vec): jnz L(first_vec_x1_end) KMOV %k2, %VRCX - /* Seperate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for + /* Separate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for returning last 2x VEC. For VEC_SIZE == 64 we test each VEC individually, for VEC_SIZE == 32 we combine them in a single 64-bit GPR. */ diff --git a/sysdeps/x86_64/multiarch/memrchr-sse2.S b/sysdeps/x86_64/multiarch/memrchr-sse2.S index 8fdad16..0ac707b 100644 --- a/sysdeps/x86_64/multiarch/memrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S @@ -50,7 +50,7 @@ ENTRY_P2ALIGN(MEMRCHR, 6) jz L(page_cross) /* NB: This load happens regardless of whether rdx (len) is zero. Since - it doesn't cross a page and the standard gurantees any pointer have + it doesn't cross a page and the standard guarantees any pointer have at least one-valid byte this load must be safe. For the entire history of the x86 memrchr implementation this has been possible so no code "should" be relying on a zero-length check before this load. diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index f37be62..3d9ad49 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -199,7 +199,7 @@ L(less_vec_from_wmemset): MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */ andl $(PAGE_SIZE - 1), %edi /* Check if VEC_SIZE store cross page. Mask stores suffer - serious performance degradation when it has to fault supress. + serious performance degradation when it has to fault suppress. */ cmpl $(PAGE_SIZE - VEC_SIZE), %edi /* This is generally considered a cold target. */ diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S index 52e6b18..0175a5e 100644 --- a/sysdeps/x86_64/multiarch/rawmemchr-evex.S +++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S @@ -187,13 +187,13 @@ L(loop_4x_vec): ymm0-15 is used at all is because there is no EVEX encoding vpcmpeq and with vpcmpeq this loop can be performed more efficiently. The non-vzeroupper version is safe for RTM - while the vzeroupper version should be prefered if RTM are + while the vzeroupper version should be preferred if RTM are not supported. Which loop version we use is determined by USE_TERN_IN_LOOP. */ # if USE_TERN_IN_LOOP /* Since vptern can only take 3x vectors fastest to do 1 vec - seperately with EVEX vpcmp. */ + separately with EVEX vpcmp. */ VPCMPEQ (VEC_SIZE * 4)(%rdi), %VMATCH, %k1 /* Compare 3x with vpcmpeq and or them all together with vptern. */ @@ -256,7 +256,7 @@ L(loop_4x_vec): (only if used VEX encoded loop). */ COND_VZEROUPPER - /* Seperate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for + /* Separate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for returning last 2x VEC. For VEC_SIZE == 64 we test each VEC individually, for VEC_SIZE == 32 we combine them in a single 64-bit GPR. */ diff --git a/sysdeps/x86_64/multiarch/strcat-sse2.S b/sysdeps/x86_64/multiarch/strcat-sse2.S index e35790c..459bd8e 100644 --- a/sysdeps/x86_64/multiarch/strcat-sse2.S +++ b/sysdeps/x86_64/multiarch/strcat-sse2.S @@ -163,7 +163,7 @@ ENTRY (STRCAT) decl %ecx jnz 21b - /* Now the sources is aligned. Unfortunatly we cannot force + /* Now the sources is aligned. Unfortunately we cannot force to have both source and destination aligned, so ignore the alignment of the destination. */ .p2align 4 diff --git a/sysdeps/x86_64/multiarch/strcat-strlen-avx2.h.S b/sysdeps/x86_64/multiarch/strcat-strlen-avx2.h.S index 4633aa9..0564f11 100644 --- a/sysdeps/x86_64/multiarch/strcat-strlen-avx2.h.S +++ b/sysdeps/x86_64/multiarch/strcat-strlen-avx2.h.S @@ -1,4 +1,4 @@ -/* strlen used for begining of str{n}cat using AVX2. +/* strlen used for beginning of str{n}cat using AVX2. Copyright (C) 2011-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S b/sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S index 9ce667d..37b773d 100644 --- a/sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S +++ b/sysdeps/x86_64/multiarch/strcat-strlen-evex.h.S @@ -1,4 +1,4 @@ -/* strlen used for begining of str{n}cat using EVEX 256/512. +/* strlen used for beginning of str{n}cat using EVEX 256/512. Copyright (C) 2011-2023 Free Software Foundation, Inc. This file is part of the GNU C Library. diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S index 3efa1b3..f5236cf 100644 --- a/sysdeps/x86_64/multiarch/strchr-evex.S +++ b/sysdeps/x86_64/multiarch/strchr-evex.S @@ -160,7 +160,7 @@ L(last_vec_x2): # endif L(first_vec_x1): /* Use bsf here to save 1-byte keeping keeping the block in 1x - fetch block. eax guranteed non-zero. */ + fetch block. eax guaranteed non-zero. */ bsf %VRCX, %VRCX # ifndef USE_AS_STRCHRNUL /* Found CHAR or the null byte. */ @@ -294,7 +294,7 @@ L(loop_4x_vec): /* Two methods for loop depending on VEC_SIZE. This is because with zmm registers VPMINU can only run on p0 (as opposed to - p0/p1 for ymm) so it is less prefered. */ + p0/p1 for ymm) so it is less preferred. */ # if VEC_SIZE == 32 /* For VEC_2 and VEC_3 use xor to set the CHARs matching esi to zero. */ @@ -340,7 +340,7 @@ L(loop_4x_vec): esi, the corresponding bit in %k3 is zero so the VPMINU_MASKZ will have a zero in the result). NB: This make the VPMINU 3c latency. The only way to avoid it is to - createa a 12c dependency chain on all the `VPCMP $4, ...` + create a 12c dependency chain on all the `VPCMP $4, ...` which has higher total latency. */ VPMINU %VMM(2), %VMM(4), %VMM(4){%k3}{z} # endif @@ -366,7 +366,7 @@ L(loop_4x_vec): # endif - /* COND_MASK integates the esi matches for VEC_SIZE == 64. For + /* COND_MASK integrates the esi matches for VEC_SIZE == 64. For VEC_SIZE == 32 they are already integrated. */ VPTEST %VMM(2), %VMM(2), %k0 COND_MASK(k2) KMOV %k0, %VRCX @@ -403,7 +403,7 @@ L(zero_end): # endif - /* Seperate return label for last VEC1 because for VEC_SIZE == + /* Separate return label for last VEC1 because for VEC_SIZE == 32 we can reuse return code in L(page_cross) but VEC_SIZE == 64 has mismatched registers. */ # if VEC_SIZE == 64 @@ -480,7 +480,7 @@ L(cross_page_boundary_real): */ xorl $((1 << CHAR_PER_VEC)- 1), %eax # endif - /* Use arithmatic shift so that leading 1s are filled in. */ + /* Use arithmetic shift so that leading 1s are filled in. */ sarx %VGPR(SHIFT_REG), %VRAX, %VRAX /* If eax is all ones then no matches for esi or NULL. */ diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S index 6036016..192cd13 100644 --- a/sysdeps/x86_64/multiarch/strchr-sse2.S +++ b/sysdeps/x86_64/multiarch/strchr-sse2.S @@ -86,7 +86,7 @@ L(next_48_bytes): jne L(return) L(loop_start): /* We use this alignment to force loop be aligned to 8 but not - 16 bytes. This gives better sheduling on AMD processors. */ + 16 bytes. This gives better scheduling on AMD processors. */ .p2align 4 pxor %xmm6, %xmm6 andq $-64, %rdi diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S index 07f8ec5..8804338 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S @@ -194,7 +194,7 @@ ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip), %rax mov %fs:(%rax), %LOCALE_REG_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END (STRCASECMP) /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ @@ -501,7 +501,7 @@ L(more_3x_vec): L(prepare_loop): # ifdef USE_AS_STRNCMP - /* Store N + (VEC_SIZE * 4) and place check at the begining of + /* Store N + (VEC_SIZE * 4) and place check at the beginning of the loop. */ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx # endif @@ -762,7 +762,7 @@ L(page_cross_during_loop): .p2align 4,, 4 L(less_1x_vec_till_page_cross): subl $-(VEC_SIZE * 4), %eax - /* Guranteed safe to read from rdi - VEC_SIZE here. The only + /* Guaranteed safe to read from rdi - VEC_SIZE here. The only concerning case is first iteration if incoming s1 was near start of a page and s2 near end. If s1 was near the start of the page we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe @@ -948,7 +948,7 @@ L(ret9): L(page_cross): # ifndef USE_AS_STRNCMP /* If both are VEC aligned we don't need any special logic here. - Only valid for strcmp where stop condition is guranteed to be + Only valid for strcmp where stop condition is guaranteed to be reachable by just reading memory. */ testl $((VEC_SIZE - 1) << 20), %eax jz L(no_page_cross) @@ -984,7 +984,7 @@ L(page_cross): subl $(VEC_SIZE * 3), %eax jg L(less_1x_vec_till_page) - /* If more than 1x VEC till page cross, loop throuh safely + /* If more than 1x VEC till page cross, loop through safely loadable memory until within 1x VEC of page cross. */ .p2align 4,, 10 @@ -1007,9 +1007,9 @@ L(page_cross_loop): jl L(page_cross_loop) subl %eax, %OFFSET_REG - /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed + /* OFFSET_REG has distance to page cross - VEC_SIZE. Guaranteed to not cross page so is safe to load. Since we have already - loaded at least 1 VEC from rsi it is also guranteed to be + loaded at least 1 VEC from rsi it is also guaranteed to be safe. */ VMOVU (%rdi, %OFFSET_REG64), %ymm0 diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S index a8bd5cd..ae39cdf 100644 --- a/sysdeps/x86_64/multiarch/strcmp-evex.S +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -217,7 +217,7 @@ ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip), %rax mov %fs:(%rax), %LOCALE_REG_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END (STRCASECMP) /* FALLTHROUGH to strcasecmp/strncasecmp_l. */ @@ -455,7 +455,7 @@ L(return_vec_3): # endif /* If CHAR_PER_VEC == 64 we can't combine matches from the last - 2x VEC so need seperate return label. */ + 2x VEC so need separate return label. */ L(return_vec_2): # if (CHAR_PER_VEC <= 16) || !(defined USE_AS_STRNCMP) bsf %VRCX, %VRCX @@ -567,7 +567,7 @@ L(prepare_loop_no_len): shrl $2, %ecx leaq (CHAR_PER_VEC * 2)(%rdx, %rcx), %rdx # else - /* Store N + (VEC_SIZE * 4) and place check at the begining of + /* Store N + (VEC_SIZE * 4) and place check at the beginning of the loop. */ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx L(prepare_loop_no_len): @@ -840,7 +840,7 @@ L(ret7): /* If CHAR_PER_VEC == 64 we can't combine matches from the last - 2x VEC so need seperate return label. */ + 2x VEC so need separate return label. */ # if CHAR_PER_VEC == 64 L(return_vec_2_end): bsf %VRCX, %VRCX @@ -906,7 +906,7 @@ L(page_cross_during_loop): .p2align 4,, 4 L(less_1x_vec_till_page_cross): subl $-(VEC_SIZE * 4), %eax - /* Guranteed safe to read from rdi - VEC_SIZE here. The only + /* Guaranteed safe to read from rdi - VEC_SIZE here. The only concerning case is first iteration if incoming s1 was near start of a page and s2 near end. If s1 was near the start of the page we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe @@ -997,7 +997,7 @@ L(return_page_cross_end_check): and %VR10, %VRCX /* Need to use tzcnt here as VRCX may be zero. If VRCX is zero tzcnt(VRCX) will be CHAR_PER and remaining length (edx) is - guranteed to be <= CHAR_PER_VEC so we will only use the return + guaranteed to be <= CHAR_PER_VEC so we will only use the return idx if VRCX was non-zero. */ tzcnt %VRCX, %VRCX leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx @@ -1147,7 +1147,7 @@ L(ret9): L(page_cross): # ifndef USE_AS_STRNCMP /* If both are VEC aligned we don't need any special logic here. - Only valid for strcmp where stop condition is guranteed to + Only valid for strcmp where stop condition is guaranteed to be reachable by just reading memory. */ testl $((VEC_SIZE - 1) << 20), %eax jz L(no_page_cross) @@ -1185,7 +1185,7 @@ L(page_cross): jg L(less_1x_vec_till_page) - /* If more than 1x VEC till page cross, loop throuh safely + /* If more than 1x VEC till page cross, loop through safely loadable memory until within 1x VEC of page cross. */ .p2align 4,, 8 L(page_cross_loop): @@ -1209,9 +1209,9 @@ L(page_cross_loop): subl %eax, %OFFSET_REG - /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed + /* OFFSET_REG has distance to page cross - VEC_SIZE. Guaranteed to not cross page so is safe to load. Since we have already - loaded at least 1 VEC from rsi it is also guranteed to be + loaded at least 1 VEC from rsi it is also guaranteed to be safe. */ VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %VMM(0) VPTESTM %VMM(0), %VMM(0), %k2 diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S index 2cf3c6f..516fe7a 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S @@ -20,7 +20,7 @@ /* Continue building as ISA level 2. We use this as ISA V2 default because strcmp-sse42 uses pcmpstri (slow on some SSE4.2 - processors) and this implementation is potenially faster than + processors) and this implementation is potentially faster than strcmp-sse42 (aside from the slower page cross case). */ #if ISA_SHOULD_BUILD (2) diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S index 4a96f2d..562c021 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse2.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S @@ -75,7 +75,7 @@ ENTRY2 (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RDX_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END2 (STRCASECMP) /* FALLTHROUGH to strcasecmp_l. */ @@ -89,7 +89,7 @@ ENTRY2 (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RCX_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END2 (STRCASECMP) /* FALLTHROUGH to strncasecmp_l. */ @@ -186,7 +186,7 @@ ENTRY (STRCMP) jnz LABEL(less16bytes) /* If not, find different value or null char */ # if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L sub $16, %r11 - jbe LABEL(strcmp_exitz) /* finish comparision */ + jbe LABEL(strcmp_exitz) /* finish comparison */ # endif add $16, %rsi /* prepare to search next 16 bytes */ add $16, %rdi /* prepare to search next 16 bytes */ @@ -400,7 +400,7 @@ LABEL(nibble_ashr_1): # endif pxor %xmm0, %xmm0 - sub $0x1000, %r10 /* substract 4K from %r10 */ + sub $0x1000, %r10 /* subtract 4K from %r10 */ jmp LABEL(gobble_ashr_1) /* diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S index f93c344..cbb2288 100644 --- a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S +++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S @@ -84,7 +84,7 @@ ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RDX_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END (STRCASECMP) /* FALLTHROUGH to strcasecmp_l. */ @@ -94,7 +94,7 @@ ENTRY (STRCASECMP) movq __libc_tsd_LOCALE@gottpoff(%rip),%rax mov %fs:(%rax),%RCX_LP - /* Either 1 or 5 bytes (dependeing if CET is enabled). */ + /* Either 1 or 5 bytes (depending if CET is enabled). */ .p2align 4 END (STRCASECMP) /* FALLTHROUGH to strncasecmp_l. */ diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2.S b/sysdeps/x86_64/multiarch/strcpy-sse2.S index a2f7047..443c62b 100644 --- a/sysdeps/x86_64/multiarch/strcpy-sse2.S +++ b/sysdeps/x86_64/multiarch/strcpy-sse2.S @@ -50,7 +50,7 @@ ENTRY (STRCPY) 5: movq $0xfefefefefefefeff,%r8 - /* Now the sources is aligned. Unfortunatly we cannot force + /* Now the sources is aligned. Unfortunately we cannot force to have both source and destination aligned, so ignore the alignment of the destination. */ .p2align 4 diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S index a2cd2d8..ef47a45 100644 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S @@ -224,7 +224,7 @@ L(cross_page_continue): since data is only aligned to VEC_SIZE. */ # ifdef USE_AS_STRNLEN /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE - because it simplies the logic in last_4x_vec_or_less. */ + because it simplifies the logic in last_4x_vec_or_less. */ leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx subq %rdx, %rcx # ifdef USE_AS_WCSLEN diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S index fedad6c..7d17089 100644 --- a/sysdeps/x86_64/multiarch/strncat-evex.S +++ b/sysdeps/x86_64/multiarch/strncat-evex.S @@ -236,7 +236,7 @@ L(more_1x_vec): VMOVU %VMM(0), (%rdi) /* We are going to align rsi here so will need to be able to re- - adjust rdi/rdx afterwords. NB: We filtered out huge lengths + adjust rdi/rdx afterwards. NB: We filtered out huge lengths so rsi + rdx * CHAR_SIZE cannot overflow. */ leaq (VEC_SIZE * -1)(%rsi, %rdx, CHAR_SIZE), %rdx diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2.S b/sysdeps/x86_64/multiarch/strncpy-avx2.S index 815b33d..3a54be1 100644 --- a/sysdeps/x86_64/multiarch/strncpy-avx2.S +++ b/sysdeps/x86_64/multiarch/strncpy-avx2.S @@ -99,7 +99,7 @@ L(page_cross_continue): /* `jb` because length rdx is now length - CHAR_SIZE. */ jbe L(less_1x_vec) - /* This may overset but thats fine because we still need to zero + /* This may overset but that's fine because we still need to zero fill. */ VMOVU %VMM(0), (%rdi) diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S index 44715f2..2a3b6d1 100644 --- a/sysdeps/x86_64/multiarch/strncpy-evex.S +++ b/sysdeps/x86_64/multiarch/strncpy-evex.S @@ -130,7 +130,7 @@ L(page_cross_continue): jae L(more_1x_vec) /* If there where multiple zero-CHAR matches in the first VEC, - VRCX will be overset but thats fine since any oversets where + VRCX will be overset but that's fine since any oversets where at zero-positions anyways. */ # ifdef USE_AS_STPCPY @@ -177,7 +177,7 @@ L(more_1x_vec): # endif - /* This may overset but thats fine because we still need to zero + /* This may overset but that's fine because we still need to zero fill. */ VMOVU %VMM(0), (%rdi) @@ -189,7 +189,7 @@ L(more_1x_vec): /* We are going to align rsi here so will need to be able to re- - adjust rdi/rdx afterwords. NB: We filtered out huge lengths + adjust rdi/rdx afterwards. NB: We filtered out huge lengths so rsi + rdx * CHAR_SIZE cannot overflow. */ leaq (VEC_SIZE * -1)(%rsi, %rdx, CHAR_SIZE), %rdx subq %rsi, %rdi @@ -221,7 +221,7 @@ L(last_2x_vec): cmpl $(CHAR_PER_VEC), %edx jb L(ret_vec_x1_len) - /* Seperate logic for CHAR_PER_VEC == 64 because we already did + /* Separate logic for CHAR_PER_VEC == 64 because we already did `tzcnt` on VRCX. */ # if CHAR_PER_VEC == 64 /* cl == CHAR_PER_VEC iff it was zero before the `tzcnt`. */ @@ -296,7 +296,7 @@ L(ret_vec_x1_no_bsf): .p2align 4,, 8 L(last_4x_vec): - /* Seperate logic for CHAR_PER_VEC == 64 because we can do `andl + /* Separate logic for CHAR_PER_VEC == 64 because we can do `andl $(CHAR_PER_VEC * 4 - 1), %edx` with less code size just using `movzbl`. */ # if CHAR_PER_VEC == 64 @@ -677,7 +677,7 @@ L(copy_16_31): vmovdqu %xmm1, -(16 - CHAR_SIZE)(%rdi, %rdx, CHAR_SIZE) cmpl %ecx, %edx - /* Seperate logic depending on VEC_SIZE. If VEC_SIZE == 64 then + /* Separate logic depending on VEC_SIZE. If VEC_SIZE == 64 then we have a larger copy block for 32-63 so this is just falls through to zfill 16-31. If VEC_SIZE == 32 then we check for full zfill of less 1x VEC. */ diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S index 28caa86..ae082d3 100644 --- a/sysdeps/x86_64/multiarch/strnlen-evex.S +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -336,7 +336,7 @@ L(loop_last_4x_vec): VPTESTN %VMM(3), %VMM(3), %k0 - /* Seperate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for + /* Separate logic for VEC_SIZE == 64 and VEC_SIZE == 32 for returning last 2x VEC. For VEC_SIZE == 64 we test each VEC individually, for VEC_SIZE == 32 we combine them in a single 64-bit GPR. */ diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S index 9db2d9c..ebbd84b 100644 --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S @@ -176,7 +176,7 @@ L(aligned_more): .p2align 4 L(first_aligned_loop): /* Do 2x VEC at a time. Any more and the cost of finding the - match outweights loop benefit. */ + match outweighs loop benefit. */ vmovdqa (VEC_SIZE * 0)(%rdi), %ymm4 vmovdqa (VEC_SIZE * 1)(%rdi), %ymm5 @@ -324,7 +324,7 @@ L(cross_page): vmovdqu (%rsi), %ymm1 VPCMPEQ %ymm1, %ymm0, %ymm6 vpmovmskb %ymm6, %ecx - /* Shift out zero CHAR matches that are before the begining of + /* Shift out zero CHAR matches that are before the beginning of src (rdi). */ shrxl %edi, %ecx, %ecx testl %ecx, %ecx @@ -332,7 +332,7 @@ L(cross_page): VPCMPEQ %ymm1, %ymm7, %ymm1 vpmovmskb %ymm1, %eax - /* Shift out search CHAR matches that are before the begining of + /* Shift out search CHAR matches that are before the beginning of src (rdi). */ shrxl %edi, %eax, %eax blsmskl %ecx, %ecx diff --git a/sysdeps/x86_64/multiarch/strrchr-evex-base.S b/sysdeps/x86_64/multiarch/strrchr-evex-base.S index 7783074..58b2853 100644 --- a/sysdeps/x86_64/multiarch/strrchr-evex-base.S +++ b/sysdeps/x86_64/multiarch/strrchr-evex-base.S @@ -152,7 +152,7 @@ L(loop): jnz L(loop_vec_x2_match) KMOV %k1, %VRDX - /* Match is in first vector, rdi offset need to be substracted + /* Match is in first vector, rdi offset need to be subtracted by VEC_SIZE. */ sub $VEC_SIZE, %r8 @@ -216,7 +216,7 @@ L(check_last_match): ret /* No match recorded in r8. Check the second saved vector - in begining. */ + in beginning. */ L(vector_x2_ret): VPCMPEQ %VMM(2), %VMM(0), %k2 KMOV %k2, %VRAX diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S index 0d1bf07..85e3b01 100644 --- a/sysdeps/x86_64/multiarch/strrchr-evex.S +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S @@ -139,7 +139,7 @@ L(first_vec_x1_or_x2): KORTEST %k2, %k3 jz L(first_vec_x0_test) - /* Guranteed that VEC(2) and VEC(3) are within range so merge + /* Guaranteed that VEC(2) and VEC(3) are within range so merge the two bitmasks then get last result. */ kunpck_2x %k2, %k3, %k3 kmov_2x %k3, %maskm_2x @@ -192,7 +192,7 @@ L(first_vec_x2): .p2align 4,, 12 L(aligned_more): - /* Need to keep original pointer incase VEC(1) has last match. + /* Need to keep original pointer in case VEC(1) has last match. */ movq %rdi, %rsi andq $-VEC_SIZE, %rdi @@ -222,7 +222,7 @@ L(aligned_more): .p2align 4,, 10 L(first_aligned_loop): /* Preserve VEC(1), VEC(2), VEC(3), and VEC(4) until we can - gurantee they don't store a match. */ + guarantee they don't store a match. */ VMOVA (VEC_SIZE * 4)(%rdi), %VMM(5) VMOVA (VEC_SIZE * 5)(%rdi), %VMM(6) @@ -285,7 +285,7 @@ L(second_aligned_loop_prep): L(second_aligned_loop_set_furthest_match): movq %rdi, %rsi /* Ideally we would safe k2/k3 but `kmov/kunpck` take uops on - port0 and have noticable overhead in the loop. */ + port0 and have noticeable overhead in the loop. */ VMOVA %VMM(5), %VMM(7) VMOVA %VMM(6), %VMM(8) .p2align 4 @@ -351,7 +351,7 @@ L(cross_page_boundary): /* eax contains all the page offset bits of src (rdi). `xor rdi, rax` sets pointer will all page offset bits cleared so offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC - before page cross (guranteed to be safe to read). Doing this + before page cross (guaranteed to be safe to read). Doing this as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves a bit of code size. */ xorq %rdi, %rax @@ -359,7 +359,7 @@ L(cross_page_boundary): VPTESTN %VMM(1), %VMM(1), %k0 KMOV %k0, %VRCX - /* Shift out zero CHAR matches that are before the begining of + /* Shift out zero CHAR matches that are before the beginning of src (rdi). */ # ifdef USE_AS_WCSRCHR movl %edi, %esi @@ -374,7 +374,7 @@ L(cross_page_boundary): /* Found zero CHAR so need to test for search CHAR. */ VPCMP $0, %VMATCH, %VMM(1), %k1 KMOV %k1, %VRAX - /* Shift out search CHAR matches that are before the begining of + /* Shift out search CHAR matches that are before the beginning of src (rdi). */ shrx %VGPR(SHIFT_REG), %VRAX, %VRAX diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S index 9bc4fc3..c9749ee 100644 --- a/sysdeps/x86_64/multiarch/strrchr-sse2.S +++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S @@ -166,7 +166,7 @@ L(first_loop): /* Do 2x VEC at a time. */ movaps (VEC_SIZE * 2)(%rdi), %xmm4 movaps (VEC_SIZE * 3)(%rdi), %xmm5 - /* Since SSE2 no pminud so wcsrchr needs seperate logic for + /* Since SSE2 no pminud so wcsrchr needs separate logic for detecting zero. Note if this is found to be a bottleneck it may be worth adding an SSE4.1 wcsrchr implementation. */ # ifdef USE_AS_WCSRCHR @@ -238,7 +238,7 @@ L(new_match): /* We can't reuse either of the old comparisons as since we mask of zeros after first zero (instead of using the full - comparison) we can't gurantee no interference between match + comparison) we can't guarantee no interference between match after end of string and valid match. */ pmovmskb %xmm4, %eax pmovmskb %xmm7, %edx @@ -268,7 +268,7 @@ L(second_loop_match): L(second_loop): movaps (VEC_SIZE * 2)(%rdi), %xmm4 movaps (VEC_SIZE * 3)(%rdi), %xmm5 - /* Since SSE2 no pminud so wcsrchr needs seperate logic for + /* Since SSE2 no pminud so wcsrchr needs separate logic for detecting zero. Note if this is found to be a bottleneck it may be worth adding an SSE4.1 wcsrchr implementation. */ # ifdef USE_AS_WCSRCHR @@ -297,11 +297,11 @@ L(second_loop): pmovmskb %xmm6, %eax addq $(VEC_SIZE * 2), %rdi - /* Either null term or new occurence of CHAR. */ + /* Either null term or new occurrence of CHAR. */ addl %ecx, %eax jz L(second_loop) - /* No null term so much be new occurence of CHAR. */ + /* No null term so much be new occurrence of CHAR. */ testl %ecx, %ecx jz L(second_loop_match) @@ -331,7 +331,7 @@ L(second_loop_new_match): /* We can't reuse either of the old comparisons as since we mask of zeros after first zero (instead of using the full - comparison) we can't gurantee no interference between match + comparison) we can't guarantee no interference between match after end of string and valid match. */ pmovmskb %xmm4, %eax pmovmskb %xmm7, %edx diff --git a/sysdeps/x86_64/multiarch/strstr-avx512.c b/sysdeps/x86_64/multiarch/strstr-avx512.c index ba7d8f5..0b85393 100644 --- a/sysdeps/x86_64/multiarch/strstr-avx512.c +++ b/sysdeps/x86_64/multiarch/strstr-avx512.c @@ -140,7 +140,7 @@ __strstr_avx512 (const char *haystack, const char *ned) = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0)); uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT); cmpmask = cmpmask & cvtmask64_u64 (loadmask); - /* Search for the 2 charaters of needle */ + /* Search for the 2 characters of needle */ __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0); __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1); k1 = kshiftri_mask64 (k1, 1); -- cgit v1.1