aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
authorFlorian Weimer <fweimer@redhat.com>2017-06-23 17:23:44 +0200
committerFlorian Weimer <fweimer@redhat.com>2017-06-23 17:24:40 +0200
commit3ec7c02cc3e922b9364dc8cfd1d4546671b91003 (patch)
tree283d54448fe89359272093156316884e61992c9a /sysdeps
parent7fa1d9462baabc5a1058efc13a48444af4678acf (diff)
downloadglibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.zip
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.gz
glibc-3ec7c02cc3e922b9364dc8cfd1d4546671b91003.tar.bz2
x86-64: memcmp-avx2-movbe.S needs saturating subtraction [BZ #21662]
This code: L(between_2_3): /* Load as big endian with overlapping loads and bswap to avoid branches. */ movzwl -2(%rdi, %rdx), %eax movzwl -2(%rsi, %rdx), %ecx shll $16, %eax shll $16, %ecx movzwl (%rdi), %edi movzwl (%rsi), %esi orl %edi, %eax orl %esi, %ecx bswap %eax bswap %ecx subl %ecx, %eax ret needs a saturating subtract because the full register is used. With this commit, only the lower 24 bits of the register are used, so a regular subtraction suffices. The test case change adds coverage for these kinds of bugs.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S20
1 files changed, 10 insertions, 10 deletions
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
index 47630dd..9d19210 100644
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -137,18 +137,18 @@ L(exit):
.p2align 4
L(between_2_3):
- /* Load as big endian with overlapping loads and bswap to avoid
- branches. */
- movzwl -2(%rdi, %rdx), %eax
- movzwl -2(%rsi, %rdx), %ecx
- shll $16, %eax
- shll $16, %ecx
- movzwl (%rdi), %edi
- movzwl (%rsi), %esi
- orl %edi, %eax
- orl %esi, %ecx
+ /* Load as big endian to avoid branches. */
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ shll $8, %eax
+ shll $8, %ecx
bswap %eax
bswap %ecx
+ movzbl -1(%rdi, %rdx), %edi
+ movzbl -1(%rsi, %rdx), %esi
+ orl %edi, %eax
+ orl %esi, %ecx
+ /* Subtraction is okay because the upper 8 bits a zero. */
subl %ecx, %eax
ret