aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiubov Dmitrieva <ldmitrie@sourceware.org>2013-08-30 18:37:28 +0400
committerLiubov Dmitrieva <ldmitrie@sourceware.org>2013-10-23 23:51:44 +0400
commit029183a4ca3f765f63e7b64bc260622f02b04539 (patch)
treec7f6912d9575e27f691f219ad9437e9af39e016e
parent01d5454d13d2c21b9a08b28441d37a7ddce089a6 (diff)
downloadglibc-ldmitrie/intel_mpx.zip
glibc-ldmitrie/intel_mpx.tar.gz
glibc-ldmitrie/intel_mpx.tar.bz2
Implemented bound check support for string/memory routines for x86_64.ldmitrie/intel_mpx
TODO: Fix bound check support in strcmp-sse2 and implement in strspn, strstr and strcspn.
-rw-r--r--sysdeps/x86_64/Makefile3
-rw-r--r--sysdeps/x86_64/Versions7
-rw-r--r--sysdeps/x86_64/memchr.S78
-rw-r--r--sysdeps/x86_64/memcmp.S85
-rw-r--r--sysdeps/x86_64/memrchr.S13
-rw-r--r--sysdeps/x86_64/memset.S30
-rw-r--r--sysdeps/x86_64/multiarch/Makefile23
-rw-r--r--sysdeps/x86_64/multiarch/Versions13
-rw-r--r--sysdeps/x86_64/multiarch/bcopy.S5
-rw-r--r--sysdeps/x86_64/multiarch/ifunc-impl-list.c6
-rw-r--r--sysdeps/x86_64/multiarch/memcmp-sse4.S339
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-c.c80
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S5
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3-back.S26
-rw-r--r--sysdeps/x86_64/multiarch/memcpy.S35
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/memmove-c.c118
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/memmove-ssse3-back.S8
-rw-r--r--sysdeps/x86_64/multiarch/memmove.c34
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/memmove_chk.c10
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-c.c36
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S6
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S12
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy.S37
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/multiarch/mempcpy_chk.S14
-rw-r--r--sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S76
-rw-r--r--sysdeps/x86_64/multiarch/strchr.S22
-rw-r--r--sysdeps/x86_64/multiarch/strcmp-sse42.S238
-rw-r--r--sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S174
-rw-r--r--sysdeps/x86_64/multiarch/strrchr.S16
-rw-r--r--sysdeps/x86_64/multiarch/wcscpy-ssse3.S171
-rw-r--r--sysdeps/x86_64/rawmemchr.S53
-rw-r--r--sysdeps/x86_64/stpcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/strcat.S53
-rw-r--r--sysdeps/x86_64/strchr.S13
-rw-r--r--sysdeps/x86_64/strchrnul.S13
-rw-r--r--sysdeps/x86_64/strcmp.S238
-rw-r--r--sysdeps/x86_64/strcpy.S40
-rw-r--r--sysdeps/x86_64/strcpy_chk-c.c1
-rw-r--r--sysdeps/x86_64/strcpy_chk.S2
-rw-r--r--sysdeps/x86_64/strcspn.S33
-rw-r--r--sysdeps/x86_64/strlen.S19
-rw-r--r--sysdeps/x86_64/strrchr.S13
-rw-r--r--sysdeps/x86_64/strspn.S33
-rw-r--r--sysdeps/x86_64/strtok.S3
-rw-r--r--sysdeps/x86_64/wcschr.S26
-rw-r--r--sysdeps/x86_64/wcscmp.S8
-rw-r--r--sysdeps/x86_64/wcslen.S73
-rw-r--r--sysdeps/x86_64/wcsrchr.S51
53 files changed, 2308 insertions, 108 deletions
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 08db331..db6838d 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -18,6 +18,9 @@ endif
ifeq ($(subdir),string)
sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
gen-as-const-headers += locale-defines.sym
+ifeq ($(enable-mpx), yes)
+sysdep_routines += strcpy_chk-c stpcpy_chk-c
+endif
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions
index a437f85..1de589c 100644
--- a/sysdeps/x86_64/Versions
+++ b/sysdeps/x86_64/Versions
@@ -2,6 +2,13 @@ libc {
GLIBC_2.14 {
memcpy;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ chkp_memset_nobnd;
+ chkp_memset_nochk;
+ chkp_memset_nobnd_nochk;
+ }
+%endif
}
libm {
GLIBC_2.1 {
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 891ee70..205345b 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -20,8 +20,17 @@
/* fast SSE2 version with using pmaxub and 64 byte loop */
+# ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
.text
ENTRY(memchr)
+
movd %rsi, %xmm1
mov %rdi, %rcx
@@ -33,6 +42,10 @@ ENTRY(memchr)
and $63, %rcx
pshufd $0, %xmm1, %xmm1
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmp $48, %rcx
ja L(crosscache)
@@ -72,7 +85,7 @@ L(crosscache):
jbe L(return_null)
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -85,24 +98,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -116,24 +141,36 @@ L(loop_prolog):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -151,6 +188,9 @@ L(loop_prolog):
L(align64_loop):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -192,25 +232,34 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(exit_loop):
add $32, %rdx
jle L(exit_loop_32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -219,6 +268,9 @@ L(exit_loop):
sub $16, %rdx
jle L(return_null)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -229,6 +281,9 @@ L(exit_loop):
.p2align 4
L(exit_loop_32):
add $32, %rdx
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
@@ -237,6 +292,9 @@ L(exit_loop_32):
sub $16, %rdx
jbe L(return_null)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
pcmpeqb 16(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -248,25 +306,25 @@ L(exit_loop_32):
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches_1):
@@ -274,7 +332,7 @@ L(matches_1):
sub %rax, %rdx
jbe L(return_null)
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16_1):
@@ -282,7 +340,7 @@ L(matches16_1):
sub %rax, %rdx
jbe L(return_null)
lea 16(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches32_1):
@@ -290,7 +348,7 @@ L(matches32_1):
sub %rax, %rdx
jbe L(return_null)
lea 32(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches48_1):
@@ -298,7 +356,7 @@ L(matches48_1):
sub %rax, %rdx
jbe L(return_null)
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index d5c072c..77a7bca 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -23,6 +23,11 @@
ENTRY (memcmp)
test %rdx, %rdx
jz L(finz)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+ pxor %xmm0, %xmm0
cmpq $1, %rdx
jle L(finr1b)
subq %rdi, %rsi
@@ -86,6 +91,10 @@ L(s16b):
.p2align 4,, 4
L(finr1b):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
movzbl (%rdi), %eax
movzbl (%rsi), %edx
L(finz1):
@@ -132,6 +141,10 @@ L(gt32):
andq $15, %r8
jz L(16am)
/* Both pointers may be misaligned. */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi), %xmm1
movdqu (%rdi, %rsi), %xmm0
pcmpeqb %xmm0, %xmm1
@@ -146,6 +159,10 @@ L(16am):
jz L(ATR)
testq $16, %rdi
jz L(A32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi, %rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -160,6 +177,10 @@ L(A32):
/* Pre-unroll to be ready for unrolled 64B loop. */
testq $32, %rdi
jz L(A64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -167,6 +188,10 @@ L(A32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -181,6 +206,10 @@ L(A64):
jge L(mt32)
L(A64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -188,6 +217,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -195,6 +228,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -202,6 +239,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -219,6 +260,10 @@ L(mt32):
jge L(mt16)
L(A32main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -226,6 +271,10 @@ L(A32main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -258,6 +307,10 @@ L(ATR):
testq $16, %rdi
jz L(ATR32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -273,6 +326,10 @@ L(ATR32):
testq $32, %rdi
jz L(ATR64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -280,6 +337,10 @@ L(ATR32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -292,6 +353,10 @@ L(ATR64):
je L(mt32)
L(ATR64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -299,6 +364,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -306,6 +375,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -313,6 +386,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -328,6 +405,10 @@ L(ATR64main):
jge L(mt16)
L(ATR32res):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -335,6 +416,10 @@ L(ATR32res):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 5a659fe..3afa97c 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -27,6 +27,11 @@ ENTRY (memrchr)
sub $16, %rdx
jbe L(length_less16)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -284,6 +289,10 @@ L(length_less16_offset0):
test %edx, %edx
jz L(return_null)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %dl, %cl
pcmpeqb (%rdi), %xmm1
@@ -314,6 +323,10 @@ L(length_less16):
and $15, %rcx
jz L(length_less16_offset0)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %rdi, %rcx
and $15, %rcx
mov %cl, %dh
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6c69f4b..ccb2aae 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -26,6 +26,15 @@
.text
#if !defined NOT_IN_libc
ENTRY(__bzero)
+ testq %rsi, %rsi
+ jz L(only_return)
+
+#if defined __CHKP__ && defined __CHKWR__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rsi), %bnd0
+# endif
+
+ mov %rdi, %rax
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
pxor %xmm8, %xmm8
@@ -53,7 +62,20 @@ ENTRY_CHK (__memset_chk)
END_CHK (__memset_chk)
#endif
+#if defined __CHKP__ && defined __CHKWR__
+ENTRY (chkp_memset_nochk)
+ jmp L(entry_from_chkp_memset_nochk)
+END (chkp_memset_nochk)
+#endif
+
ENTRY (memset)
+ testq %rdx, %rdx
+ jz L(only_return)
+#if defined __CHKP__ && defined __CHKWR__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+L(entry_from_chkp_memset_nochk):
+#endif
movd %esi, %xmm8
movq %rdi, %rax
punpcklbw %xmm8, %xmm8
@@ -71,6 +93,9 @@ L(entry_from_bzero):
L(return):
rep
ret
+L(only_return):
+ movq %rdi, %rax
+ ret
ALIGN (4)
L(between_32_64_bytes):
movdqu %xmm8, 16(%rdi)
@@ -129,6 +154,11 @@ L(between8_16bytes):
END (memset)
libc_hidden_builtin_def (memset)
+#if defined __CHKP__ && defined __CHKWR__
+weak_alias (memset, chkp_memset_nobnd)
+weak_alias (chkp_memset_nochk, chkp_memset_nobnd_nochk)
+#endif
+
#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 203d16e..bdf7964 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -26,6 +26,29 @@ CFLAGS-strstr.c += -msse4
CFLAGS-strcasestr.c += -msse4
CFLAGS-strcasestr-nonascii.c += -msse4
endif
+
+ifeq ($(enable-mpx), yes)
+sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \
+ memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c
+#These are C versions written with intrinsics. We need to add checks as intrinsics manually
+CFLAGS-varshift.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcspn-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strpbrk-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strspn-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strstr.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-strcasestr-nonascii.c += -fno-chkp-check-read -fno-chkp-check-write
+#Checks are put manually for these routines.
+CFLAGS-memcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-mempcpy-c.c += -fno-chkp-check-read -fno-chkp-check-write
+CFLAGS-memmove-c.c += -fno-chkp-check-read -fno-chkp-check-write
+endif
+
+ifeq ($(enable-mpx-write-only), yes)
+CFLAGS-memcpy-c.c += -D__CHKWR__
+CFLAGS-memmove-c.c += -D__CHKWR__
+endif
+
endif
ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
index 59b185a..5325bde 100644
--- a/sysdeps/x86_64/multiarch/Versions
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -2,4 +2,17 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ chkp_memcpy_nobnd;
+ chkp_memmove_nobnd;
+ chkp_mempcpy_nobnd;
+ chkp_memcpy_nobnd_nochk;
+ chkp_memmove_nobnd_nochk;
+ chkp_mempcpy_nobnd_nochk;
+ chkp_memcpy_nochk;
+ chkp_memmove_nochk;
+ chkp_mempcpy_nochk;
+ }
+%endif
}
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 639f02b..9809d47 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,10 @@
.text
ENTRY(bcopy)
xchg %rdi, %rsi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
jmp __libc_memmove /* Branch to IFUNC memmove. */
END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d0992e1..e3a4163 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
+#ifndef __CHKP__
+ /* We use specific version for MPX glibc */
/* Support sysdeps/x86_64/multiarch/memmove_chk.S. */
IFUNC_IMPL (i, name, __memmove_chk,
IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
@@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
#ifdef SHARED
+#ifndef __CHKP__
+ /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
IFUNC_IMPL (i, name, __memcpy_chk,
IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
@@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
__mempcpy_ssse3)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200..b5c6675 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -48,6 +48,13 @@ ENTRY (MEMCMP)
# ifdef USE_AS_WMEMCMP
shl $2, %rdx
# endif
+# ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+L(NoEntryCheck):
+# endif
pxor %xmm0, %xmm0
cmp $79, %rdx
ja L(79bytesormore)
@@ -70,6 +77,10 @@ L(firstbyte):
ALIGN (4)
L(79bytesormore):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
pxor %xmm1, %xmm2
@@ -90,21 +101,37 @@ L(79bytesormore):
L(less128bytes):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -112,11 +139,19 @@ L(less128bytes):
cmp $32, %rdx
jb L(less32bytesin64)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -139,41 +174,73 @@ L(128bytesormore):
L(less256bytes):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -188,11 +255,19 @@ L(less256bytes):
cmp $32, %rdx
jb L(less32bytesin128)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -207,81 +282,145 @@ L(less32bytesin128):
L(less512bytes):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqu 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqu 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqu 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqu 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqu 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqu 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqu 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqu 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -299,11 +438,19 @@ L(less512bytes):
cmp $32, %rdx
jb L(less32bytesin256)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -331,18 +478,34 @@ L(512bytesormore):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loop):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined):
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -403,21 +582,37 @@ L(2aligned):
L(less128bytesin2aligned):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -425,11 +620,19 @@ L(less128bytesin2aligned):
cmp $32, %rdx
jb L(less32bytesin64in2alinged)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -453,41 +656,73 @@ L(128bytesormorein2aligned):
L(less256bytesin2alinged):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -502,11 +737,19 @@ L(less256bytesin2alinged):
cmp $32, %rdx
jb L(less32bytesin128in2aligned)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -523,81 +766,145 @@ L(less32bytesin128in2aligned):
L(256bytesormorein2aligned):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqa 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqa 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqa 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqa 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqa 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqa 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqa 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqa 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -648,18 +955,34 @@ L(512bytesormorein2aligned):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loopin2aligned):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined):
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c
new file mode 100644
index 0000000..6fa50ea
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-c.c
@@ -0,0 +1,80 @@
+/* C-version of memcpy for using when Intel MPX is on
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ if (!n) return dst;
+
+ __bnd_chk_ptr_lbounds(dst);
+ __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+ __bnd_chk_ptr_lbounds(src);
+ __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+
+ return chkp_memcpy_nochk(dst, src, n);
+}
+
+void *
+chkp_memcpy_nochk (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+
+# if defined SHARED && !defined NOT_IN_libc && !defined IA32
+# include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+# else
+weak_alias (__memcpy, memcpy)
+# endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
new file mode 100644
index 0000000..7fedbee
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
@@ -0,0 +1,5 @@
+/* optimized version of memcpy without any checks or copying bounds. */
+#define MEMCPY chkp_memcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef..16b4e68 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -27,7 +27,11 @@
#include "asm-syntax.h"
#ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3_back
+# if defined __CHKP__ || defined __CHKWR__
+# define MEMCPY chkp_memcpy_nobnd
+# else
+# define MEMCPY __memcpy_ssse3_back
+# endif
# define MEMCPY_CHK __memcpy_chk_ssse3_back
#endif
@@ -48,7 +52,7 @@
ud2
.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && defined MEMCPY_CHK
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -56,6 +60,15 @@ END (MEMCPY_CHK)
#endif
ENTRY (MEMCPY)
+#ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
+
mov %rdi, %rax
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
@@ -87,6 +100,15 @@ L(bk_write):
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
+#ifdef __CHKP__
+L(NoEntryCheck):
+ mov %rdi, %rax
+# ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+# endif
+ ret
+#endif
+
ALIGN (4)
L(144bytesormore):
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031..fc5ab2d 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <shlib-compat.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need memcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
@@ -43,37 +44,39 @@ ENTRY(__new_memcpy)
3: ret
END(__new_memcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __memcpy_sse2, @function; \
.globl __memcpy_sse2; \
.hidden __memcpy_sse2; \
.p2align 4; \
__memcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __memcpy_chk_sse2, @function; \
.globl __memcpy_chk_sse2; \
.p2align 4; \
__memcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-# undef libc_hidden_builtin_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
+# endif
+
+# include "../memcpy.S"
-#include "../memcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
new file mode 100644
index 0000000..1eee86c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index ad01d8c..6f87f26 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch memcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__memcpy_chk)
leaq __memcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
+# else
+# include "../memcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c
new file mode 100644
index 0000000..7111128
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-c.c
@@ -0,0 +1,118 @@
+/* C-version of memmove for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ if (n == 0) return dst;
+
+ __bnd_chk_ptr_lbounds(dst);
+ __bnd_chk_ptr_ubounds(dst+n-1);
+#ifndef __CHKWR__
+ __bnd_chk_ptr_lbounds(src);
+ __bnd_chk_ptr_ubounds(src+n-1);
+#endif
+ return chkp_memmove_nochk(dst, src, n);
+}
+
+
+void *
+chkp_memmove_nochk (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ /* forward copying */
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ /* forward copying */
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __libc_memmove)
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
+
+# if defined SHARED && !defined NOT_IN_libc
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
+# endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
new file mode 100644
index 0000000..2a1f3e6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of memmove without any checks or copying bounds. */
+#define USE_AS_MEMMOVE
+#define MEMCPY chkp_memmove_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
index f9a4e9a..478141b 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
@@ -1,4 +1,10 @@
#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3_back
+#if defined __CHKP__ || defined __CHKWR__
+/* version of memmove with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY chkp_memmove_nobnd
+# else
+# define MEMCPY __memmove_ssse3_back
+#endif
#define MEMCPY_CHK __memmove_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8149c48..0d2c6f0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -17,31 +17,32 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifndef NOT_IN_libc
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
+#ifndef __CHKP__
+# ifndef NOT_IN_libc
+# define MEMMOVE __memmove_sse2
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
__hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
+# endif
/* Redefine memmove so that the compiler won't complain about the type
mismatch with the IFUNC selector in strong_alias, below. */
-# undef memmove
-# define memmove __redirect_memmove
-# include <string.h>
-# undef memmove
+# undef memmove
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-#endif
+# endif
-#include "string/memmove.c"
+# include "string/memmove.c"
-#ifndef NOT_IN_libc
-# include <shlib-compat.h>
-# include "init-arch.h"
+# ifndef NOT_IN_libc
+# include <shlib-compat.h>
+# include "init-arch.h"
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
@@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove,
strong_alias (__libc_memmove, memmove)
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c
new file mode 100644
index 0000000..bbf53d0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 17ed460..c1b0b93 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -17,19 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <string.h>
-#include "init-arch.h"
+#ifndef __CHKP__
+# include <string.h>
+# include "init-arch.h"
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define MEMMOVE_CHK __memmove_chk_sse2
extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-#include "debug/memmove_chk.c"
+# include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
HAS_SSSE3
? (HAS_FAST_COPY_BACKWARD
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c
new file mode 100644
index 0000000..522fb86
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-c.c
@@ -0,0 +1,36 @@
+/* C-version of mempcpy for using when Intel MPX is enabled
+ in order to process with an array of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ return memcpy(dst, src, n) + n;
+}
+
+void *
+chkp_mempcpy_nochk (void *dst, const void *src, size_t n)
+{
+ return chkp_memcpy_nochk(dst, src, n) + n;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __GI___mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
new file mode 100644
index 0000000..eb929f4
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
@@ -0,0 +1,6 @@
+/* optimized version of mempcpy without any checks or copying bounds. */
+#define USE_AS_MEMPCPY
+#define MEMCPY chkp_mempcpy_nobnd_nochk
+#undef __CHKP__
+#undef __CHKWR__
+#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
index 82ffacb..f32ecfc 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
@@ -1,4 +1,12 @@
#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_back
-#define MEMCPY_CHK __mempcpy_chk_ssse3_back
+
+#if defined __CHKP__ || defined __CHKWR__
+/* version of mempcpy with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY chkp_mempcpy_nobnd
+#else
+# define MEMCPY __mempcpy_ssse3_back
+#endif
+
+#define MEMCPY_CHK __mempcpy_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index b8b7fcd..4ec5825 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -18,13 +18,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need mempcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
@@ -40,38 +41,40 @@ ENTRY(__mempcpy)
2: ret
END(__mempcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __mempcpy_sse2, @function; \
.p2align 4; \
.globl __mempcpy_sse2; \
.hidden __mempcpy_sse2; \
__mempcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __mempcpy_chk_sse2, @function; \
.globl __mempcpy_chk_sse2; \
.p2align 4; \
__mempcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
+# undef libc_hidden_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_def(name) \
+# define libc_hidden_def(name) \
.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
+# endif
+
+# include "../mempcpy.S"
-#include "../mempcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
new file mode 100644
index 0000000..ba17078
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 3801db3..98acf96 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#if !defined __CHKP__ && !defined __CHKWR__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch mempcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk)
leaq __mempcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
+# else
+# include "../mempcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3..a3535ad 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
# define STRCAT __strcat_sse2_unaligned
# endif
+# ifdef __CHKP__
+# define RETURN \
+ bndcu -1(%rdi, %rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
# define USE_AS_STRCAT
.text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
/* Inline corresponding strlen file, temporary until new strcpy
implementation gets merged. */
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %rax, %rax
mov %edi, %ecx
and $0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $80, %rax
pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm1
add $16, %rax
pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm2
add $16, %rax
pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm3
add $16, %rax
pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
jnz L(exit)
add $16, %rax
+
.p2align 4
L(align64_loop):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movaps (%rax), %xmm4
pminub 16(%rax), %xmm4
movaps 32(%rax), %xmm5
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index f170238..4311e86 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -91,6 +91,10 @@ __strchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
pxor %xmm2, %xmm2
movd %esi, %xmm1
movl %edi, %ecx
@@ -124,6 +128,9 @@ __strchr_sse42:
ja L(return_null)
L(unaligned_match):
addq %rdi, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
.p2align 4
@@ -135,15 +142,27 @@ L(unaligned_no_match):
L(loop):
addq $16, %r8
L(aligned_start):
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
jmp L(loop)
@@ -159,6 +178,9 @@ L(return_null):
.p2align 4
L(loop_exit):
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
cfi_endproc
.size __strchr_sse42, .-__strchr_sse42
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c84f1c2..edfa915 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -127,6 +127,14 @@ STRCMP_SSE42:
je LABEL(Byte0)
mov %rdx, %r11
#endif
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -210,6 +218,10 @@ LABEL(touppermask):
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte
@@ -231,6 +243,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -280,6 +297,10 @@ LABEL(ashr_0):
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
.p2align 4
LABEL(ashr_0_use):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -295,6 +316,10 @@ LABEL(ashr_0_use):
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use):
jbe LABEL(strcmp_exitz)
#endif
lea -16(%rdx, %rcx), %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -362,6 +391,15 @@ LABEL(ashr_1):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_1_use)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_1_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_1_use):
@@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $14, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+#else
ja LABEL(nibble_ashr_1_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -450,6 +492,15 @@ LABEL(ashr_2):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_2_use)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_2_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_2_use):
@@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $13, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+#else
ja LABEL(nibble_ashr_2_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -539,6 +594,15 @@ LABEL(ashr_3):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_3_use)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_3_restart_use)
+#endif
LABEL(loop_ashr_3_use):
add $16, %r10
@@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $12, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+#else
ja LABEL(nibble_ashr_3_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -627,6 +695,15 @@ LABEL(ashr_4):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_4_use)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_4_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_4_use):
@@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $11, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+#else
ja LABEL(nibble_ashr_4_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -716,6 +797,15 @@ LABEL(ashr_5):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_5_use)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_5_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_5_use):
@@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $10, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+#else
ja LABEL(nibble_ashr_5_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -806,6 +900,15 @@ LABEL(ashr_6):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_6_use)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_6_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_6_use):
@@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $9, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+#else
ja LABEL(nibble_ashr_6_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -895,6 +1002,15 @@ LABEL(ashr_7):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_7_use)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_7_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_7_use):
@@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $8, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+#else
ja LABEL(nibble_ashr_7_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -984,6 +1104,15 @@ LABEL(ashr_8):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_8_use)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_8_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_8_use):
@@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $7, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+#else
ja LABEL(nibble_ashr_8_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1073,6 +1206,15 @@ LABEL(ashr_9):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_9_use)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_9_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_9_use):
@@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $6, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+#else
ja LABEL(nibble_ashr_9_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1163,6 +1309,15 @@ LABEL(ashr_10):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_10_use)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_10_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_10_use):
@@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $5, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+#else
ja LABEL(nibble_ashr_10_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1252,6 +1411,15 @@ LABEL(ashr_11):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_11_use)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_11_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_11_use):
@@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $4, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+#else
ja LABEL(nibble_ashr_11_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1341,6 +1513,15 @@ LABEL(ashr_12):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_12_use)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_12_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_12_use):
@@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $3, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+#else
ja LABEL(nibble_ashr_12_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1431,6 +1616,15 @@ LABEL(ashr_13):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_13_use)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_13_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_13_use):
@@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $2, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+#else
ja LABEL(nibble_ashr_13_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1521,6 +1719,15 @@ LABEL(ashr_14):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_14_use)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_14_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_14_use):
@@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $1, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+#else
ja LABEL(nibble_ashr_14_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1613,6 +1824,15 @@ LABEL(ashr_15):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_15_use)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_15_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_15_use):
@@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $0, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+#else
ja LABEL(nibble_ashr_15_restart_use)
+#endif
LABEL(nibble_ashr_exit_use):
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1691,6 +1915,11 @@ LABEL(exit_use):
test %r8d, %r8d
jz LABEL(ret_use)
xchg %eax, %edx
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(ret_use):
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
@@ -1707,6 +1936,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -1717,6 +1951,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 7710173..e6baee9 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
- jmp *%rcx
+ jmp *%rcx
# ifndef USE_AS_STRCAT
@@ -51,6 +51,16 @@ ENTRY (STRCPY)
# endif
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+ bndcu -1(%rdi, %rdx), %bnd0
+# endif
+# endif
+
and $63, %rcx
cmp $32, %rcx
jbe L(SourceStringAlignmentLess32)
@@ -79,6 +89,9 @@ ENTRY (STRCPY)
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+# endif
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
@@ -91,6 +104,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm1, (%rdi)
/* If source address alignment != destination address alignment */
@@ -101,6 +117,10 @@ L(Unalign16Both):
add %rcx, %r8
# endif
mov $16, %rcx
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqa (%rsi, %rcx), %xmm1
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
@@ -118,6 +138,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -134,6 +158,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm4
movdqu %xmm3, (%rdi, %rcx)
pcmpeqb %xmm4, %xmm0
@@ -150,6 +178,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm1
movdqu %xmm4, (%rdi, %rcx)
pcmpeqb %xmm1, %xmm0
@@ -166,6 +198,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
@@ -182,6 +218,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -198,6 +238,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqu %xmm3, (%rdi, %rcx)
mov %rsi, %rdx
lea 16(%rsi, %rcx), %rsi
@@ -208,6 +252,9 @@ L(Unalign16Both):
lea 128(%r8, %rdx), %r8
# endif
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%rsi), %bnd1
+# endif
movaps (%rsi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rsi), %xmm5
@@ -229,6 +276,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %rdi
add $64, %rsi
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+ bndcu (%rdi), %bnd0
+# endif
movdqu %xmm4, -64(%rdi)
movaps (%rsi), %xmm2
movdqa %xmm2, %xmm4
@@ -271,16 +322,28 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%rdi, %rdx), %bnd0
+# endif
lea 48(%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 63(%rdi), %bnd0
+# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
+# ifdef __CHKP__
+ bndcu 49(%rdi, %rdx), %bnd0
+# endif
lea 49(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
# else
@@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+ bndcu 15(%rdi), %bnd0
+# endif
pcmpeqb %xmm2, %xmm0
movdqu %xmm1, (%rdi)
pmovmskb %xmm0, %rdx
@@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
@@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0):
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
@@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16):
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
@@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
mov %dh, (%rdi)
# ifdef USE_AS_STPCPY
lea (%rdi), %rax
@@ -543,6 +622,9 @@ L(Exit1):
.p2align 4
L(Exit2):
mov (%rsi), %dx
+# ifdef __CHKP__
+ bndcu 1(%rdi), %bnd0
+# endif
mov %dx, (%rdi)
# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
@@ -557,6 +639,9 @@ L(Exit2):
.p2align 4
L(Exit3):
mov (%rsi), %cx
+# ifdef __CHKP__
+ bndcu 2(%rdi), %bnd0
+# endif
mov %cx, (%rdi)
mov %dh, 2(%rdi)
# ifdef USE_AS_STPCPY
@@ -572,6 +657,9 @@ L(Exit3):
.p2align 4
L(Exit4):
mov (%rsi), %edx
+# ifdef __CHKP__
+ bndcu 3(%rdi), %bnd0
+# endif
mov %edx, (%rdi)
# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
@@ -586,6 +674,9 @@ L(Exit4):
.p2align 4
L(Exit5):
mov (%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+# endif
mov %dh, 4(%rdi)
mov %ecx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -602,6 +693,9 @@ L(Exit5):
L(Exit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 5(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
# ifdef USE_AS_STPCPY
@@ -618,6 +712,9 @@ L(Exit6):
L(Exit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 6(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
# ifdef USE_AS_STPCPY
@@ -633,6 +730,9 @@ L(Exit7):
.p2align 4
L(Exit8):
mov (%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 7(%rdi), %bnd0
+# endif
mov %rdx, (%rdi)
# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
@@ -647,6 +747,9 @@ L(Exit8):
.p2align 4
L(Exit9):
mov (%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+# endif
mov %dh, 8(%rdi)
mov %rcx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -663,6 +766,9 @@ L(Exit9):
L(Exit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 9(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -679,6 +785,9 @@ L(Exit10):
L(Exit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 10(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -695,6 +804,9 @@ L(Exit11):
L(Exit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 11(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -711,6 +823,9 @@ L(Exit12):
L(Exit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
# ifdef USE_AS_STPCPY
@@ -727,6 +842,9 @@ L(Exit13):
L(Exit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 13(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
# ifdef USE_AS_STPCPY
@@ -743,6 +861,9 @@ L(Exit14):
L(Exit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 14(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -758,6 +879,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
@@ -772,6 +896,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %dh, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -788,6 +915,9 @@ L(Exit17):
L(Exit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 17(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -804,6 +934,9 @@ L(Exit18):
L(Exit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -820,6 +953,9 @@ L(Exit19):
L(Exit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -836,6 +972,9 @@ L(Exit20):
L(Exit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dh, 20(%rdi)
@@ -853,6 +992,9 @@ L(Exit21):
L(Exit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 21(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -869,6 +1011,9 @@ L(Exit22):
L(Exit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 22(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -885,6 +1030,9 @@ L(Exit23):
L(Exit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 23(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -901,6 +1049,9 @@ L(Exit24):
L(Exit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
mov %dh, 24(%rdi)
@@ -919,6 +1070,9 @@ L(Exit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 25(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
@@ -937,6 +1091,9 @@ L(Exit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
@@ -955,6 +1112,9 @@ L(Exit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
@@ -972,6 +1132,9 @@ L(Exit28):
L(Exit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
# ifdef USE_AS_STPCPY
@@ -988,6 +1151,9 @@ L(Exit29):
L(Exit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -1004,6 +1170,9 @@ L(Exit30):
L(Exit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -1020,6 +1189,9 @@ L(Exit31):
L(Exit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
# ifdef USE_AS_STPCPY
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 3f92a41..1fed105 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -97,6 +97,10 @@ __strrchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %eax,%eax /* RAX has the last occurrence of s. */
movd %esi, %xmm1
punpcklbw %xmm1, %xmm1
@@ -135,6 +139,9 @@ L(unaligned_no_byte):
contain the NULL terminator. */
jg L(exit)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
/* Loop start on aligned string. */
.p2align 4
@@ -142,6 +149,9 @@ L(loop):
pcmpistri $0x4a, (%r8), %xmm1
jbe L(match_or_eos)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(match_or_eos):
@@ -149,11 +159,17 @@ L(match_or_eos):
L(match_no_eos):
leaq (%r8,%rcx), %rax
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(had_eos):
jnc L(exit)
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
.p2align 4
L(exit):
ret
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index b7de092..77889dd 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3)
mov %rsi, %rcx
mov %rdi, %rdx
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
cmpl $0, (%rcx)
jz L(Exit4)
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
cmpl $0, 4(%rcx)
jz L(Exit8)
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
cmpl $0, 8(%rcx)
jz L(Exit12)
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
cmpl $0, 12(%rcx)
jz L(Exit16)
@@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
mov (%rcx), %r9
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %r9, (%rdx)
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+# endif
pcmpeqd (%rsi), %xmm0
mov 8(%rcx), %r9
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %r9, 8(%rdx)
pmovmskb %xmm0, %rax
@@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 16(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
@@ -82,6 +109,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -91,6 +122,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqd %xmm4, %xmm0
@@ -100,6 +135,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqd %xmm1, %xmm0
@@ -109,6 +148,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqd %xmm2, %xmm0
@@ -118,6 +161,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -127,6 +174,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
@@ -138,6 +189,10 @@ L(Align16Both):
.p2align 4
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 63(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
@@ -168,6 +223,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -49(%rdx), %bnd0
+# endif
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
@@ -176,11 +234,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -33(%rdx), %bnd0
+# endif
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%rdx), %bnd0
+# endif
movaps %xmm6, -32(%rdx)
pcmpeqd %xmm7, %xmm0
@@ -190,11 +254,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %rsi
+# ifdef __CHKP__
+ bndcu -1(%rdx), %bnd0
+# endif
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
@@ -206,6 +276,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -219,6 +293,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -232,6 +310,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -244,6 +326,9 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -258,6 +343,9 @@ L(Shl4Start):
.p2align 4
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -279,6 +367,9 @@ L(Shl4LoopStart):
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -287,6 +378,10 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
+# ifdef __CHKP__
+ bndcu -4(%rcx), %bnd1
+ bndcu 11(%rdx), %bnd0
+# endif
movdqu -4(%rcx), %xmm1
mov $12, %rsi
movdqu %xmm1, -4(%rdx)
@@ -294,6 +389,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
@@ -305,6 +403,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -318,6 +420,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -331,6 +437,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -343,6 +453,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -357,6 +471,9 @@ L(Shl8Start):
.p2align 4
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -378,6 +495,9 @@ L(Shl8LoopStart):
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -386,6 +506,10 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 7(%rdx), %bnd0
+# endif
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
@@ -393,6 +517,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
@@ -404,6 +531,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -417,6 +548,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -430,6 +565,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -442,6 +581,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -456,6 +599,9 @@ L(Shl12Start):
.p2align 4
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -476,6 +622,9 @@ L(Shl12LoopStart):
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -484,6 +633,10 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 3(%rdx), %bnd0
+# endif
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
@@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes):
jnz L(Exit4)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -510,6 +666,9 @@ L(ExitHigh):
jnz L(Exit12)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
@@ -519,6 +678,9 @@ L(ExitHigh):
.p2align 4
L(Exit4):
movl (%rcx), %eax
+# ifdef __CHKP__
+ bndcu 3(%rdx), %bnd0
+# endif
movl %eax, (%rdx)
mov %rdi, %rax
ret
@@ -526,6 +688,9 @@ L(Exit4):
.p2align 4
L(Exit8):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -533,6 +698,9 @@ L(Exit8):
.p2align 4
L(Exit12):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 11(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
@@ -542,6 +710,9 @@ L(Exit12):
.p2align 4
L(Exit16):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f4d5591..2f4cb25 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -20,11 +20,23 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
.text
ENTRY (rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -63,7 +75,7 @@ L(crosscache):
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -71,24 +83,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -99,24 +123,36 @@ L(loop_prolog):
test $0x3f, %rdi
jz L(align64_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -129,6 +165,9 @@ L(loop_prolog):
.p2align 4
L(align64_loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -170,36 +209,36 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
xor %rax, %rax
- ret
+ RETURN
END (rawmemchr)
diff --git a/sysdeps/x86_64/stpcpy_chk-c.c b/sysdeps/x86_64/stpcpy_chk-c.c
new file mode 100644
index 0000000..900fa76
--- /dev/null
+++ b/sysdeps/x86_64/stpcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/stpcpy_chk.c>
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index 8bea6fb..7832379 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -25,6 +25,11 @@
.text
ENTRY (strcat)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+
movq %rdi, %rcx /* Dest. register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rax /* Duplicate destination pointer. */
@@ -36,7 +41,11 @@ ENTRY (strcat)
neg %ecx /* We need to align to 8 bytes. */
addl $8,%ecx
/* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
+0:
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
+ cmpb $0x0,(%rax) /* is byte NUL? */
je 2f /* yes => start copy */
incq %rax /* increment pointer */
decl %ecx
@@ -48,6 +57,9 @@ ENTRY (strcat)
.p2align 4
4:
/* First unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -62,6 +74,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Second unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -76,6 +91,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Third unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -90,6 +108,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Fourth unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -163,6 +184,9 @@ ENTRY (strcat)
.p2align 4
22:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -177,10 +201,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -195,10 +225,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -213,10 +249,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -231,6 +273,9 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 22b /* Next iteration. */
@@ -239,10 +284,16 @@ ENTRY (strcat)
The loop is unrolled twice. */
.p2align 4
23:
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 24f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 24f /* yes, finish. */
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index d89f1eb..8519a81 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -29,6 +33,9 @@ ENTRY (strchr)
pxor %xmm2, %xmm2
punpcklbw %xmm1, %xmm1
orl $0xffffffff, %esi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pshufd $0, %xmm1, %xmm1
subq %rdi, %rcx
@@ -44,7 +51,11 @@ ENTRY (strchr)
orl %edx, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index d8c345b..3e4abfa 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -23,6 +23,10 @@
.text
ENTRY (__strchrnul)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -44,7 +48,11 @@ ENTRY (__strchrnul)
andl %esi, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -56,6 +64,9 @@ ENTRY (__strchrnul)
1: bsfl %ecx, %edx
leaq -16(%rdi,%rdx), %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
END (__strchrnul)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 7680937..8959784 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -128,7 +128,16 @@ libc_hidden_def (__strncasecmp)
ENTRY (STRCMP)
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
-L(oop): movb (%rdi), %al
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+L(oop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
+ movb (%rdi), %al
cmpb (%rsi), %al
jne L(neq)
incq %rdi
@@ -177,6 +186,12 @@ END (STRCMP)
je LABEL(Byte0)
mov %rdx, %r11
# endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -243,6 +258,10 @@ END (STRCMP)
# endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte alignment.
@@ -263,6 +282,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
lea 15(%rax), %r9
sub %rcx, %r9
@@ -310,6 +334,10 @@ LABEL(ashr_0):
*/
.p2align 4
LABEL(loop_ashr_0):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -326,6 +354,10 @@ LABEL(loop_ashr_0):
jbe LABEL(strcmp_exitz)
# endif
add $16, %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -377,6 +409,15 @@ LABEL(ashr_1):
lea 1(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_1)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_1)
+# endif
.p2align 4
LABEL(loop_ashr_1):
@@ -460,7 +501,11 @@ LABEL(nibble_ashr_1):
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* substract 4K from %r10 */
+# ifdef __CHKP__
+ jmp LABEL(ashr_1_check)
+# else
jmp LABEL(gobble_ashr_1)
+# endif
/*
* Once find null char, determine if there is a string mismatch
@@ -507,6 +552,15 @@ LABEL(ashr_2):
lea 2(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_2)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(loop_ashr_2):
@@ -588,7 +642,11 @@ LABEL(nibble_ashr_2):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_2_check)
+# else
jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(ashr_2_exittail):
@@ -632,6 +690,15 @@ LABEL(ashr_3):
lea 3(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_3)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(loop_ashr_3):
@@ -713,7 +780,11 @@ LABEL(nibble_ashr_3):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_3_check)
+# else
jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(ashr_3_exittail):
@@ -757,6 +828,15 @@ LABEL(ashr_4):
lea 4(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_4)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(loop_ashr_4):
@@ -838,7 +918,11 @@ LABEL(nibble_ashr_4):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_4_check)
+# else
jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(ashr_4_exittail):
@@ -882,6 +966,15 @@ LABEL(ashr_5):
lea 5(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_5)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(loop_ashr_5):
@@ -963,7 +1056,11 @@ LABEL(nibble_ashr_5):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_5_check)
+# else
jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(ashr_5_exittail):
@@ -1007,6 +1104,15 @@ LABEL(ashr_6):
lea 6(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_6)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(loop_ashr_6):
@@ -1088,7 +1194,11 @@ LABEL(nibble_ashr_6):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_6_check)
+# else
jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(ashr_6_exittail):
@@ -1132,6 +1242,15 @@ LABEL(ashr_7):
lea 7(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_7)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(loop_ashr_7):
@@ -1213,7 +1332,11 @@ LABEL(nibble_ashr_7):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_7_check)
+# else
jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(ashr_7_exittail):
@@ -1257,6 +1380,15 @@ LABEL(ashr_8):
lea 8(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_8)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(loop_ashr_8):
@@ -1338,7 +1470,11 @@ LABEL(nibble_ashr_8):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_8_check)
+# else
jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(ashr_8_exittail):
@@ -1382,6 +1518,15 @@ LABEL(ashr_9):
lea 9(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_9)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(loop_ashr_9):
@@ -1463,7 +1608,11 @@ LABEL(nibble_ashr_9):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_9_check)
+# else
jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(ashr_9_exittail):
@@ -1507,6 +1656,15 @@ LABEL(ashr_10):
lea 10(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_10)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(loop_ashr_10):
@@ -1588,7 +1746,11 @@ LABEL(nibble_ashr_10):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_10_check)
+# else
jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(ashr_10_exittail):
@@ -1632,6 +1794,15 @@ LABEL(ashr_11):
lea 11(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_11)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(loop_ashr_11):
@@ -1713,7 +1884,11 @@ LABEL(nibble_ashr_11):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_11_check)
+# else
jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(ashr_11_exittail):
@@ -1757,6 +1932,15 @@ LABEL(ashr_12):
lea 12(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_12)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(loop_ashr_12):
@@ -1838,7 +2022,11 @@ LABEL(nibble_ashr_12):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_12_check)
+# else
jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(ashr_12_exittail):
@@ -1882,6 +2070,15 @@ LABEL(ashr_13):
lea 13(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_13)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(loop_ashr_13):
@@ -1963,7 +2160,11 @@ LABEL(nibble_ashr_13):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_13_check)
+# else
jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(ashr_13_exittail):
@@ -2007,6 +2208,15 @@ LABEL(ashr_14):
lea 14(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_14)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(loop_ashr_14):
@@ -2088,7 +2298,11 @@ LABEL(nibble_ashr_14):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_14_check)
+# else
jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(ashr_14_exittail):
@@ -2134,6 +2348,15 @@ LABEL(ashr_15):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_15)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(loop_ashr_15):
@@ -2215,7 +2438,11 @@ LABEL(nibble_ashr_15):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ jmp LABEL(ashr_15_check)
+# else
jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(ashr_15_exittail):
@@ -2240,6 +2467,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -2250,6 +2482,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
# endif
+/*#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif*/
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 6128247..2b78e95 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -26,6 +26,10 @@
.text
ENTRY (STRCPY)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
movq %rsi, %rcx /* Source register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rdx /* Duplicate destination pointer. */
@@ -36,8 +40,14 @@ ENTRY (STRCPY)
addl $8,%ecx
/* Search the first bytes directly. */
0:
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movb (%rsi), %al /* Fetch a byte */
testb %al, %al /* Is it NUL? */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* Store it */
jz 4f /* If it was NUL, done! */
incq %rsi
@@ -54,6 +64,9 @@ ENTRY (STRCPY)
.p2align 4
1:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -68,10 +81,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -86,10 +105,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -104,10 +129,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -122,6 +153,9 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 1b /* Next iteration. */
@@ -132,10 +166,16 @@ ENTRY (STRCPY)
3:
/* Note that stpcpy needs to return with the value of the NUL
byte. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 4f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 4f /* yes, finish. */
diff --git a/sysdeps/x86_64/strcpy_chk-c.c b/sysdeps/x86_64/strcpy_chk-c.c
new file mode 100644
index 0000000..b48704c
--- /dev/null
+++ b/sysdeps/x86_64/strcpy_chk-c.c
@@ -0,0 +1 @@
+#include <debug/strcpy_chk.c>
diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
index 7e171de..a102820 100644
--- a/sysdeps/x86_64/strcpy_chk.S
+++ b/sysdeps/x86_64/strcpy_chk.S
@@ -18,6 +18,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#if !defined __CHKP__ && !defined __CHKWR__
#include <sysdep.h>
#include "asm-syntax.h"
@@ -206,3 +207,4 @@ ENTRY (STRCPY_CHK)
jmp HIDDEN_JUMPTARGET (__chk_fail)
END (STRCPY_CHK)
+#endif
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index 65f8a9e..0acca21 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -29,6 +29,12 @@
.text
ENTRY (strcspn)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
movq %rdi, %rdx /* Save SRC. */
@@ -54,21 +60,34 @@ ENTRY (strcspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from skipset */
+L(2):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd1
+# endif
+ movb (%rax), %cl /* get byte from skipset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+# endif
movb 1(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+# endif
movb 2(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+# endif
movb 3(%rax), %cl /* get byte from skipset */
addq $4, %rax /* increment skipset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
@@ -89,18 +108,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movb (%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(4) /* yes => return */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+# endif
movb 1(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(5) /* yes => return */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+# endif
movb 2(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* yes => return */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+# endif
movb 3(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jne L(3) /* no => start loop again */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index eeb1092..065f0e6 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -63,6 +63,10 @@ L(n_nonzero):
mov %rsi, %r11
#endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
pxor %xmm8, %xmm8
pxor %xmm9, %xmm9
pxor %xmm10, %xmm10
@@ -157,6 +161,9 @@ L(loop_init):
L(loop):
addq $64, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
cmpq %rax, %r10
je L(exit_end)
@@ -182,6 +189,9 @@ L(first):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
.p2align 4
@@ -192,6 +202,9 @@ L(exit):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#else
@@ -199,6 +212,9 @@ L(exit):
/* Main loop. Unrolled twice to improve L2 cache performance on core2. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
movdqa 64(%rax), %xmm8
pminub 80(%rax), %xmm8
@@ -231,6 +247,9 @@ L(exit0):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#endif
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e413b07..0bd3405 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strrchr)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -46,7 +50,11 @@ ENTRY (strrchr)
orl %ecx, %esi
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -73,6 +81,9 @@ ENTRY (strrchr)
bsrl %edx, %edx
jz 4f
leaq -16(%rdi,%rdx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
4: ret
END (strrchr)
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 2911da2..bd3be8a 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -25,6 +25,12 @@
.text
ENTRY (strspn)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
movq %rdi, %rdx /* Save SRC. */
@@ -50,21 +56,34 @@ ENTRY (strspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from stopset */
+L(2):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd1
+#endif
+ movb (%rax), %cl /* get byte from stopset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+#endif
movb 1(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+#endif
movb 2(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+#endif
movb 3(%rax), %cl /* get byte from stopset */
addq $4, %rax /* increment stopset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
@@ -85,18 +104,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movb (%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(4) /* no => return */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+#endif
movb 1(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(5) /* no => return */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+#endif
movb 2(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* no => return */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+#endif
movb 3(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jnz L(3) /* yes => start loop again */
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index 5636d9a..17e2521 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -90,6 +90,9 @@ ENTRY (FUNCTION)
the last run. */
cmpq $0, %rdx
cmove %rax, %rdx
+#ifdef __CHKP__
+ bndldx (,%rax,1),%bnd0
+#endif
testq %rdx, %rdx
jz L(returnNULL)
movq %rsi, %rax /* Get start of delimiter set. */
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 3f098dc..3ab1e47 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -22,6 +22,11 @@
.text
ENTRY (wcschr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
+
movd %rsi, %xmm1
pxor %xmm2, %xmm2
mov %rdi, %rcx
@@ -43,6 +48,9 @@ ENTRY (wcschr)
and $-16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -78,6 +86,9 @@ L(cross_cache):
L(unaligned_match):
add %rdi, %rax
add %rcx, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
@@ -91,6 +102,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -100,6 +114,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -109,6 +126,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -118,6 +138,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -142,6 +165,9 @@ L(matches):
L(match):
sub $16, %rdi
add %rdi, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index d6b516b..38e2849 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -28,6 +28,14 @@ ENTRY (wcscmp)
*/
mov %esi, %eax
mov %edi, %edx
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
mov %al, %ch
mov %dl, %cl
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 5927352..a7d944f 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -21,20 +21,45 @@
.text
ENTRY (__wcslen)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmpl $0, (%rdi)
jz L(exit_tail0)
+#ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+#endif
cmpl $0, 4(%rdi)
jz L(exit_tail1)
+#ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+#endif
cmpl $0, 8(%rdi)
jz L(exit_tail2)
+#ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+#endif
cmpl $0, 12(%rdi)
jz L(exit_tail3)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
cmpl $0, 16(%rdi)
jz L(exit_tail4)
+#ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+#endif
cmpl $0, 20(%rdi)
jz L(exit_tail5)
+#ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+#endif
cmpl $0, 24(%rdi)
jz L(exit_tail6)
+#ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+#endif
cmpl $0, 28(%rdi)
jz L(exit_tail7)
@@ -44,6 +69,9 @@ ENTRY (__wcslen)
lea 16(%rdi), %rcx
and $-16, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -51,6 +79,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -58,6 +89,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -65,54 +99,81 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -123,6 +184,9 @@ ENTRY (__wcslen)
.p2align 4
L(aligned_64_loop):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movaps (%rax), %xmm0
movaps 16(%rax), %xmm1
movaps 32(%rax), %xmm2
@@ -173,6 +237,9 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
@@ -181,11 +248,17 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
L(exit_1):
add $1, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index ea1e2e5..8edfc46 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -19,9 +19,22 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
+
.text
ENTRY (wcsrchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %rsi, %xmm1
mov %rdi, %rcx
punpckldq %xmm1, %xmm1
@@ -92,6 +105,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -101,6 +117,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %rdi
@@ -110,6 +129,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %rdi
@@ -119,6 +141,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %rdi
@@ -145,7 +170,7 @@ L(return_value):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match):
@@ -175,14 +200,14 @@ L(find_zero):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_first_wchar):
test $1, %rax
jz L(return_value)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_second_wchar):
@@ -192,7 +217,7 @@ L(find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_third_wchar):
@@ -204,12 +229,12 @@ L(find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero):
add %rcx, %rdi
- mov %rdx, %rcx
+ mov %rdx, %rcx
L(prolog_find_zero_1):
test $15, %cl
jnz L(prolog_find_zero_in_first_wchar)
@@ -228,14 +253,14 @@ L(prolog_find_zero_1):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_first_wchar):
test $1, %rax
jz L(return_null)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_second_wchar):
@@ -245,7 +270,7 @@ L(prolog_find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_third_wchar):
@@ -257,22 +282,22 @@ L(prolog_find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_second_wchar):
lea -12(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_third_wchar):
lea -8(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_fourth_wchar):
lea -4(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):