aboutsummaryrefslogtreecommitdiff
path: root/sysdeps/i386/stpncpy.S
blob: b49b757794f2e230ad84c5e4e64cd48c4f44d84e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
/* copy no more than N bytes from SRC to DEST, returning the address of
   the terminating '\0' in DEST.
   For Intel 80x86, x>=3.
   Copyright (C) 1994-2021 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include "asm-syntax.h"

#define PARMS	4+4	/* space for 1 saved reg */
#define RTN	PARMS
#define DEST	RTN
#define SRC	DEST+4
#define LEN	SRC+4

	.text
ENTRY (__stpncpy)

	pushl %esi
	cfi_adjust_cfa_offset (4)

	movl DEST(%esp), %eax
	movl SRC(%esp), %esi
	cfi_rel_offset (esi, 0)
	movl LEN(%esp), %ecx

	subl %eax, %esi		/* magic: reduce number of loop variants
				   to one using addressing mode */
	jmp L(1)		/* jump to loop "head" */

	ALIGN(4)

	/* Four times unfolded loop with two loop counters.  We get the
	   third value (the source address) by using the index+base
	   addressing mode.  */
L(2):	movb (%eax,%esi), %dl	/* load current char */
	movb %dl, (%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(7)			/* yes, then exit */

	movb 1(%eax,%esi), %dl	/* load current char */
	movb %dl, 1(%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(6)			/* yes, then exit */

	movb 2(%eax,%esi), %dl	/* load current char */
	movb %dl, 2(%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(5)			/* yes, then exit */

	movb 3(%eax,%esi), %dl	/* load current char */
	movb %dl, 3(%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(4)			/* yes, then exit */

	addl $4, %eax		/* increment loop counter for full round */

L(1):	subl $4, %ecx		/* still more than 4 bytes allowed? */
	jae L(2)		/* yes, then go to start of loop */

	/* The maximal remaining 15 bytes are not processed in a loop.  */

	addl $4, %ecx		/* correct above subtraction */
	jz L(9)			/* maximal allowed char reached => go to end */

	movb (%eax,%esi), %dl	/* load current char */
	movb %dl, (%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(3)			/* yes, then exit */

	incl %eax		/* increment pointer */
	decl %ecx		/* decrement length counter */
	jz L(9)			/* no more allowed => exit */

	movb (%eax,%esi), %dl	/* load current char */
	movb %dl, (%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(3)			/* yes, then exit */

	incl %eax		/* increment pointer */
	decl %ecx		/* decrement length counter */
	jz L(9)			/* no more allowed => exit */

	movb (%eax,%esi), %dl	/* load current char */
	movb %dl, (%eax)	/* and store it */
	testb %dl, %dl		/* was it NUL? */
	jz L(3)			/* yes, then exit */

	incl %eax		/* increment pointer */
	jmp L(9)		/* we don't have to test for counter underflow
				   because we know we had a most 3 bytes
				   remaining => exit */

	/* When coming from the main loop we have to adjust the pointer.  */
L(4):	decl %ecx		/* decrement counter */
	incl %eax		/* increment pointer */

L(5):	decl %ecx		/* increment pointer */
	incl %eax		/* increment pointer */

L(6):	decl %ecx		/* increment pointer */
	incl %eax		/* increment pointer */
L(7):

	addl $3, %ecx		/* correct pre-decrementation of counter
				   at the beginning of the loop; but why 3
				   and not 4?  Very simple, we have to count
				   the NUL char we already wrote.  */
	jz L(9)			/* counter is also 0 => exit */

	/* We now have to fill the rest of the buffer with NUL.  This
	   is done in a tricky way.  Please note that the addressing mode
	   used below is not the same we used above.  Here we use the
	   %ecx register.  */
L(8):
	movb $0, (%ecx,%eax)	/* store NUL char */
L(3):	decl %ecx		/* all bytes written? */
	jnz L(8)		/* no, then again */

L(9):	popl %esi		/* restore saved register content */
	cfi_adjust_cfa_offset (-4)
	cfi_restore (esi)

	ret
END (__stpncpy)

libc_hidden_def (__stpncpy)
weak_alias (__stpncpy, stpncpy)