1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
/*
* ====================================================
* Copyright (C) 1998, 2002, 2008 by Red Hat Inc. All rights reserved.
*
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include "i386mach.h"
.global SYM (memset)
SOTYPE_FUNCTION(memset)
SYM (memset):
#ifdef __iamcu__
pushl edi
movl eax,edi
movzbl dl,eax
mov edi,edx
rep stosb
mov edx,eax
popl edi
#else
pushl ebp
movl esp,ebp
pushl edi
movl 8(ebp),edi
movzbl 12(ebp),eax
movl 16(ebp),ecx
cld
#ifndef __OPTIMIZE_SIZE__
/* Less than 16 bytes won't benefit from the 'rep stosl' loop. */
cmpl $16,ecx
jbe .L19
testl $7,edi
je .L10
/* It turns out that 8-byte aligned 'rep stosl' outperforms
4-byte aligned on some x86 platforms. */
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
testl $7,edi
je .L10
movb al,(edi)
incl edi
decl ecx
/* At this point, ecx>8 and edi%8==0. */
.L10:
movb al,ah
movl eax,edx
sall $16,edx
orl edx,eax
movl ecx,edx
shrl $2,ecx
andl $3,edx
rep
stosl
movl edx,ecx
#endif /* not __OPTIMIZE_SIZE__ */
.L19:
rep
stosb
movl 8(ebp),eax
leal -4(ebp),esp
popl edi
leave
#endif
ret
|