1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
|
! SH5 code Copyright 2002 SuperH Ltd.
#include "asm.h"
ENTRY(strcmp)
#if __SHMEDIA__
ld.ub r2,0,r4
pt/l quickret0,tr0
ld.ub r3,0,r5
ptabs r18,tr2
beqi/u r4,0,tr0
ld.ub r2,1,r6
bne/u r4,r5,tr0
pt/l quickret1,tr1
ld.ub r3,1,r7
beqi/u r6,0,tr1
ld.ub r2,2,r4
bne/u r6,r7,tr1
ld.ub r3,2,r5
beqi/u r4,0,tr0
ld.ub r2,3,r6
bne/u r4,r5,tr0
ld.ub r3,3,r7
beqi/u r6,0,tr1
ld.ub r2,4,r4
bne/u r6,r7,tr1
ld.ub r3,4,r5
beqi/u r4,0,tr0
ld.ub r2,5,r6
bne/u r4,r5,tr0
ld.ub r3,5,r7
beqi/u r6,0,tr1
ld.ub r2,6,r4
bne/u r6,r7,tr1
ld.ub r3,6,r5
beqi/u r4,0,tr0
ld.ub r2,7,r6
bne/u r4,r5,tr0
ld.ub r3,7,r7
beqi/u r6,0,tr1
sub r3,r2,r3
bne/u r6,r7,tr1
andi r2,-8,r2
add r3,r2,r3
ldlo.q r3,8,r23
pt r23_zero,tr0
shlli r3,3,r22
sub r63,r22,r20
movi 0x101,r6
mperm.w r6,r63,r6
SHLO r6,r22,r7
msubs.ub r7,r23,r8
pt loop,tr1
bnei/u r8,0,tr0 // r23_zero
pt found_zero,tr0
addi r3,15,r3
andi r3,-8,r3
sub r3,r2,r3
bne/l r7,r6,tr1 // loop
/* The strings are aligned to each other. */
/* It is possible to have a loop with six cycles / iteration
by re-ordering the exit conditions, but then it needs extra
time and/or code to sort out the r4 != r5 case. */
pt al_loop,tr1
pt al_found_zero,tr0
al_loop:
ld.q r2,8,r4
ldx.q r2,r3,r5
addi r2,8,r2
mcmpeq.b r63,r4,r8
pt cmp_quad,tr3
bnei/u r8,0,tr0 // al_found_zero
beq/l r4,r5,tr1 // al_loop
blink tr3,r63 // cmp_quad
.balign 8
quickret0:
sub r4,r5,r2
blink tr2,r63
quickret1:
sub r6,r7,r2
blink tr2,r63
loop:
ld.q r2,8,r4
ldx.q r2,r3,r19
addi r2,8,r2
msubs.ub r6,r4,r8
mcmpeq.b r63,r19,r9
SHHI r19,r20,r21
or r21,r23,r5
SHLO r19,r22,r23
bne/u r8,r9,tr0 // found_zero
beq/l r4,r5,tr1 // loop
cmp_quad:
#ifdef __LITTLE_ENDIAN__
byterev r4,r4
byterev r5,r5
#endif
cmpgtu r4,r5,r6
cmpgtu r5,r4,r7
sub r6,r7,r2
blink tr2,r63
found_zero:
pt zero_now,tr0
pt cmp_quad,tr1
SHHI r9,r20,r7
bne/u r8,r7,tr0 // zero_now
bne/u r4,r5,tr1 // cmp_quad
SHLO r9,r22,r8
r23_zero:
ld.q r2,8,r4
add r23,r63,r5
zero_now:
al_found_zero:
/* We konw that one of the values has at lest one zero, and r8 holds
an 0x01 or 0xff mask for every zero found in one of the operands.
If both operands have the first zero in the same place, this mask
allows us to truncate the comparison to the valid bytes in the
strings. If the first zero is in different places, it doesn't
matter if some invalid bytes are included, since the comparison
of the zero with the non-zero will determine the outcome. */
#ifdef __LITTLE_ENDIAN__
shlli r8,8,r8
addi r8,-1,r9
andc r9,r8,r8
and r8,r4,r4
and r8,r5,r5
#else
shlri r8,1,r8
nsb r8,r8
addi r8,8,r8
andi r8,56,r8
sub r63,r8,r8
shlrd r4,r8,r4
shlrd r5,r8,r5
#endif
#ifdef __LITTLE_ENDIAN__
byterev r4,r4
byterev r5,r5
#endif
cmpgtu r4,r5,r6
cmpgtu r5,r4,r7
sub r6,r7,r2
blink tr2,r63
#else /* ! __SHMEDIA__, i.e. SH 1..4 / SHcompact */
#ifdef __SH5__
#define STR1 r2
#define STR2 r3
#define RESULT r2
#define TMP r4
#else
! Entry: r4: string1
! r5: string2
! Exit: r0: result
! r1-r2,r4-r5: clobbered
#define STR1 r4
#define STR2 r5
#define RESULT r0
#define TMP r2
#endif /* __SH5__ */
mov STR1,r0
or STR2,r0
tst #3,r0
bf L_setup_char_loop
mov #0,r0
#ifdef DELAYED_BRANCHES
mov.l @STR1+,r1
.align 2
Longword_loop:
mov.l @STR2+,TMP
cmp/str r0,r1
bt Longword_loop_end
cmp/eq r1,TMP
bt.s Longword_loop
mov.l @STR1+,r1
add #-4, STR1
Longword_loop_end:
add #-4, STR1
add #-4, STR2
L_setup_char_loop:
mov.b @STR1+,r0
.align 2
L_char_loop:
mov.b @STR2+,r1
tst r0,r0
bt L_return
cmp/eq r0,r1
bt.s L_char_loop
mov.b @STR1+,r0
add #-2,STR1
mov.b @STR1,r0
#else /* ! DELAYED_BRANCHES */
.align 2
Longword_loop:
mov.l @r4+,r1
mov.l @r5+,r2
cmp/str r0,r1
bt Longword_loop_end
cmp/eq r1,r2
bt Longword_loop
Longword_loop_end:
add #-4, r4
add #-4, r5
.align 2
L_setup_char_loop:
L_char_loop:
mov.b @r4+,r0
mov.b @r5+,r1
tst r0,r0
bt L_return
cmp/eq r0,r1
bt L_char_loop
#endif
L_return:
extu.b r0,RESULT
extu.b r1,r1
rts
sub r1,RESULT
#endif /* ! __SHMEDIA__ */
|