1/* Optimized strcmp for Xtensa.
2   Copyright (C) 2001, 2007 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <http://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include <bits/xtensa-config.h>
21#include <features.h>
22
23#ifdef __XTENSA_EB__
24#define	MASK0 0xff000000
25#define	MASK1 0x00ff0000
26#define	MASK2 0x0000ff00
27#define	MASK3 0x000000ff
28#else
29#define	MASK0 0x000000ff
30#define	MASK1 0x0000ff00
31#define	MASK2 0x00ff0000
32#define	MASK3 0xff000000
33#endif
34
35#define MASK4 0x40404040
36
37	.text
38	.align 4
39	.literal_position
40ENTRY (strcmp)
41	/* a2 = s1, a3 = s2 */
42
43	l8ui	a8, a2, 0	/* byte 0 from s1 */
44	l8ui	a9, a3, 0	/* byte 0 from s2 */
45	movi	a10, 3		/* mask */
46	bne	a8, a9, .Lretdiff
47
48	or	a11, a2, a3
49	bnone	a11, a10, .Laligned
50
51	xor	a11, a2, a3	/* compare low two bits of s1 and s2 */
52	bany	a11, a10, .Lunaligned	/* if they have different alignment */
53
54	/* s1/s2 are not word-aligned.  */
55	addi	a2, a2, 1	/* advance s1 */
56	beqz	a8, .Leq	/* bytes equal, if zero, strings are equal */
57	addi	a3, a3, 1	/* advance s2 */
58	bnone	a2, a10, .Laligned /* if s1/s2 now aligned */
59	l8ui	a8, a2, 0	/* byte 1 from s1 */
60	l8ui	a9, a3, 0	/* byte 1 from s2 */
61	addi	a2, a2, 1	/* advance s1 */
62	bne	a8, a9, .Lretdiff /* if different, return difference */
63	beqz	a8, .Leq	/* bytes equal, if zero, strings are equal */
64	addi	a3, a3, 1	/* advance s2 */
65	bnone	a2, a10, .Laligned /* if s1/s2 now aligned */
66	l8ui	a8, a2, 0	/* byte 2 from s1 */
67	l8ui	a9, a3, 0	/* byte 2 from s2 */
68	addi	a2, a2, 1	/* advance s1 */
69	bne	a8, a9, .Lretdiff /* if different, return difference */
70	beqz	a8, .Leq	/* bytes equal, if zero, strings are equal */
71	addi	a3, a3, 1	/* advance s2 */
72	j	.Laligned
73
74/* s1 and s2 have different alignment.
75
76   If the zero-overhead loop option is available, use an (almost)
77   infinite zero-overhead loop with conditional exits so we only pay
78   for taken branches when exiting the loop.
79
80   Note: It is important for this unaligned case to come before the
81   code for aligned strings, because otherwise some of the branches
82   above cannot reach and have to be transformed to branches around
83   jumps.  The unaligned code is smaller and the branches can reach
84   over it.  */
85
86	.align	4
87	/* (2 mod 4) alignment for loop instruction */
88.Lunaligned:
89#if XCHAL_HAVE_LOOPS
90	movi	a11, 0		/* set up for the maximum loop count */
91	loop	a11, .Lretdiff	/* loop forever (almost anyway) */
92#endif
93.Lnextbyte:
94	l8ui	a8, a2, 0
95	l8ui	a9, a3, 0
96	addi	a2, a2, 1
97	bne	a8, a9, .Lretdiff
98	addi	a3, a3, 1
99#if XCHAL_HAVE_LOOPS
100	beqz	a8, .Lretdiff
101#else
102	bnez	a8, .Lnextbyte
103#endif
104.Lretdiff:
105	sub	a2, a8, a9
106	abi_ret
107
108/* s1 is word-aligned; s2 is word-aligned.
109
110   If the zero-overhead loop option is available, use an (almost)
111   infinite zero-overhead loop with conditional exits so we only pay
112   for taken branches when exiting the loop.  */
113
114/* New algorithm, relying on the fact that all normal ASCII is between
115   32 and 127.
116
117   Rather than check all bytes for zero:
118   Take one word (4 bytes).  Call it w1.
119   Shift w1 left by one into w1'.
120   Or w1 and w1'.  For all normal ASCII bit 6 will be 1; for zero it won't.
121   Check that all 4 bit 6's (one for each byte) are one:
122   If they are, we are definitely not done.
123   If they are not, we are probably done, but need to check for zero.  */
124
125	.align	4
126#if XCHAL_HAVE_LOOPS
127.Laligned:
128	movi	a11, 0
129	movi	a4, MASK0	/* mask for byte 0 */
130	movi	a7, MASK4
131	loop	a11, .Laligned_done /* Loop forever. */
132
133	/* First unrolled loop body.  */
134	l32i	a8, a2, 0	/* get word from s1 */
135	l32i	a9, a3, 0	/* get word from s2 */
136	slli	a5, a8, 1
137	bne	a8, a9, .Lwne2
138	or	a9, a8, a5
139	bnall	a9, a7, .Lprobeq
140
141	/* Second unrolled loop body.  */
142	l32i	a8, a2, 4	/* get word from s1+4 */
143	l32i	a9, a3, 4	/* get word from s2+4 */
144	slli	a5, a8, 1
145	bne	a8, a9, .Lwne2
146	or	a9, a8, a5
147	bnall	a9, a7, .Lprobeq2
148
149	addi	a2, a2, 8	/* advance s1 pointer */
150	addi	a3, a3, 8	/* advance s2 pointer */
151.Laligned_done:
152.Lprobeq2:
153	/* Adjust pointers to account for the loop unrolling.  */
154	addi	a2, a2, 4
155	addi	a3, a3, 4
156
157#else /* !XCHAL_HAVE_LOOPS */
158
159.Laligned:
160	movi	a4, MASK0	/* mask for byte 0 */
161	movi	a7, MASK4
162	j	.Lfirstword
163.Lnextword:
164	addi	a2, a2, 4	/* advance s1 pointer */
165	addi	a3, a3, 4	/* advance s2 pointer */
166.Lfirstword:
167	l32i	a8, a2, 0	/* get word from s1 */
168	l32i	a9, a3, 0	/* get word from s2 */
169	slli	a5, a8, 1
170	bne	a8, a9, .Lwne2
171	or	a9, a8, a5
172	ball	a9, a7, .Lnextword
173#endif /* !XCHAL_HAVE_LOOPS */
174
175	/* align (0 mod 4) */
176.Lprobeq:
177	/* Words are probably equal, but check for sure.
178	   If not, loop over the rest of string using normal algorithm.  */
179
180	bnone	a8, a4, .Leq	/* if byte 0 is zero */
181	movi	a5, MASK1	/* mask for byte 1 */
182	movi	a6, MASK2	/* mask for byte 2 */
183	bnone	a8, a5, .Leq	/* if byte 1 is zero */
184	movi	a7, MASK3	/* mask for byte 3 */
185	bnone	a8, a6, .Leq	/* if byte 2 is zero */
186	bnone	a8, a7, .Leq	/* if byte 3 is zero */
187	addi.n	a2, a2, 4	/* advance s1 pointer */
188	addi.n	a3, a3, 4	/* advance s2 pointer */
189#if XCHAL_HAVE_LOOPS
190
191	/* align (1 mod 4) */
192	loop	a11, .Leq	/* loop forever */
193
194	l32i	a8, a2, 0	/* get word from s1 */
195	l32i	a9, a3, 0	/* get word from s2 */
196	addi	a2, a2, 4	/* advance s1 pointer */
197	bne	a8, a9, .Lwne
198	bnone	a8, a4, .Leq	/* if byte 0 is zero */
199	bnone	a8, a5, .Leq	/* if byte 1 is zero */
200	bnone	a8, a6, .Leq	/* if byte 2 is zero */
201	bnone	a8, a7, .Leq	/* if byte 3 is zero */
202	addi	a3, a3, 4	/* advance s2 pointer */
203
204#else /* !XCHAL_HAVE_LOOPS */
205
206	j	.Lfirstword2
207.Lnextword2:
208	addi	a3, a3, 4	/* advance s2 pointer */
209.Lfirstword2:
210	l32i	a8, a2, 0	/* get word from s1 */
211	l32i	a9, a3, 0	/* get word from s2 */
212	addi	a2, a2, 4	/* advance s1 pointer */
213	bne	a8, a9, .Lwne
214	bnone	a8, a4, .Leq	/* if byte 0 is zero */
215	bnone	a8, a5, .Leq	/* if byte 1 is zero */
216	bnone	a8, a6, .Leq	/* if byte 2 is zero */
217	bany	a8, a7, .Lnextword2	/* if byte 3 is zero */
218#endif /* !XCHAL_HAVE_LOOPS */
219
220	/* Words are equal; some byte is zero.  */
221.Leq:	movi	a2, 0		/* return equal */
222	abi_ret
223
224.Lwne2:	/* Words are not equal.  On big-endian processors, if none of the
225	   bytes are zero, the return value can be determined by a simple
226	   comparison.  */
227#ifdef __XTENSA_EB__
228	or	a10, a8, a5
229	bnall	a10, a7, .Lsomezero
230	bgeu	a8, a9, .Lposreturn
231	movi	a2, -1
232	abi_ret
233.Lposreturn:
234	movi	a2, 1
235	abi_ret
236.Lsomezero:	/* There is probably some zero byte. */
237#endif /* __XTENSA_EB__ */
238.Lwne:	/* Words are not equal.  */
239	xor	a2, a8, a9	/* get word with nonzero in byte that differs */
240	bany	a2, a4, .Ldiff0	/* if byte 0 differs */
241	movi	a5, MASK1	/* mask for byte 1 */
242	bnone	a8, a4, .Leq	/* if byte 0 is zero */
243	bany	a2, a5, .Ldiff1	/* if byte 1 differs */
244	movi	a6, MASK2	/* mask for byte 2 */
245	bnone	a8, a5, .Leq	/* if byte 1 is zero */
246	bany	a2, a6, .Ldiff2	/* if byte 2 differs */
247	bnone	a8, a6, .Leq	/* if byte 2 is zero */
248#ifdef __XTENSA_EB__
249.Ldiff3:
250.Ldiff2:
251.Ldiff1:
252	/* Byte 0 is equal (at least) and there is a difference before a zero
253	   byte.  Just subtract words to get the return value.
254	   The high order equal bytes cancel, leaving room for the sign.  */
255	sub	a2, a8, a9
256	abi_ret
257
258.Ldiff0:
259	/* Need to make room for the sign, so can't subtract whole words.  */
260	extui	a10, a8, 24, 8
261	extui	a11, a9, 24, 8
262	sub	a2, a10, a11
263	abi_ret
264
265#else /* !__XTENSA_EB__ */
266	/* Little-endian is a little more difficult because can't subtract
267	   whole words.  */
268.Ldiff3:
269	/* Bytes 0-2 are equal; byte 3 is different.
270	   For little-endian need to have a sign bit for the difference.  */
271	extui	a10, a8, 24, 8
272	extui	a11, a9, 24, 8
273	sub	a2, a10, a11
274	abi_ret
275
276.Ldiff0:
277	/* Byte 0 is different.  */
278	extui	a10, a8, 0, 8
279	extui	a11, a9, 0, 8
280	sub	a2, a10, a11
281	abi_ret
282
283.Ldiff1:
284	/* Byte 0 is equal; byte 1 is different.  */
285	extui	a10, a8, 8, 8
286	extui	a11, a9, 8, 8
287	sub	a2, a10, a11
288	abi_ret
289
290.Ldiff2:
291	/* Bytes 0-1 are equal; byte 2 is different.  */
292	extui	a10, a8, 16, 8
293	extui	a11, a9, 16, 8
294	sub	a2, a10, a11
295	abi_ret
296
297#endif /* !__XTENSA_EB */
298
299libc_hidden_def (strcmp)
300
301#ifndef __UCLIBC_HAS_LOCALE__
302strong_alias (strcmp, strcoll)
303libc_hidden_def (strcoll)
304#endif
305