1/*
2 * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
3 * Copyright (C) 2007 ARC International (UK) LTD
4 *
5 * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
6 */
7
8#include <features.h>
9#include <sysdep.h>
10#include <asm.h>
11
12ENTRY(strcmp)
13
14#if defined(__ARC700__) || defined(__ARC64_ARCH32__)
15/* This is optimized primarily for the ARC700.
16   It would be possible to speed up the loops by one cycle / word
17   respective one cycle / byte by forcing double source 1 alignment, unrolling
18   by a factor of two, and speculatively loading the second word / byte of
19   source 1; however, that would increase the overhead for loop setup / finish,
20   and strcmp might often terminate early.  */
21
22	or	r2,r0,r1
23	bmsk_s	r2,r2,1
24	brne	r2,0,.Lcharloop
25	mov_s	r12,0x01010101
26	ror	r5,r12
27.Lwordloop:
28	ld.ab	r2,[r0,4]
29	ld.ab	r3,[r1,4]
30	nop_s
31	sub	r4,r2,r12
32	bic	r4,r4,r2
33	and	r4,r4,r5
34	brne	r4,0,.Lfound0
35	breq	r2,r3,.Lwordloop
36#ifdef	__LITTLE_ENDIAN__
37	xor	r0,r2,r3	; mask for difference
38	SUBR_S	r1,r0,1
39	bic_s	r0,r0,r1	; mask for least significant difference bit
40	sub	r1,r5,r0
41	xor	r0,r5,r1	; mask for least significant difference byte
42	and_s	r2,r2,r0
43	and_s	r3,r3,r0
44#endif /* LITTLE ENDIAN */
45	cmp_s	r2,r3
46	mov_s	r0,1
47	j_s.d	[blink]
48	bset.lo	r0,r0,31
49
50	.balign	4
51#ifdef __LITTLE_ENDIAN__
52.Lfound0:
53	xor	r0,r2,r3	; mask for difference
54	or	r0,r0,r4	; or in zero indicator
55	SUBR_S	r1,r0,1
56	bic_s	r0,r0,r1	; mask for least significant difference bit
57	sub	r1,r5,r0
58	xor	r0,r5,r1	; mask for least significant difference byte
59	and_s	r2,r2,r0
60	and_s	r3,r3,r0
61	sub.f	r0,r2,r3
62	mov.hi	r0,1
63	j_s.d	[blink]
64	bset.lo	r0,r0,31
65#else /* BIG ENDIAN */
66	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
67	   because of carry-propagateion from a lower significant zero byte.
68	   We can compensate for this by checking that bit0 is zero.
69	   This compensation is not necessary in the step where we
70	   get a low estimate for r2, because in any affected bytes
71	   we already have 0x00 or 0x01, which will remain unchanged
72	   when bit 7 is cleared.  */
73	.balign	4
74.Lfound0:
75	lsr	r0,r4,8
76	lsr_s	r1,r2
77	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
78	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
79	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
80	cmp_s	r3,r2		; ... be independent of trailing garbage
81	or_s	r2,r2,r0	; likewise for r3 > r2
82	bic_s	r3,r3,r0
83	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
84	cmp_s	r2,r3
85	j_s.d	[blink]
86	bset.lo	r0,r0,31
87#endif /* ENDIAN */
88
89	.balign	4
90.Lcharloop:
91	ldb.ab	r2,[r0,1]
92	ldb.ab	r3,[r1,1]
93	nop_s
94	breq	r2,0,.Lcmpend
95	breq	r2,r3,.Lcharloop
96.Lcmpend:
97	j_s.d	[blink]
98	sub	r0,r2,r3
99
100#elif defined(__ARCHS__)
101	or	r2, r0, r1
102	bmsk_s	r2, r2, 1
103	brne	r2, 0, @.Lcharloop
104
105;;; s1 and s2 are word aligned
106
107	mov_s	r12, 0x01010101
108	ror	r11, r12
109	.align  4
110.LwordLoop:
111	ld.ab	r2, [r0, 4]
112	sub	r4, r2, r12
113	ld.ab	r3, [r1, 4]
114	;; Detect NULL char in str1
115	bic	r4, r4, r2
116	and	r4, r4, r11
117	brne.d.nt	r4, 0, .LfoundNULL
118	;; Check if the read locations are the same
119	cmp	r2, r3
120	beq	.LwordLoop
121
122	;; A match is found, spot it out
123#ifdef __LITTLE_ENDIAN__
124	swape	r3, r3
125	mov_s	r0, 1
126	swape	r2, r2
127#else
128	mov_s	r0, 1
129#endif
130	cmp_s	r2, r3
131	j_s.d	[blink]
132	bset.lo	r0, r0, 31
133
134	.align 4
135.LfoundNULL:
136#ifdef __BIG_ENDIAN__
137	swape	r4, r4
138	swape	r2, r2
139	swape	r3, r3
140#endif
141	;; Find null byte
142	ffs	r0, r4
143	bmsk	r2, r2, r0
144	bmsk	r3, r3, r0
145	swape	r2, r2
146	swape	r3, r3
147	;; make the return value
148	sub.f	r0, r2, r3
149	mov.hi	r0, 1
150	j_s.d	[blink]
151	bset.lo	r0, r0, 31
152
153	.align 4
154.Lcharloop:
155	ldb.ab	r2, [r0, 1]
156	ldb.ab	r3, [r1, 1]
157	nop
158	breq	r2, 0, .Lcmpend
159	breq	r2, r3, .Lcharloop
160
161	.align 4
162.Lcmpend:
163	j_s.d	[blink]
164	sub	r0, r2, r3
165
166#else
167#error "Unsupported ARC CPU type"
168#endif
169
170END(strcmp)
171libc_hidden_def(strcmp)
172
173#ifndef __UCLIBC_HAS_LOCALE__
174strong_alias(strcmp,strcoll)
175libc_hidden_def(strcoll)
176#endif
177