1/*
2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
3 *
4 * Author: Nicolas Pitre <nico@fluxnic.net>
5 *   - contributed to gcc-3.4 on Sep 30, 2003
6 *   - adapted for the Linux kernel on Oct 2, 2003
7 */
8
9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
10
11This file is free software; you can redistribute it and/or modify it
12under the terms of the GNU General Public License as published by the
13Free Software Foundation; either version 2, or (at your option) any
14later version.
15
16In addition to the permissions in the GNU General Public License, the
17Free Software Foundation gives you unlimited permission to link the
18compiled version of this file into combinations with other programs,
19and to distribute those combinations without any restriction coming
20from the use of this file.  (The General Public License restrictions
21do apply in other respects; for example, they cover modification of
22the file, and distribution when not linked into a combine
23executable.)
24
25This file is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of
27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
28General Public License for more details.
29
30You should have received a copy of the GNU General Public License
31along with this program; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>.  */
32
33
34#include "assembler.h"
35
36.macro ARM_DIV_BODY dividend, divisor, result, curbit
37
38#if __LINUX_ARM_ARCH__ >= 5
39
40	clz	\curbit, \divisor
41	clz	\result, \dividend
42	sub	\result, \curbit, \result
43	mov	\curbit, #1
44	mov	\divisor, \divisor, lsl \result
45	mov	\curbit, \curbit, lsl \result
46	mov	\result, #0
47
48#else
49
50	@ Initially shift the divisor left 3 bits if possible,
51	@ set curbit accordingly.  This allows for curbit to be located
52	@ at the left end of each 4 bit nibbles in the division loop
53	@ to save one loop in most cases.
54	tst	\divisor, #0xe0000000
55	moveq	\divisor, \divisor, lsl #3
56	moveq	\curbit, #8
57	movne	\curbit, #1
58
59	@ Unless the divisor is very big, shift it up in multiples of
60	@ four bits, since this is the amount of unwinding in the main
61	@ division loop.  Continue shifting until the divisor is
62	@ larger than the dividend.
631:	cmp	\divisor, #0x10000000
64	cmplo	\divisor, \dividend
65	movlo	\divisor, \divisor, lsl #4
66	movlo	\curbit, \curbit, lsl #4
67	blo	1b
68
69	@ For very big divisors, we must shift it a bit at a time, or
70	@ we will be in danger of overflowing.
711:	cmp	\divisor, #0x80000000
72	cmplo	\divisor, \dividend
73	movlo	\divisor, \divisor, lsl #1
74	movlo	\curbit, \curbit, lsl #1
75	blo	1b
76
77	mov	\result, #0
78
79#endif
80
81	@ Division loop
821:	cmp	\dividend, \divisor
83	subhs	\dividend, \dividend, \divisor
84	orrhs	\result,   \result,   \curbit
85	cmp	\dividend, \divisor,  lsr #1
86	subhs	\dividend, \dividend, \divisor, lsr #1
87	orrhs	\result,   \result,   \curbit,  lsr #1
88	cmp	\dividend, \divisor,  lsr #2
89	subhs	\dividend, \dividend, \divisor, lsr #2
90	orrhs	\result,   \result,   \curbit,  lsr #2
91	cmp	\dividend, \divisor,  lsr #3
92	subhs	\dividend, \dividend, \divisor, lsr #3
93	orrhs	\result,   \result,   \curbit,  lsr #3
94	cmp	\dividend, #0			@ Early termination?
95	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
96	movne	\divisor,  \divisor, lsr #4
97	bne	1b
98
99.endm
100
101
102.macro ARM_DIV2_ORDER divisor, order
103
104#if __LINUX_ARM_ARCH__ >= 5
105
106	clz	\order, \divisor
107	rsb	\order, \order, #31
108
109#else
110
111	cmp	\divisor, #(1 << 16)
112	movhs	\divisor, \divisor, lsr #16
113	movhs	\order, #16
114	movlo	\order, #0
115
116	cmp	\divisor, #(1 << 8)
117	movhs	\divisor, \divisor, lsr #8
118	addhs	\order, \order, #8
119
120	cmp	\divisor, #(1 << 4)
121	movhs	\divisor, \divisor, lsr #4
122	addhs	\order, \order, #4
123
124	cmp	\divisor, #(1 << 2)
125	addhi	\order, \order, #3
126	addls	\order, \order, \divisor, lsr #1
127
128#endif
129
130.endm
131
132
133.macro ARM_MOD_BODY dividend, divisor, order, spare
134
135#if __LINUX_ARM_ARCH__ >= 5
136
137	clz	\order, \divisor
138	clz	\spare, \dividend
139	sub	\order, \order, \spare
140	mov	\divisor, \divisor, lsl \order
141
142#else
143
144	mov	\order, #0
145
146	@ Unless the divisor is very big, shift it up in multiples of
147	@ four bits, since this is the amount of unwinding in the main
148	@ division loop.  Continue shifting until the divisor is
149	@ larger than the dividend.
1501:	cmp	\divisor, #0x10000000
151	cmplo	\divisor, \dividend
152	movlo	\divisor, \divisor, lsl #4
153	addlo	\order, \order, #4
154	blo	1b
155
156	@ For very big divisors, we must shift it a bit at a time, or
157	@ we will be in danger of overflowing.
1581:	cmp	\divisor, #0x80000000
159	cmplo	\divisor, \dividend
160	movlo	\divisor, \divisor, lsl #1
161	addlo	\order, \order, #1
162	blo	1b
163
164#endif
165
166	@ Perform all needed substractions to keep only the reminder.
167	@ Do comparisons in batch of 4 first.
168	subs	\order, \order, #3		@ yes, 3 is intended here
169	blt	2f
170
1711:	cmp	\dividend, \divisor
172	subhs	\dividend, \dividend, \divisor
173	cmp	\dividend, \divisor,  lsr #1
174	subhs	\dividend, \dividend, \divisor, lsr #1
175	cmp	\dividend, \divisor,  lsr #2
176	subhs	\dividend, \dividend, \divisor, lsr #2
177	cmp	\dividend, \divisor,  lsr #3
178	subhs	\dividend, \dividend, \divisor, lsr #3
179	cmp	\dividend, #1
180	mov	\divisor, \divisor, lsr #4
181	subges	\order, \order, #4
182	bge	1b
183
184	tst	\order, #3
185	teqne	\dividend, #0
186	beq	5f
187
188	@ Either 1, 2 or 3 comparison/substractions are left.
1892:	cmn	\order, #2
190	blt	4f
191	beq	3f
192	cmp	\dividend, \divisor
193	subhs	\dividend, \dividend, \divisor
194	mov	\divisor,  \divisor,  lsr #1
1953:	cmp	\dividend, \divisor
196	subhs	\dividend, \dividend, \divisor
197	mov	\divisor,  \divisor,  lsr #1
1984:	cmp	\dividend, \divisor
199	subhs	\dividend, \dividend, \divisor
2005:
201.endm
202
203
204ENTRY(__udivsi3)
205ENTRY(__aeabi_uidiv)
206UNWIND(.fnstart)
207
208	subs	r2, r1, #1
209	moveq	pc, lr
210	bcc	Ldiv0
211	cmp	r0, r1
212	bls	11f
213	tst	r1, r2
214	beq	12f
215
216	ARM_DIV_BODY r0, r1, r2, r3
217
218	mov	r0, r2
219	mov	pc, lr
220
22111:	moveq	r0, #1
222	movne	r0, #0
223	mov	pc, lr
224
22512:	ARM_DIV2_ORDER r1, r2
226
227	mov	r0, r0, lsr r2
228	mov	pc, lr
229
230UNWIND(.fnend)
231ENDPROC(__udivsi3)
232ENDPROC(__aeabi_uidiv)
233
234ENTRY(__umodsi3)
235UNWIND(.fnstart)
236
237	subs	r2, r1, #1			@ compare divisor with 1
238	bcc	Ldiv0
239	cmpne	r0, r1				@ compare dividend with divisor
240	moveq   r0, #0
241	tsthi	r1, r2				@ see if divisor is power of 2
242	andeq	r0, r0, r2
243	movls	pc, lr
244
245	ARM_MOD_BODY r0, r1, r2, r3
246
247	mov	pc, lr
248
249UNWIND(.fnend)
250ENDPROC(__umodsi3)
251
252ENTRY(__divsi3)
253ENTRY(__aeabi_idiv)
254UNWIND(.fnstart)
255
256	cmp	r1, #0
257	eor	ip, r0, r1			@ save the sign of the result.
258	beq	Ldiv0
259	rsbmi	r1, r1, #0			@ loops below use unsigned.
260	subs	r2, r1, #1			@ division by 1 or -1 ?
261	beq	10f
262	movs	r3, r0
263	rsbmi	r3, r0, #0			@ positive dividend value
264	cmp	r3, r1
265	bls	11f
266	tst	r1, r2				@ divisor is power of 2 ?
267	beq	12f
268
269	ARM_DIV_BODY r3, r1, r0, r2
270
271	cmp	ip, #0
272	rsbmi	r0, r0, #0
273	mov	pc, lr
274
27510:	teq	ip, r0				@ same sign ?
276	rsbmi	r0, r0, #0
277	mov	pc, lr
278
27911:	movlo	r0, #0
280	moveq	r0, ip, asr #31
281	orreq	r0, r0, #1
282	mov	pc, lr
283
28412:	ARM_DIV2_ORDER r1, r2
285
286	cmp	ip, #0
287	mov	r0, r3, lsr r2
288	rsbmi	r0, r0, #0
289	mov	pc, lr
290
291UNWIND(.fnend)
292ENDPROC(__divsi3)
293ENDPROC(__aeabi_idiv)
294
295ENTRY(__modsi3)
296UNWIND(.fnstart)
297
298	cmp	r1, #0
299	beq	Ldiv0
300	rsbmi	r1, r1, #0			@ loops below use unsigned.
301	movs	ip, r0				@ preserve sign of dividend
302	rsbmi	r0, r0, #0			@ if negative make positive
303	subs	r2, r1, #1			@ compare divisor with 1
304	cmpne	r0, r1				@ compare dividend with divisor
305	moveq	r0, #0
306	tsthi	r1, r2				@ see if divisor is power of 2
307	andeq	r0, r0, r2
308	bls	10f
309
310	ARM_MOD_BODY r0, r1, r2, r3
311
31210:	cmp	ip, #0
313	rsbmi	r0, r0, #0
314	mov	pc, lr
315
316UNWIND(.fnend)
317ENDPROC(__modsi3)
318
319#ifdef CONFIG_AEABI
320
321ENTRY(__aeabi_uidivmod)
322UNWIND(.fnstart)
323UNWIND(.save {r0, r1, ip, lr}	)
324
325	stmfd	sp!, {r0, r1, ip, lr}
326	bl	__aeabi_uidiv
327	ldmfd	sp!, {r1, r2, ip, lr}
328	mul	r3, r0, r2
329	sub	r1, r1, r3
330	mov	pc, lr
331
332UNWIND(.fnend)
333ENDPROC(__aeabi_uidivmod)
334
335ENTRY(__aeabi_idivmod)
336UNWIND(.fnstart)
337UNWIND(.save {r0, r1, ip, lr}	)
338	stmfd	sp!, {r0, r1, ip, lr}
339	bl	__aeabi_idiv
340	ldmfd	sp!, {r1, r2, ip, lr}
341	mul	r3, r0, r2
342	sub	r1, r1, r3
343	mov	pc, lr
344
345UNWIND(.fnend)
346ENDPROC(__aeabi_idivmod)
347
348ENTRY(__aeabi_uldivmod)
349UNWIND(.fnstart)
350UNWIND(.save {lr}	)
351	sub sp, sp, #8
352	stmfd   sp!, {sp, lr}
353	bl __qdivrem
354	ldr lr, [sp, #4]
355	add sp, sp, #8
356	ldmfd sp!, {r2, r3}
357	mov	pc, lr
358
359UNWIND(.fnend)
360ENDPROC(__aeabi_uldivmod)
361
362ENTRY(__aeabi_ldivmod)
363UNWIND(.fnstart)
364UNWIND(.save {lr}	)
365	sub sp, sp, #16
366	stmfd   sp!, {sp, lr}
367	bl __ldivmod_helper
368	ldr lr, [sp, #4]
369	add sp, sp, #16
370	ldmfd	sp!, {r2, r3}
371	mov	pc, lr
372
373UNWIND(.fnend)
374ENDPROC(__aeabi_ldivmod)
375#endif
376
377Ldiv0:
378UNWIND(.fnstart)
379UNWIND(.pad #4)
380UNWIND(.save {lr})
381	str	lr, [sp, #-8]!
382	bl	__div0
383	mov	r0, #0			@ About as wrong as it could be.
384	ldr	pc, [sp], #8
385UNWIND(.fnend)
386ENDPROC(Ldiv0)
387