1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11This file is free software; you can redistribute it and/or modify it 12under the terms of the GNU General Public License as published by the 13Free Software Foundation; either version 2, or (at your option) any 14later version. 15 16In addition to the permissions in the GNU General Public License, the 17Free Software Foundation gives you unlimited permission to link the 18compiled version of this file into combinations with other programs, 19and to distribute those combinations without any restriction coming 20from the use of this file. (The General Public License restrictions 21do apply in other respects; for example, they cover modification of 22the file, and distribution when not linked into a combine 23executable.) 24 25This file is distributed in the hope that it will be useful, but 26WITHOUT ANY WARRANTY; without even the implied warranty of 27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28General Public License for more details. 29 30You should have received a copy of the GNU General Public License 31along with this program; see the file COPYING. If not, see <http://www.gnu.org/licenses/>. */ 32 33 34#include "assembler.h" 35 36.macro ARM_DIV_BODY dividend, divisor, result, curbit 37 38#if __LINUX_ARM_ARCH__ >= 5 39 40 clz \curbit, \divisor 41 clz \result, \dividend 42 sub \result, \curbit, \result 43 mov \curbit, #1 44 mov \divisor, \divisor, lsl \result 45 mov \curbit, \curbit, lsl \result 46 mov \result, #0 47 48#else 49 50 @ Initially shift the divisor left 3 bits if possible, 51 @ set curbit accordingly. This allows for curbit to be located 52 @ at the left end of each 4 bit nibbles in the division loop 53 @ to save one loop in most cases. 54 tst \divisor, #0xe0000000 55 moveq \divisor, \divisor, lsl #3 56 moveq \curbit, #8 57 movne \curbit, #1 58 59 @ Unless the divisor is very big, shift it up in multiples of 60 @ four bits, since this is the amount of unwinding in the main 61 @ division loop. Continue shifting until the divisor is 62 @ larger than the dividend. 631: cmp \divisor, #0x10000000 64 cmplo \divisor, \dividend 65 movlo \divisor, \divisor, lsl #4 66 movlo \curbit, \curbit, lsl #4 67 blo 1b 68 69 @ For very big divisors, we must shift it a bit at a time, or 70 @ we will be in danger of overflowing. 711: cmp \divisor, #0x80000000 72 cmplo \divisor, \dividend 73 movlo \divisor, \divisor, lsl #1 74 movlo \curbit, \curbit, lsl #1 75 blo 1b 76 77 mov \result, #0 78 79#endif 80 81 @ Division loop 821: cmp \dividend, \divisor 83 subhs \dividend, \dividend, \divisor 84 orrhs \result, \result, \curbit 85 cmp \dividend, \divisor, lsr #1 86 subhs \dividend, \dividend, \divisor, lsr #1 87 orrhs \result, \result, \curbit, lsr #1 88 cmp \dividend, \divisor, lsr #2 89 subhs \dividend, \dividend, \divisor, lsr #2 90 orrhs \result, \result, \curbit, lsr #2 91 cmp \dividend, \divisor, lsr #3 92 subhs \dividend, \dividend, \divisor, lsr #3 93 orrhs \result, \result, \curbit, lsr #3 94 cmp \dividend, #0 @ Early termination? 95 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 96 movne \divisor, \divisor, lsr #4 97 bne 1b 98 99.endm 100 101 102.macro ARM_DIV2_ORDER divisor, order 103 104#if __LINUX_ARM_ARCH__ >= 5 105 106 clz \order, \divisor 107 rsb \order, \order, #31 108 109#else 110 111 cmp \divisor, #(1 << 16) 112 movhs \divisor, \divisor, lsr #16 113 movhs \order, #16 114 movlo \order, #0 115 116 cmp \divisor, #(1 << 8) 117 movhs \divisor, \divisor, lsr #8 118 addhs \order, \order, #8 119 120 cmp \divisor, #(1 << 4) 121 movhs \divisor, \divisor, lsr #4 122 addhs \order, \order, #4 123 124 cmp \divisor, #(1 << 2) 125 addhi \order, \order, #3 126 addls \order, \order, \divisor, lsr #1 127 128#endif 129 130.endm 131 132 133.macro ARM_MOD_BODY dividend, divisor, order, spare 134 135#if __LINUX_ARM_ARCH__ >= 5 136 137 clz \order, \divisor 138 clz \spare, \dividend 139 sub \order, \order, \spare 140 mov \divisor, \divisor, lsl \order 141 142#else 143 144 mov \order, #0 145 146 @ Unless the divisor is very big, shift it up in multiples of 147 @ four bits, since this is the amount of unwinding in the main 148 @ division loop. Continue shifting until the divisor is 149 @ larger than the dividend. 1501: cmp \divisor, #0x10000000 151 cmplo \divisor, \dividend 152 movlo \divisor, \divisor, lsl #4 153 addlo \order, \order, #4 154 blo 1b 155 156 @ For very big divisors, we must shift it a bit at a time, or 157 @ we will be in danger of overflowing. 1581: cmp \divisor, #0x80000000 159 cmplo \divisor, \dividend 160 movlo \divisor, \divisor, lsl #1 161 addlo \order, \order, #1 162 blo 1b 163 164#endif 165 166 @ Perform all needed substractions to keep only the reminder. 167 @ Do comparisons in batch of 4 first. 168 subs \order, \order, #3 @ yes, 3 is intended here 169 blt 2f 170 1711: cmp \dividend, \divisor 172 subhs \dividend, \dividend, \divisor 173 cmp \dividend, \divisor, lsr #1 174 subhs \dividend, \dividend, \divisor, lsr #1 175 cmp \dividend, \divisor, lsr #2 176 subhs \dividend, \dividend, \divisor, lsr #2 177 cmp \dividend, \divisor, lsr #3 178 subhs \dividend, \dividend, \divisor, lsr #3 179 cmp \dividend, #1 180 mov \divisor, \divisor, lsr #4 181 subges \order, \order, #4 182 bge 1b 183 184 tst \order, #3 185 teqne \dividend, #0 186 beq 5f 187 188 @ Either 1, 2 or 3 comparison/substractions are left. 1892: cmn \order, #2 190 blt 4f 191 beq 3f 192 cmp \dividend, \divisor 193 subhs \dividend, \dividend, \divisor 194 mov \divisor, \divisor, lsr #1 1953: cmp \dividend, \divisor 196 subhs \dividend, \dividend, \divisor 197 mov \divisor, \divisor, lsr #1 1984: cmp \dividend, \divisor 199 subhs \dividend, \dividend, \divisor 2005: 201.endm 202 203 204ENTRY(__udivsi3) 205ENTRY(__aeabi_uidiv) 206UNWIND(.fnstart) 207 208 subs r2, r1, #1 209 moveq pc, lr 210 bcc Ldiv0 211 cmp r0, r1 212 bls 11f 213 tst r1, r2 214 beq 12f 215 216 ARM_DIV_BODY r0, r1, r2, r3 217 218 mov r0, r2 219 mov pc, lr 220 22111: moveq r0, #1 222 movne r0, #0 223 mov pc, lr 224 22512: ARM_DIV2_ORDER r1, r2 226 227 mov r0, r0, lsr r2 228 mov pc, lr 229 230UNWIND(.fnend) 231ENDPROC(__udivsi3) 232ENDPROC(__aeabi_uidiv) 233 234ENTRY(__umodsi3) 235UNWIND(.fnstart) 236 237 subs r2, r1, #1 @ compare divisor with 1 238 bcc Ldiv0 239 cmpne r0, r1 @ compare dividend with divisor 240 moveq r0, #0 241 tsthi r1, r2 @ see if divisor is power of 2 242 andeq r0, r0, r2 243 movls pc, lr 244 245 ARM_MOD_BODY r0, r1, r2, r3 246 247 mov pc, lr 248 249UNWIND(.fnend) 250ENDPROC(__umodsi3) 251 252ENTRY(__divsi3) 253ENTRY(__aeabi_idiv) 254UNWIND(.fnstart) 255 256 cmp r1, #0 257 eor ip, r0, r1 @ save the sign of the result. 258 beq Ldiv0 259 rsbmi r1, r1, #0 @ loops below use unsigned. 260 subs r2, r1, #1 @ division by 1 or -1 ? 261 beq 10f 262 movs r3, r0 263 rsbmi r3, r0, #0 @ positive dividend value 264 cmp r3, r1 265 bls 11f 266 tst r1, r2 @ divisor is power of 2 ? 267 beq 12f 268 269 ARM_DIV_BODY r3, r1, r0, r2 270 271 cmp ip, #0 272 rsbmi r0, r0, #0 273 mov pc, lr 274 27510: teq ip, r0 @ same sign ? 276 rsbmi r0, r0, #0 277 mov pc, lr 278 27911: movlo r0, #0 280 moveq r0, ip, asr #31 281 orreq r0, r0, #1 282 mov pc, lr 283 28412: ARM_DIV2_ORDER r1, r2 285 286 cmp ip, #0 287 mov r0, r3, lsr r2 288 rsbmi r0, r0, #0 289 mov pc, lr 290 291UNWIND(.fnend) 292ENDPROC(__divsi3) 293ENDPROC(__aeabi_idiv) 294 295ENTRY(__modsi3) 296UNWIND(.fnstart) 297 298 cmp r1, #0 299 beq Ldiv0 300 rsbmi r1, r1, #0 @ loops below use unsigned. 301 movs ip, r0 @ preserve sign of dividend 302 rsbmi r0, r0, #0 @ if negative make positive 303 subs r2, r1, #1 @ compare divisor with 1 304 cmpne r0, r1 @ compare dividend with divisor 305 moveq r0, #0 306 tsthi r1, r2 @ see if divisor is power of 2 307 andeq r0, r0, r2 308 bls 10f 309 310 ARM_MOD_BODY r0, r1, r2, r3 311 31210: cmp ip, #0 313 rsbmi r0, r0, #0 314 mov pc, lr 315 316UNWIND(.fnend) 317ENDPROC(__modsi3) 318 319#ifdef CONFIG_AEABI 320 321ENTRY(__aeabi_uidivmod) 322UNWIND(.fnstart) 323UNWIND(.save {r0, r1, ip, lr} ) 324 325 stmfd sp!, {r0, r1, ip, lr} 326 bl __aeabi_uidiv 327 ldmfd sp!, {r1, r2, ip, lr} 328 mul r3, r0, r2 329 sub r1, r1, r3 330 mov pc, lr 331 332UNWIND(.fnend) 333ENDPROC(__aeabi_uidivmod) 334 335ENTRY(__aeabi_idivmod) 336UNWIND(.fnstart) 337UNWIND(.save {r0, r1, ip, lr} ) 338 stmfd sp!, {r0, r1, ip, lr} 339 bl __aeabi_idiv 340 ldmfd sp!, {r1, r2, ip, lr} 341 mul r3, r0, r2 342 sub r1, r1, r3 343 mov pc, lr 344 345UNWIND(.fnend) 346ENDPROC(__aeabi_idivmod) 347 348ENTRY(__aeabi_uldivmod) 349UNWIND(.fnstart) 350UNWIND(.save {lr} ) 351 sub sp, sp, #8 352 stmfd sp!, {sp, lr} 353 bl __qdivrem 354 ldr lr, [sp, #4] 355 add sp, sp, #8 356 ldmfd sp!, {r2, r3} 357 mov pc, lr 358 359UNWIND(.fnend) 360ENDPROC(__aeabi_uldivmod) 361 362ENTRY(__aeabi_ldivmod) 363UNWIND(.fnstart) 364UNWIND(.save {lr} ) 365 sub sp, sp, #16 366 stmfd sp!, {sp, lr} 367 bl __ldivmod_helper 368 ldr lr, [sp, #4] 369 add sp, sp, #16 370 ldmfd sp!, {r2, r3} 371 mov pc, lr 372 373UNWIND(.fnend) 374ENDPROC(__aeabi_ldivmod) 375#endif 376 377Ldiv0: 378UNWIND(.fnstart) 379UNWIND(.pad #4) 380UNWIND(.save {lr}) 381 str lr, [sp, #-8]! 382 bl __div0 383 mov r0, #0 @ About as wrong as it could be. 384 ldr pc, [sp], #8 385UNWIND(.fnend) 386ENDPROC(Ldiv0) 387