1 /* This file is generated from divrem.m4; DO NOT EDIT! */ 2/* 3 * Division and remainder, from Appendix E of the Sparc Version 8 4 * Architecture Manual, with fixes from Gordon Irlam. 5 */ 6 7/* 8 * Input: dividend and divisor in %o0 and %o1 respectively. 9 * 10 * m4 parameters: 11 * .urem name of function to generate 12 * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 13 * false false=true => signed; false=false => unsigned 14 * 15 * Algorithm parameters: 16 * N how many bits per iteration we try to get (4) 17 * WORDSIZE total number of bits (32) 18 * 19 * Derived constants: 20 * TOPBITS number of bits in the top decade of a number 21 * 22 * Important variables: 23 * Q the partial quotient under development (initially 0) 24 * R the remainder so far, initially the dividend 25 * ITER number of main division loop iterations required; 26 * equal to ceil(log2(quotient) / N). Note that this 27 * is the log base (2^N) of the quotient. 28 * V the current comparand, initially divisor*2^(ITER*N-1) 29 * 30 * Cost: 31 * Current estimate for non-large dividend is 32 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 33 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 34 * different path, as the upper bits of the quotient must be developed 35 * one bit at a time. 36 */ 37 38 39 40ENTRY(.urem) 41 42 ! Ready to divide. Compute size of quotient; scale comparand. 43 orcc %o1, %g0, %o5 44 bne 1f 45 mov %o0, %o3 46 47 ! Divide by zero trap. If it returns, return 0 (about as 48 ! wrong as possible, but that is what SunOS does...). 49 ta ST_DIV0 50 retl 51 clr %o0 52 531: 54 cmp %o3, %o5 ! if %o1 exceeds %o0, done 55 blu LOC(got_result) ! (and algorithm fails otherwise) 56 clr %o2 57 sethi %hi(1 << (32 - 4 - 1)), %g1 58 cmp %o3, %g1 59 blu LOC(not_really_big) 60 clr %o4 61 62 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 63 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 64 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 65 ! Compute ITER in an unorthodox manner: know we need to shift V into 66 ! the top decade: so do not even bother to compare to R. 67 1: 68 cmp %o5, %g1 69 bgeu 3f 70 mov 1, %g2 71 sll %o5, 4, %o5 72 b 1b 73 add %o4, 1, %o4 74 75 ! Now compute %g2. 76 2: addcc %o5, %o5, %o5 77 bcc LOC(not_too_big) 78 add %g2, 1, %g2 79 80 ! We get here if the %o1 overflowed while shifting. 81 ! This means that %o3 has the high-order bit set. 82 ! Restore %o5 and subtract from %o3. 83 sll %g1, 4, %g1 ! high order bit 84 srl %o5, 1, %o5 ! rest of %o5 85 add %o5, %g1, %o5 86 b LOC(do_single_div) 87 sub %g2, 1, %g2 88 89 LOC(not_too_big): 90 3: cmp %o5, %o3 91 blu 2b 92 nop 93 be LOC(do_single_div) 94 nop 95 /* NB: these are commented out in the V8-Sparc manual as well */ 96 /* (I do not understand this) */ 97 ! %o5 > %o3: went too far: back up 1 step 98 ! srl %o5, 1, %o5 99 ! dec %g2 100 ! do single-bit divide steps 101 ! 102 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 103 ! first divide step without thinking. BUT, the others are conditional, 104 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 105 ! order bit set in the first step, just falling into the regular 106 ! division loop will mess up the first time around. 107 ! So we unroll slightly... 108 LOC(do_single_div): 109 subcc %g2, 1, %g2 110 bl LOC(end_regular_divide) 111 nop 112 sub %o3, %o5, %o3 113 mov 1, %o2 114 b LOC(end_single_divloop) 115 nop 116 LOC(single_divloop): 117 sll %o2, 1, %o2 118 bl 1f 119 srl %o5, 1, %o5 120 ! %o3 >= 0 121 sub %o3, %o5, %o3 122 b 2f 123 add %o2, 1, %o2 124 1: ! %o3 < 0 125 add %o3, %o5, %o3 126 sub %o2, 1, %o2 127 2: 128 LOC(end_single_divloop): 129 subcc %g2, 1, %g2 130 bge LOC(single_divloop) 131 tst %o3 132 b,a LOC(end_regular_divide) 133 134LOC(not_really_big): 1351: 136 sll %o5, 4, %o5 137 cmp %o5, %o3 138 bleu 1b 139 addcc %o4, 1, %o4 140 be LOC(got_result) 141 sub %o4, 1, %o4 142 143 tst %o3 ! set up for initial iteration 144LOC(divloop): 145 sll %o2, 4, %o2 146 ! depth 1, accumulated bits 0 147 bl LOC(1.16) 148 srl %o5,1,%o5 149 ! remainder is positive 150 subcc %o3,%o5,%o3 151 ! depth 2, accumulated bits 1 152 bl LOC(2.17) 153 srl %o5,1,%o5 154 ! remainder is positive 155 subcc %o3,%o5,%o3 156 ! depth 3, accumulated bits 3 157 bl LOC(3.19) 158 srl %o5,1,%o5 159 ! remainder is positive 160 subcc %o3,%o5,%o3 161 ! depth 4, accumulated bits 7 162 bl LOC(4.23) 163 srl %o5,1,%o5 164 ! remainder is positive 165 subcc %o3,%o5,%o3 166 b 9f 167 add %o2, (7*2+1), %o2 168 169LOC(4.23): 170 ! remainder is negative 171 addcc %o3,%o5,%o3 172 b 9f 173 add %o2, (7*2-1), %o2 174 175 176LOC(3.19): 177 ! remainder is negative 178 addcc %o3,%o5,%o3 179 ! depth 4, accumulated bits 5 180 bl LOC(4.21) 181 srl %o5,1,%o5 182 ! remainder is positive 183 subcc %o3,%o5,%o3 184 b 9f 185 add %o2, (5*2+1), %o2 186 187LOC(4.21): 188 ! remainder is negative 189 addcc %o3,%o5,%o3 190 b 9f 191 add %o2, (5*2-1), %o2 192 193 194 195LOC(2.17): 196 ! remainder is negative 197 addcc %o3,%o5,%o3 198 ! depth 3, accumulated bits 1 199 bl LOC(3.17) 200 srl %o5,1,%o5 201 ! remainder is positive 202 subcc %o3,%o5,%o3 203 ! depth 4, accumulated bits 3 204 bl LOC(4.19) 205 srl %o5,1,%o5 206 ! remainder is positive 207 subcc %o3,%o5,%o3 208 b 9f 209 add %o2, (3*2+1), %o2 210 211LOC(4.19): 212 ! remainder is negative 213 addcc %o3,%o5,%o3 214 b 9f 215 add %o2, (3*2-1), %o2 216 217 218LOC(3.17): 219 ! remainder is negative 220 addcc %o3,%o5,%o3 221 ! depth 4, accumulated bits 1 222 bl LOC(4.17) 223 srl %o5,1,%o5 224 ! remainder is positive 225 subcc %o3,%o5,%o3 226 b 9f 227 add %o2, (1*2+1), %o2 228 229LOC(4.17): 230 ! remainder is negative 231 addcc %o3,%o5,%o3 232 b 9f 233 add %o2, (1*2-1), %o2 234 235 236 237 238LOC(1.16): 239 ! remainder is negative 240 addcc %o3,%o5,%o3 241 ! depth 2, accumulated bits -1 242 bl LOC(2.15) 243 srl %o5,1,%o5 244 ! remainder is positive 245 subcc %o3,%o5,%o3 246 ! depth 3, accumulated bits -1 247 bl LOC(3.15) 248 srl %o5,1,%o5 249 ! remainder is positive 250 subcc %o3,%o5,%o3 251 ! depth 4, accumulated bits -1 252 bl LOC(4.15) 253 srl %o5,1,%o5 254 ! remainder is positive 255 subcc %o3,%o5,%o3 256 b 9f 257 add %o2, (-1*2+1), %o2 258 259LOC(4.15): 260 ! remainder is negative 261 addcc %o3,%o5,%o3 262 b 9f 263 add %o2, (-1*2-1), %o2 264 265 266LOC(3.15): 267 ! remainder is negative 268 addcc %o3,%o5,%o3 269 ! depth 4, accumulated bits -3 270 bl LOC(4.13) 271 srl %o5,1,%o5 272 ! remainder is positive 273 subcc %o3,%o5,%o3 274 b 9f 275 add %o2, (-3*2+1), %o2 276 277LOC(4.13): 278 ! remainder is negative 279 addcc %o3,%o5,%o3 280 b 9f 281 add %o2, (-3*2-1), %o2 282 283 284 285LOC(2.15): 286 ! remainder is negative 287 addcc %o3,%o5,%o3 288 ! depth 3, accumulated bits -3 289 bl LOC(3.13) 290 srl %o5,1,%o5 291 ! remainder is positive 292 subcc %o3,%o5,%o3 293 ! depth 4, accumulated bits -5 294 bl LOC(4.11) 295 srl %o5,1,%o5 296 ! remainder is positive 297 subcc %o3,%o5,%o3 298 b 9f 299 add %o2, (-5*2+1), %o2 300 301LOC(4.11): 302 ! remainder is negative 303 addcc %o3,%o5,%o3 304 b 9f 305 add %o2, (-5*2-1), %o2 306 307 308LOC(3.13): 309 ! remainder is negative 310 addcc %o3,%o5,%o3 311 ! depth 4, accumulated bits -7 312 bl LOC(4.9) 313 srl %o5,1,%o5 314 ! remainder is positive 315 subcc %o3,%o5,%o3 316 b 9f 317 add %o2, (-7*2+1), %o2 318 319LOC(4.9): 320 ! remainder is negative 321 addcc %o3,%o5,%o3 322 b 9f 323 add %o2, (-7*2-1), %o2 324 325 326 327 328 9: 329LOC(end_regular_divide): 330 subcc %o4, 1, %o4 331 bge LOC(divloop) 332 tst %o3 333 bl,a LOC(got_result) 334 ! non-restoring fixup here (one instruction only!) 335 add %o3, %o1, %o3 336 337 338LOC(got_result): 339 340 retl 341 mov %o3, %o0 342 343END(.urem) 344