1 // SPDX-License-Identifier: BSD-3-Clause
2 
3 /*============================================================================
4 
5 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
6 Package, Release 3a, by John R. Hauser.
7 
8 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
9 All rights reserved.
10 
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13 
14  1. Redistributions of source code must retain the above copyright notice,
15     this list of conditions, and the following disclaimer.
16 
17  2. Redistributions in binary form must reproduce the above copyright notice,
18     this list of conditions, and the following disclaimer in the documentation
19     and/or other materials provided with the distribution.
20 
21  3. Neither the name of the University nor the names of its contributors may
22     be used to endorse or promote products derived from this software without
23     specific prior written permission.
24 
25 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
26 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
27 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
28 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
29 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
32 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 
36 =============================================================================*/
37 
38 #include <stdbool.h>
39 #include <stdint.h>
40 #include "platform.h"
41 #include "internals.h"
42 #include "specialize.h"
43 #include "softfloat.h"
44 
45 #ifdef SOFTFLOAT_FAST_INT64
46 
f128M_sqrt(const float128_t * aPtr,float128_t * zPtr)47 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
48 {
49 
50     *zPtr = f128_sqrt( *aPtr );
51 
52 }
53 
54 #else
55 
f128M_sqrt(const float128_t * aPtr,float128_t * zPtr)56 void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
57 {
58     const uint32_t *aWPtr;
59     uint32_t *zWPtr;
60     uint32_t uiA96;
61     bool signA;
62     int32_t rawExpA;
63     uint32_t rem[6];
64     int32_t expA, expZ;
65     uint64_t rem64;
66     uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
67     uint64_t sig64Z, x64;
68     uint32_t term[5], y[5], rem32;
69 
70     /*------------------------------------------------------------------------
71     *------------------------------------------------------------------------*/
72     aWPtr = (const uint32_t *) aPtr;
73     zWPtr = (uint32_t *) zPtr;
74     /*------------------------------------------------------------------------
75     *------------------------------------------------------------------------*/
76     uiA96 = aWPtr[indexWordHi( 4 )];
77     signA = signF128UI96( uiA96 );
78     rawExpA  = expF128UI96( uiA96 );
79     /*------------------------------------------------------------------------
80     *------------------------------------------------------------------------*/
81     if ( rawExpA == 0x7FFF ) {
82         if (
83             fracF128UI96( uiA96 )
84                 || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
85                         | aWPtr[indexWord( 4, 0 )])
86         ) {
87             softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
88             return;
89         }
90         if ( ! signA ) goto copyA;
91         goto invalid;
92     }
93     /*------------------------------------------------------------------------
94     *------------------------------------------------------------------------*/
95     expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
96     if ( expA == -128 ) goto copyA;
97     if ( signA ) goto invalid;
98     /*------------------------------------------------------------------------
99     | (`sig32Z' is guaranteed to be a lower bound on the square root of
100     | `sig32A', which makes `sig32Z' also a lower bound on the square root of
101     | `sigA'.)
102     *------------------------------------------------------------------------*/
103     expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
104     expA &= 1;
105     rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
106     if ( expA ) {
107         if ( ! rawExpA ) {
108             softfloat_shortShiftRight128M( rem, 1, rem );
109             rem64 >>= 1;
110         }
111         sig32A = rem64>>29;
112     } else {
113         sig32A = rem64>>30;
114     }
115     recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
116     sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
117     if ( expA ) sig32Z >>= 1;
118     qs[2] = sig32Z;
119     rem64 -= (uint64_t) sig32Z * sig32Z;
120     rem[indexWord( 4, 3 )] = rem64>>32;
121     rem[indexWord( 4, 2 )] = rem64;
122     /*------------------------------------------------------------------------
123     *------------------------------------------------------------------------*/
124     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
125     qs[1] = q;
126     sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
127     x64 = ((uint64_t) sig32Z<<32) + sig64Z;
128     term[indexWord( 4, 3 )] = 0;
129     term[indexWord( 4, 2 )] = x64>>32;
130     term[indexWord( 4, 1 )] = x64;
131     term[indexWord( 4, 0 )] = 0;
132     softfloat_remStep128MBy32( rem, 29, term, q, y );
133     rem64 = (uint64_t) y[indexWord( 4, 3 )]<<32 | y[indexWord( 4, 2 )];
134     /*------------------------------------------------------------------------
135     *------------------------------------------------------------------------*/
136     q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
137     sig64Z <<= 1;
138     /*------------------------------------------------------------------------
139     | (Repeating this loop is a rare occurrence.)
140     *------------------------------------------------------------------------*/
141     for (;;) {
142         x64 = sig64Z + (q>>26);
143         term[indexWord( 4, 2 )] = x64>>32;
144         term[indexWord( 4, 1 )] = x64;
145         term[indexWord( 4, 0 )] = q<<6;
146         term[indexWord( 4, 3 )] = 0;
147         softfloat_remStep128MBy32(
148             y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
149         rem32 = rem[indexWordHi( 6 )];
150         if ( ! (rem32 & 0x80000000) ) break;
151         --q;
152     }
153     qs[0] = q;
154     rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
155     /*------------------------------------------------------------------------
156     *------------------------------------------------------------------------*/
157     q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
158     x64 = (uint64_t) q<<27;
159     y[indexWord( 5, 0 )] = x64;
160     x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
161     y[indexWord( 5, 1 )] = x64;
162     x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
163     y[indexWord( 5, 2 )] = x64;
164     x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
165     y[indexWord( 5, 3 )] = x64;
166     y[indexWord( 5, 4 )] = x64>>32;
167     /*------------------------------------------------------------------------
168     *------------------------------------------------------------------------*/
169     if ( (q & 0xF) <= 2 ) {
170         q &= ~3;
171         y[indexWordLo( 5 )] = q<<27;
172         term[indexWord( 5, 4 )] = 0;
173         term[indexWord( 5, 3 )] = 0;
174         term[indexWord( 5, 2 )] = 0;
175         term[indexWord( 5, 1 )] = q>>6;
176         term[indexWord( 5, 0 )] = q<<26;
177         softfloat_sub160M( y, term, term );
178         rem[indexWord( 6, 1 )] = 0;
179         rem[indexWord( 6, 0 )] = 0;
180         softfloat_remStep160MBy32(
181             &rem[indexMultiwordLo( 6, 5 )],
182             14,
183             term,
184             q,
185             &rem[indexMultiwordLo( 6, 5 )]
186         );
187         rem32 = rem[indexWord( 6, 4 )];
188         if ( rem32 & 0x80000000 ) {
189             softfloat_sub1X160M( y );
190         } else {
191             if (
192                 rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
193                     || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
194             ) {
195                 y[indexWordLo( 5 )] |= 1;
196             }
197         }
198     }
199     softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
200     return;
201     /*------------------------------------------------------------------------
202     *------------------------------------------------------------------------*/
203  invalid:
204     softfloat_invalidF128M( zWPtr );
205     return;
206     /*------------------------------------------------------------------------
207     *------------------------------------------------------------------------*/
208  copyA:
209     zWPtr[indexWordHi( 4 )] = uiA96;
210     zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
211     zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
212     zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
213 
214 }
215 
216 #endif
217 
218