1 // Copyright 2018 Ulf Adams
2 //
3 // The contents of this file may be used under the terms of the Apache License,
4 // Version 2.0.
5 //
6 //    (See accompanying file LICENSE-Apache or copy at
7 //     http://www.apache.org/licenses/LICENSE-2.0)
8 //
9 // Alternatively, the contents of this file may be used under the terms of
10 // the Boost Software License, Version 1.0.
11 //    (See accompanying file LICENSE-Boost or copy at
12 //     https://www.boost.org/LICENSE_1_0.txt)
13 //
14 // Unless required by applicable law or agreed to in writing, this software
15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 // KIND, either express or implied.
17 #ifndef RYU_F2S_INTRINSICS_H
18 #define RYU_F2S_INTRINSICS_H
19 
20 // Defines RYU_32_BIT_PLATFORM if applicable.
21 
22 #if defined(RYU_FLOAT_FULL_TABLE)
23 
24 
25 #else
26 
27 #if defined(RYU_OPTIMIZE_SIZE)
28 #else
29 #endif
30 #define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
31 #define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)
32 
33 #endif
34 
pow5factor_32(uint32_t value)35 static inline uint32_t pow5factor_32(uint32_t value) {
36   uint32_t count = 0;
37   for (;;) {
38     assert(value != 0);
39     const uint32_t q = value / 5;
40     const uint32_t r = value % 5;
41     if (r != 0) {
42       break;
43     }
44     value = q;
45     ++count;
46   }
47   return count;
48 }
49 
50 // Returns true if value is divisible by 5^p.
multipleOfPowerOf5_32(const uint32_t value,const uint32_t p)51 static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
52   return pow5factor_32(value) >= p;
53 }
54 
55 // Returns true if value is divisible by 2^p.
multipleOfPowerOf2_32(const uint32_t value,const uint32_t p)56 static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
57   // __builtin_ctz doesn't appear to be faster here.
58   return (value & ((1u << p) - 1)) == 0;
59 }
60 
61 // It seems to be slightly faster to avoid uint128_t here, although the
62 // generated code for uint128_t looks slightly nicer.
mulShift32(const uint32_t m,const uint64_t factor,const int32_t shift)63 static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
64   assert(shift > 32);
65 
66   // The casts here help MSVC to avoid calls to the __allmul library
67   // function.
68   const uint32_t factorLo = (uint32_t)(factor);
69   const uint32_t factorHi = (uint32_t)(factor >> 32);
70   const uint64_t bits0 = (uint64_t)m * factorLo;
71   const uint64_t bits1 = (uint64_t)m * factorHi;
72 
73 #if defined(RYU_32_BIT_PLATFORM)
74   // On 32-bit platforms we can avoid a 64-bit shift-right since we only
75   // need the upper 32 bits of the result and the shift value is > 32.
76   const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
77   uint32_t bits1Lo = (uint32_t)(bits1);
78   uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
79   bits1Lo += bits0Hi;
80   bits1Hi += (bits1Lo < bits0Hi);
81   if (shift >= 64) {
82     // s2f can call this with a shift value >= 64, which we have to handle.
83     // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case.
84     return (uint32_t)(bits1Hi >> (shift - 64));
85   } else {
86     const int32_t s = shift - 32;
87     return (bits1Hi << (32 - s)) | (bits1Lo >> s);
88   }
89 #else // RYU_32_BIT_PLATFORM
90   const uint64_t sum = (bits0 >> 32) + bits1;
91   const uint64_t shiftedSum = sum >> (shift - 32);
92   assert(shiftedSum <= UINT32_MAX);
93   return (uint32_t) shiftedSum;
94 #endif // RYU_32_BIT_PLATFORM
95 }
96 
mulPow5InvDivPow2(const uint32_t m,const uint32_t q,const int32_t j)97 static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
98 #if defined(RYU_FLOAT_FULL_TABLE)
99   return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
100 #elif defined(RYU_OPTIMIZE_SIZE)
101   // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
102   // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
103   // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
104   uint64_t pow5[2];
105   double_computeInvPow5(q, pow5);
106   return mulShift32(m, pow5[1] + 1, j);
107 #else
108   return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
109 #endif
110 }
111 
mulPow5divPow2(const uint32_t m,const uint32_t i,const int32_t j)112 static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
113 #if defined(RYU_FLOAT_FULL_TABLE)
114   return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
115 #elif defined(RYU_OPTIMIZE_SIZE)
116   uint64_t pow5[2];
117   double_computePow5(i, pow5);
118   return mulShift32(m, pow5[1], j);
119 #else
120   return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
121 #endif
122 }
123 
124 #endif // RYU_F2S_INTRINSICS_H
125