qcbor/src/ieee754.c

// SPDX-License-Identifier: BSD-3-Clause
/* ==========================================================================
 * ieee754.c -- floating-point conversion between half, double & single-precision
 *
 * Copyright (c) 2018-2024, Laurence Lundblade. All rights reserved.
 * Copyright (c) 2021, Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *
 * See BSD-3-Clause license in README.md
 *
 * Created on 7/23/18
 * ========================================================================== */

/*
 * Include before QCBOR_DISABLE_PREFERRED_FLOAT is checked as
 * QCBOR_DISABLE_PREFERRED_FLOAT might be defined in qcbor/qcbor_common.h
 */
#include "qcbor/qcbor_common.h"

#ifndef QCBOR_DISABLE_PREFERRED_FLOAT

#include "ieee754.h"
#include <string.h> /* For memcpy() */


/*
 * This code has long lines and is easier to read because of
 * them. Some coding guidelines prefer 80 column lines (can they not
 * afford big displays?).
 *
 * This code works solely using shifts and masks and thus has no
 * dependency on any math libraries. It can even work if the CPU
 * doesn't have any floating-point support, though that isn't the most
 * useful thing to do.
 *
 * The memcpy() dependency is only for CopyFloatToUint32() and friends
 * which only is needed to avoid type punning when converting the
 * actual float bits to an unsigned value so the bit shifts and masks
 * can work.
 *
 * The references used to write this code:
 *
 *  IEEE 754-2008, particularly section 3.6 and 6.2.1
 *
 *  https://en.wikipedia.org/wiki/IEEE_754 and subordinate pages
 *
 *  https://stackoverflow.com/questions/19800415/why-does-ieee-754-reserve-so-many-nan-values
 *
 *  https://stackoverflow.com/questions/46073295/implicit-type-promotion-rules
 *
 *  https://stackoverflow.com/questions/589575/what-does-the-c-standard-state-the-size-of-int-long-type-to-be
 *
 * IEEE754_FloatToDouble(uint32_t uFloat) was created but is not
 * needed. It can be retrieved from github history if needed.
 */


/* ----- Half Precsion ----------- */
#define HALF_NUM_SIGNIFICAND_BITS (10)
#define HALF_NUM_EXPONENT_BITS    (5)
#define HALF_NUM_SIGN_BITS        (1)

#define HALF_SIGNIFICAND_SHIFT    (0)
#define HALF_EXPONENT_SHIFT       (HALF_NUM_SIGNIFICAND_BITS)
#define HALF_SIGN_SHIFT           (HALF_NUM_SIGNIFICAND_BITS + HALF_NUM_EXPONENT_BITS)

#define HALF_SIGNIFICAND_MASK     (0x3ffU) // The lower 10 bits
#define HALF_EXPONENT_MASK        (0x1fU << HALF_EXPONENT_SHIFT) // 0x7c00 5 bits of exponent
#define HALF_SIGN_MASK            (0x01U << HALF_SIGN_SHIFT) // 0x8000 1 bit of sign
#define HALF_QUIET_NAN_BIT        (0x01U << (HALF_NUM_SIGNIFICAND_BITS-1)) // 0x0200

/* Biased    Biased    Unbiased   Use
 *  0x00       0        -15       0 and subnormal
 *  0x01       1        -14       Smallest normal exponent
 *  0x1e      30         15       Largest normal exponent
 *  0x1F      31         16       NaN and Infinity  */
#define HALF_EXPONENT_BIAS        (15)
#define HALF_EXPONENT_MAX         (HALF_EXPONENT_BIAS)    //  15 Unbiased
#define HALF_EXPONENT_MIN         (-HALF_EXPONENT_BIAS+1) // -14 Unbiased
#define HALF_EXPONENT_ZERO        (-HALF_EXPONENT_BIAS)   // -15 Unbiased
#define HALF_EXPONENT_INF_OR_NAN  (HALF_EXPONENT_BIAS+1)  //  16 Unbiased


/* ------ Single-Precision -------- */
#define SINGLE_NUM_SIGNIFICAND_BITS (23)
#define SINGLE_NUM_EXPONENT_BITS    (8)
#define SINGLE_NUM_SIGN_BITS        (1)

#define SINGLE_SIGNIFICAND_SHIFT    (0)
#define SINGLE_EXPONENT_SHIFT       (SINGLE_NUM_SIGNIFICAND_BITS)
#define SINGLE_SIGN_SHIFT           (SINGLE_NUM_SIGNIFICAND_BITS + SINGLE_NUM_EXPONENT_BITS)

#define SINGLE_SIGNIFICAND_MASK     (0x7fffffU) // The lower 23 bits
#define SINGLE_EXPONENT_MASK        (0xffU << SINGLE_EXPONENT_SHIFT) // 8 bits of exponent
#define SINGLE_SIGN_MASK            (0x01U << SINGLE_SIGN_SHIFT) // 1 bit of sign
#define SINGLE_QUIET_NAN_BIT        (0x01U << (SINGLE_NUM_SIGNIFICAND_BITS-1))

/* Biased  Biased   Unbiased  Use
 *  0x0000     0     -127      0 and subnormal
 *  0x0001     1     -126      Smallest normal exponent
 *  0x7f     127        0      1
 *  0xfe     254      127      Largest normal exponent
 *  0xff     255      128      NaN and Infinity  */
#define SINGLE_EXPONENT_BIAS        (127)
#define SINGLE_EXPONENT_MAX         (SINGLE_EXPONENT_BIAS)
#define SINGLE_EXPONENT_MIN         (-SINGLE_EXPONENT_BIAS+1)
#define SINGLE_EXPONENT_ZERO        (-SINGLE_EXPONENT_BIAS)
#define SINGLE_EXPONENT_INF_OR_NAN  (SINGLE_EXPONENT_BIAS+1)


/* --------- Double-Precision ---------- */
#define DOUBLE_NUM_SIGNIFICAND_BITS (52)
#define DOUBLE_NUM_EXPONENT_BITS    (11)
#define DOUBLE_NUM_SIGN_BITS        (1)

#define DOUBLE_SIGNIFICAND_SHIFT    (0)
#define DOUBLE_EXPONENT_SHIFT       (DOUBLE_NUM_SIGNIFICAND_BITS)
#define DOUBLE_SIGN_SHIFT           (DOUBLE_NUM_SIGNIFICAND_BITS + DOUBLE_NUM_EXPONENT_BITS)

#define DOUBLE_SIGNIFICAND_MASK     (0xfffffffffffffULL) // The lower 52 bits
#define DOUBLE_EXPONENT_MASK        (0x7ffULL << DOUBLE_EXPONENT_SHIFT) // 11 bits of exponent
#define DOUBLE_SIGN_MASK            (0x01ULL << DOUBLE_SIGN_SHIFT) // 1 bit of sign
#define DOUBLE_QUIET_NAN_BIT        (0x01ULL << (DOUBLE_NUM_SIGNIFICAND_BITS-1))


/* Biased      Biased   Unbiased  Use
 * 0x00000000     0     -1023     0 and subnormal
 * 0x00000001     1     -1022     Smallest normal exponent
 * 0x000007fe  2046      1023     Largest normal exponent
 * 0x000007ff  2047      1024     NaN and Infinity  */
#define DOUBLE_EXPONENT_BIAS        (1023)
#define DOUBLE_EXPONENT_MAX         (DOUBLE_EXPONENT_BIAS)
#define DOUBLE_EXPONENT_MIN         (-DOUBLE_EXPONENT_BIAS+1)
#define DOUBLE_EXPONENT_ZERO        (-DOUBLE_EXPONENT_BIAS)
#define DOUBLE_EXPONENT_INF_OR_NAN  (DOUBLE_EXPONENT_BIAS+1)


/*
 * Convenient functions to avoid type punning, compiler warnings and
 * such. The optimizer reduces them to a simple assignment. This is a
 * crusty corner of C. It shouldn't be this hard.
 *
 * These are also in UsefulBuf.h under a different name. They are copied
 * here to avoid a dependency on UsefulBuf.h. There is no object code
 * size impact because these always optimze down to a simple assignment.
 */
static inline uint32_t
CopyFloatToUint32(float f)
{
   uint32_t u32;
   memcpy(&u32, &f, sizeof(uint32_t));
   return u32;
}

static inline uint64_t
CopyDoubleToUint64(double d)
{
   uint64_t u64;
   memcpy(&u64, &d, sizeof(uint64_t));
   return u64;
}

static inline double
CopyUint64ToDouble(uint64_t u64)
{
   double d;
   memcpy(&d, &u64, sizeof(uint64_t));
   return d;
}

static inline float
CopyUint32ToSingle(uint32_t u32)
{
   float f;
   memcpy(&f, &u32, sizeof(uint32_t));
   return f;
}


/**
 * @brief Assemble sign, significand and exponent into single precision float.
 *
 * @param[in] uDoubleSign              0 if positive, 1 if negative
 * @pararm[in] uDoubleSignificand      Bits of the significand
 * @param[in] nDoubleUnBiasedExponent  Exponent
 *
 * This returns the bits for a single-precision float, a binary64
 * as specified in IEEE754.
 */
static double
IEEE754_AssembleDouble(uint64_t uDoubleSign,
                       uint64_t uDoubleSignificand,
                       int64_t  nDoubleUnBiasedExponent)
{
   uint64_t uDoubleBiasedExponent;

   uDoubleBiasedExponent = (uint64_t)(nDoubleUnBiasedExponent + DOUBLE_EXPONENT_BIAS);

   return CopyUint64ToDouble(uDoubleSignificand |
                             (uDoubleBiasedExponent << DOUBLE_EXPONENT_SHIFT) |
                             (uDoubleSign << DOUBLE_SIGN_SHIFT));
}


double
IEEE754_HalfToDouble(uint16_t uHalfPrecision)
{
   uint64_t uDoubleSignificand;
   int64_t  nDoubleUnBiasedExponent;
   double   dResult;

   /* Pull out the three parts of the half-precision float.  Do all
    * the work in 64 bits because that is what the end result is.  It
    * may give smaller code size and will keep static analyzers
    * happier.
    */
   const uint64_t uHalfSignificand      = uHalfPrecision & HALF_SIGNIFICAND_MASK;
   const uint64_t uHalfBiasedExponent   = (uHalfPrecision & HALF_EXPONENT_MASK) >> HALF_EXPONENT_SHIFT;
   const int64_t  nHalfUnBiasedExponent = (int64_t)uHalfBiasedExponent - HALF_EXPONENT_BIAS;
   const uint64_t uHalfSign             = (uHalfPrecision & HALF_SIGN_MASK) >> HALF_SIGN_SHIFT;

   if(nHalfUnBiasedExponent == HALF_EXPONENT_ZERO) {
      /* 0 or subnormal */
      if(uHalfSignificand) {
         /* --- SUBNORMAL --- */
         /* A half-precision subnormal can always be converted to a
          * normal double-precision float because the ranges line up.
          * The exponent of a subnormal starts out at the min exponent
          * for a normal. As the sub normal significand bits are
          * shifted, left to normalize, the exponent is
          * decremented. Shifting continues until fully normalized.
          */
          nDoubleUnBiasedExponent = HALF_EXPONENT_MIN;
          uDoubleSignificand      = uHalfSignificand;
          do {
             uDoubleSignificand <<= 1;
             nDoubleUnBiasedExponent--;
          } while ((uDoubleSignificand & (1ULL << HALF_NUM_SIGNIFICAND_BITS)) == 0);
          /* A normal has an implied 1 in the most significant
           * position that a subnormal doesn't. */
          uDoubleSignificand -= 1ULL << HALF_NUM_SIGNIFICAND_BITS;
          /* Must shift into place for a double significand */
          uDoubleSignificand <<= DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS;

          dResult = IEEE754_AssembleDouble(uHalfSign,
                                           uDoubleSignificand,
                                           nDoubleUnBiasedExponent);
      } else {
         /* --- ZERO --- */
         dResult = IEEE754_AssembleDouble(uHalfSign,
                                          0,
                                          DOUBLE_EXPONENT_ZERO);
      }
   } else if(nHalfUnBiasedExponent == HALF_EXPONENT_INF_OR_NAN) {
      /* NaN or Inifinity */
      if(uHalfSignificand) {
         /* --- NaN --- */
         /* Half-precision payloads always fit into double precision
          * payloads. They are shifted left the same as a normal
          * number significand.
          */
         uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
         dResult = IEEE754_AssembleDouble(uHalfSign,
                                          uDoubleSignificand,
                                          DOUBLE_EXPONENT_INF_OR_NAN);
      } else {
         /* --- INFINITY --- */
         dResult = IEEE754_AssembleDouble(uHalfSign,
                                          0,
                                          DOUBLE_EXPONENT_INF_OR_NAN);
      }
   } else {
      /* --- NORMAL NUMBER --- */
      uDoubleSignificand = uHalfSignificand << (DOUBLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
      dResult = IEEE754_AssembleDouble(uHalfSign,
                                       uDoubleSignificand,
                                       nHalfUnBiasedExponent);
   }

   return dResult;
}


/**
 * @brief Assemble sign, significand and exponent into single precision float.
 *
 * @param[in] uHalfSign              0 if positive, 1 if negative
 * @pararm[in] uHalfSignificand      Bits of the significand
 * @param[in] nHalfUnBiasedExponent  Exponent
 *
 * This returns the bits for a single-precision float, a binary32 as
 * specified in IEEE754. It is returned as a uint64_t rather than a
 * uint32_t or a float for convenience of usage.
 */
static uint32_t
IEEE754_AssembleHalf(uint32_t uHalfSign,
                     uint32_t uHalfSignificand,
                     int32_t nHalfUnBiasedExponent)
{
   uint32_t uHalfUnbiasedExponent;

   uHalfUnbiasedExponent = (uint32_t)(nHalfUnBiasedExponent + HALF_EXPONENT_BIAS);

   return uHalfSignificand |
          (uHalfUnbiasedExponent << HALF_EXPONENT_SHIFT) |
          (uHalfSign << HALF_SIGN_SHIFT);
}


/*  Public function; see ieee754.h */
IEEE754_union
IEEE754_SingleToHalf(float f)
{
   IEEE754_union result;
   uint32_t      uDroppedBits;
   int32_t       nExponentDifference;
   int32_t       nShiftAmount;
   uint32_t      uHalfSignificand;

   /* Pull the three parts out of the double-precision float Most work
    * is done with uint32_t which helps avoid integer promotions and
    * static analyzer complaints.
    */
   const uint32_t uSingle                 = CopyFloatToUint32(f);
   const uint32_t uSingleBiasedExponent   = (uSingle & SINGLE_EXPONENT_MASK) >> SINGLE_EXPONENT_SHIFT;
   const int32_t  nSingleUnbiasedExponent = (int32_t)uSingleBiasedExponent - SINGLE_EXPONENT_BIAS;
   const uint32_t uSingleSignificand      = uSingle & SINGLE_SIGNIFICAND_MASK;
   const uint32_t uSingleSign             = (uSingle & SINGLE_SIGN_MASK) >> SINGLE_SIGN_SHIFT;

   if(nSingleUnbiasedExponent == SINGLE_EXPONENT_ZERO) {
      if(uSingleSignificand == 0) {
         /* --- IS ZERO --- */
         result.uSize  = IEEE754_UNION_IS_HALF;
         result.uValue = IEEE754_AssembleHalf(uSingleSign,
                                              0,
                                              HALF_EXPONENT_ZERO);
      } else {
         /* --- IS SINGLE SUBNORMAL --- */
         /* The largest single subnormal is slightly less than the
          * largest single normal which is 2^-149 or
          * 2.2040517676619426e-38.  The smallest half subnormal is
          * 2^-14 or 5.9604644775390625E-8.  There is no overlap so
          * single subnormals can't be converted to halfs of any sort.
          */
         result.uSize   = IEEE754_UNION_IS_SINGLE;
         result.uValue  = uSingle;
      }
   } else if(nSingleUnbiasedExponent == SINGLE_EXPONENT_INF_OR_NAN) {
      if(uSingleSignificand == 0) {
         /* ---- IS INFINITY ---- */
         result.uSize  = IEEE754_UNION_IS_HALF;
         result.uValue = IEEE754_AssembleHalf(uSingleSign, 0, HALF_EXPONENT_INF_OR_NAN);
      } else {
         /* The NaN can only be converted if no payload bits are lost
          * per RFC 8949 section 4.1 that defines Preferred
          * Serializaton. Note that Deterministically Encode CBOR in
          * section 4.2 allows for some variation of this rule, but at
          * the moment this implementation is of Preferred
          * Serialization, not CDE. As of December 2023, we are also
          * expecting an update to CDE. This code may need to be
          * updated for CDE.
          */
         uDroppedBits = uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS);
         if(uDroppedBits == 0) {
            /* --- IS CONVERTABLE NAN --- */
            uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);
            result.uSize  = IEEE754_UNION_IS_HALF;
            result.uValue = IEEE754_AssembleHalf(uSingleSign,
                                                 uHalfSignificand,
                                                 HALF_EXPONENT_INF_OR_NAN);

         } else {
            /* --- IS UNCONVERTABLE NAN --- */
            result.uSize   = IEEE754_UNION_IS_SINGLE;
            result.uValue  = uSingle;
         }
      }
   } else {
      /* ---- REGULAR NUMBER ---- */
      /* A regular single can be converted to a regular half if the
       * single's exponent is in the smaller range of a half and if no
       * precision is lost in the significand.
       */
      if(nSingleUnbiasedExponent >= HALF_EXPONENT_MIN &&
         nSingleUnbiasedExponent <= HALF_EXPONENT_MAX &&
        (uSingleSignificand & (SINGLE_SIGNIFICAND_MASK >> HALF_NUM_SIGNIFICAND_BITS)) == 0) {
         uHalfSignificand = uSingleSignificand >> (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

         /* --- CONVERT TO HALF NORMAL --- */
         result.uSize  = IEEE754_UNION_IS_HALF;
         result.uValue = IEEE754_AssembleHalf(uSingleSign,
                                              uHalfSignificand,
                                              nSingleUnbiasedExponent);
      } else {
         /* Unable to convert to a half normal. See if it can be
          * converted to a half subnormal. To do that, the exponent
          * must be in range and no precision can be lost in the
          * signficand.
          *
          * This is more complicated because the number is not
          * normalized.  The signficand must be shifted proprotionally
          * to the exponent and 1 must be added in.  See
          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
          *
          * Exponents -14 to -24 map to a shift of 0 to 10 of the
          * significand.  The largest value of a half subnormal has an
          * exponent of -14. Subnormals are not normalized like
          * normals meaning they lose precision as the numbers get
          * smaller. Normals don't lose precision because the exponent
          * allows all the bits of the significand to be significant.
          */
         /* The exponent of the largest possible half-precision
          * subnormal is HALF_EXPONENT_MIN (-14).  Exponents larger
          * than this are normal and handled above. We're going to
          * shift the significand right by at least this amount.
          */
         nExponentDifference = -(nSingleUnbiasedExponent - HALF_EXPONENT_MIN);

         /* In addition to the shift based on the exponent's value,
          * the single significand has to be shifted right to fit into
          * a half-precision significand */
         nShiftAmount = nExponentDifference + (SINGLE_NUM_SIGNIFICAND_BITS - HALF_NUM_SIGNIFICAND_BITS);

         /* Must add 1 in to the possible significand because there is
          * an implied 1 for normal values and not for subnormal
          * values. See equations here:
          * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
          */
         uHalfSignificand = (uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

         /* If only zero bits get shifted out, this can be converted
          * to subnormal */
         if(nSingleUnbiasedExponent < HALF_EXPONENT_MIN &&
            nSingleUnbiasedExponent >= HALF_EXPONENT_MIN - HALF_NUM_SIGNIFICAND_BITS &&
            uHalfSignificand << nShiftAmount == uSingleSignificand + (1 << SINGLE_NUM_SIGNIFICAND_BITS)) {
            /* --- CONVERTABLE TO HALF SUBNORMAL --- */
            result.uSize  = IEEE754_UNION_IS_HALF;
            result.uValue = IEEE754_AssembleHalf(uSingleSign,
                                                 uHalfSignificand,
                                                 HALF_EXPONENT_ZERO);
         } else {
            /* --- DO NOT CONVERT --- */
            result.uSize   = IEEE754_UNION_IS_SINGLE;
            result.uValue  = uSingle;
         }
      }
   }

   return result;
}


/**
 * @brief Assemble sign, significand and exponent into single precision float.
 *
 * @param[in] uSingleSign              0 if positive, 1 if negative
 * @pararm[in] uSingleSignificand      Bits of the significand
 * @param[in] nSingleUnBiasedExponent  Exponent
 *
 * This returns the bits for a single-precision float, a binary32 as
 * specified in IEEE754. It is returned as a uint64_t rather than a
 * uint32_t or a float for convenience of usage.
 */
static uint64_t
IEEE754_AssembleSingle(uint64_t uSingleSign,
                       uint64_t uSingleSignificand,
                       int64_t  nSingleUnBiasedExponent)
{
   uint64_t uSingleBiasedExponent;

   uSingleBiasedExponent = (uint64_t)(nSingleUnBiasedExponent + SINGLE_EXPONENT_BIAS);

   return uSingleSignificand |
          (uSingleBiasedExponent << SINGLE_EXPONENT_SHIFT) |
          (uSingleSign << SINGLE_SIGN_SHIFT);
}


/**
 * @brief Convert a double-precision float to single-precision.
 *
 * @param[in] d  The value to convert.
 *
 * @returns Either unconverted value or value converted to single-precision.
 *
 * This always succeeds. If the value cannot be converted without the
 * loss of precision, it is not converted.
 *
 * This handles all subnormals and NaN payloads.
 */
static IEEE754_union
IEEE754_DoubleToSingle(double d)
{
   IEEE754_union Result;
   int64_t       nExponentDifference;
   int64_t       nShiftAmount;
   uint64_t      uSingleSignificand;
   uint64_t      uDroppedBits;


   /* Pull the three parts out of the double-precision float. Most
    * work is done with uint64_t which helps avoid integer promotions
    * and static analyzer complaints.
    */
   const uint64_t uDouble                 = CopyDoubleToUint64(d);
   const uint64_t uDoubleBiasedExponent   = (uDouble & DOUBLE_EXPONENT_MASK) >> DOUBLE_EXPONENT_SHIFT;
   const int64_t  nDoubleUnbiasedExponent = (int64_t)uDoubleBiasedExponent - DOUBLE_EXPONENT_BIAS;
   const uint64_t uDoubleSign             = (uDouble & DOUBLE_SIGN_MASK) >> DOUBLE_SIGN_SHIFT;
   const uint64_t uDoubleSignificand      = uDouble & DOUBLE_SIGNIFICAND_MASK;


    if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_ZERO) {
        if(uDoubleSignificand == 0) {
            /* --- IS ZERO --- */
            Result.uSize  = IEEE754_UNION_IS_SINGLE;
            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
                                                   0,
                                                   SINGLE_EXPONENT_ZERO);
        } else {
            /* --- IS DOUBLE SUBNORMAL --- */
            /* The largest double subnormal is slightly less than the
             * largest double normal which is 2^-1022 or
             * 2.2250738585072014e-308.  The smallest single subnormal
             * is 2^-149 or 1.401298464324817e-45.  There is no
             * overlap so double subnormals can't be converted to
             * singles of any sort.
             */
            Result.uSize   = IEEE754_UNION_IS_DOUBLE;
            Result.uValue  = uDouble;
         }
    } else if(nDoubleUnbiasedExponent == DOUBLE_EXPONENT_INF_OR_NAN) {
         if(uDoubleSignificand == 0) {
             /* ---- IS INFINITY ---- */
             Result.uSize  = IEEE754_UNION_IS_SINGLE;
             Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
                                                    0,
                                                    SINGLE_EXPONENT_INF_OR_NAN);
         } else {
             /* The NaN can only be converted if no payload bits are
              * lost per RFC 8949 section 4.1 that defines Preferred
              * Serializaton. Note that Deterministically Encode CBOR
              * in section 4.2 allows for some variation of this rule,
              * but at the moment this implementation is of Preferred
              * Serialization, not CDE. As of December 2023, we are
              * also expecting an update to CDE. This code may need to
              * be updated for CDE.
              */
             uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
             if(uDroppedBits == 0) {
                /* --- IS CONVERTABLE NAN --- */
                uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
                Result.uSize  = IEEE754_UNION_IS_SINGLE;
                Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
                                                       uSingleSignificand,
                                                       SINGLE_EXPONENT_INF_OR_NAN);
            } else {
               /* --- IS UNCONVERTABLE NAN --- */
               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
               Result.uValue  = uDouble;
            }
         }
    } else {
        /* ---- REGULAR NUMBER ---- */
        /* A regular double can be converted to a regular single if
         * the double's exponent is in the smaller range of a single
         * and if no precision is lost in the significand.
         */
        uDroppedBits = uDoubleSignificand & (DOUBLE_SIGNIFICAND_MASK >> SINGLE_NUM_SIGNIFICAND_BITS);
        if(nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN &&
           nDoubleUnbiasedExponent <= SINGLE_EXPONENT_MAX &&
           uDroppedBits == 0) {
            /* --- IS CONVERTABLE TO SINGLE --- */
            uSingleSignificand = uDoubleSignificand >> (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
            Result.uSize  = IEEE754_UNION_IS_SINGLE;
            Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
                                                   uSingleSignificand,
                                                   nDoubleUnbiasedExponent);
        } else {
            /* Unable to convert to a single normal. See if it can be
             * converted to a single subnormal. To do that, the
             * exponent must be in range and no precision can be lost
             * in the signficand.
             *
             * This is more complicated because the number is not
             * normalized.  The signficand must be shifted
             * proprotionally to the exponent and 1 must be added
             * in. See
             * https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Exponent_encoding
             */
            nExponentDifference = -(nDoubleUnbiasedExponent - SINGLE_EXPONENT_MIN);
            nShiftAmount        = nExponentDifference + (DOUBLE_NUM_SIGNIFICAND_BITS - SINGLE_NUM_SIGNIFICAND_BITS);
            uSingleSignificand  = (uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) >> nShiftAmount;

            if(nDoubleUnbiasedExponent < SINGLE_EXPONENT_MIN &&
               nDoubleUnbiasedExponent >= SINGLE_EXPONENT_MIN - SINGLE_NUM_SIGNIFICAND_BITS &&
               uSingleSignificand << nShiftAmount == uDoubleSignificand + (1ULL << DOUBLE_NUM_SIGNIFICAND_BITS)) {
               /* --- IS CONVERTABLE TO SINGLE SUBNORMAL --- */
               Result.uSize  = IEEE754_UNION_IS_SINGLE;
               Result.uValue = IEEE754_AssembleSingle(uDoubleSign,
                                                      uSingleSignificand,
                                                      SINGLE_EXPONENT_ZERO);
            } else {
               /* --- CAN NOT BE CONVERTED --- */
               Result.uSize   = IEEE754_UNION_IS_DOUBLE;
               Result.uValue  = uDouble;
            }
        }
    }

    return Result;
}


/* Public function; see ieee754.h */
IEEE754_union
IEEE754_DoubleToSmaller(double d, int bAllowHalfPrecision)
{
   IEEE754_union result;

   result = IEEE754_DoubleToSingle(d);

   if(result.uSize == IEEE754_UNION_IS_SINGLE && bAllowHalfPrecision) {
      /* Cast to uint32_t is OK, because value was just successfully
       * converted to single. */
      float uSingle = CopyUint32ToSingle((uint32_t)result.uValue);
      result = IEEE754_SingleToHalf(uSingle);
   }

   return result;
}


#else /* QCBOR_DISABLE_PREFERRED_FLOAT */

int ieee754_dummy_place_holder;

#endif /* QCBOR_DISABLE_PREFERRED_FLOAT */