Subversion Repositories pentevo

Rev

Blame | Last modification | View Log | Download | RSS feed | ?url?

  1. /* ieeefloat.c */
  2. /*****************************************************************************/
  3. /* SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only                     */
  4. /*                                                                           */
  5. /* AS                                                                        */
  6. /*                                                                           */
  7. /* IEEE Floating Point Handling                                              */
  8. /*                                                                           */
  9. /*****************************************************************************/
  10.  
  11. #include "stdinc.h"
  12. #include <math.h>
  13. #include <string.h>
  14.  
  15. #include "be_le.h"
  16. #ifdef HOST_DECFLOAT
  17. # include "decfloat.h"
  18. #endif
  19. #include "ieeefloat.h"
  20.  
  21. #define DBG_FLOAT 0
  22.  
  23. /*!------------------------------------------------------------------------
  24.  * \fn     as_fpclassify(Double inp)
  25.  * \brief  classify floating point number
  26.  * \param  inp number to classify
  27.  * \return type of number
  28.  * ------------------------------------------------------------------------ */
  29.  
  30. int as_fpclassify(Double inp)
  31. {
  32. #ifdef IEEEFLOAT
  33.   Byte Buf[8];
  34.   Word Exponent;
  35.  
  36.   /* extract exponent */
  37.  
  38.   Double_2_ieee8(inp, Buf, True);
  39.   Exponent = (Buf[0] & 0x7f);
  40.   Exponent = (Exponent << 4) | ((Buf[1] >> 4) & 15);
  41.   switch (Exponent)
  42.   {
  43.     case 0:
  44.       return AS_FP_SUBNORMAL;
  45.     case 2047:
  46.     {
  47.       Byte Acc = Buf[1] & 0x0f;
  48.       int z;
  49.  
  50.       for (z = 2; z < 8; Acc |= Buf[z++]);
  51.       return Acc ? AS_FP_NAN : AS_FP_INFINITE;
  52.     }
  53.     default:
  54.       return AS_FP_NORMAL;
  55.   }
  56. #else
  57.   (void)inp; return AS_FP_NORMAL;
  58. #endif
  59. }
  60.  
  61. /*!------------------------------------------------------------------------
  62.  * \fn     ieee8_dissect(Word *p_sign, Integer *p_exponent, LongWord *p_mantissa, LongWord *p_fraction, Double num)
  63.  * \brief  dissect IEEE 64 bit float into components
  64.  * \param  p_sign extracted sign (1 for negative)
  65.  * \param  p_exponent extracted power-of-2s exponent, without bias
  66.  * \param  p_mantissa upper 29 bits of mantissa, including leading 1 made explicit
  67.  * \param  p_fraction lower 24 bits of mantissa
  68.  * \param  num number to dissect
  69.  * ------------------------------------------------------------------------ */
  70.  
  71. void ieee8_dissect(Word *p_sign, Integer *p_exponent, LongWord *p_mantissa, LongWord *p_fraction, Double num)
  72. {
  73.   Byte buf[8];
  74.  
  75.   /* binary representation, big endian: */
  76.  
  77.   Double_2_ieee8(num, buf, True);
  78.  
  79.   /* (a) Sign is MSB of first byte: */
  80.  
  81.   *p_sign = !!(buf[0] & 0x80);
  82.  
  83.   /* (b) Exponent is stored in the following 11 bits, with a bias of 1023:  */
  84.  
  85.   *p_exponent = (buf[0] & 0x7f);
  86.   *p_exponent = (*p_exponent << 4) | ((buf[1] >> 4) & 15);
  87.   *p_exponent -= 1023;
  88.  
  89.   /* (c) Extract 28 bits of mantissa: */
  90.  
  91.   *p_mantissa = buf[1] & 15;
  92.   *p_mantissa = (*p_mantissa << 8) | buf[2];
  93.   *p_mantissa = (*p_mantissa << 8) | buf[3];
  94.   *p_mantissa = (*p_mantissa << 8) | buf[4];
  95.  
  96.   /* (d) remaining 24 bits of mantissa: */
  97.  
  98.   *p_fraction = buf[5];
  99.   *p_fraction = (*p_fraction << 8) | buf[6];
  100.   *p_fraction = (*p_fraction << 8) | buf[7];
  101.  
  102.   /* (e) if not denormal, make leading one of mantissa explicit: */
  103.  
  104.   if (*p_exponent != -1023)
  105.     *p_mantissa |= 0x10000000ul;
  106. #if DBG_FLOAT
  107.   fprintf(stderr, "(cnvrt) %2d * 0x%08x * 2^%d Fraction 0x%08x\n", Sign ? -1 : 1, *p_mantissa, *p_exponent, *p_fraction);
  108. #endif
  109. }
  110.  
  111. /*!------------------------------------------------------------------------
  112.  * \fn     Double_2_ieee4(Double inp, Byte *pDest, Boolean NeedsBig)
  113.  * \brief  convert float to IEEE single (32 bit) float binary representation
  114.  * \param  inp input number
  115.  * \param  pDest where to write binary data
  116.  * \param  NeedsBig req's big endian?
  117.  * ------------------------------------------------------------------------ */
  118.  
  119. void Double_2_ieee4(Double inp, Byte *pDest, Boolean NeedsBig)
  120. {
  121. #ifdef IEEEFLOAT
  122.   Single tmp = inp;
  123.   memcpy(pDest, &tmp, 4);
  124. #elif defined HOST_DECFLOAT
  125.   VAXF_2_Single(pDest, inp);
  126. #else
  127. # error define host floating point format
  128. #endif
  129.   if (HostBigEndian != NeedsBig)
  130.     DSwap(pDest, 4);
  131. }
  132.  
  133. /*!------------------------------------------------------------------------
  134.  * \fn     Double_2_ieee8(Double inp, Byte *pDest, Boolean NeedsBig)
  135.  * \brief  convert float to IEEE double (64 bit) float binary representation
  136.  * \param  inp input number
  137.  * \param  pDest where to write binary data
  138.  * \param  NeedsBig req's big endian?
  139.  * ------------------------------------------------------------------------ */
  140.  
  141. void Double_2_ieee8(Double inp, Byte *pDest, Boolean NeedsBig)
  142. {
  143. #ifdef IEEEFLOAT
  144.   memcpy(pDest, &inp, 8);
  145. #elif defined HOST_VDECFLOAT
  146.   VAXD_2_Double(pDest, inp);
  147. #else
  148. # error define host floating point format
  149. #endif
  150.   if (HostBigEndian != NeedsBig)
  151.     QSwap(pDest, 8);
  152. }
  153.  
  154. /*!------------------------------------------------------------------------
  155.  * \fn     Double_2_ieee10(Double inp, Byte *pDest, Boolean NeedsBig)
  156.  * \brief  convert float to non-IEEE extended (80 bit) float binary representation
  157.  * \param  inp input number
  158.  * \param  pDest where to write binary data
  159.  * \param  NeedsBig req's big endian?
  160.  * ------------------------------------------------------------------------ */
  161.  
  162. void Double_2_ieee10(Double inp, Byte *pDest, Boolean NeedsBig)
  163. {
  164.   Byte Buffer[8];
  165.   Byte Sign;
  166.   Word Exponent;
  167.   int z;
  168.  
  169. #ifdef IEEEFLOAT
  170.   Boolean Denormal;
  171.  
  172.   memcpy(Buffer, &inp, 8);
  173.   if (HostBigEndian)
  174.     QSwap(Buffer, 8);
  175.   Sign = (Buffer[7] & 0x80);
  176.   Exponent = (Buffer[6] >> 4) + (((Word) Buffer[7] & 0x7f) << 4);
  177.   Denormal = (Exponent == 0);
  178.   if (Exponent == 2047)
  179.     Exponent = 32767;
  180.   else Exponent += (16383 - 1023);
  181.   Buffer[6] &= 0x0f;
  182.   if (!Denormal)
  183.     Buffer[6] |= 0x10;
  184.   for (z = 7; z >= 2; z--)
  185.     pDest[z] = ((Buffer[z - 1] & 0x1f) << 3) | ((Buffer[z - 2] & 0xe0) >> 5);
  186.   pDest[1] = (Buffer[0] & 0x1f) << 3;
  187.   pDest[0] = 0;
  188.   pDest[9] = Sign | ((Exponent >> 8) & 0x7f);
  189.   pDest[8] = Exponent & 0xff;
  190. #elif defined HOST_DECFLOAT
  191.   VAXD_2_LongDouble(pDest, inp);
  192. #else
  193. # error define host floating point format
  194. #endif
  195.   if (NeedsBig)
  196.     TSwap(pDest, 10);
  197. }
  198.  
  199. /*!------------------------------------------------------------------------
  200.  * \fn     Double_2_ieee2(Double inp, Byte *pDest, Boolean NeedsBig)
  201.  * \brief  convert floating point number to IEEE half size (16 bit) format
  202.  * \param  inp floating point number to store
  203.  * \param  pDest where to write result (2 bytes)
  204.  * \param  NeedsBig req's big endian?
  205.  * \return True if conversion was successful
  206.  * ------------------------------------------------------------------------ */
  207.  
  208. Boolean Double_2_ieee2(Double inp, Byte *pDest, Boolean NeedsBig)
  209. {
  210.   Word Sign;
  211.   Integer Exponent;
  212.   LongWord Mantissa, Fraction;
  213.   Boolean RoundUp;
  214.  
  215. #if DBG_FLOAT
  216.   fprintf(stderr, "(0) %g\n", inp);
  217. #endif
  218.  
  219.   /* (1) Dissect IEEE number */
  220.  
  221.   ieee8_dissect(&Sign, &Exponent, &Mantissa, &Fraction, inp);
  222.  
  223.   /* (1b) All-ones Exponent (2047-1023=1024) is reserved for NaN and infinity: */
  224.  
  225.   if (Exponent == 1024)
  226.   {
  227.     pDest[1 ^ !!NeedsBig] = (Sign << 7) | 0x7c;
  228.     pDest[0 ^ !!NeedsBig] = 0;
  229.  
  230.     /* clone all-ones mantissa for NaN: */
  231.  
  232.     if ((Mantissa == 0x0ffffffful) && (Fraction == 0x00ffffff))
  233.     {
  234.       pDest[1 ^ !!NeedsBig] |= 0x03;
  235.       pDest[0 ^ !!NeedsBig] = 0xff;
  236.     }
  237.  
  238.     /* otherwise clone MSB+LSB of mantissa: */
  239.  
  240.     else
  241.     {
  242.       if (Mantissa & 0x08000000ul)
  243.         pDest[1 ^ !!NeedsBig] |= 0x02;
  244.       if (Mantissa & 0x00000001ul)
  245.         pDest[0 ^ !!NeedsBig] |= 0x01;
  246.     }
  247.     return True;
  248.   }
  249.  
  250.   /* (1c) if not denormal, make leading one of mantissa explicit: */
  251.  
  252.   if (Exponent != -1023)
  253.     Mantissa |= 0x10000000ul;
  254. #if DBG_FLOAT
  255.   fprintf(stderr, "(cnvrt) %2d * 0x%08x * 2^%d Fraction 0x%08x\n", Sign ? -1 : 1, Mantissa, Exponent, Fraction);
  256. #endif
  257.  
  258.   /* (2) Round-to-the-nearest for FP16: */
  259.  
  260.   /* Bits 27..18 of fractional part of mantissa will make it into dest, so the decision bit is bit 17: */
  261.  
  262.   if (Mantissa & 0x20000ul) /* fraction is >= 0.5 */
  263.   {
  264.     if ((Mantissa & 0x1fffful) || Fraction) /* fraction is > 0.5 -> round up */
  265.       RoundUp = True;
  266.     else /* fraction is 0.5 -> round towards even, i.e. round up if mantissa is odd */
  267.       RoundUp = !!(Mantissa & 0x40000ul);
  268.   }
  269.   else /* fraction is < 0.5 -> round down */
  270.     RoundUp = False;
  271. #if DBG_FLOAT
  272.   fprintf(stderr, "RoundUp %u\n", RoundUp);
  273. #endif
  274.   if (RoundUp)
  275.   {
  276.     Mantissa += 0x40000ul - (Mantissa & 0x3fffful);
  277.     Fraction = 0;
  278.     if (Mantissa & 0x20000000ul)
  279.     {
  280.       Mantissa >>= 1;
  281.       Exponent++;
  282.     }
  283.   }
  284. #if DBG_FLOAT
  285.   fprintf(stderr, "(round) %2d * 0x%08x * 2^%d Fraction 0x%08x\n", Sign ? -1 : 1, Mantissa, Exponent, Fraction);
  286. #endif
  287.  
  288.   /* (3a) Overrange? */
  289.  
  290.   if (Exponent > 15)
  291.     return False;
  292.   else
  293.   {
  294.     /* (3b) number that is too small may degenerate to 0: */
  295.  
  296.     while ((Exponent < -15) && Mantissa)
  297.     {
  298.       Exponent++; Mantissa >>= 1;
  299.     }
  300. #if DBG_FLOAT
  301.     fprintf(stderr, "(after denormchk) %2d * 0x%08x * 2^%d Fraction 0x%08x\n", Sign ? -1 : 1, Mantissa, Exponent, Fraction);
  302. #endif
  303.  
  304.     /* numbers too small to represent degenerate to 0 (mantissa was shifted out) */
  305.  
  306.     if (Exponent < -15)
  307.       Exponent = -15;
  308.  
  309.     /* For denormal numbers, exponent is 2^(-14) and not 2^(-15)!
  310.        So if we end up with an exponent of 2^(-15), convert
  311.        mantissa so it corresponds to 2^(-14): */
  312.  
  313.     else if (Exponent == -15)
  314.       Mantissa >>= 1;
  315.  
  316.     /* (3c) add bias to exponent */
  317.  
  318.     Exponent += 15;
  319.  
  320.     /* (3d) store result */
  321.  
  322.     pDest[1 ^ !!NeedsBig] = (Sign << 7) | ((Exponent << 2) & 0x7c) | ((Mantissa >> 26) & 3);
  323.     pDest[0 ^ !!NeedsBig] = (Mantissa >> 18) & 0xff;
  324.  
  325.     return True;
  326.   }
  327. }
  328.