Bytes of long double - trouble . . .

Dennis Clarke dclarke@blastwave.org
Wed Feb 15 12:50:38 GMT 2023


On 2/8/23 03:48, Sergey Smith wrote:
> 
> To:     gcc-help@gcc.gnu.org
> Dear GCC,  [ 8:2:23  ]
>                    Like Woooew ! What is THIS!?  I installed Visual Studio Code, & your 32 bit C, Version 9.2.0. I ran :
>     printf("\nOn THIS particular computer, long double is given %d bytes\n", sizeof(long double)); /* The Answer was: 12 bytes.
>     THEN, I updated to C Version 12.2.0 and ran the same code, - ON THE SAME COMPUTER, - but NOW the answer is:  16 bytes !
>   
>      HOW can this happen if, as I understand it, - this function is supposed to assess a computer’s  *hardware* ?  I am on Windows 10 btw.
>                                                                                         — Sergey.
>   

There is no x86 hardware ever that can do true 128-bit long double
floating point. The best you can hope for is the strange 10-byte
format that Intel made up as a way to extend precision a little bit.
Works pretty well for things like fused multiply add and such.

However there are various ways you can emulate the IEEE-754 floating
point stuff on x86 hardware and perhaps you really want the libquadmath
here. Regardless you *may* see the data type take a full 16 bytes and
yes that means six bytes are trash. They mean nothing. Unless you do
the software emulation goodness. Try :

/*
  * fp128_q.c  mess around with the libquadmath to see IEEE-754 2008
  *            floating point stuff sort of work in an emulated way
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 3 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
  *
  * https://www.gnu.org/licenses/gpl-3.0.txt
  */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>
#include <float.h>
#include <fenv.h>

#define BUFFERSIZE 128

int main(int argc, char *argv[]){

     __float128 fp0, fp1, fp2, pi;
     const size_t buffer_size = BUFFERSIZE;
     char *buffer = calloc(buffer_size,sizeof(unsigned char));
     int num_chars;

#ifdef FLT_EVAL_METHOD
     printf ( "INFO : FLT_EVAL_METHOD == %d\n", FLT_EVAL_METHOD);
#endif

#ifdef DECIMAL_DIG
     printf ( "INFO : DECIMAL_DIG == %d\n", DECIMAL_DIG);
#endif

/* LDBL_DIG, FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG */
#ifdef FLT_DECIMAL_DIG
     printf ( "INFO : FLT_DECIMAL_DIG == %d\n", FLT_DECIMAL_DIG);
#endif

#ifdef DBL_DECIMAL_DIG
     printf ( "INFO : DBL_DECIMAL_DIG == %d\n", DBL_DECIMAL_DIG);
#endif

#ifdef LDBL_DECIMAL_DIG
     printf ( "INFO : LDBL_DECIMAL_DIG == %d\n", LDBL_DECIMAL_DIG);
#endif

#ifdef LDBL_DIG
     printf ( "INFO : LDBL_DIG == %d\n", LDBL_DIG);
#endif


     /* NOTE : floating point can NOT precisely represent the
      *        test values being used here. Such is life in
      *        the real world of floating point. Good luck.
      */
     fp0 = 36.584Q;

     printf ( "the sizeof(fp0) is %i\n", sizeof(fp0) );

     num_chars = quadmath_snprintf(buffer,
                                   buffer_size, "%40.36Qg", fp0);

     if ( num_chars > 0 ) {

         printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                  num_chars);

     } else {

         fprintf(stderr,"FAIL : quadmath_snprintf failed.\n");
         return EXIT_FAILURE;

     }

     printf ("the value of fp0 is %s\n", buffer);

     fp1 =  7.812;

     num_chars = quadmath_snprintf(buffer,
                                   buffer_size, "%40.36Qg", fp1);

     if ( num_chars > 0 ) {

         printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                  num_chars);

     } else {

         fprintf(stderr,"FAIL : wtf quadmath_snprintf failed.\n");
         return EXIT_FAILURE;

     }

     printf ("the value of fp1 is %s\n", buffer);

     fp2 = fp0 + fp1;

     num_chars = quadmath_snprintf(buffer,
                                   buffer_size, "%40.36Qg", fp2);

     if ( num_chars > 0 ) {

         printf("INFO : quadmath_snprintf formatted %i chars.\n",
                 num_chars);

     } else {

         fprintf(stderr,"FAIL : wat? quadmath_snprintf failed.\n");
         return EXIT_FAILURE;

     }

     printf("fp2 = fp0 + fp1 = %s\n", buffer);

     /* more than reasonable value for pi which is a few more
      * decimal digits past the stuff in math.h */
     pi = 3.1415926535897932384626433832795028841971693993751Q;

     num_chars = quadmath_snprintf(buffer,
                                   buffer_size, "%46.40Qe", pi );

     if ( num_chars > 0 ) {
         printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                  num_chars);
     } else {
         fprintf(stderr,"FAIL : wat? quadmath_snprintf failed.\n");
         return EXIT_FAILURE;
     }

     printf("libquadmath says pi = %s\n", buffer);
     printf("the real thing is  ~= ");
     printf("3.1415926535897932384626433832795028841971693993...\n");

     free(buffer);
     return EXIT_SUCCESS;  /* or 42 if you prefer */

}


Be sure to link with -lquadmath and you should see :

$ gcc12 -g -O0 -Wl,-rpath=/usr/local/lib/gcc12,-enable-new-dtags
            -o fp128_q fp128_q.c -lquadmath

$ ./fp128_q
INFO : FLT_EVAL_METHOD == 0
INFO : DECIMAL_DIG == 21
INFO : FLT_DECIMAL_DIG == 9
INFO : DBL_DECIMAL_DIG == 17
INFO : LDBL_DECIMAL_DIG == 21
INFO : LDBL_DIG == 18
the sizeof(fp0) is 16
INFO : quadmath_snprintf formatted 40 chars.
the value of fp0 is    36.5840000000000000000000000000000015
INFO : quadmath_snprintf formatted 40 chars.
the value of fp1 is    7.81200000000000027711166694643907249
INFO : quadmath_snprintf formatted 40 chars.
fp2 = fp0 + fp1 =     44.396000000000000277111666946439074
INFO : quadmath_snprintf formatted 46 chars.
libquadmath says pi = 3.1415926535897932384626433832795027974791e+00
the real thing is  ~= 3.1415926535897932384626433832795028841971693993...


Works pretty well.  The only other option is to get an IBM POWER9
server which has real 128-bit goodness in hardware. Also an IBM
System Z type mainframe can do all that also. The RISC-V processor
specification also has the Q-extension for true 128bit floating
point but no one seems to have fabricated that. Yet. :)

-- 
Dennis Clarke
RISC-V/SPARC/PPC/ARM/CISC
UNIX and Linux spoken
GreyBeard and suspenders optional



More information about the Gcc-help mailing list