48823 – gcc-4.6.0 floating-point optimization regression on ia64-Linux

Bug 48823 - gcc-4.6.0 floating-point optimization regression on ia64-Linux

Summary: gcc-4.6.0 floating-point optimization regression on ia64-Linux

Status:	RESOLVED INVALID

Alias:	None

Product:	gcc
Classification:	Unclassified
Component:	rtl-optimization (show other bugs)
Version:	4.6.0

Importance:	P3 normal
Target Milestone:	---
Assignee:	Not yet assigned to anyone

URL:
Keywords:

Depends on:
Blocks:

Reported:	2011-04-29 14:12 UTC by Mariah Lenox
Modified:	2011-10-13 20:16 UTC (History)
CC List:	2 users (show)

See Also:
Host:	ia64-unknown-linux-gnu
Target:	ia64-unknown-linux-gnu
Build:	ia64-unknown-linux-gnu
Known to work:	4.5.1
Known to fail:	4.6.0
Last reconfirmed:

Attachments
Testcase (219 bytes, application/octet-stream) 2011-05-02 09:21 UTC, Jeroen Demeyer	Details
View All Add an attachment (proposed patch, testcase, etc.)

Note You need to log in before you can comment on or make changes to this bug.

Description Mariah Lenox 2011-04-29 14:12:27 UTC

/* gcc-4.6.0 floating-point optimization regression on ia64-Linux

% gcc -O1 -o foo foo.c
% foo
g3  = 35f2349dc34f3df6
i[1]= 3ca1a62633145c07
i[2]= b92f1976b7ed8fbc
j[1]= 3ca4d57ee2b1013a
j[2]= b92618713a31d3e2
val = 35bfd0f99254c580
8.53973422267356774285
402114580b45d475
% 
%
% gcc -O2 -o foo foo.c
% foo
g3  = 35f2349dc34f3df6
i[1]= 3ca1a62633145c07
i[2]= b92f1976b7ed8fbc
j[1]= 3ca4d57ee2b1013a
j[2]= b92618713a31d3e2
val = 35bfd0f99254c590
8.53973422267356596649
402114580b45d474
**ERROR** QD_check failed at x[0]
%
%
% gcc-4.5.1 -O2 -o foo foo.c
% foo
g3  = 35f2349dc34f3df6
i[1]= 3ca1a62633145c07
i[2]= b92f1976b7ed8fbc
j[1]= 3ca4d57ee2b1013a
j[2]= b92618713a31d3e2
val = 35bfd0f99254c590
8.53973422267356774285
402114580b45d475
%

# note that this code compiles and runs successfully 
# with gcc-4.5.1 -03


% gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/local/gcc-4.6.0/ia64-Linux-gmp-5.0.1/libexec/gcc/ia64-unknown-linux-gnu/4.6.0/lto-wrapper
Target: ia64-unknown-linux-gnu
Configured with: /usr/local/gcc-4.6.0/src/gcc-4.6.0/configure
--enable-languages=c,c++,fortran --with-gnu-as
--with-as=/usr/local/binutils-2.21/ia64-Linux-rhel-gcc-4.5.1/bin/as
--with-gnu-ld
--with-ld=/usr/local/binutils-2.21/ia64-Linux-rhel-gcc-4.5.1/bin/ld
--with-gmp=/usr/local/gmp-5.0.1/ia64-Linux-rhel-gcc-4.5.0
--with-mpfr=/usr/local/mpfr-3.0.1/ia64-Linux-rhel-gmp-5.0.1-gcc-4.6.0
--with-mpc=/usr/local/mpc-0.9/ia64-Linux-rhel-gmp-5.0.1-mpfr-3.0.1-gcc-4.6.0
--prefix=/usr/local/gcc-4.6.0/ia64-Linux-gmp-5.0.1
Thread model: posix
gcc version 4.6.0 (GCC) 
%

*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fpu_control.h>

double QD_e[4]={2.718281828459045091e+00,1.445646891729250158e-16,
                -2.127717108038176765e-33,1.515630159841218954e-49};
double QD_pi[4]=
{3.141592653589793116e+00,1.224646799147353207e-16,
 -2.994769809718339666e-33,1.112454220863365282e-49};

#define QD_qsum(a,b,s,e) {double z1; s=a+b; z1=s-a; e=b-z1;}
#define QD_sum(a,b,s,e)\
 {double bb,z1,z2; s=a+b; bb=s-a; z1=s-bb; z2=a-z1; z1=b-bb; e=z1+z2;}
#define QD_split(a,hi,lo)\
{double xt=134217729.0*a,z1; z1=xt-a; hi=xt-z1; lo=a-hi;}
#define QD_prod(a,b,p,e)\
{double ah,al,bh,bl,z1,z2,z3,z4; p=a*b; QD_split(a,ah,al); QD_split(b,bh,bl);\
 z1=ah*bh; z2=z1-p; z1=ah*bl; z3=al*bh; z4=z1+z3; z1=z2+z4; z3=al*bl; e=z1+z3;}


double QD_mul(double *i,double *j,double *o)
{
  double a0,a1,b1,b2,c1,c2,d2,d3,e2,e3,f2,f3;
  double g3,h1,h2,h3,k2,k3,l2,l3,m2,m3,n2,n3,p2,p3;
  double t1,t2,t3;
  double a2, a3,o0;
  double val;

  QD_prod(i[0],j[0],a0,a1); 
  QD_prod(i[0],j[1],b1,b2);
  QD_prod(i[1],j[0],c1,c2); 
  QD_prod(i[0],j[2],d2,d3);
  QD_prod(i[2],j[0],e2,e3); 
  QD_prod(i[1],j[1],f2,f3);
  g3=i[0]*j[3]+i[3]*j[0]; 
  QD_sum(a1,b1,t1,t2); 
  QD_sum(c1,t1,h1,t3); 
  QD_sum(t2,t3,h2,h3);
  QD_sum(b2,c2,k2,k3); 
  QD_sum(d2,e2,l2,l3); 
  QD_sum(f2,h2,m2,m3);
printf("g3  = %lx\n", g3); 
printf("i[1]= %lx\n", i[1]); 
printf("i[2]= %lx\n", i[2]); 
printf("j[1]= %lx\n", j[1]); 
printf("j[2]= %lx\n", j[2]); 
  g3+=(i[1]*j[2]+i[2]*j[1]); 
val = g3;  /* printing g3 here makes the bug disappear, so use return */
  QD_sum(k2,l2,n2,n3); 
  QD_sum(m2,n2,p2,p3);
  g3+=d3+e3+f3+h3+k3+l3+m3+n3+p3; 
  QD_qsum(p2,g3,a2,a3); 
  QD_qsum(h1,a2,b1,b2); 
  QD_qsum(a0,b1,o0,c1);
  o[0]=o0; 
  QD_qsum(c1,b2,o[1],d2); 
  QD_qsum(d2,a3,o[2],o[3]);
  return val;
}
 
void errorit(S) char *S; {printf("**ERROR** %s\n",S); exit(-1);}

int main()
{
  double x[4]; 
  double val;
  /* This fpu_control code should set the control
   word of the floating point processor to round to an 
   IEEE-754 double (53-bit mantissa, 64-bits in total), rather
   than the default extended precision

   However the bug also occurs if this fpu_control code is
   removed or commented out.
  */
  fpu_control_t fpu_control=0x027f; _FPU_SETCW(fpu_control);

  val = QD_mul(QD_pi,QD_e,x);
printf("val = %lx\n", val);
printf("%1.20f\n", x[0]);
printf("%lx\n", x[0]);
  if (x[0]!=8.53973422267356774285) errorit("QD_check failed at x[0]");
  if (x[1]!=-6.7738152905024242803e-16) errorit("QD_check failed at x[1]");
  if (x[2]!=1.6082340642907151632e-32) errorit("QD_check failed at x[2]");
  return 0;
}

Comment 1 Jeroen Demeyer 2011-05-02 09:20:23 UTC

Minimal code to demonstrate the problem in qd.i.  The problematic flag seems to be -fexpensive-optimizations.

$ gcc qd.i -O1 -o qd && ./qd
0x1.114580b45d474p+3
$ gcc qd.i -O1 -fexpensive-optimizations -o qd && ./qd
0x1.114580b45d475p+3

Comment 2 Jeroen Demeyer 2011-05-02 09:21:07 UTC

Created attachment 24161 [details]
Testcase

Comment 3 Steve Ellcey 2011-10-13 17:57:50 UTC

The difference in results is due to the use of the fma instructions (fused multiply add).  If you don't want the fma instructions to be used you should use the -ffp-contract=on flag.  Otherwise GCC will use them and accept the slightly different floating point results due to no rounding being done between the multiply and the add in the fma instruction.  GCC will also do other optimizations that may change the results of inexact floating point instructions.

I am resolving this as invalid since this change in the least significant bit of an inexact floating point value is reasonable without fp-contract being on.

Comment 4 Jeroen Demeyer 2011-10-13 20:05:21 UTC

(In reply to comment #3)
> The difference in results is due to the use of the fma instructions (fused
> multiply add).  If you don't want the fma instructions to be used you should
> use the -ffp-contract=on flag.

Thanks for the reply, I understand your reasoning.  However, will this option guarantee strict IEEE floating-point behaviour or are there other flags to be specified?  I have always been under the impression that *by default* gcc produced code respecting the floating-point standard unless one specifies something like -ffast-math.  It seems this has changed in recent GCC versions.

Comment 5 Steve Ellcey 2011-10-13 20:16:46 UTC

I am not sure any set of flags will guarantee strict IEEE floating point behavior.  See PR37845 for some more information.