Optimizer handles double incorrectly

Mon Nov 29 12:05:00 GMT 1999

 Platform:
     Pentium II PC, Red Hat Linux 6.0, X11R6.4 @ Patch Level 3, compiled -m486 -g2 -ansi -pedantic
     gcc version egcs-2.91.66 19990314/Linux (egcs-1.1.2 release)

 Description:
     There is a bug in the gcc optimizer's handling of "double". It also
     occurs with gcc version 2.95.1 19990816 (release). Unfortunately,
     it depends on things that are compiled with something. Which is to
     say, while I can reproduce it by compiling the whole X Server,
     I can not extract the routine and reproduce it as a separate program.

I have encountered two such bugs, one in our hardware initialization
code and one in the font libraries. Neither is extractable, i.e. the below, 
compiled with the same options run fine as separate programs but not as 
parts of the X Server (see below). 

--John

Timing Bug:

As part of the X Server, af is corrupted  by the assignment ef = df immediately 
following. 

/*
gcc -c -m486 -g2 -O2 -ansi -pedantic  -Dlinux -D__i386__ -D_POSIX_S
OURCE -D_BSD_SOURCE -D_SVID_SOURCE -DX_LOCALE -DSHAPE  -DXCSECURITY
 -DTOGCUP -DDPMSExtension  -DGCCUSESGAS -DSTATIC_COLOR -DAVOID_GLYP
HBLT -DPIXPRIV -DXV -DFORCE_SEPARATE_PRIVATE -DNDEBUG -DFUNCPROTO=1
5 -DNARROWPROTO optbug.c
gcc -o optbug -m486 -g2 -O2 -ansi -pedantic optbug.c
*/

typedef struct {
    unsigned int        PixelClk;
    unsigned int        DesiredFrequency;
    unsigned int        ActualFrequency;
    unsigned char       Numerator;
    unsigned char       Denominator;
    unsigned char       PostScale;
} ClkRegSet, *ClkRegSetPtr;

void
calculate_int_vclks(ClkRegSet *CP)
{
    double f, af, ef, df, e;
    double rf = 14318.18;
    int i, j, n, d, s, p, on, od, os;

    df = (double )CP->DesiredFrequency;
    af = (double )0.0;
    ef = df;
    for (s = 1; s >= 0; s--)
    {
        p = s + 1;
        for (i = 1; i < 128; i++)
        {
            if (i & 1)
                n = 64 - ((i - 1) / 2);
            else
                n = 64 + (i / 2);
            for (j = 1; j < 128; j++)
            {
                if (j & 1)
                    d = 64 - ((j - 1) / 2);
                else
                    d = 64 + (j / 2);

                f = (rf * (double) n) / ((double) (d * p));

                e = abs(f - df);
                if (e < ef)
                {
                    af = f;
                    on = n;
                    od = d;
                    os = s;
                    ef = e;
                }
            }
        }
    }
    CP->ActualFrequency = (int )af;
    CP->Numerator = on; 
    CP->Denominator = od; 
    CP->PostScale = os; 
}  
==================================================================================================
Font Bug:

Font:
     courb.pfa, -adobe-courier-bold-r-normal--14-*-*-*-*-*-*-*.

     This did not seem to be library bug, since the font server, compiled in the same tree with
     identical defines and libraries, including libfont.a, opens the font without problems.
     It was thought to be an initialization error, since the Jupiter X Server is quite complex,
     supporting multiple screens, Virtual Screen and multiple depths. Several instances of memory
     being allocated in one place, freed in another and reallocated in yet another had been
     encountered during implementation.
     With the aid of Electric Fence, it was found that the Bresenham algorithm was violating its
     allocated memory space (t1_Bresenham at lines.c:182) due to out of bound (x,y) coordinates.
     (By setting EF_PROTECT_FREE, efence also eliminated the possibility of memory allocation
     errors as initially suspected.) The origin of the problem was traced to the MatrixMultiply
     procedure (t1_MMultiply at spaces.c:860). 

 Workaround:
     The anomalous calculations were eliminated by removing the \""register\"" declarations from the 
     input parameters and working variables.
 Comments:
     Why this should correct anything is not understood. MatrixMultiply is a very simple procedure,
     (see below). The same code in the same file and same library running on the same machine at
     the same time with the same input parameters yields correct computations when linked with the
     Font Server and incorrect computations when linked with the X Server. It may be a compiler bug
 or perhaps there is some register initialization done by the font server that is not done by the 
 X Server.
     /*
      * :h3.MatrixMultiply() - Implements Multiplication of Two Matrices
      * Implements matrix multiplication, A * B = C.
      * To remind myself, matrix multiplication goes rows of A times columns of B.
      * The output matrix may be the same as one of the input matrices.
      */
     void MatrixMultiply(A, B, C)
            register double A[2][2],B[2][2]; /* input matrices   */
            register double C[2][2];             /* output matrix    */
     {
            register double txx,txy,tyx,tyy;
            txx = A[0][0] * B[0][0] + A[0][1] * B[1][0];
            txy = A[1][0] * B[0][0] + A[1][1] * B[1][0];
            tyx = A[0][0] * B[0][1] + A[0][1] * B[1][1];
            tyy = A[1][0] * B[0][1] + A[1][1] * B[1][1];
            C[0][0] = txx;
            C[1][0] = txy;
            C[0][1] = tyx;
            C[1][1] = tyy;
     }
     Dump of assembler code for function t1_MMultiply:
     0x808a260 <t1_MMultiply>:   pushl  ÃƒÂ«p
     0x808a261 <t1_MMultiply+1>: movl   %esp,ÃƒÂ«p
     0x808a263 <t1_MMultiply+3>: movl   0x8(ÃƒÂ«p),ÃƒÂx
     0x808a266 <t1_MMultiply+6>: movl   0xc(ÃƒÂ«p),ÃƒÂ¬x
     0x808a269 <t1_MMultiply+9>: movl   0x10(ÃƒÂ«p),ÃƒÂªx
     0x808a26c <t1_MMultiply+12>:        fldl   (ÃƒÂx)
     0x808a26e <t1_MMultiply+14>:        fmull  (ÃƒÂ¬x)
     0x808a270 <t1_MMultiply+16>:        fldl   0x8(ÃƒÂx)
     0x808a273 <t1_MMultiply+19>:        fmull  0x10(ÃƒÂ¬x)
     0x808a276 <t1_MMultiply+22>:        faddp  %st,%st(1)
     0x808a278 <t1_MMultiply+24>:        fldl   0x10(ÃƒÂx)
     0x808a27b <t1_MMultiply+27>:        fmull  (ÃƒÂ¬x)
     0x808a27d <t1_MMultiply+29>:        fldl   0x18(ÃƒÂx)
     0x808a280 <t1_MMultiply+32>:        fmull  0x10(ÃƒÂ¬x)
     0x808a283 <t1_MMultiply+35>:        faddp  %st,%st(1)
     0x808a285 <t1_MMultiply+37>:        fldl   (ÃƒÂx)
     0x808a287 <t1_MMultiply+39>:        fmull  0x8(ÃƒÂ¬x)
     0x808a28a <t1_MMultiply+42>:        fldl   0x8(ÃƒÂx)
     0x808a28d <t1_MMultiply+45>:        fmull  0x18(ÃƒÂ¬x)
     0x808a290 <t1_MMultiply+48>:        faddp  %st,%st(1)
     0x808a292 <t1_MMultiply+50>:        fldl   0x10(ÃƒÂx)
     0x808a295 <t1_MMultiply+53>:        fmull  0x8(ÃƒÂ¬x)
     0x808a298 <t1_MMultiply+56>:        fldl   0x18(ÃƒÂx)
     0x808a29b <t1_MMultiply+59>:        fmull  0x18(ÃƒÂ¬x)
     0x808a29e <t1_MMultiply+62>:        faddp  %st,%st(1)
     0x808a2a0 <t1_MMultiply+64>:        fxch   %st(3)
     0x808a2a2 <t1_MMultiply+66>:        fstpl  (ÃƒÂªx)
     0x808a2a4 <t1_MMultiply+68>:        fxch   %st(1)
     0x808a2a6 <t1_MMultiply+70>:        fstpl  0x10(ÃƒÂªx)
     0x808a2a9 <t1_MMultiply+73>:        fstpl  0x8(ÃƒÂªx)
     0x808a2ac <t1_MMultiply+76>:        fstpl  0x18(ÃƒÂªx)
     0x808a2af <t1_MMultiply+79>:        movl   ÃƒÂ«p,%esp
     0x808a2b1 <t1_MMultiply+81>:        popl   ÃƒÂ«p
     0x808a2b2 <t1_MMultiply+82>:        ret    
     End of assembler dump.
 Fix:
    void MatrixMultiply(A, B, C)
        double A[2][2],B[2][2];  /* input matrices */
        double C[2][2];    /* output matrix        */
    {
        double txx,txy,tyx,tyy;

        txx = A[0][0] * B[0][0] + A[0][1] * B[1][0];
        txy = A[1][0] * B[0][0] + A[1][1] * B[1][0];
        tyx = A[0][0] * B[0][1] + A[0][1] * B[1][1];
        tyy = A[1][0] * B[0][1] + A[1][1] * B[1][1];

        C[0][0] = txx;
        C[1][0] = txy;
        C[0][1] = tyx;
        C[1][1] = tyy;
    }
======================================================================