Bug 19547 - floating point registers not preserved during function call
Summary: floating point registers not preserved during function call
Status: RESOLVED INVALID
Alias: None
Product: gcc
Classification: Unclassified
Component: c (show other bugs)
Version: 2.95.3
: P2 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2005-01-20 16:39 UTC by Jack Whitham
Modified: 2005-07-23 22:49 UTC (History)
1 user (show)

See Also:
Host: i686-unknown-linux
Target: arm-unknown-linux
Build: i686-unknown-linux
Known to work:
Known to fail:
Last reconfirmed:


Attachments
Preprocessor output for the code fragment (2.03 KB, application/octet-stream)
2005-01-20 16:40 UTC, Jack Whitham
Details
glibc function in which floating point registers are not preserved (1.71 KB, text/plain)
2005-01-20 17:37 UTC, Jack Whitham
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Jack Whitham 2005-01-20 16:39:25 UTC
This is a bug report for GCC 2.95.3. The bug also exists in 2.95.2.
I am cross-compiling for arm-linux on an i386 Linux system, using
the command line:
    arm-linux-gcc -static -O -g -c au1.c

The code fragment below is taken from the mpeg2enc package. When
compiled with -O, it does not work correctly: the value of s changes
between the line marked XXX and the line marked YYY. This is because
s is stored in register f5, which is clobbered by __kernel_cos, a
glibc function called by cos. I don't know whether f5 should be
preserved by the calling function or the callee, but nothing is preserving
it at present, and this results in the bug.

The code works correctly when compiled without optimisation, because the
value of s is stored on the stack.

I don't think that this problem is confined to register f5 or the
functions involved here, but I don't have any evidence for that.
It occurs with any level of optimisation, not just -O.

// Code fragment: (note: based on init_fdct() in fdctref.c, in mpeg2enc)

#include <math.h>

#define PI M_PI

void Test ( double c [ 8 ][ 8 ] )
{
  int i, j;
  double s;

  for (i=0; i<8; i++)
  {
    s = (i==0) ? sqrt(0.125) : 0.5;

    printf ( "Begin: s = %1.4f\n" , s ) ; // XXX
    for (j=0; j<8; j++)
    {
        double p = PI / 8.0 ;
        
        printf ( "     p: %1.4f " , p ) ;
        
        p *= (double) i ; 
        printf ( "%1.4f " , p ) ;
        
        p *= ( (double) j ) + 0.5 ;
        printf ( "%1.4f " , p ) ;
        
        p = cos ( p ) ; 
        printf ( "%1.4f " , p ) ;
        
        p *= s ; 
        printf ( "%1.4f\n" , p ) ;
        
        c [ i ][ j ] = p ;
    }
    printf ( "Now: s = %1.4f\n" , s ) ; // YYY
  }
}

// End code fragment

Here is a typical output produced by one iteration (i = 1) of the outer
for loop, when compiled with -O.

Begin: s = 0.5000
     p: 0.3927 0.3927 0.1963 0.9808 0.0000
     p: 0.3927 0.3927 0.5890 0.8315 0.0000
     p: 0.3927 0.3927 0.9817 0.5556 0.0000
     p: 0.3927 0.3927 1.3744 0.1951 0.0000
     p: 0.3927 0.3927 1.7671 -0.1951 -0.0000
     p: 0.3927 0.3927 2.1598 -0.5556 -0.0000
     p: 0.3927 0.3927 2.5525 -0.8315 -1.7958
     p: 0.3927 0.3927 2.9452 -0.9808 -2.1183
Now: s = 2.1598

As you can see, s has changed.



Here is a disassembly of the Test() function when optimised with -O:

// Begin disassembly

au1.o:     file format elf32-littlearm

Disassembly of section .text:

00000000 <Test>:

#define PI M_PI

void Test ( double c [ 8 ][ 8 ] )
{  
   0:   e1a0c00d    mov ip, sp
   4:   e92dd8f0    stmdb   sp!, {r4, r5, r6, r7, fp, ip, lr, pc}
   8:   ed6d4206    sfm f4, 2, [sp, #-24]!
   c:   e24cb004    sub fp, ip, #4  ; 0x4
  10:   e1a07000    mov r7, r0
  int i, j;
  double s;
  
  for (i=0; i<8; i++)
  14:   e3a06000    mov r6, #0  ; 0x0
  { 
    s = (i==0) ? sqrt(0.125) : 0.5;
  18:   e3560000    cmp r6, #0  ; 0x0
  1c:   1a000011    bne 68 <.text+0x68>
  20:   ed9f9107    ldfd    f1, [pc, #28]
  24:   ee408181    sqtd    f0, f1
  28:   ee90f110    cmf f0, f0
  2c:   0a00000d    beq 68 <.text+0x68>
  30:   ed2d9102    stfd    f1, [sp, #-8]!
  34:   e8bd0003    ldmia   sp!, {r0, r1}
  38:   ebfffffe    bl  38 <Test+0x38>
  3c:   ee00d180    mvfd    f5, f0
  40:   ea000012    b   90 <.text+0x90>
  44:   3fc00000    swicc   0x00c00000
  48:   00000000    andeq   r0, r0, r0
  4c:   ee00d18e    mvfd    f5, #0.5

    printf ( "Begin: s = %1.4f\n" , s ) ; // XXX
  50:   e59f00c0    ldr r0, [pc, #192]  ; 118 <.text+0x118>
  54:   ed2dd102    stfd    f5, [sp, #-8]!
  58:   e8bd0006    ldmia   sp!, {r1, r2}
  5c:   ebfffffe    bl  5c <.text+0x5c>
    for (j=0; j<8; j++)
  60:   e3a04000    mov r4, #0  ; 0x0
  64:   e59f50b0    ldr r5, [pc, #176]  ; 11c <.text+0x11c>
    {
        double p = PI / 8.0 ;
  68:   ed9fc12c    ldfd    f4, [pc, #176]

        printf ( "     p: %1.4f " , p ) ;
  6c:   e59f00b4    ldr r0, [pc, #180]  ; 128 <.text+0x128>
  70:   ed2dc102    stfd    f4, [sp, #-8]!
  74:   e8bd0006    ldmia   sp!, {r1, r2}
  78:   ebfffffe    bl  78 <.text+0x78>

        p *= (double) i ;
  7c:   ee006190    fltd    f0, r6
  80:   ee104184    mufd    f4, f0, f4
        printf ( "%1.4f " , p ) ;
  84:   e1a00005    mov r0, r5
  88:   ed2dc102    stfd    f4, [sp, #-8]!
  8c:   e8bd0006    ldmia   sp!, {r1, r2}
  90:   ebfffffe    bl  90 <.text+0x90>

        p *= ( (double) j ) + 0.5 ;
  94:   ee004190    fltd    f0, r4
  98:   ee00018e    adfd    f0, f0, #0.5
  9c:   ee144180    mufd    f4, f4, f0
        printf ( "%1.4f " , p ) ; 
  a0:   e1a00005    mov r0, r5
  a4:   ed2dc102    stfd    f4, [sp, #-8]!
  a8:   e8bd0006    ldmia   sp!, {r1, r2}
  ac:   ebfffffe    bl  ac <.text+0xac>
  
        p = cos ( p ) ;
  b0:   ed2dc102    stfd    f4, [sp, #-8]!
  b4:   e8bd0003    ldmia   sp!, {r0, r1}
  b8:   ebfffffe    bl  b8 <.text+0xb8>
  bc:   ee00c180    mvfd    f4, f0
        printf ( "%1.4f " , p ) ;
  c0:   e1a00005    mov r0, r5
  c4:   ed2dc102    stfd    f4, [sp, #-8]!
  c8:   e8bd0006    ldmia   sp!, {r1, r2}
  cc:   ebfffffe    bl  cc <.text+0xcc> 
  
        p *= s ;    
  d0:   ee144185    mufd    f4, f4, f5
        printf ( "%1.4f\n" , p ) ;
  d4:   e59f0050    ldr r0, [pc, #80]   ; 12c <.text+0x12c>
  d8:   ed2dc102    stfd    f4, [sp, #-8]!
  dc:   e8bd0006    ldmia   sp!, {r1, r2} 
  e0:   ebfffffe    bl  e0 <.text+0xe0>
        
        c [ i ][ j ] = p ;  
  e4:   e0873306    add r3, r7, r6, lsl #6
  e8:   e0833184    add r3, r3, r4, lsl #3
  ec:   ed83c100    stfd    f4, [r3]
  f0:   e2844001    add r4, r4, #1  ; 0x1
  f4:   e3540007    cmp r4, #7  ; 0x7 
  f8:   da000018    ble 160 <.text+0x160>

    }   
    printf ( "Now: s = %1.4f\n" , s ) ; // YYY
  fc:   e59f002c    ldr r0, [pc, #44]   ; 130 <.text+0x130>
 100:   ed2dd102    stfd    f5, [sp, #-8]!
 104:   e8bd0006    ldmia   sp!, {r1, r2}
 108:   ebfffffe    bl  108 <.text+0x108>
 10c:   e2866001    add r6, r6, #1  ; 0x1
 110:   e3560007    cmp r6, #7  ; 0x7
 114:   ea00004b    b   248 <.text+0x248>
 118:   00000000    andeq   r0, r0, r0
 11c:   00000024    andeq   r0, r0, r4, lsr #32
 120:   3fd921fb    swicc   0x00d921fb
 124:   54442d18    strplb  r2, [r4], #-3352
 128:   00000014    andeq   r0, r0, r4, lsl r0
 12c:   0000002c    andeq   r0, r0, ip, lsr #32
 130:   00000034    andeq   r0, r0, r4, lsr r0
 134:   da000004    ble 14c <.text+0x14c>
  }
}
 138:   ed5b420d    lfm f4, 2, [fp, #-52]
 13c:   e91ba8f0    ldmdb   fp, {r4, r5, r6, r7, fp, sp, pc}


// End disassembly



And here is the output of the command:
    arm-linux-gcc -v -save-temps -static -O -g -c au1.c

Reading specs from /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/specs
gcc version 2.95.3 20010315 (release)
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/cpp0 -lang-c -v
-D__GNUC__=2 -D__GNUC_MINOR__=95 -Dunix -D__arm__ -Dlinux -D__ELF__ -D__unix__
-D__arm__ -D__linux__ -D__ELF__ -D__unix -D__linux -Asystem(unix)
-Asystem(posix) -Acpu(arm) -Amachine(arm) -D__CHAR_UNSIGNED__ -D__OPTIMIZE__ -g
-D__ARM_ARCH_3__ -D__APCS_32__ au1.c au1.i
GNU CPP version 2.95.3 20010315 (release) (ARM GNU/Linux with ELF)
#include "..." search starts here:
#include <...> search starts here:
 /usr/jack/proj/arm/include
 /usr/jack/proj/arm/include
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/include
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../arm-linux/include
End of search list.
The following default directories have been omitted from the search path:
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../include/g++-
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/../../../../arm-linux/sys-include
End of omitted list.
 /usr/jack/local/arm_gcc_2.95.3/lib/gcc-lib/arm-linux/2.95.3/cc1 au1.i -quiet
-dumpbase au1.c -g -O -version -o au1.s
GNU C version 2.95.3 20010315 (release) (arm-linux) compiled by GNU C version 3.3.4.
 /usr/jack/local/arm_gcc_2.95.3/arm-linux/bin/as -o au1.o au1.s
Comment 1 Jack Whitham 2005-01-20 16:40:42 UTC
Created attachment 8020 [details]
Preprocessor output for the code fragment
Comment 2 Richard Earnshaw 2005-01-20 17:18:37 UTC
It's the callee's responsibility to save f4...f7 if it wants to use them.  So
the problem is in glibc.

Note that gcc 2.95 isn't supported any more, so the most we'd likely do in this
sort of case is fix the latest release if it could be shown to have the same
problem.
Comment 3 Jack Whitham 2005-01-20 17:35:58 UTC
> It's the callee's responsibility to save f4...f7 if it wants to use them.  So   
the problem is in glibc. 

Ah, I see.

However, when I recompile the glibc function in question (__kernel_cos), f4...f7
are not saved. As the function is written entirely in C, surely this must be the
fault of the compiler?

This is glibc-2.1.3, and the file I am recompiling is
sysdeps/libm-ieee754/k_cos.c (which I will attach).
Comment 4 Jack Whitham 2005-01-20 17:37:00 UTC
Created attachment 8021 [details]
glibc function in which floating point registers are not preserved
Comment 5 Andrew Pinski 2005-01-20 17:37:43 UTC
(In reply to comment #3)
> This is glibc-2.1.3, and the file I am recompiling is
> sysdeps/libm-ieee754/k_cos.c (which I will attach).

Well consider 2.95.3 is old nothing matters unless you try a newer compiler.