[Bug c/26653] New: Wrong assembly generated with -O2, -O OK for cinelerra source item

mtodorov at alu dot hr gcc-bugzilla@gcc.gnu.org
Sun Mar 12 17:23:00 GMT 2006


Exact version of GCC:   gcc-4.2-20060304
Problem since:          unknown (gcc-4.0.3-20060212 compiles OK both under -O2  
                                 and -O3)
Built with:             ./configure --prefix=/usr/local; make bootstrap
Hardware:               Athlon Thunderbird 1.33 GHz; 256 MB RAM
OS:                     KANOTIX Linux (Knoppix)
uname -a:               Linux zion 2.6.11 #3 SMP Mon May 2 00:19:56 CEST 2005
                                   i686 GNU/Linux
Package:                Cinelerra-2.0, jpeg-mmx lib
Description of problem: Breaks with -O2, successful with -O

mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O2  -I.   -c -o
jquant_x86simd.o -save-temps jquant_x86simd.c
jquant_x86simd.s: Assembler messages:
jquant_x86simd.s:59: Error: suffix or operands invalid for `ldmxcsr'
mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O  -I.   -c -o
jquant_x86simd.o -save-temps jquant_x86simd.c
mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ (successful)

.i file that triggered this erroneous compilation is:

# 1 "jquant_x86simd.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "jquant_x86simd.c"
# 31 "jquant_x86simd.c"
# 1 "jconfig.h" 1
# 32 "jquant_x86simd.c" 2
# 1 "jmorecfg.h" 1
# 58 "jmorecfg.h"
typedef unsigned char JSAMPLE;
# 98 "jmorecfg.h"
typedef short JCOEF;
# 124 "jmorecfg.h"
typedef unsigned char JOCTET;
# 149 "jmorecfg.h"
typedef unsigned char UINT8;
# 161 "jmorecfg.h"
typedef unsigned short UINT16;







typedef short INT16;





typedef int INT32;
# 189 "jmorecfg.h"
typedef unsigned int JDIMENSION;
# 245 "jmorecfg.h"
typedef int boolean;
# 33 "jquant_x86simd.c" 2
# 1 "mmx.h" 1
# 22 "mmx.h"
# 1 "attributes.h" 1
# 23 "mmx.h" 2







typedef union {
 long long q;
 unsigned long long uq;
 int d[2];
 unsigned int ud[2];
 short w[4];
 unsigned short uw[4];
 char b[8];
 unsigned char ub[8];
 float s[2];
} mmx_t;
# 34 "jquant_x86simd.c" 2







void jcquant_3dnow( INT16 *psrc, INT16 *pdst, float *piqf )

{
 int i;
 for (i=0; i < 64 ; i+=4)
 {
# 55 "jquant_x86simd.c"
  __asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (*(mmx_t *)&psrc[0]));
  __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7");
  __asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) );
  __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3");
  __asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2");
  __asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3");







  __asm__ __volatile__ ("movq" " %0, %%" "mm4" : : "X" (*(mmx_t*)&piqf[0]));
  __asm__ __volatile__ ("pi2fd" " %" "mm2" ", %" "mm2");
  __asm__ __volatile__ ("movq" " %0, %%" "mm5" : : "X" (*(mmx_t*)&piqf[2]));
  __asm__ __volatile__ ("pi2fd" " %" "mm3" ", %" "mm3");



  __asm__ __volatile__ ("pfmul" " %" "mm4" ", %" "mm2");
  __asm__ __volatile__ ("pfmul" " %" "mm5" ", %" "mm3");
  __asm__ __volatile__ ("pf2id" " %" "mm2" ", %" "mm2");
  __asm__ __volatile__ ("pf2id" " %" "mm3" ", %" "mm3");



  __asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2");

  piqf += 4;
  psrc += 4;
  __asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (*(mmx_t*)pdst) : );
  pdst += 4;
 }
 __asm__ __volatile__ ("femms");

}







static int trunc_mxcsr = 0x1f80;

void jcquant_sse( INT16 *psrc, INT16 *pdst, float *piqf )
{
 int i;



 __asm__ ( "ldmxcsr %0\n" : : "X" (trunc_mxcsr) );

 for (i=0; i < 64 ; i+=4)
 {



  __asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (psrc[i]));
  __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7");
  __asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) );
  __asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3");
  __asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2");
  __asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3");





  __asm__ __volatile__ ("cvtpi2ps" " %" "mm2" ", %" "xmm2");
  __asm__ __volatile__ ("cvtpi2ps" " %" "mm3" ", %" "xmm3");
  __asm__ __volatile__ ("shufps" " %0, %%" "xmm3" ", %%" "xmm2" : : "X" (0*1 +
1*4 + 0 * 16 + 1 * 64) );



  __asm__ __volatile__ ("mulps" " %0, %%" "xmm2" : : "X" (piqf[i]));
  __asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm2");
  __asm__ __volatile__ ("shufps" " %0, %%" "xmm2" ", %%" "xmm2" : : "X" (2*1 +
3*4 + 0 * 16 + 1 * 64) );
  __asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm3");


  __asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2");




  __asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (pdst[i]) : );



 }
 __asm__ __volatile__ ("emms");
}


-- 
           Summary: Wrong assembly generated with -O2, -O OK for cinelerra
                    source item
           Product: gcc
           Version: 4.2.0
            Status: UNCONFIRMED
          Severity: major
          Priority: P3
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: mtodorov at alu dot hr
 GCC build triplet: i686-pc-linux-gnu
  GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26653



More information about the Gcc-bugs mailing list