[Bug c/26653] New: Wrong assembly generated with -O2, -O OK for cinelerra source item
mtodorov at alu dot hr
gcc-bugzilla@gcc.gnu.org
Sun Mar 12 17:23:00 GMT 2006
Exact version of GCC: gcc-4.2-20060304
Problem since: unknown (gcc-4.0.3-20060212 compiles OK both under -O2
and -O3)
Built with: ./configure --prefix=/usr/local; make bootstrap
Hardware: Athlon Thunderbird 1.33 GHz; 256 MB RAM
OS: KANOTIX Linux (Knoppix)
uname -a: Linux zion 2.6.11 #3 SMP Mon May 2 00:19:56 CEST 2005
i686 GNU/Linux
Package: Cinelerra-2.0, jpeg-mmx lib
Description of problem: Breaks with -O2, successful with -O
mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O2 -I. -c -o
jquant_x86simd.o -save-temps jquant_x86simd.c
jquant_x86simd.s: Assembler messages:
jquant_x86simd.s:59: Error: suffix or operands invalid for `ldmxcsr'
mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ gcc -O -I. -c -o
jquant_x86simd.o -save-temps jquant_x86simd.c
mtodorovac@zion:~/cinelerra-2.0/quicktime/jpeg-mmx.0.1.6$ (successful)
.i file that triggered this erroneous compilation is:
# 1 "jquant_x86simd.c"
# 1 "<built-in>"
# 1 "<command line>"
# 1 "jquant_x86simd.c"
# 31 "jquant_x86simd.c"
# 1 "jconfig.h" 1
# 32 "jquant_x86simd.c" 2
# 1 "jmorecfg.h" 1
# 58 "jmorecfg.h"
typedef unsigned char JSAMPLE;
# 98 "jmorecfg.h"
typedef short JCOEF;
# 124 "jmorecfg.h"
typedef unsigned char JOCTET;
# 149 "jmorecfg.h"
typedef unsigned char UINT8;
# 161 "jmorecfg.h"
typedef unsigned short UINT16;
typedef short INT16;
typedef int INT32;
# 189 "jmorecfg.h"
typedef unsigned int JDIMENSION;
# 245 "jmorecfg.h"
typedef int boolean;
# 33 "jquant_x86simd.c" 2
# 1 "mmx.h" 1
# 22 "mmx.h"
# 1 "attributes.h" 1
# 23 "mmx.h" 2
typedef union {
long long q;
unsigned long long uq;
int d[2];
unsigned int ud[2];
short w[4];
unsigned short uw[4];
char b[8];
unsigned char ub[8];
float s[2];
} mmx_t;
# 34 "jquant_x86simd.c" 2
void jcquant_3dnow( INT16 *psrc, INT16 *pdst, float *piqf )
{
int i;
for (i=0; i < 64 ; i+=4)
{
# 55 "jquant_x86simd.c"
__asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (*(mmx_t *)&psrc[0]));
__asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7");
__asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) );
__asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3");
__asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2");
__asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3");
__asm__ __volatile__ ("movq" " %0, %%" "mm4" : : "X" (*(mmx_t*)&piqf[0]));
__asm__ __volatile__ ("pi2fd" " %" "mm2" ", %" "mm2");
__asm__ __volatile__ ("movq" " %0, %%" "mm5" : : "X" (*(mmx_t*)&piqf[2]));
__asm__ __volatile__ ("pi2fd" " %" "mm3" ", %" "mm3");
__asm__ __volatile__ ("pfmul" " %" "mm4" ", %" "mm2");
__asm__ __volatile__ ("pfmul" " %" "mm5" ", %" "mm3");
__asm__ __volatile__ ("pf2id" " %" "mm2" ", %" "mm2");
__asm__ __volatile__ ("pf2id" " %" "mm3" ", %" "mm3");
__asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2");
piqf += 4;
psrc += 4;
__asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (*(mmx_t*)pdst) : );
pdst += 4;
}
__asm__ __volatile__ ("femms");
}
static int trunc_mxcsr = 0x1f80;
void jcquant_sse( INT16 *psrc, INT16 *pdst, float *piqf )
{
int i;
__asm__ ( "ldmxcsr %0\n" : : "X" (trunc_mxcsr) );
for (i=0; i < 64 ; i+=4)
{
__asm__ __volatile__ ("movq" " %0, %%" "mm2" : : "X" (psrc[i]));
__asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm7");
__asm__ __volatile__ ("psraw" " %0, %%" "mm7" : : "J" (16) );
__asm__ __volatile__ ("movq" " %" "mm2" ", %" "mm3");
__asm__ __volatile__ ("punpcklwd" " %" "mm7" ", %" "mm2");
__asm__ __volatile__ ("punpckhwd" " %" "mm7" ", %" "mm3");
__asm__ __volatile__ ("cvtpi2ps" " %" "mm2" ", %" "xmm2");
__asm__ __volatile__ ("cvtpi2ps" " %" "mm3" ", %" "xmm3");
__asm__ __volatile__ ("shufps" " %0, %%" "xmm3" ", %%" "xmm2" : : "X" (0*1 +
1*4 + 0 * 16 + 1 * 64) );
__asm__ __volatile__ ("mulps" " %0, %%" "xmm2" : : "X" (piqf[i]));
__asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm2");
__asm__ __volatile__ ("shufps" " %0, %%" "xmm2" ", %%" "xmm2" : : "X" (2*1 +
3*4 + 0 * 16 + 1 * 64) );
__asm__ __volatile__ ("cvtps2pi" " %" "xmm2" ", %" "mm3");
__asm__ __volatile__ ("packssdw" " %" "mm3" ", %" "mm2");
__asm__ __volatile__ ("movq" " %%" "mm2" ", %0" : "=X" (pdst[i]) : );
}
__asm__ __volatile__ ("emms");
}
--
Summary: Wrong assembly generated with -O2, -O OK for cinelerra
source item
Product: gcc
Version: 4.2.0
Status: UNCONFIRMED
Severity: major
Priority: P3
Component: c
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: mtodorov at alu dot hr
GCC build triplet: i686-pc-linux-gnu
GCC host triplet: i686-pc-linux-gnu
GCC target triplet: i686-pc-linux-gnu
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26653
More information about the Gcc-bugs
mailing list