This is the mail archive of the
gcc-bugs@gcc.gnu.org
mailing list for the GCC project.
[Bug c++/15857] New: Wrong code with optimization >= -O1
- From: "marco at technoboredom dot net" <gcc-bugzilla at gcc dot gnu dot org>
- To: gcc-bugs at gcc dot gnu dot org
- Date: 7 Jun 2004 11:31:07 -0000
- Subject: [Bug c++/15857] New: Wrong code with optimization >= -O1
- Reply-to: gcc-bugzilla at gcc dot gnu dot org
The included source file "fail_alpha.cc" implements alpha blending on i386/mmx
platforms using the mmx intrinsics from <mmintrin.h>. If compiled without
optimizations (but with -march=pentium2 or -march=athlon to enable the mmx
intrinsics) the program terminates, however if compiled with any optimization
level (-O1, -O2, -O3) the program does not terminate.
Example:
g++ -march=athlon fail_alpha.cc -o fail_alpha
./fail_alpha
=> teminates
g++ -O1 -march=athlon fail_alpha.cc -o fail_alpha
./fail_alpha
=> hangs
I've also checked the 3.3.2 compiler and found that he shows the same behaviour.
Here is my config line:
% /opt/gcc-3.4.0/bin/g++ -v
Reading specs from /opt/gcc-3.4.0/lib/gcc/i386-slackware-linux/3.4.0/specs
Configured with: ../source/gcc-3.4.0/configure --prefix=/opt/gcc-3.4.0
--enable-shared --enable-threads=posix --enable-__cxa_atexit --disable-checking
--with-gnu-ld --verbose --target=i386-slackware-linux --host=i386-slackware-linux
Thread model: posix
gcc version 3.4.0
/*--- begin fail_alpha.cc ---*/
#include <mmintrin.h>
typedef unsigned char byte;
struct Surface
{
byte *pixels;
unsigned int pitch;
unsigned int w,h;
unsigned int bytes_per_pixel;
};
struct Rect
{
int x,y,w,h;
};
struct calpha_blender
{
__m64 valpha;
__m64 one;
byte alpha;
calpha_blender(byte _alpha) : alpha(_alpha)
{
valpha=_mm_set1_pi16(alpha);
one=_mm_set1_pi16(1);
}
virtual __m64 blend(__m64 s, __m64 d)
{
__m64 lo=_mm_setzero_si64();
__m64 hi=_mm_setzero_si64();
lo=_mm_sub_pi16(_mm_unpacklo_pi8(s,lo),_mm_unpacklo_pi8(d,lo));
lo=_mm_mullo_pi16(lo,valpha);
hi=_mm_sub_pi16(_mm_unpackhi_pi8(s,hi),_mm_unpackhi_pi8(d,hi));
hi=_mm_mullo_pi16(hi,valpha);
lo=_mm_add_pi16(lo,one);
hi=_mm_add_pi16(hi,one);
lo=_mm_add_pi16(lo,_mm_srli_pi16(lo,8));
hi=_mm_add_pi16(hi,_mm_srli_pi16(hi,8));
lo=_mm_srli_pi16(lo,8);
hi=_mm_srli_pi16(hi,8);
hi=_mm_packs_pu16(lo,hi);
hi=_mm_add_pi8(d,hi);
return hi;
}
inline byte blend(byte s, byte d)
{
unsigned int c=alpha*(s-d)+1;
return (c+(c>>8)>>8)+d;
}
};
void blt(Surface &s,const Rect &sr,Surface &d,const Rect &dr, byte alpha)
{
byte *sp=s.pixels;
byte *dp=d.pixels;
int bpp=s.bytes_per_pixel;
sp=sp+(sr.x*bpp+sr.y*s.pitch);
dp=dp+(dr.x*bpp+dr.y*d.pitch);
int lw=sr.w*bpp;
calpha_blender blender(alpha);
for (int y=0;y<sr.h;y++)
{
int x=0;
for (;x<lw;x+=8)
{
*(__m64*)(dp+x)=blender.blend(*(__m64*)(sp+x),*(__m64*)(dp+x));
}
for (;x<lw;x++)
{
dp[x]=blender.blend(sp[x],dp[x]);
}
sp+=s.pitch;
dp+=d.pitch;
}
_mm_empty();
}
int main()
{
const int W=800;
const int H=600;
const int bpp=3;
byte data[W*H*bpp];
Surface x={data,W*bpp,W,H,bpp};
Rect r={0,0,W,H};
blt(x,r,x,r,128);
}
/*--- end fail_alpha.cc ---*/
--
Summary: Wrong code with optimization >= -O1
Product: gcc
Version: 3.4.0
Status: UNCONFIRMED
Severity: critical
Priority: P2
Component: c++
AssignedTo: unassigned at gcc dot gnu dot org
ReportedBy: marco at technoboredom dot net
CC: gcc-bugs at gcc dot gnu dot org
GCC build triplet: i386-slackware-linux
GCC host triplet: i386-slackware-linux
GCC target triplet: i386-slackware-linux
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=15857