[Bug c/20494] New: gcc -O causes segfault in code using SSE2 intrinsics

isaac at nada dot kth dot se gcc-bugzilla@gcc.gnu.org
Wed Mar 16 13:16:00 GMT 2005

When the code below is compiled using "-O" flag it segfaults. If
the code is compiled without optimization it works fine. Also the code works
fine when compiled with intels compiler. Note that there are no pointers in the
code. Thus it seems very likely that there is an SSE2 intrinsics related bug in

One explanation could be that the __m128i types in SSE2 need to be 16 bytes
aligned and that gcc missaligns something (maybe the stack) when optimizing.
However, I am far from an expert so I leave it up to you figure that out.


1) gcc output
2) /proc/cpuinfo
3) the file "bug_creator.c"
(the file "bug_creator.i" is to large to be added)

> gcc -v -save-temps -march=pentium4 -msse2 -O -I/usr/include bug_creator.c
Reading specs from /usr/lib/gcc/i386-redhat-linux/3.4.2/specs
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man
--infodir=/usr/share/info --enable-shared --enable-threads=posix
--disable-checking --with-system-zlib --enable-__cxa_atexit
--disable-libunwind-exceptions --enable-java-awt=gtk --host=i386-redhat-linux
Thread model: posix
gcc version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3)
 /usr/libexec/gcc/i386-redhat-linux/3.4.2/cc1 -E -quiet -v -I/usr/include
bug_creator.c -march=pentium4 -msse2 -O -o bug_creator.i
ignoring nonexistent directory
ignoring duplicate directory "/usr/include"
  as it is a non-system directory that duplicates a system directory
#include "..." search starts here:
#include <...> search starts here:
End of search list.
 /usr/libexec/gcc/i386-redhat-linux/3.4.2/cc1 -fpreprocessed bug_creator.i
-quiet -dumpbase bug_creator.c -march=pentium4 -msse2 -auxbase bug_creator -O
-version -o bug_creator.s
GNU C version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3) (i386-redhat-linux)
        compiled by GNU C version 3.4.2 20041017 (Red Hat 3.4.2-6.fc3).
GGC heuristics: --param ggc-min-expand=81 --param ggc-min-heapsize=96982
 as -V -Qy -o bug_creator.o bug_creator.s
GNU assembler version (i386-redhat-linux) using BFD version 20040927
 /usr/libexec/gcc/i386-redhat-linux/3.4.2/collect2 --eh-frame-hdr -m elf_i386
-dynamic-linker /lib/ld-linux.so.2
-L/usr/lib/gcc/i386-redhat-linux/3.4.2 -L/usr/lib/gcc/i386-redhat-linux/3.4.2
-L/usr/lib/gcc/i386-redhat-linux/3.4.2/../../.. bug_creator.o -lgcc --as-needed
-lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed

//******* /proc/cpuinfo
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 15
model           : 2
model name      : Intel(R) Pentium(R) 4 CPU 2.00GHz
stepping        : 4
cpu MHz         : 2020.823
cache size      : 512 KB
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat
pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm
bogomips        : 3997.69

// ********  bug_creator.c *********************

#include <stdio.h>
#include <emmintrin.h>

// Compiling with the "-O" flag causes program to segfault. If it is
// compiled without "-0" it runs perfectly.
// gcc -v -save-temps -march=pentium4 -msse2 -I/usr/include  bug_creator.c

__m128i A1;
__m128i B1;
__m128i BITMASK;

__m128i A2;
__m128i B2;

  B1 = _mm_add_epi32(B1,A1);
  A1 = _mm_add_epi32(A1,_mm_add_epi32(_mm_setzero_si128(),
  A2 = _mm_add_epi32(_mm_add_epi32(A1,A1),_mm_add_epi32(_mm_add_epi32(A1,A1),A1));
  B2 = _mm_add_epi32(_mm_add_epi32(B1,A1),_mm_add_epi32(_mm_add_epi32(B1,A1),A1));

  B1 = _mm_add_epi32(B1,A1);
  A1 = _mm_add_epi32(A1,_mm_add_epi32(_mm_setzero_si128(),
  A2 =


main(int argc,
  char **argv){

  A1 = _mm_setzero_si128();
  B1 = _mm_setzero_si128();
  A2 = _mm_setzero_si128();
  B2 = _mm_setzero_si128();
  BITMASK = _mm_setzero_si128();


  //Everything is initialized to zero and "_mm_add_epi32" is the only
  //operation used so everything should be zero.
  printf("A1: %d\n",_mm_extract_epi16(A1,0));
  printf("B1: %d\n",_mm_extract_epi16(B1,0));
  printf("A2: %d\n",_mm_extract_epi16(A2,0));
  printf("B2: %d\n",_mm_extract_epi16(A2,0));
  printf("BITMASK: %d\n",_mm_extract_epi16(BITMASK,0));
  return 1;

           Summary: gcc -O causes segfault in code using SSE2 intrinsics
           Product: gcc
           Version: 3.4.2
            Status: UNCONFIRMED
          Severity: normal
          Priority: P2
         Component: c
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: isaac at nada dot kth dot se
                CC: gcc-bugs at gcc dot gnu dot org
 GCC build triplet: i386-redhat-linux
  GCC host triplet: i386-redhat-linux
GCC target triplet: i386-redhat-linux


More information about the Gcc-bugs mailing list