[Bug c++/56127] New: Incorrect code with -O2
trosenband at gmail dot com
gcc-bugzilla@gcc.gnu.org
Mon Jan 28 10:08:00 GMT 2013
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=56127
Bug #: 56127
Summary: Incorrect code with -O2
Classification: Unclassified
Product: gcc
Version: 4.6.3
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
AssignedTo: unassigned@gcc.gnu.org
ReportedBy: trosenband@gmail.com
Created attachment 29290
--> http://gcc.gnu.org/bugzilla/attachment.cgi?id=29290
main.ii
It seems like GCC 4.6.3 generates incorrect assembly for ARM Cortex-A9 from the
following C++ code when invoked with -O1, -O2 and -O3
The code just increments two variables 100 times and divides them.
Correct output is:
average = 1.000
PMTsum = 100 nValues = 100
but -O1 gives this:
average = 0.000
PMTsum = 100 nValues = 100
and -O2 and -O3 yield this:
average = inf
PMTsum = 100 nValues = 100
This 3-file C++ project is the minimal configuration that shows this behavior.
With everything in one file the problem vanishes.
Below the C++ code is the assembly language output, marked with *** where I
think the error may be (but I'm unfamiliar with assembly language).
Thanks to anyone who looks into this, and to everyone who has been developing
GCC!
//exp_results.h
class exp_results
{
public:
exp_results() :
PMTsum(0),
nValues(0)
{
}
unsigned increment();
double get_average()
{
if (nValues)
return ((double)PMTsum) / ((double)nValues);
else
return 0;
}
unsigned PMTsum;
unsigned nValues;
};
//exp_results.cpp
#include "exp_results.h"
unsigned exp_results::increment()
{
PMTsum++;
nValues++;
return 1;
}
//main.cc
#include <stdio.h>
#include "exp_results.h"
int main()
{
exp_results r;
for(unsigned i=0; i< 100; i++)
r.increment();
printf("average = %9.3f\n", r.get_average());
printf("PMTsum = %u nValues = %u\n", r.PMTsum, r.nValues);
return 0;
}
// main.s
.cpu cortex-a9
.eabi_attribute 27, 3
.fpu neon-fp16
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "main.cc"
@ GNU C++ (Sourcery CodeBench Lite 2012.03-83) version 4.6.3 (arm-xilinx-eabi)
@ compiled by GNU C version 4.3.2, GMP version 4.3.2, MPFR version 3.0.1-p4,
MPC version 0.9
@ GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
@ options passed: -fpreprocessed main.ii -mcpu=cortex-a9
@ -mfloat-abi=softfp -mfpu=neon-fp16 -auxbase-strip src/main.o -O2 -Wall
@ -fmessage-length=0 -fverbose-asm -fremove-local-statics
@ options enabled: -fauto-inc-dec -fbranch-count-reg -fcaller-saves
@ -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
@ -fcrossjumping -fcse-follow-jumps -fdefer-pop
@ -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm
@ -fearly-inlining -feliminate-unused-debug-types -fexceptions
@ -fexpensive-optimizations -fextension-elimination -fforward-propagate
@ -ffunction-cse -fgcse -fgcse-lm -fguess-branch-probability -fident
@ -fif-conversion -fif-conversion2 -findirect-inlining -finline
@ -finline-functions-called-once -finline-small-functions -fipa-cp
@ -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra
@ -fira-share-save-slots -fira-share-spill-slots -fivopts
@ -fkeep-static-consts -fleading-underscore -fmath-errno -fmerge-constants
@ -fmerge-debug-strings -fmove-loop-invariants -fomit-frame-pointer
@ -foptimize-register-move -foptimize-sibling-calls -fpartial-inlining
@ -fpeephole -fpeephole2 -fprefetch-loop-arrays -fpromote-loop-indices
@ -freg-struct-return -fregmove -frename-registers -freorder-blocks
@ -freorder-functions -frerun-cse-after-loop
@ -fsched-critical-path-heuristic -fsched-dep-count-heuristic
@ -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic
@ -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic
@ -fsched-stalled-insns-dep -fschedule-insns -fschedule-insns2
@ -fsection-anchors -fshow-column -fsigned-zeros -fsplit-ivs-in-unroller
@ -fsplit-wide-types -fstrict-aliasing -fstrict-overflow
@ -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder
@ -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp
@ -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce
@ -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre
@ -ftree-if-to-switch-conversion -ftree-loop-if-convert -ftree-loop-im
@ -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
@ -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
@ -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
@ -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time
@ -funroll-loops -fverbose-asm -fweb -fzero-initialized-in-bss
@ -mlittle-endian -msched-prolog -munaligned-access
@ Compiler executable checksum: af2616fad9f2abb21c14f2e52d2eaee7
.section .text.startup,"ax",%progbits
.align 2
.global main
.type main, %function
main:
.fnstart
.LFB4:
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, lr} @,
.save {r4, lr}
mov r4, #100 @ ivtmp.3,
.pad #8
sub sp, sp, #8 @,,
mov r3, #0 @ tmp144,
str r3, [sp, #0] @ tmp144, r.PMTsum
str r3, [sp, #4] @ tmp144, r.nValues
.L3:
mov r0, sp @,
sub r4, r4, #1 @ tmp156, ivtmp.3,
bl _ZN11exp_results9incrementEv @
mov r0, sp @,
bl _ZN11exp_results9incrementEv @
subs r4, r4, #1 @ ivtmp.3, tmp156,
bne .L3 @,
ldr r3, [sp, #4] @ D.6224, r.nValues
movw r0, #:lower16:.LC0 @,
movt r0, #:upper16:.LC0 @,
cmp r3, #0 @ D.6224,
fmsrne s15, r3 @ int @, D.6224
fuitodne d16, s15 @ tmp149,
fldsne s15, [sp, #0] @ int @, r.PMTsum
vmov.i32 d16, #0 @ D.6219
//*** load 0 into d16 (why? is this the bug?) ***
fuitodne d17, s15 @ tmp147,
fdivdne d16, d17, d16 @ D.6219, tmp147, tmp149
fmrrd r2, r3, d16 @, D.6219
bl printf @
movw r0, #:lower16:.LC1 @,
ldmia sp, {r1, r2} @,,
movt r0, #:upper16:.LC1 @,
bl printf @
mov r0, #0 @,
add sp, sp, #8 @,,
ldmfd sp!, {r4, pc}
.fnend
.size main, .-main
.section .rodata.str1.4,"aMS",%progbits,1
.align 2
.LC0:
.ascii "average = %9.3f\012\000"
.space 3
.LC1:
.ascii "PMTsum = %u nValues = %u\012\000"
.ident "GCC: (Sourcery CodeBench Lite 2012.03-83) 4.6.3"
// exp_results.s
.cpu cortex-a9
.eabi_attribute 27, 3
.fpu neon-fp16
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.file "exp_results.cpp"
@ GNU C++ (Sourcery CodeBench Lite 2012.03-83) version 4.6.3 (arm-xilinx-eabi)
@ compiled by GNU C version 4.3.2, GMP version 4.3.2, MPFR version 3.0.1-p4,
MPC version 0.9
@ GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
@ options passed: -fpreprocessed exp_results.ii -mcpu=cortex-a9
@ -mfloat-abi=softfp -mfpu=neon-fp16 -auxbase-strip src/exp_results.o -O2
@ -Wall -fmessage-length=0 -fverbose-asm -fremove-local-statics
@ options enabled: -fauto-inc-dec -fbranch-count-reg -fcaller-saves
@ -fcombine-stack-adjustments -fcommon -fcompare-elim -fcprop-registers
@ -fcrossjumping -fcse-follow-jumps -fdefer-pop
@ -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm
@ -fearly-inlining -feliminate-unused-debug-types -fexceptions
@ -fexpensive-optimizations -fextension-elimination -fforward-propagate
@ -ffunction-cse -fgcse -fgcse-lm -fguess-branch-probability -fident
@ -fif-conversion -fif-conversion2 -findirect-inlining -finline
@ -finline-functions-called-once -finline-small-functions -fipa-cp
@ -fipa-profile -fipa-pure-const -fipa-reference -fipa-sra
@ -fira-share-save-slots -fira-share-spill-slots -fivopts
@ -fkeep-static-consts -fleading-underscore -fmath-errno -fmerge-constants
@ -fmerge-debug-strings -fmove-loop-invariants -fomit-frame-pointer
@ -foptimize-register-move -foptimize-sibling-calls -fpartial-inlining
@ -fpeephole -fpeephole2 -fprefetch-loop-arrays -fpromote-loop-indices
@ -freg-struct-return -fregmove -frename-registers -freorder-blocks
@ -freorder-functions -frerun-cse-after-loop
@ -fsched-critical-path-heuristic -fsched-dep-count-heuristic
@ -fsched-group-heuristic -fsched-interblock -fsched-last-insn-heuristic
@ -fsched-rank-heuristic -fsched-spec -fsched-spec-insn-heuristic
@ -fsched-stalled-insns-dep -fschedule-insns -fschedule-insns2
@ -fsection-anchors -fshow-column -fsigned-zeros -fsplit-ivs-in-unroller
@ -fsplit-wide-types -fstrict-aliasing -fstrict-overflow
@ -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder
@ -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp
@ -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce
@ -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre
@ -ftree-if-to-switch-conversion -ftree-loop-if-convert -ftree-loop-im
@ -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
@ -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
@ -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
@ -ftree-ter -ftree-vect-loop-version -ftree-vrp -funit-at-a-time
@ -funroll-loops -fverbose-asm -fweb -fzero-initialized-in-bss
@ -mlittle-endian -msched-prolog -munaligned-access
@ Compiler executable checksum: af2616fad9f2abb21c14f2e52d2eaee7
.text
.align 2
.global _ZN11exp_results9incrementEv
.type _ZN11exp_results9incrementEv, %function
_ZN11exp_results9incrementEv:
.fnstart
.LFB4:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
mov r3, r0 @ this, this
mov r0, #1 @,
ldmia r3, {r1, r2} @ this,,
add r1, r1, r0 @ tmp141, this_1(D)->PMTsum,
add r2, r2, r0 @ tmp143, this_1(D)->nValues,
stmia r3, {r1, r2} @ this,,
bx lr @
.cantunwind
.fnend
.size _ZN11exp_results9incrementEv, .-_ZN11exp_results9incrementEv
.ident "GCC: (Sourcery CodeBench Lite 2012.03-83) 4.6.3"
More information about the Gcc-bugs
mailing list