Strangely suboptimal assembly output
☂Josh Chia (谢任中)
joshchia@gmail.com
Thu Jul 25 03:40:00 GMT 2013
I have the following C++ code that compiles to the following x86-64
assembly. While the assembly for bar() looks fine, the assembly for
foo() looks strangely suboptimal. The movb to and movl from -16(%rsp)
seems redundant as we can directly move literal 1 into eax. Am I right
about the alternative way? Is there some language rule that prevents
the compiler from doing it that way or just a limitation of GCC?
struct MyPair1 {
MyPair1(double a, bool b) : a(a), b(b) { }
double a;
bool b;
};
MyPair1 foo() {
return MyPair1(1.2345, true);
}
struct MyPair2 {
MyPair2(double a, int b) : a(a), b(b) { }
double a;
int b;
};
MyPair2 bar() {
return MyPair2(1.2345, 678);
}
.file "pair.cpp"
# GNU C++ (GCC) version 4.7.2 20120921 (Red Hat 4.7.2-2) (x86_64-redhat-linux)
# compiled by GNU C version 4.7.2 20120921 (Red Hat 4.7.2-2), GMP
version 5.0.2, MPFR version 3.1.0, MPC version 0.9
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed: -D_GNU_SOURCE pair.cpp -mtune=generic -march=x86-64 -O3
# -fverbose-asm
# options enabled: -fasynchronous-unwind-tables -fauto-inc-dec
# -fbranch-count-reg -fcaller-saves -fcombine-stack-adjustments -fcommon
# -fcompare-elim -fcprop-registers -fcrossjumping -fcse-follow-jumps
# -fdefer-pop -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm
# -fearly-inlining -feliminate-unused-debug-types -fexceptions
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-after-reload -fgcse-lm -fgnu-runtime -fguess-branch-probability
# -fident -fif-conversion -fif-conversion2 -findirect-inlining -finline
# -finline-atomics -finline-functions -finline-functions-called-once
# -finline-small-functions -fipa-cp -fipa-cp-clone -fipa-profile
# -fipa-pure-const -fipa-reference -fipa-sra -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-register-move
# -foptimize-sibling-calls -foptimize-strlen -fpartial-inlining -fpeephole
# -fpeephole2 -fpredictive-commoning -fprefetch-loop-arrays -free
# -freg-struct-return -fregmove -freorder-blocks -freorder-functions
# -frerun-cse-after-loop -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-insns2
# -fshow-column -fshrink-wrap -fsigned-zeros -fsplit-ivs-in-unroller
# -fsplit-wide-types -fstrict-aliasing -fstrict-overflow
# -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder
# -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp
# -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce
# -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre
# -ftree-loop-distribute-patterns -ftree-loop-if-convert -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
# -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
# -ftree-tail-merge -ftree-ter -ftree-vect-loop-version -ftree-vectorize
# -ftree-vrp -funit-at-a-time -funswitch-loops -funwind-tables
# -fvect-cost-model -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mglibc -mieee-fp
# -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs
.text
.p2align 4,,15
.globl _Z3foov
.type _Z3foov, @function
_Z3foov:
.LFB3:
.cfi_startproc
movb $1, -16(%rsp) #, MEM[(struct MyPair1 *)&D.2256 + 8B]
movsd .LC0(%rip), %xmm0 #,
movl -16(%rsp), %eax # D.2256,
ret
.cfi_endproc
.LFE3:
.size _Z3foov, .-_Z3foov
.p2align 4,,15
.globl _Z3barv
.type _Z3barv, @function
_Z3barv:
.LFB7:
.cfi_startproc
movl $678, %eax #,
movsd .LC0(%rip), %xmm0 #,
ret
.cfi_endproc
.LFE7:
.size _Z3barv, .-_Z3barv
.section .rodata.cst8,"aM",@progbits,8
.align 8
.LC0:
.long 309237645
.long 1072939139
.ident "GCC: (GNU) 4.7.2 20120921 (Red Hat 4.7.2-2)"
.section .note.GNU-stack,"",@progbits
More information about the Gcc-help
mailing list