Strangely suboptimal assembly output

☂Josh Chia (谢任中) joshchia@gmail.com
Thu Jul 25 03:40:00 GMT 2013


I have the following C++ code that compiles to the following x86-64
assembly. While the assembly for bar() looks fine, the assembly for
foo() looks strangely suboptimal. The movb to and movl from -16(%rsp)
seems redundant as we can directly move literal 1 into eax. Am I right
about the alternative way? Is there some language rule that prevents
the compiler from doing it that way or just a limitation of GCC?

struct MyPair1 {
    MyPair1(double a, bool b) : a(a), b(b) { }
    double a;
    bool b;
};

MyPair1 foo() {
    return MyPair1(1.2345, true);
}

struct MyPair2 {
    MyPair2(double a, int b) : a(a), b(b) { }
    double a;
    int b;
};

MyPair2 bar() {
    return MyPair2(1.2345, 678);
}


    .file   "pair.cpp"
# GNU C++ (GCC) version 4.7.2 20120921 (Red Hat 4.7.2-2) (x86_64-redhat-linux)
#   compiled by GNU C version 4.7.2 20120921 (Red Hat 4.7.2-2), GMP
version 5.0.2, MPFR version 3.1.0, MPC version 0.9
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -D_GNU_SOURCE pair.cpp -mtune=generic -march=x86-64 -O3
# -fverbose-asm
# options enabled:  -fasynchronous-unwind-tables -fauto-inc-dec
# -fbranch-count-reg -fcaller-saves -fcombine-stack-adjustments -fcommon
# -fcompare-elim -fcprop-registers -fcrossjumping -fcse-follow-jumps
# -fdefer-pop -fdelete-null-pointer-checks -fdevirtualize -fdwarf2-cfi-asm
# -fearly-inlining -feliminate-unused-debug-types -fexceptions
# -fexpensive-optimizations -fforward-propagate -ffunction-cse -fgcse
# -fgcse-after-reload -fgcse-lm -fgnu-runtime -fguess-branch-probability
# -fident -fif-conversion -fif-conversion2 -findirect-inlining -finline
# -finline-atomics -finline-functions -finline-functions-called-once
# -finline-small-functions -fipa-cp -fipa-cp-clone -fipa-profile
# -fipa-pure-const -fipa-reference -fipa-sra -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-constants -fmerge-debug-strings
# -fmove-loop-invariants -fomit-frame-pointer -foptimize-register-move
# -foptimize-sibling-calls -foptimize-strlen -fpartial-inlining -fpeephole
# -fpeephole2 -fpredictive-commoning -fprefetch-loop-arrays -free
# -freg-struct-return -fregmove -freorder-blocks -freorder-functions
# -frerun-cse-after-loop -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fschedule-insns2
# -fshow-column -fshrink-wrap -fsigned-zeros -fsplit-ivs-in-unroller
# -fsplit-wide-types -fstrict-aliasing -fstrict-overflow
# -fstrict-volatile-bitfields -fthread-jumps -ftoplevel-reorder
# -ftrapping-math -ftree-bit-ccp -ftree-builtin-call-dce -ftree-ccp
# -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-cselim -ftree-dce
# -ftree-dominator-opts -ftree-dse -ftree-forwprop -ftree-fre
# -ftree-loop-distribute-patterns -ftree-loop-if-convert -ftree-loop-im
# -ftree-loop-ivcanon -ftree-loop-optimize -ftree-parallelize-loops=
# -ftree-phiprop -ftree-pre -ftree-pta -ftree-reassoc -ftree-scev-cprop
# -ftree-sink -ftree-slp-vectorize -ftree-sra -ftree-switch-conversion
# -ftree-tail-merge -ftree-ter -ftree-vect-loop-version -ftree-vectorize
# -ftree-vrp -funit-at-a-time -funswitch-loops -funwind-tables
# -fvect-cost-model -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mglibc -mieee-fp
# -mmmx -mno-sse4 -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs

    .text
    .p2align 4,,15
    .globl  _Z3foov
    .type   _Z3foov, @function
_Z3foov:
.LFB3:
    .cfi_startproc
    movb    $1, -16(%rsp)   #, MEM[(struct MyPair1 *)&D.2256 + 8B]
    movsd   .LC0(%rip), %xmm0   #,
    movl    -16(%rsp), %eax # D.2256,
    ret
    .cfi_endproc
.LFE3:
    .size   _Z3foov, .-_Z3foov
    .p2align 4,,15
    .globl  _Z3barv
    .type   _Z3barv, @function
_Z3barv:
.LFB7:
    .cfi_startproc
    movl    $678, %eax  #,
    movsd   .LC0(%rip), %xmm0   #,
    ret
    .cfi_endproc
.LFE7:
    .size   _Z3barv, .-_Z3barv
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC0:
    .long   309237645
    .long   1072939139
    .ident  "GCC: (GNU) 4.7.2 20120921 (Red Hat 4.7.2-2)"
    .section    .note.GNU-stack,"",@progbits



More information about the Gcc-help mailing list