problems with optimisation
Andrew Haley
aph@redhat.com
Fri Dec 28 15:19:00 GMT 2012
On 12/28/2012 10:25 AM, Kicer wrote:
> Hi all
>
>
> Last days I've found a problem with some certain code optimisations:
>
>
> namespace
> {
>
> struct Base;
>
> struct Bit
> {
> const Base &m_p;
> const int m_pos;
>
> constexpr Bit(const Base &p, const int pos): m_p(p), m_pos(pos)
> {
> }
>
> operator bool() const;
> };
>
> struct Base
> {
> const int m_port;
> constexpr Base(int p): m_port(p)
> {
> }
>
> operator char () const
> {
> char result;
>
> asm(
> "in %%dx, %%al\n"
> :"=a"(result)
> :"d"(m_port)
> );
>
> //result = *(reinterpret_cast<char *>(m_port+32));
>
> return result;
> }
>
> Bit operator[] (int p) const
> {
> Bit r(*this, p);
> return r;
> }
>
> };
>
>
> Bit::operator bool() const
> {
> const char v = m_p;
> const bool r = (v & (1 << m_pos)) > 0;
>
> return r;
> }
>
> struct Anc: public Base
> {
> const Base m_in;
> constexpr Anc(int o): Base(o), m_in(o - 1)
> {
> }
>
> const Base& getIn() const
> {
> return m_in;
> }
>
> };
>
> }
>
> template<int v>
> char foo()
> {
> Anc p(v), p2(v+2);
> char r = p.getIn() + p2.getIn();
>
> //r += p[0]? 1: 0; //commented out at first step
> r += p2[4]? 1 : 0;
>
> return r;
> }
>
>
> char bar()
> {
> char r = foo<4>();
>
> r-= foo<6>();
>
> return r;
> }
>
> there are 3 structs which looks more complex than the code they generate.
> foo() and bar() are just ising those structs.
> For the code above output is short and clear as expected:
>
> but when I uncomment "//r += p[0]? 1: 0; " in foo(), the code becomes
> unexpectly large and unclear:
>
>
> compilation flags:
> g++ -Os test.cpp -c -o test.o -std=c++11
>
>
> this may seem to be a less important problem for x86 archs, but I'm affected
> with this problem on avr arch where memory is very limited. Can I somehow
> figure out why gcc resigns from generation clean code in second example?
With -O2 there's much less difference:
bar(): bar():
.LFB14: .LFB14:
.cfi_startproc .cfi_startproc
movl $3, %edx movl $3, %edx
in %dx, %al in %dx, %al
movb $6, %dl | movb $4, %dl
movl %eax, %ecx movl %eax, %ecx
in %dx, %al in %dx, %al
> movb $6, %dl
> movl %eax, %edi
> in %dx, %al
>
movb $7, %dl movb $7, %dl
movl %eax, %esi movl %eax, %esi
> andl $1, %edi
in %dx, %al in %dx, %al
movl %eax, %edi | movl %eax, %r8d
> movsbl %sil, %esi
movb $8, %dl movb $8, %dl
subb %dil, %cl | subb %r8b, %cl
in %dx, %al in %dx, %al
andl $16, %esi | addl %edi, %ecx
> testb $16, %sil
setne %dl setne %dl
> andl $1, %esi
addl %edx, %ecx addl %edx, %ecx
> subb %sil, %cl
testb $16, %al testb $16, %al
setne %al setne %al
subb %al, %cl subb %al, %cl
movl %ecx, %eax movl %ecx, %eax
ret ret
Without inlining GCC can't tell what your program is doing, and by using
-Os you're preventing GCC from inlining.
Andrew.
More information about the Gcc-help
mailing list