problems with optimisation

Andrew Haley aph@redhat.com
Fri Dec 28 15:19:00 GMT 2012


On 12/28/2012 10:25 AM, Kicer wrote:
> Hi all
> 
> 
> Last days I've found a problem with some certain code optimisations:
> 
> 
> namespace 
> {
>   
>   struct Base;
>   
>   struct Bit
>   {
> 	  const Base &m_p;
> 	  const int m_pos;
> 	  
> 	  constexpr Bit(const Base &p, const int pos): m_p(p), m_pos(pos)
> 	  {
> 	  }
> 	
> 	  operator bool() const;  
>   };
>   
>   struct Base
>   {  
> 	  const int m_port;
> 	  constexpr Base(int p): m_port(p)
> 	  {
> 	  }
> 	  
> 	  operator char () const
> 	  {
> 		  char result;
> 		    
> 		  asm(
> 			"in %%dx, %%al\n"
> 			:"=a"(result)
> 			:"d"(m_port)
> 		  );
> 		  
> 		  //result = *(reinterpret_cast<char *>(m_port+32));
> 		  
> 		  return result;
> 	  }
> 	  
> 	  Bit operator[] (int p) const
> 	  {
> 		  Bit r(*this, p);
> 		  return r;
> 	  }
> 	
>   };
> 
> 
>   Bit::operator bool() const
>   {
> 	  const char v = m_p;
> 	  const bool r = (v & (1 << m_pos)) > 0;
> 	  
> 	  return r;
>   }
>   
>   struct Anc: public Base
>   {
> 	  const Base m_in;
> 	  constexpr Anc(int o): Base(o), m_in(o - 1)
> 	  {
> 	  }
> 	  
> 	  const Base& getIn() const
> 	  {
> 		  return m_in;
> 	  }
> 	
>   };
> 
> }
> 
> template<int v>
> char foo()
> {
> 	Anc p(v), p2(v+2);
> 	char r = p.getIn() + p2.getIn();
> 	
> 	//r += p[0]? 1: 0;                   //commented out at first step
> 	r += p2[4]? 1 : 0;
> 	
> 	return r;
> }
> 
> 
> char bar()
> {
>   char r = foo<4>();
>   
>   r-= foo<6>();
>   
>   return r;
> }
> 
> there are 3 structs which looks more complex than the code they generate.
> foo() and bar() are just ising those structs.
> For the code above output is short and clear as expected: 
> 
> but when I uncomment "//r += p[0]? 1: 0; " in foo(), the code becomes 
> unexpectly large and unclear:
> 

> 
> compilation flags:
> g++ -Os test.cpp -c -o test.o -std=c++11
> 
> 
> this may seem to be a less important problem for x86 archs, but I'm affected 
> with this problem on avr arch where memory is very limited. Can I somehow 
> figure out why gcc resigns from generation clean code in second example?

With -O2 there's much less difference:

bar():								bar():
.LFB14:								.LFB14:
	.cfi_startproc							.cfi_startproc
	movl	$3, %edx						movl	$3, %edx
	in %dx, %al							in %dx, %al

	movb	$6, %dl					      |		movb	$4, %dl
	movl	%eax, %ecx						movl	%eax, %ecx
	in %dx, %al							in %dx, %al

							      >		movb	$6, %dl
							      >		movl	%eax, %edi
							      >		in %dx, %al
							      >
	movb	$7, %dl							movb	$7, %dl
	movl	%eax, %esi						movl	%eax, %esi
							      >		andl	$1, %edi
	in %dx, %al							in %dx, %al

	movl	%eax, %edi				      |		movl	%eax, %r8d
							      >		movsbl	%sil, %esi
	movb	$8, %dl							movb	$8, %dl
	subb	%dil, %cl				      |		subb	%r8b, %cl
	in %dx, %al							in %dx, %al

	andl	$16, %esi				      |		addl	%edi, %ecx
							      >		testb	$16, %sil
	setne	%dl							setne	%dl
							      >		andl	$1, %esi
	addl	%edx, %ecx						addl	%edx, %ecx
							      >		subb	%sil, %cl
	testb	$16, %al						testb	$16, %al
	setne	%al							setne	%al
	subb	%al, %cl						subb	%al, %cl
	movl	%ecx, %eax						movl	%ecx, %eax
	ret								ret


Without inlining GCC can't tell what your program is doing, and by using
-Os you're preventing GCC from inlining.

Andrew.



More information about the Gcc-help mailing list