This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

-fno-crossjumping not working + weird asm behavior


Hello,

compiling the inner loop of a virtual machine (see source below) using
gcc-3.4 on x86 I get the following output when using computed gotos
and -O3:


...
.L49:
        movl    (%esi), %eax
        addl    $4, %esi
        jmp     *%eax
.L33:
        incl    -156(%ebp)
        movl    %ebx, %eax
        sall    $5, %eax
        movl    %ebx, (%edi)
        subl    %ebx, %eax
        incl    %eax
        cmpl    $3124999, -156(%ebp)
        jle     .L34
        movl    %eax, (%edi)
        leal    -12(%ebp), %esp
        xorl    %eax, %eax
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
        .p2align 4,,7
.L32:
        movl    %ebx, (%edi)
        movl    %ebx, %edx
        sall    $4, %edx
        subl    %ebx, %edx
        leal    2(%edx,%edx), %ebx
        jmp     .L49           
.L31:
        movl    %ebx, (%edi)
        leal    0(,%ebx,8), %eax
        subl    %ebx, %eax
        leal    1(%ebx,%eax,4), %ebx
        jmp     .L49		
.L30:
        movl    %ebx, (%edi)
        leal    0(,%ebx,8), %eax
        subl    %ebx, %eax
.L46:
        leal    1(,%eax,4), %ebx
        jmp     .L49            
...

Every goto **pc++ first jumps to .L49. This costs cycles but is likely
the result of -fcrossjumping. However, when I switch it off using
-fno-crossjumping I get the following output:

.L43:
        jmp     *%eax
.L33:
        incl    -156(%ebp)
        movl    %ebx, %eax
        sall    $5, %eax
        movl    %ebx, (%edi)
        subl    %ebx, %eax
        incl    %eax
        cmpl    $3124999, -156(%ebp)
        jle     .L34
        movl    %eax, (%edi)
        leal    -12(%ebp), %esp
        xorl    %eax, %eax
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
        .p2align 4,,7
.L32:
        movl    %ebx, (%edi)
        movl    %ebx, %eax
        sall    $4, %eax
        subl    %ebx, %eax
        leal    2(%eax,%eax), %ebx
        movl    (%esi), %eax
        addl    $4, %esi
        jmp     .L43			// WASTING CYCLES (expected jmp *%eax)
.L31:
        movl    %ebx, (%edi)
        leal    0(,%ebx,8), %ecx
        movl    (%esi), %eax
        subl    %ebx, %ecx
        addl    $4, %esi
        leal    1(%ebx,%ecx,4), %ebx
        jmp     .L43			// WASTING CYCLES (expected jmp *%eax)

...


The jump is still there, but some of the code now is kept in the case
code. Funny. Why is that happening? It's wasting cycles and still does
crossjumping. I would expect the jmp .Lxx to go away when using
-fno-crossjumping.


Next thing I tried was "helping" gcc by generating jmp *%eax on the
spot using inline assembly (see the JMP macro below). Now I am getting
the following code which appears to be wrong because no code is
actually generated for the cases except for the indirect jump.

        .p2align 4,,15
.L34:
.L2:
.L3:
.L4:
.L5:
.L6:
.L7:
.L8:
.L9:
.L10:
.L11:
.L12:
.L13:
.L14:
.L15:
.L16:
.L17:
.L18:
.L19:
.L20:
.L21:
.L22:
.L23:
.L24:
.L25:
.L26:
.L27:
.L28:
.L29:
.L30:
.L31:
.L32:
.L33:
        movl    -196(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
        movl    -208(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
        movl    -220(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
        movl    -232(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
        movl    -240(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
        movl    -248(%ebp), %eax
#APP
        jmp *%eax
#NO_APP
....


Perhaps I am missing something crucial w.r.t. inline assembly. I
compiled the same code using icc and the code there looks OK. So my
questions are: how do I force gcc to generate jmp *%eax on the spot
and why is code missing when using asm() ?

Thanks.

/Michael Buro - University of Alberta




////////////////////////// test.c

#include <iostream>

using namespace std;

class X {

public:

  int foo;
};

X context;
X *y = &context;

#if 1
#define JMP \
{ addr = *pc++; asm volatile("jmp *%0" : : "r" (addr) ) ; } 

#else

#define JMP \
{ addr = *pc++; goto *addr; }
#endif


int main()
{
  context.foo = 1;
  register X *x = y;
  //register X *x __asm__("esi") = y;  
  
#if 0

  // vanilla code: switch dispatch
  
  for (int k=0; k < 100000000; k++) {  

    switch(k & 31) {

    case 0: x->foo *= 0; x->foo += 1;  break;
    case 1: x->foo *= 1; x->foo += 1;  break;
    case 2: x->foo *= 2; x->foo += 1;  break;
    case 3: x->foo *= 3; x->foo += 1;  break;      
    case 4: x->foo *= 4; x->foo += 1;  break;
    case 5: x->foo *= 5; x->foo += 1;  break;
    case 6: x->foo *= 6; x->foo += 1;  break;
    case 7: x->foo *= 7; x->foo += 1;  break;      
    case 8: x->foo *= 8; x->foo += 1;  break;
    case 9: x->foo *= 9; x->foo += 1;  break;
    case 10:x->foo *= 10; x->foo += 1; break;
    case 11:x->foo *= 11; x->foo += 4; break;      
    case 12:x->foo *= 12; x->foo += 1; break;
    case 13:x->foo *= 13; x->foo += 1; break;
    case 14:x->foo *= 14; x->foo += 1; break;
    case 15:x->foo *= 15; x->foo += 1; break;      
    case 16:x->foo *= 16; x->foo += 1; break;
    case 17:x->foo *= 17; x->foo += 1; break;
    case 18:x->foo *= 18; x->foo += 1; break;
    case 19:x->foo *= 19; x->foo += 1; break;      
    case 20:x->foo *= 20; x->foo += 1; break;
    case 21:x->foo *= 21; x->foo += 3; break;
    case 22:x->foo *= 22; x->foo += 1; break;
    case 23:x->foo *= 23; x->foo += 1; break;      
    case 24:x->foo *= 24; x->foo += 1; break;
    case 25:x->foo *= 25; x->foo += 1; break;
    case 26:x->foo *= 26; x->foo += 1; break;
    case 27:x->foo *= 27; x->foo += 1; break;      
    case 28:x->foo *= 28; x->foo += 1; break;
    case 29:x->foo *= 29; x->foo += 1; break;
    case 30:x->foo *= 30; x->foo += 2; break;
    case 31:x->foo *= 31; x->foo += 1; break;


    default:;
    }
  }

#else

  int j = 0;
  const void *a[] = {
    &&case_0,
    &&case_1,
    &&case_2,
    &&case_3,
    &&case_4,
    &&case_5,
    &&case_6,
    &&case_7,
    &&case_8,
    &&case_9,
    &&case_10,
    &&case_11,
    &&case_12,
    &&case_13,
    &&case_14,
    &&case_15,
    &&case_16,
    &&case_17,
    &&case_18,
    &&case_19,
    &&case_20,
    &&case_21,
    &&case_22,
    &&case_23,
    &&case_24,
    &&case_25,
    &&case_26,
    &&case_27,
    &&case_28,
    &&case_29,
    &&case_30,
    &&case_31
  };

  //register void **pc __asm__("esi") = &a[0];

 loop:;
  
  register const void **pc = &a[0];    
  register const void *addr;
    
#if 1    

  // computed goto dispatch

 case_0: x->foo *= 0; x->foo += 1; goto **pc++;
 case_1: x->foo *= 1; x->foo += 1; goto **pc++;
 case_2: x->foo *= 2; x->foo += 1; goto **pc++;
 case_3: x->foo *= 3; x->foo += 1; goto **pc++;      
 case_4: x->foo *= 4; x->foo += 1; goto **pc++;
 case_5: x->foo *= 5; x->foo += 1; goto **pc++;
 case_6: x->foo *= 6; x->foo += 1; goto **pc++;
 case_7: x->foo *= 7; x->foo += 1; goto **pc++;      
 case_8: x->foo *= 8; x->foo += 1; goto **pc++;
 case_9: x->foo *= 9; x->foo += 1; goto **pc++;
 case_10:x->foo *= 10; x->foo += 1; goto **pc++;
 case_11:x->foo *= 11; x->foo += 4; goto **pc++;      
 case_12:x->foo *= 12; x->foo += 1; goto **pc++;
 case_13:x->foo *= 13; x->foo += 1; goto **pc++;
 case_14:x->foo *= 14; x->foo += 1; goto **pc++;
 case_15:x->foo *= 15; x->foo += 1; goto **pc++;      
 case_16:x->foo *= 16; x->foo += 1; goto **pc++;
 case_17:x->foo *= 17; x->foo += 1; goto **pc++;
 case_18:x->foo *= 18; x->foo += 1; goto **pc++;
 case_19:x->foo *= 19; x->foo += 1; goto **pc++;      
 case_20:x->foo *= 20; x->foo += 1; goto **pc++;
 case_21:x->foo *= 21; x->foo += 3; goto **pc++;
 case_22:x->foo *= 22; x->foo += 1; goto **pc++;
 case_23:x->foo *= 23; x->foo += 1; goto **pc++;      
 case_24:x->foo *= 24; x->foo += 1; goto **pc++;
 case_25:x->foo *= 25; x->foo += 1; goto **pc++;
 case_26:x->foo *= 26; x->foo += 1; goto **pc++;
 case_27:x->foo *= 27; x->foo += 1; goto **pc++;      
 case_28:x->foo *= 28; x->foo += 1; goto **pc++;
 case_29:x->foo *= 29; x->foo += 1; goto **pc++;
 case_30:x->foo *= 30; x->foo += 2; goto **pc++;
 case_31:x->foo *= 31; x->foo += 1; j++; if (j < 3125000) goto loop;

#else

 // inline assembly computed goto
  
 case_0: x->foo *= 0; x->foo += 1;  JMP;
 case_1: x->foo *= 1; x->foo += 1;  JMP;
 case_2: x->foo *= 2; x->foo += 1;  JMP;
 case_3: x->foo *= 3; x->foo += 1;  JMP;      
 case_4: x->foo *= 4; x->foo += 1;  JMP;
 case_5: x->foo *= 5; x->foo += 1;  JMP;
 case_6: x->foo *= 6; x->foo += 1;  JMP;
 case_7: x->foo *= 7; x->foo += 1;  JMP;      
 case_8: x->foo *= 8; x->foo += 1;  JMP;
 case_9: x->foo *= 9; x->foo += 1;  JMP;
 case_10:x->foo *= 10; x->foo += 1; JMP;
 case_11:x->foo *= 11; x->foo += 4; JMP;      
 case_12:x->foo *= 12; x->foo += 1; JMP;
 case_13:x->foo *= 13; x->foo += 1; JMP;
 case_14:x->foo *= 14; x->foo += 1; JMP;
 case_15:x->foo *= 15; x->foo += 1; JMP;      
 case_16:x->foo *= 16; x->foo += 1; JMP;
 case_17:x->foo *= 17; x->foo += 1; JMP;
 case_18:x->foo *= 18; x->foo += 1; JMP;
 case_19:x->foo *= 19; x->foo += 1; JMP;      
 case_20:x->foo *= 20; x->foo += 1; JMP;
 case_21:x->foo *= 21; x->foo += 3; JMP;
 case_22:x->foo *= 22; x->foo += 1; JMP;
 case_23:x->foo *= 23; x->foo += 1; JMP;      
 case_24:x->foo *= 24; x->foo += 1; JMP;
 case_25:x->foo *= 25; x->foo += 1; JMP;
 case_26:x->foo *= 26; x->foo += 1; JMP;
 case_27:x->foo *= 27; x->foo += 1; JMP;      
 case_28:x->foo *= 28; x->foo += 1; JMP;
 case_29:x->foo *= 29; x->foo += 1; JMP;
 case_30:x->foo *= 30; x->foo += 2; JMP;
 case_31:x->foo *= 31; x->foo += 1; j++; if (j < 3125000) goto loop;

 end:;
  
#endif
  
#endif
    
 return 0;
}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]