Bug 54364

Summary: Tail call jumps not threaded
Product: gcc Reporter: Steven Bosscher <steven>
Component: middle-endAssignee: Not yet assigned to anyone <unassigned>
Status: RESOLVED FIXED    
Severity: normal CC: gabravier, jakub
Priority: P3    
Version: 4.8.0   
Target Milestone: ---   
See Also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=41004
Host: Target:
Build: Known to work:
Known to fail: Last reconfirmed:

Description Steven Bosscher 2012-08-24 11:39:27 UTC
Consider:

#undef ONE
#undef TEN
#undef HUN
#define ONE(X)                                          \
  void use##X (int);
#define TEN(X)                                          \
  ONE(X##0) ONE(X##1) ONE(X##2) ONE(X##3) ONE(X##4)     \
  ONE(X##5) ONE(X##6) ONE(X##7) ONE(X##8) ONE(X##9)
#define HUN                                             \
  TEN(0) TEN(1) TEN(2) TEN(3) TEN(4)                    \
  TEN(5) TEN(6) TEN(7) TEN(8) TEN(9)

TEN(1)

#undef ONE
#undef TEN
#undef HUN
#define ONE(X)                                          \
  case 1##X##1 ... 1##X##2: use##X (c); break;
#define TEN(X)                                          \
  ONE(X##0) ONE(X##1) ONE(X##2) ONE(X##3) ONE(X##4)     \
  ONE(X##5) ONE(X##6) ONE(X##7) ONE(X##8) ONE(X##9)
#define HUN                                             \
  TEN(0) TEN(1) TEN(2) TEN(3) TEN(4)                    \
  TEN(5) TEN(6) TEN(7) TEN(8) TEN(9)

void
foo (int c)
{
  switch (c)
    {
    TEN(1)
    default:
      break;
    }
}


With r190601, "cc1 -Os -fno-jump-tables", this compiles to:

        .file   "t.c"
        .text
        .globl  foo
        .type   foo, @function
foo:
.LFB0:
        cmpl    $1142, %edi
        jg      .L13
        cmpl    $1141, %edi
        jge     .L8
        cmpl    $1112, %edi
        jg      .L14
        cmpl    $1111, %edi
        jge     .L11
        leal    -1101(%rdi), %eax
        cmpl    $1, %eax
        ja      .L1
        jmp     .L16
.L14:
        cmpl    $1121, %edi
        jl      .L1
        cmpl    $1122, %edi
        jle     .L10
        leal    -1131(%rdi), %eax
        cmpl    $1, %eax
        ja      .L1
        jmp     .L17
.L13:
        cmpl    $1172, %edi
        jg      .L15
        cmpl    $1171, %edi
        jge     .L5
        cmpl    $1151, %edi
        jl      .L1
        cmpl    $1152, %edi
        jle     .L7
        leal    -1161(%rdi), %eax
        cmpl    $1, %eax
        ja      .L1
        jmp     .L18
.L15:
        cmpl    $1181, %edi
        jl      .L1
        cmpl    $1182, %edi
        jle     .L4
        leal    -1191(%rdi), %eax
        cmpl    $1, %eax
        ja      .L1
        jmp     .L19
.L16:
        jmp     use10
.L11:
        jmp     use11
.L10:
        jmp     use12
.L17:
        jmp     use13
.L8:
        jmp     use14
.L7:
        jmp     use15
.L18:
        jmp     use16
.L5:
        jmp     use17
.L4:
        jmp     use18
.L19:
        jmp     use19
.L1:
        ret
.LFE0:
        .size   foo, .-foo
        .section        .eh_frame,"a",@progbits
.Lframe1:
        .long   .LECIE1-.LSCIE1
.LSCIE1:
        .long   0
        .byte   0x3
        .string "zR"
        .uleb128 0x1
        .sleb128 -8
        .uleb128 0x10
        .uleb128 0x1
        .byte   0x3
        .byte   0xc
        .uleb128 0x7
        .uleb128 0x8
        .byte   0x90
        .uleb128 0x1
        .align 8
.LECIE1:
.LSFDE1:
        .long   .LEFDE1-.LASFDE1
.LASFDE1:
        .long   .LASFDE1-.Lframe1
        .long   .LFB0
        .long   .LFE0-.LFB0
        .uleb128 0
        .align 8
.LEFDE1:
        .ident  "GCC: (GNU) 4.8.0 20120822 (experimental) [trunk \
revision 190601]"
        .section        .note.GNU-stack,"",@progbits


Note the missed "jump threading" opportunities from the case decision tree to the tail calls.

The RTL for one of the missed tail calls looks like this:
...
# BLOCK 11 freq:157 seq:9
# PRED: 10 [50.0%]  (FALLTHRU)
# SUCC: 25 [100.0%]
#  145 pc=L144
        jmp     .L17    # 145   jump    [length = 2]
...
# BLOCK 25 freq:909 seq:23
# PRED: 11 [100.0%]
.L17:
# SUCC: EXIT [100.0%]  (ABNORMAL,SIBCALL)
#   79 call <...>
#      REG_DEAD: di:SI
        jmp     use13   # 79    *sibcall        [length = 5]

I am guessing that jumps to calls are not handled as jump threading opportunities because the call_insn isn't recognized as just a jump.
Comment 1 Jakub Jelinek 2012-08-24 11:44:53 UTC
It often isn't just a jump, in many cases it is a lot of insns, to restore stack, etc.  You don't know that until prologues/epilogues are expanded.
Comment 2 stevenb.gcc@gmail.com 2012-08-24 12:01:40 UTC
With -O2 this is cleaned up by bb-reorder.
Comment 3 bin cheng 2012-09-19 07:40:21 UTC
Author: amker
Date: Wed Sep 19 07:40:15 2012
New Revision: 191462

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=191462
Log:
	PR middle-end/54364
	* bb-reorder.c (connect_better_edge_p): New added.
	(find_traces_1_round): When optimizing for size, ignore edge frequency
	and probability, and handle all in one round.
	(bb_to_key): Use bb->index as key when optimizing for size.
	(better_edge_p): The bb with smaller index is better when optimizing
	for size.
	(connect_traces): When optimizing for size, connect block n with
	block n + 1; connect trace m with trace m + 1 if falling through.
	(gate_handle_reorder_blocks): Enable bbro when optimizing for -Os.

Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/bb-reorder.c
Comment 4 xuepeng guo 2012-09-26 03:17:59 UTC
Author: xguo
Date: Wed Sep 26 03:17:54 2012
New Revision: 191751

URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=191751
Log:
2012-09-26  Zhenqiang Chen  <zhenqiang.chen@arm.com>

        Backport from 2012-09-19 mainline r191462

        PR middle-end/54364
        * bb-reorder.c (connect_better_edge_p): New added.
        (find_traces_1_round): When optimizing for size, ignore edge frequency
        and probability, and handle all in one round.
        (bb_to_key): Use bb->index as key when optimizing for size.
        (better_edge_p): The bb with smaller index is better when optimizing
        for size.
        (connect_traces): When optimizing for size, connect block n with
        block n + 1; connect trace m with trace m + 1 if falling through.
        (gate_handle_reorder_blocks): Enable bbro when optimizing for -Os.


Modified:
    branches/ARM/embedded-4_7-branch/gcc/ChangeLog.arm
    branches/ARM/embedded-4_7-branch/gcc/bb-reorder.c
Comment 5 Steven Bosscher 2014-06-11 09:53:15 UTC
http://gcc.gnu.org/r191462