This is the mail archive of the gcc-bugs@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Bug tree-optimization/55906] New: suboptimal code generated for post-inc on Thumb1


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55906

             Bug #: 55906
           Summary: suboptimal code generated for post-inc on Thumb1
    Classification: Unclassified
           Product: gcc
           Version: 4.8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassigned@gcc.gnu.org
        ReportedBy: amker.cheng@gmail.com


For below program:

int
ffs(int word)

{
  int i;

  if (!word)
    return 0;

  i = 0;
  for (;;)
    {
      if (((1 << i++) & word) != 0)
 return i;
    }
}

The dump of 164t.optimized is like:
ffs (int word)
{
  int i;
  int _6;
  int _7;

  <bb 2>:
  if (word_3(D) == 0)
    goto <bb 6>;
  else
    goto <bb 3>;

  <bb 3>:

  <bb 4>:
  # i_1 = PHI <0(3), i_5(5)>
  i_5 = i_1 + 1;
  _6 = word_3(D) >> i_1;
  _7 = _6 & 1;
  if (_7 != 0)
    goto <bb 6>;
  else
    goto <bb 5>;

  <bb 5>:
  goto <bb 4>;

  <bb 6>:
  # i_2 = PHI <0(2), i_5(4)>
  return i_2;

}
GCC increases i before i_1 is used, causing i_5 and i_1 to be partitioned into
different partitions as in expanded rtl:
    2: r115:SI=r0:SI
    3: NOTE_INSN_FUNCTION_BEG
    9: pc={(r115:SI==0)?L33:pc}
      REG_BR_PROB 0xf3c
   10: NOTE_INSN_BASIC_BLOCK 4
    4: r110:SI=0
   18: L18:
   11: NOTE_INSN_BASIC_BLOCK 5
   12: r111:SI=r110:SI+0x1        <-----i_5/i_1 in different pseudos
   13: r116:SI=r115:SI>>r110:SI
   14: r118:SI=0x1
   15: r117:SI=r116:SI&r118:SI
      REG_EQUAL r116:SI&0x1
   16: pc={(r117:SI!=0)?L21:pc}
      REG_BR_PROB 0x384
   17: NOTE_INSN_BASIC_BLOCK 6
    5: r110:SI=r111:SI
   19: pc=L18
   20: barrier
   33: L33:
   32: NOTE_INSN_BASIC_BLOCK 7
    6: r111:SI=0
   21: L21:
   22: NOTE_INSN_BASIC_BLOCK 8
   23: r114:SI=r111:SI
   27: r0:SI=r114:SI
   30: use r0:SI

Finally, suboptimal codes are generated :
ffs:
    mov    r3, #0
    push    {r4, lr}
    cmp    r0, r3
    beq    .L2
    mov    r2, r3
    mov    r1, #1
.L3:
    mov    r4, r0
    asr    r4, r4, r2
    add    r3, r2, #1
    tst    r4, r1
    bne    .L2
    mov    r2, r3
    b    .L3
.L2:
    mov    r0, r3
    @ sp needed
    pop    {r4, pc}

While GCC 4.6 generates better codes:
ffs:
    push    {lr}
    sub    r3, r0, #0
    beq    .L2
    mov    r3, #0
    mov    r2, #1
.L3:
    mov    r1, r0
    asr    r1, r1, r3
    add    r3, r3, #1
    tst    r1, r2
    beq    .L3
.L2:
    mov    r0, r3
    @ sp needed for prologue
    pop    {pc}


The command line is:
arm-none-eabi-gcc -mthumb -mcpu=cortex-m0 -Os -S ffs.c -o ffs.S

Same problem exists when optimizing with "-O2"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]