Bug 24209 - strange instruction selected for an annuled delay slot
Summary: strange instruction selected for an annuled delay slot
Status: NEW
Alias: None
Product: gcc
Classification: Unclassified
Component: rtl-optimization (show other bugs)
Version: 4.1.0
: P2 normal
Target Milestone: ---
Assignee: Not yet assigned to anyone
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2005-10-05 05:12 UTC by Dan Nicolaescu
Modified: 2013-04-18 21:06 UTC (History)
3 users (show)

See Also:
Host:
Target: sparc-*-*
Build:
Known to work:
Known to fail:
Last reconfirmed: 2013-04-18 11:06:49


Attachments
preprocessed code for this bug (41.60 KB, text/plain)
2005-10-05 05:13 UTC, Dan Nicolaescu
Details

Note You need to log in before you can comment on or make changes to this bug.
Description Dan Nicolaescu 2005-10-05 05:12:36 UTC
4.1 selects a strange instruction to put in the delay slot of a bl,a instruction because in the non-taken case the same instruction will be executed anyway...


-O2 code for 4.1

PointToRowCol:
        save    %sp, -112, %sp
        sethi   %hi(term), %g1
        ld      [%g1+%lo(term)], %l2
        add     %l2, 136, %l1
        ld      [%l1+572], %l3
        ld      [%l1+772], %l0
        sub     %i0, %l3, %o0
        call    .div, 0
         ld     [%l0+20], %o1                 
        sethi   %hi(firstValidRow), %g1
        ld      [%g1+%lo(firstValidRow)], %i0
        cmp     %o0, %i0
        bl,a    .LL118                
         ldsb   [%l2+1823], %g1          ;; this instruction
        sethi   %hi(lastValidRow), %g1
        ld      [%g1+%lo(lastValidRow)], %g1
        cmp     %o0, %g1
        bg      .LL116
         mov    %o0, %i0
.LL105:
        ldsb    [%l2+1823], %g1          ;; this will be executed on the                            
                                         ;; non-taken path
.LL118:
        cmp     %g1, 0
        bne     .LL110
         mov    0, %o0
        ld      [%l0+32], %o0
.LL110:
        add     %o0, %l3, %o0
        ld      [%l0+16], %o1
        call    .div, 0
         sub    %i1, %o0, %o0
        cmp     %o0, 0
        bl      .LL113
         mov    0, %g2
        ld      [%l1+888], %g1
        add     %g1, 1, %g1
        cmp     %o0, %g1
        bg      .LL117
         mov    %o0, %g2
.LL113:
        st      %i0, [%i2]
        st      %g2, [%i3]
        jmp     %i7+8
         restore
.LL117:
        st      %i0, [%i2]
        mov     %g1, %g2
        st      %g2, [%i3]
        jmp     %i7+8
         restore
.LL116:
        b       .LL105
         mov    %g1, %i0


The 4.0 code is:
PointToRowCol:
        save    %sp, -112, %sp
        sethi   %hi(term), %g1
        ld      [%g1+%lo(term)], %l2
        add     %l2, 136, %l1
        ld      [%l1+572], %l3
        sub     %i0, %l3, %o0
        ld      [%l1+772], %i0
        call    .div, 0
         ld     [%i0+20], %o1
        sethi   %hi(firstValidRow), %g1
        ld      [%g1+%lo(firstValidRow)], %g1
        cmp     %o0, %g1
        bl      .LL42
         mov    %o0, %l0
        sethi   %hi(lastValidRow), %g1
        ld      [%g1+%lo(lastValidRow)], %g1
        cmp     %o0, %g1
        bg,a    .LL32
         mov    %g1, %l0
.LL32:
        ldsb    [%l2+1823], %g1
        cmp     %g1, 0
        bne     .LL36
         mov    0, %o0
        ld      [%i0+32], %o0
.LL36:
        add     %o0, %l3, %o0
        ld      [%i0+16], %o1
        call    .div, 0
         sub    %i1, %o0, %o0
        cmp     %o0, 0
        bl,a    .LL43
         st     %l0, [%i2]
        ld      [%l1+888], %g1
        add     %g1, 1, %g1
        cmp     %o0, %g1
        bg,a    .LL39
         mov    %g1, %o0
.LL39:
        st      %l0, [%i2]
        st      %o0, [%i3]
        jmp     %i7+8
         restore
.LL42:
        b       .LL32
         mov    %g1, %l0
.LL43:
        mov     0, %o0
        st      %o0, [%i3]
        jmp     %i7+8

(the 4.0 code a few bytes smaller)

I'll attach the preprocessed code.
Comment 1 Dan Nicolaescu 2005-10-05 05:13:18 UTC
Created attachment 9889 [details]
preprocessed code for this bug
Comment 2 Eric Botcazou 2005-10-05 11:06:49 UTC
Looks indeed weird.
Comment 3 Steven Bosscher 2013-04-18 21:06:17 UTC
Current trunk still picks that ldsb insn for the delay slot.
Here's what it produces:

        .file   "t.c"
        .section        ".text"
        .align 4
        .global PointToRowCol
        .type   PointToRowCol, #function
        .proc   020
PointToRowCol:
        sethi   %hi(term+4), %g1
        sethi   %hi(firstValidRow), %g2
        ld      [%g1+%lo(term+4)], %g1
        ld      [%g2+%lo(firstValidRow)], %g2
        ld      [%g1], %g4
        ld      [%g1+12], %g3
        sub     %o0, %g4, %o0
        sra     %o0, 31, %o5
        wr      %o5, 0, %y
        ld      [%g3+4], %o5
        nop
        nop
        sdiv    %o0, %o5, %o0
        cmp     %o0, %g2
        bl,a    .L12
         ldsb   [%g1+16], %o4
        sethi   %hi(lastValidRow), %o5
        ld      [%o5+%lo(lastValidRow)], %o5
        cmp     %o0, %o5
        bg      .L10
         mov    %o0, %g2
.L2:
        ldsb    [%g1+16], %o4
.L12:
        cmp     %o4, 0
        bne     .L4
         mov    0, %o5
        ld      [%g3+8], %o5
.L4:
        add     %o5, %g4, %g4
        sub     %o1, %g4, %o1
        sra     %o1, 31, %g4
        wr      %g4, 0, %y
        ld      [%g3], %g4
        nop
        nop
        sdiv    %o1, %g4, %o1
        cmp     %o1, 0
        bl,a    .L8
         st     %g2, [%o2]
        ld      [%g1+4], %g1
        add     %g1, 1, %g1
        cmp     %o1, %g1
        bg      .L11
         st     %g2, [%o2]
        jmp     %o7+8
         st     %o1, [%o3]
.L11:
        mov     %g1, %o1
        jmp     %o7+8
         st     %o1, [%o3]
.L10:
        b       .L2
         mov    %o5, %g2
.L8:
        mov     0, %o1
        jmp     %o7+8
         st     %o1, [%o3]
        .size   PointToRowCol, .-PointToRowCol
        .ident  "GCC: (GNU) 4.9.0 20130418 (experimental) [trunk revision 198052]"
        .section        .note.GNU-stack,"",@progbits


for this test case:

typedef char Boolean;

typedef struct {
  int width;
} SbInfo;

struct _vtwin {
  int f_width;
  int f_height;
  SbInfo sb_info;
};

typedef struct {
  int border;
  int max_col;
  struct _vtwin *whichVwin;
} TScreen;

typedef struct _Misc {
  Boolean useRight;
} Misc;

typedef struct _XtermWidgetRec {
  TScreen screen;
  Misc misc;
} XtermWidgetRec, *XtermWidget;

extern int firstValidRow, lastValidRow;

extern XtermWidget term;

void
PointToRowCol(int y, int x, int *r, int *c)
{
  TScreen *screen = &term->screen;
  int row, col;

  row = (y - screen->border) / screen->whichVwin->f_height;
  if (row < firstValidRow)
    row = firstValidRow;
  else if (row > lastValidRow)
    row = lastValidRow;
  col = (x - (((term->misc.useRight) ? 0 : screen->whichVwin->sb_info.width) + screen->border) ) / screen->whichVwin->f_width;
  if (col < 0)
    col = 0;
  else if (col > screen->max_col + 1)
    col = screen->max_col + 1;

  *r = row;
  *c = col;
}

with options: "-mcpu=v8 -m32 -O2".