This is the mail archive of the mailing list for the GCC project.

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

RELOAD: Spill in to the index register

I have a case GCC emits sub-optimal code for SuperH, and struggle to
get optimal code.

In SuperH, memory access with index register "r0" is valid.  So, 
GCC generates memory access expression like following:
	(mem (plus (reg X) (reg Y)))

In reload phase (with REG_OK_STRICT), register X or Y in the
expression will be spill in "r0" to be legitimized.  To do that, GCC
generates r0 := rX register copy.

Say, when we have:
	(set (reg 1) (mem (plus (reg 0) (reg 2))))
	(set (reg 5) (mem (plus (reg 1) (reg 8))))
it becomes:
	(set (reg 1) (mem (plus (reg 0) (reg 2))))
	(set (reg 0) (reg 1))
	(set (reg 5) (mem (plus (reg 0) (reg 8))))
instead of:
	(set (reg 0) (mem (plus (reg 0) (reg 2))))
	(set (reg 5) (mem (plus (reg 0) (reg 8))))

I mean, the instruction (set (reg 0) (reg 1)) will not be combined.

We could use define_peephole to (only) combine the instructions,
actually I've confirmed it works.  But it does not improve register
allocation (worse, it's not good for instruction scheduling).

I've tried define_peephole2 but it results ICE, because it's too late,
peephole2 phase comes after register allocation.

Is there any way to get good code in this case?
Any suggestions are welcome.

Following is my test.

Input is a cut-down version of rtlanal.i:
struct rtx_def;
typedef struct rtx_def *rtx;

struct rtvec_def {
  int num_elem;		/* number of elements */
  rtx elem[1];

typedef struct rtvec_def *rtvec;

enum machine_mode { VOIDmode, SImode, MAX_MACHINE_MODE };

typedef union rtunion_def
  int rtwint;
  int rtint;
  unsigned int rtuint;
  const char *rtstr;
  rtx rtx;
  rtvec rtvec;
} rtunion;

struct rtx_def
  enum rtx_code code: 16;
  enum machine_mode mode : 8;
  unsigned int jump : 1;
  unsigned int call : 1;
  unsigned int unchanging : 1;
  unsigned int volatil : 1;
  unsigned int in_struct : 1;
  unsigned int used : 1;
  unsigned integrated : 1;
  unsigned frame_related : 1;
  rtunion fld[1];

extern const int rtx_length[];
extern const char * const rtx_format[];

loc_mentioned_in_p (loc, in)
     rtx *loc, in;
  enum rtx_code code = ((enum rtx_code) (in)->code);
  const char *fmt = (rtx_format[(int) (code)]);
  int i, j;

  for (i = (rtx_length[(int) (code)]) - 1; i >= 0; i--)
      if (loc == &in->fld[i].rtx)
        return 1;
      if (fmt[i] == 'e')
          if (loc_mentioned_in_p (loc, (((in)->fld[i]).rtx)))
            return 1;
      else if (fmt[i] == 'E')
        for (j = (((((in)->fld[i]).rtvec))->num_elem) - 1; j >= 0; j--)
          if (loc_mentioned_in_p (loc, (((((in)->fld[i]).rtvec))->elem[j])))
            return 1;
  return 0;

Output with -ml -m4 -O2 (sh-unknown-linux-gnu) is:
	.file	"rtlanal.i"
	.align 5
	.global	loc_mentioned_in_p
	.type	loc_mentioned_in_p,@function
	mov.l	r8,@-r15
	mov.l	r9,@-r15
	mov.l	r10,@-r15
	mov.l	r11,@-r15
	mov.l	r12,@-r15
	mov.l	r13,@-r15
	mov.l	r14,@-r15
	sts.l	pr,@-r15
	mov.w	@r5,r0
	add	#-16,r15	; 		Could be 12!
	mov.l	.L24,r1
	mov	r15,r14
	extu.w	r0,r0
	mov.l	r4,@r14
	shll2	r0
	mov.l	r5,@(4,r14)
	mov.l	@(r0,r1),r1
	mov.l	r1,@(8,r14)
	mov.l	.L25,r1
	mov.l	@(r0,r1),r10
	add	#-1,r10
	cmp/pz	r10
	bf	.L20
	mov	r10,r1
	shll2	r1
	mov	r1,r11
	mov	r1,r13
	add	r5,r11
	add	#4,r11
	add	#4,r13
	mov	r11,r12
	mov.l	@r14,r0
	cmp/eq	r12,r0
	bt/s	.L1
	mov	#1,r0
	mov.l	@(8,r14),r0
	mov.b	@(r0,r10),r1		; Could be:
	mov	r1,r0			;     mov.b	@(r0,r10),r0
	cmp/eq	#101,r0
	bt/s	.L22
	cmp/eq	#69,r0
	bt	.L23
	add	#-1,r10
	add	#-4,r11
	cmp/pz	r10
	add	#-4,r12
	bt/s	.L5
	add	#-4,r13
	mov	#0,r0
	add	#16,r14
	mov	r14,r15
	lds.l	@r15+,pr
	mov.l	@r15+,r14
	mov.l	@r15+,r13
	mov.l	@r15+,r12
	mov.l	@r15+,r11
	mov.l	@r15+,r10
	mov.l	@r15+,r9
	mov.l	@r15+,r8
	.align 5
	mov.l	@r11,r1
	mov.l	@r1,r9
	add	#-1,r9
	cmp/pz	r9
	bf/s	.L4
	mov	r9,r8
	mov.l	r13,@(12,r14)
	shll2	r8
	add	#4,r8
	mov.l	@(4,r14),r0
	mov.l	@(12,r14),r2
	mov.l	@r14,r4
	mov.l	@(r0,r2),r1		; Could be:
	mov	r1,r0			;     mov.l	@(r0,r2),r0
	mov	r8,r2
	mov.l	.L27,r1
	jsr	@r1
	mov.l	@(r0,r2),r5
	tst	r0,r0
	bf/s	.L1
	mov	#1,r0
	add	#-1,r9
	cmp/pz	r9
	bt/s	.L15
	add	#-4,r8
	bra	.L29
	add	#-1,r10
	.align 5
	mov.l	.L27,r0
	mov.l	@r14,r4
	jsr	@r0
	mov.l	@r11,r5
	tst	r0,r0
	bf/s	.L1
	mov	#1,r0
	bra	.L29
	add	#-1,r10
	.align 2
	.long	rtx_format
	.long	rtx_length
	.long	loc_mentioned_in_p
	.size	loc_mentioned_in_p,.Lfe1-loc_mentioned_in_p
	.ident	"GCC: (GNU) 3.1 20011105 (experimental)"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]