This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

NON_OPTIMAL CODE GENERATED FOR SH4



>Submitter-Id:	net
>Originator:	Naveen Sharma
>Organization:  HCLT	
>Confidential:	no
>Synopsis:      NON_OPTIMAL CODE GENERATED FOR SH4	
>Severity:	serious 
>Priority:	medium
>Category:	
>Class:		pessimizes-code
>Release:	3.0.3
>Environment:
System: Linux  2.4.2-2 #1 Sun Apr 8 20:41:30 EDT 2001 i686 unknown
Architecture: i686
host: i686-pc-linux-gnu
build: i686-pc-linux-gnu
target: sh-unknown-linux-gnu
configured with: ../gcc/configure --target=sh-linux --prefix=/home/naveens/local --with-as=/home/gcc/gnu_env/tools/sh4/bin/sh-linux-as --with-ld=/home/gcc/gnu_env/tools/sh4/bin/sh-linux-ld --with-headers=/usr/sh4-linux/include --with-libs=/usr/sh4-linux/lib
>Description:
	
For the following piece of code:
        	
void
loop_p (np, non0, coeff)
int		np,non0;	
double	coeff[][2048];
{
	int	i, j, k;
	double	tmp1;		

	for (j = non0;j < np;j++)
		for (k = 0;k < j;k++) {
		  coeff[j][j] -= tmp1 * coeff[j][k]; <-- HERE
		}
	

}

The Code Generated for innermost Loop is

.L9:
        fmov.s  @r2+,fr5
        dt      r3
        fmov.s  @r7+,fr3
        fmov.s  @r2+,fr4
        fmov.s  @r7,fr2  ! Note this
        fmul    dr6,dr4
        add     #-4,r7   <-- Subtract 4 ??
        add     #4,r7    <-- Add 4 ??
        fsub    dr4,dr2
        fmov.s  fr2,@r7  ! Note this
        bf/s    .L9
        fmov.s  fr3,@-r7

There are redundant statements in the assembler output.( Market with arrow )
These are generated in insn splitting after the reload pass(along with 
marked fmov.s insns). 

These effect the performance since they are in innermost loop.

>How-To-Repeat:

 Repeat with  Code Fragment attached in description.

>Fix:

Investigation shows follwing pieces of code in sh.md cause the 
problems.Somebody can comment as to why it is written that way
so that fix can be worked out.

Line 2869 and after:
--------- 
 .....
(define_split
  [(set (match_operand:DF 0 "register_operand" "")
        (match_operand:DF 1 "memory_operand" ""))
   (use (match_operand:PSI 2 "fpscr_operand" "c"))
   (clobber (match_scratch:SI 3 "X"))]
  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
   && FP_OR_XD_REGISTER_P (true_regnum (operands[0]))"
  [(const_int 0)]
  "
{
  int regno = true_regnum (operands[0]);
  rtx addr, insn, adjust = NULL_RTX;
  rtx mem2 = copy_rtx (operands[1]);
  rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN);
  rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN);
 
  PUT_MODE (mem2, SFmode);
  operands[1] = copy_rtx (mem2);
  addr = XEXP (mem2, 0);
  if (GET_CODE (addr) != POST_INC)
    {
      /* If we have to modify the stack pointer, the value that we have
         read with post-increment might be modified by an interrupt,
         so write it back.  */
      if (REGNO (addr) == STACK_POINTER_REGNUM)
        adjust = gen_push_e (reg0);
      else
        adjust = gen_addsi3 (addr, addr, GEN_INT (-4)); <-- This is it !!
      XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
    }
   .............

Line 2910 and after:

(define_split
  [(set (match_operand:DF 0 "memory_operand" "")
        (match_operand:DF 1 "register_operand" ""))
   (use (match_operand:PSI 2 "fpscr_operand" "c"))
   (clobber (match_scratch:SI 3 "X"))]
  "TARGET_SH4 && ! TARGET_FMOVD && reload_completed
   && FP_OR_XD_REGISTER_P (true_regnum (operands[1]))"
  [(const_int 0)]
  "
{
  int regno = true_regnum (operands[1]);
  rtx insn, addr, adjust = NULL_RTX;
 
  operands[0] = copy_rtx (operands[0]);
  PUT_MODE (operands[0], SFmode);
  insn = emit_insn (gen_movsf_ie (operands[0],
                                  gen_rtx (REG, SFmode,
                                           regno + ! TARGET_LITTLE_ENDIAN),
                                  operands[2]));
  operands[0] = copy_rtx (operands[0]);
  addr = XEXP (operands[0], 0);
  if (GET_CODE (addr) != PRE_DEC)
    {
      adjust = gen_addsi3 (addr, addr, GEN_INT (4)); <-- This is it.
      emit_insn_before (adjust, insn);
      XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr);
    }
  addr = XEXP (addr, 0);
  if (! adjust)
    REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
  insn = emit_insn (gen_movsf_ie (operands[0],
                                  gen_rtx (REG, SFmode,
                                           regno + !! TARGET_LITTLE_ENDIAN),
                                  operands[2]));
  REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
  DONE;
}")

pasting the complete assembler output for reference.


	.file	"tmp1.c"
	.little
	.text
	.align 5
	.global	loop_p
	.type	loop_p,@function
loop_p:
	mov.l	r8,@-r15
	mov.l	r9,@-r15
	mov	r4,r8
	mov.l	r14,@-r15
	mov	r6,r4
	cmp/ge	r8,r5
	sts.l	pr,@-r15
	bt/s	.L12
	mov	r15,r14
	mov	#11,r1
	mov	r5,r6
	shld	r1,r6
	mov.w	.L14,r1
	mul.l	r1,r5
	mov	r1,r9
	mov.w	.L15,r1
	sts	macl,r0
	add	r4,r0
.L5:
	mov	#0,r3
	cmp/ge	r5,r3
	bt	.L13
	mov	#3,r3
	mov	r6,r2
	mov	r0,r7
	shld	r3,r2
	mov	r5,r3
	add	r4,r2
.L9:
	fmov.s	@r2+,fr5
	dt	r3
	fmov.s	@r7+,fr3
	fmov.s	@r2+,fr4
	fmov.s	@r7,fr2
	fmul	dr6,dr4
	add	#-4,r7
	add	#4,r7
	fsub	dr4,dr2
	fmov.s	fr2,@r7
	bf/s	.L9
	fmov.s	fr3,@-r7
.L13:
	add	#1,r5
	add	r1,r6
	cmp/ge	r8,r5
	bf/s	.L5
	add	r9,r0
.L12:
	mov	r14,r15
	lds.l	@r15+,pr
	mov.l	@r15+,r14
	mov.l	@r15+,r9
	rts	
	mov.l	@r15+,r8
	.align 1
.L14:
	.short	16392
.L15:
	.short	2048
.Lfe1:
	.size	loop_p,.Lfe1-loop_p
	.ident	"GCC: (GNU) 3.0.3"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]