This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Small slowdown...


> > Date: Tue, 30 Mar 1999 12:23:17 +0200
> > From: Jan Hubicka <hubicka@atrey.karlin.mff.cuni.cz>
> 
> > It is pretty easy to hanle this directly in asm output routines.
> > Is such hack acceptable?
> 
> Yes (though I am not the best source for this answer).
OK, so here is the patch.
I've made two versions. First more unclean one modifying
print_operand_address and second one changes the shift patterns. Advantage of the
first patch is that we can be sure it handles all cases. The second patch
don't handle for example following code:

main(int a)
{
    return *(int *)(a*2);
}

Because multiply is done in addressing and don't go trought shift patterns. 

Another advantage of the first patch is that it adds support for AMD's
recommendation to use [ESI+0] addressing mode instead of [ESI].

Honza

First patch:
*** ChangeLog.old	Wed Mar 31 22:47:54 1999
--- ChangeLog	Wed Mar 31 23:15:15 1999
***************
*** 1,3 ****
--- 1,8 ----
+ Wed Mar 31 22:48:01 CEST 1999 Jan Hubicka  (hubicka@paru.cas.cz)
+ 
+ 	* i386.c (print_operand_address): follow Intel and AMD recommendations
+ 	when outputing memory addresses.
+ 
  Sun Mar 28 01:15:04 PST 1999 Jeff Law  (law@cygnus.com)
  
  	* version.c: Bump for snapshot.
*** config/i386/i386.c.old	Tue Mar 30 23:01:14 1999
--- config/i386/i386.c	Wed Mar 31 23:22:17 1999
*************** print_operand_address (file, addr)
*** 3772,3777 ****
--- 3772,3783 ----
    switch (GET_CODE (addr))
      {
      case REG:
+       /* This is recommended by AMD manual. ESI addressing makes instruction
+        * vector decoded. We can avoid this by ESI+0 addressing. */
+       if (REGNO (addr) == 4 && ix86_cpu == PROCESSOR_K6 && !optimize_size)
+ 	 {
+ 	   output_addr_const (file, const0_rtx);
+ 	 } 
        ADDR_BEG (file);
        fprintf (file, "%se", RP);
        fputs (hi_reg_name[REGNO (addr)], file);
*************** print_operand_address (file, addr)
*** 3887,3894 ****
  	    ireg = XEXP (addr, 0);
  	  }
  
! 	output_addr_const (file, const0_rtx);
! 	PRINT_B_I_S (NULL_RTX, ireg, scale, file);
        }
        break;
  
--- 3893,3909 ----
  	    ireg = XEXP (addr, 0);
  	  }
  
! 	/* (reg,reg,) is shorter than (,reg,2). */
! 	if(scale == 2)
! 	  {
! 	    PRINT_B_I_S (ireg, ireg, 1, file);
! 	  } 
! 	else 
! 	  {
! 	    output_addr_const (file, const0_rtx);
! 	    PRINT_B_I_S (NULL_RTX, ireg, scale, file);
! 	  }
        }
        break;
  

Second patch:

*** ChangeLog.old	Wed Mar 31 22:47:54 1999
--- ChangeLog	Wed Mar 31 23:15:15 1999
***************
*** 1,3 ****
--- 1,8 ----
+ Wed Mar 31 22:48:01 CEST 1999 Jan Hubicka  (hubicka@paru.cas.cz)
+ 
+ 	* i386.md (shifting pattern): use plus instead of mult in lea
+	instructions whenever possible
+ 
  Sun Mar 28 01:15:04 PST 1999 Jeff Law  (law@cygnus.com)
  
  	* version.c: Bump for snapshot.
*** config/i386/i386.md.old	Wed Mar 31 23:05:47 1999
--- config/i386/i386.md	Wed Mar 31 23:12:24 1999
*************** byte_xor_operation:
*** 4849,4855 ****
  	return \"#\";
      
        /* For shifts up to and including 3 bits, use lea.  */
!       operands[1] = gen_rtx_MULT (SImode, operands[1],
  				  GEN_INT (1 << INTVAL (operands[2])));
        return AS2 (lea%L0,%a1,%0);
      }
--- 4849,4859 ----
  	return \"#\";
      
        /* For shifts up to and including 3 bits, use lea.  */
!       /* (reg,reg) addressing is shorter than 0(,reg,2) */
!       if (INTVAL (operands[2]) == 2)
!         operands[1] = gen_rtx_PLUS (SImode, operands[1], operands[1]);
!       else
!         operands[1] = gen_rtx_MULT (SImode, operands[1],
  				  GEN_INT (1 << INTVAL (operands[2])));
        return AS2 (lea%L0,%a1,%0);
      }
*************** byte_xor_operation:
*** 4878,4884 ****
        && (int)ix86_cpu == (int)PROCESSOR_PENTIUM
        && GET_MODE (insn) != TImode)
      {
!       operands[1] = gen_rtx_MULT (SImode, operands[1],
  				  GEN_INT (1 << INTVAL (operands[2])));
        return AS2 (lea%L0,%a1,%0);
      }
--- 4882,4891 ----
        && (int)ix86_cpu == (int)PROCESSOR_PENTIUM
        && GET_MODE (insn) != TImode)
      {
! 	  /* (reg,reg) addressing is shorter than 0(,reg,2) */
!       if (INTVAL (operands[2]) == 2)
!         operands[1] = gen_rtx_PLUS (SImode, operands[1], operands[1]);
!       else
!         operands[1] = gen_rtx_MULT (SImode, operands[1],
  				  GEN_INT (1 << INTVAL (operands[2])));
        return AS2 (lea%L0,%a1,%0);
      }
*************** byte_xor_operation:
*** 4941,4947 ****
  	      output_asm_insn (AS2 (mov%L0,%1,%0), operands);
  	      operands[1] = operands[0];
  	    }
!           operands[1] = gen_rtx_MULT (SImode, operands[1],
  				      GEN_INT (1 << INTVAL (operands[2])));
  	  return AS2 (lea%L0,%a1,%0);
  	}
--- 4948,4959 ----
  	      output_asm_insn (AS2 (mov%L0,%1,%0), operands);
  	      operands[1] = operands[0];
  	    }
! 
! 	  /* (reg,reg) addressing is shorter than 0(,reg,2) */
! 	  if (INTVAL (operands[2]) == 2)
!             operands[1] = gen_rtx_PLUS (SImode, operands[1], operands[1]);
! 	  else
!             operands[1] = gen_rtx_MULT (SImode, operands[1],
  				      GEN_INT (1 << INTVAL (operands[2])));
  	  return AS2 (lea%L0,%a1,%0);
  	}

-- 
                       OK. Lets make a signature file.
+-------------------------------------------------------------------------+
|        Jan Hubicka (Jan Hubi\v{c}ka in TeX) hubicka@freesoft.cz         |
|         Czech free software foundation: http://www.freesoft.cz          |
|AA project - the new way for computer graphics - http://www.ta.jcu.cz/aa |
|  homepage: http://www.paru.cas.cz/~hubicka/, games koules, Xonix, fast  |
|  fractal zoomer XaoS, index of Czech GNU/Linux/UN*X documentation etc.  | 
+-------------------------------------------------------------------------+


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]