ADDRESS_COST cleanups

Jan Hubicka hubicka@atrey.karlin.mff.cuni.cz
Sun Apr 23 13:22:00 GMT 2000


Hi
I've hit nonsential behaviour of loop optimizer caused by the ADDRESS_COST not
working as expected.  As currently defined in i386.h it is unable to parse more
complex memory references and marks them as expensive even when only one pseudo
is present or so.

I've rewrite it to function using decompose_address, but I've hit problem in
cse, that calls address_cost even for badly formed addresses.  It looks like
calling of problems here (also because currently cse will ignore correctly
formed ones when the badly formed one appears cheaper), so I've made new
function address_cost that calls ADDRESS_COST only for correct addresses and
gives high cost to the incorrect ones.

While checking looking for the incorrect addresses I've noticed that cse makes
no effor to get address into right form, so for example attempts to substitute
(plus (sybol_ref ...) (const_int ...)), that would match if it were enclosed in
the (const ....), but since I am not aware of any infrastructure that may make
memory address matching at the moment, I am ignoring this right now.

This patch also cleanups some code in find_best_addr and avoids unnecesary
re-calculation of the costs.

With this patch I am getting somewhat better code on byte benchmark, but the
changes are not very noticeable.

Honza

Mon Apr 17 19:37:51 CEST 2000  Jan Hubicka  <jh@suse.cz>
	* cse.c (CSE_ADDRESS_COST): Remove.
	(find_best_addr): Add new parameter "MODE", use address_cost instead
	of CSE_ADDRESS_COST
	(address_cost): New.
	(fold_rtx): Update call of find_best_addr.
	* rtl.h (address_cost): Declare.
	* loop.c (general_induction_var): Add new parameter "MODE", use
	address_cost instead of ADDRESS_COST
	(init_loop): Use address_cost instead of ADDRESS_COST.
	(check_insn_for_givs): Update call of general_induction_var.
	(find_mem_givs): Likewise.
	(consec_sets_giv): Likewise.
	* config/i386/i386.h (ADDRESS_COST): Call ix86_address_cost.
	* i386.c (ix86_address_cost): New.
	* i386-protos.h (ix86_address_cost): Declare.

*** cse.c.old	Mon Apr 17 09:14:03 2000
--- cse.c	Mon Apr 17 17:21:18 2000
*************** struct table_elt
*** 537,552 ****
  
  #define REGNO_QTY_VALID_P(N) (REG_QTY (N) != (int) (N))
  
- #ifdef ADDRESS_COST
- /* The ADDRESS_COST macro does not deal with ADDRESSOF nodes.  But,
-    during CSE, such nodes are present.  Using an ADDRESSOF node which
-    refers to the address of a REG is a good thing because we can then
-    turn (MEM (ADDRESSSOF (REG))) into just plain REG.  */
- #define CSE_ADDRESS_COST(RTX)					\
-   ((GET_CODE (RTX) == ADDRESSOF && REG_P (XEXP ((RTX), 0)))	\
-    ? -1 : ADDRESS_COST(RTX))
- #endif 
- 
  static struct table_elt *table[HASH_SIZE];
  
  /* Chain of `struct table_elt's made so far for this function
--- 537,542 ----
*************** static unsigned canon_hash	PARAMS ((rtx,
*** 683,689 ****
  static unsigned safe_hash	PARAMS ((rtx, enum machine_mode));
  static int exp_equiv_p		PARAMS ((rtx, rtx, int, int));
  static rtx canon_reg		PARAMS ((rtx, rtx));
! static void find_best_addr	PARAMS ((rtx, rtx *));
  static enum rtx_code find_comparison_args PARAMS ((enum rtx_code, rtx *, rtx *,
  						   enum machine_mode *,
  						   enum machine_mode *));
--- 673,679 ----
  static unsigned safe_hash	PARAMS ((rtx, enum machine_mode));
  static int exp_equiv_p		PARAMS ((rtx, rtx, int, int));
  static rtx canon_reg		PARAMS ((rtx, rtx));
! static void find_best_addr	PARAMS ((rtx, rtx *, enum machine_mode));
  static enum rtx_code find_comparison_args PARAMS ((enum rtx_code, rtx *, rtx *,
  						   enum machine_mode *,
  						   enum machine_mode *));
*************** rtx_cost (x, outer_code)
*** 849,854 ****
--- 839,872 ----
    return total;
  }
  
+ /* Return cost of address expression X.  Expect that X is propertly formed address
+    reference.  */
+ int
+ address_cost (x, mode)
+      rtx x;
+      enum machine_mode mode;
+ {
+   /* The ADDRESS_COST macro does not deal with ADDRESSOF nodes.  But,
+      during CSE, such nodes are present.  Using an ADDRESSOF node which
+      refers to the address of a REG is a good thing because we can then
+      turn (MEM (ADDRESSSOF (REG))) into just plain REG.  */
+ 
+   if (GET_CODE (x) == ADDRESSOF && REG_P (XEXP ((x), 0)))
+     return -1;
+ 
+   /* We may be asked for cost of various unusual addresses, such as operands
+      of push instruction.  It is not worthwhile to complicate writting
+      of ADDRESS_COST macro by such cases.  */
+ 
+   if (!memory_address_p (mode, x))
+     return 1000;
+ #ifdef ADDRESS_COST
+   return ADDRESS_COST (x);
+ #else
+   return rtx_cost (x, MEM);
+ #endif
+ }
+ 
  static struct cse_reg_info *
  get_cse_reg_info (regno)
       unsigned int regno;
*************** canon_reg (x, insn)
*** 2681,2689 ****
    */
  
  static void
! find_best_addr (insn, loc)
       rtx insn;
       rtx *loc;
  {
    struct table_elt *elt;
    rtx addr = *loc;
--- 2703,2712 ----
    */
  
  static void
! find_best_addr (insn, loc, mode)
       rtx insn;
       rtx *loc;
+      enum machine_mode mode;
  {
    struct table_elt *elt;
    rtx addr = *loc;
*************** find_best_addr (insn, loc)
*** 2695,2700 ****
--- 2718,2724 ----
    int save_hash_arg_in_memory = hash_arg_in_memory;
    int addr_volatile;
    int regno;
+   int folded_cost, addr_cost;
    unsigned hash;
  
    /* Do not try to replace constant addresses or addresses of local and
*************** find_best_addr (insn, loc)
*** 2728,2741 ****
      {
        rtx folded = fold_rtx (copy_rtx (addr), NULL_RTX);
  
!       if (1
! #ifdef ADDRESS_COST
! 	  && (CSE_ADDRESS_COST (folded) < CSE_ADDRESS_COST (addr)
! 	      || (CSE_ADDRESS_COST (folded) == CSE_ADDRESS_COST (addr)
! 		  && rtx_cost (folded, MEM) > rtx_cost (addr, MEM)))
! #else
  	  && rtx_cost (folded, MEM) < rtx_cost (addr, MEM)
- #endif
  	  && validate_change (insn, loc, folded, 0))
  	addr = folded;
      }
--- 2752,2764 ----
      {
        rtx folded = fold_rtx (copy_rtx (addr), NULL_RTX);
  
!       folded_cost = address_cost (folded, mode);
!       addr_cost = address_cost (addr, mode);
! 
!       if ((folded_cost < addr_cost
! 	   || (folded_cost == addr_cost
! 	       && rtx_cost (folded, MEM) > rtx_cost (addr, MEM)))
  	  && rtx_cost (folded, MEM) < rtx_cost (addr, MEM)
  	  && validate_change (insn, loc, folded, 0))
  	addr = folded;
      }
*************** find_best_addr (insn, loc)
*** 2782,2789 ****
  
        while (found_better)
  	{
! 	  int best_addr_cost = CSE_ADDRESS_COST (*loc);
  	  int best_rtx_cost = (elt->cost + 1) >> 1;
  	  struct table_elt *best_elt = elt; 
  
  	  found_better = 0;
--- 2805,2813 ----
  
        while (found_better)
  	{
! 	  int best_addr_cost = address_cost (*loc, mode);
  	  int best_rtx_cost = (elt->cost + 1) >> 1;
+ 	  int exp_cost;
  	  struct table_elt *best_elt = elt; 
  
  	  found_better = 0;
*************** find_best_addr (insn, loc)
*** 2792,2803 ****
  	      {
  		if ((GET_CODE (p->exp) == REG
  		     || exp_equiv_p (p->exp, p->exp, 1, 0))
! 		    && (CSE_ADDRESS_COST (p->exp) < best_addr_cost
! 			|| (CSE_ADDRESS_COST (p->exp) == best_addr_cost
! 			    && (p->cost + 1) >> 1 > best_rtx_cost)))
  		  {
  		    found_better = 1;
! 		    best_addr_cost = CSE_ADDRESS_COST (p->exp);
  		    best_rtx_cost = (p->cost + 1) >> 1;
  		    best_elt = p;
  		  }
--- 2816,2827 ----
  	      {
  		if ((GET_CODE (p->exp) == REG
  		     || exp_equiv_p (p->exp, p->exp, 1, 0))
! 		    && ((exp_cost = address_cost (p->exp, mode)) < best_addr_cost
! 			|| (exp_cost == best_addr_cost
! 			    && (p->cost + 1) >> 1 < best_rtx_cost)))
  		  {
  		    found_better = 1;
! 		    best_addr_cost = exp_cost;
  		    best_rtx_cost = (p->cost + 1) >> 1;
  		    best_elt = p;
  		  }
*************** find_best_addr (insn, loc)
*** 2851,2857 ****
  
        while (found_better)
  	{
! 	  int best_addr_cost = CSE_ADDRESS_COST (*loc);
  	  int best_rtx_cost = (COST (*loc) + 1) >> 1;
  	  struct table_elt *best_elt = elt; 
  	  rtx best_rtx = *loc;
--- 2875,2881 ----
  
        while (found_better)
  	{
! 	  int best_addr_cost = address_cost (*loc, mode);
  	  int best_rtx_cost = (COST (*loc) + 1) >> 1;
  	  struct table_elt *best_elt = elt; 
  	  rtx best_rtx = *loc;
*************** find_best_addr (insn, loc)
*** 2873,2885 ****
  	      {
  		rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode,
  					       p->exp, c);
  
! 		if ((CSE_ADDRESS_COST (new) < best_addr_cost
! 		    || (CSE_ADDRESS_COST (new) == best_addr_cost
! 			&& (COST (new) + 1) >> 1 > best_rtx_cost)))
  		  {
  		    found_better = 1;
! 		    best_addr_cost = CSE_ADDRESS_COST (new);
  		    best_rtx_cost = (COST (new) + 1) >> 1;
  		    best_elt = p;
  		    best_rtx = new;
--- 2897,2911 ----
  	      {
  		rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode,
  					       p->exp, c);
+ 		int new_cost;
+ 		new_cost = address_cost (new, mode);
  
! 		if (new_cost < best_addr_cost
! 		    || (new_cost == best_addr_cost
! 			&& (COST (new) + 1) >> 1 > best_rtx_cost))
  		  {
  		    found_better = 1;
! 		    best_addr_cost = new_cost;
  		    best_rtx_cost = (COST (new) + 1) >> 1;
  		    best_elt = p;
  		    best_rtx = new;
*************** fold_rtx (x, insn)
*** 3350,3356 ****
  	 best address.  Not only don't we care, but we could modify the
  	 MEM in an invalid way since we have no insn to validate against.  */
        if (insn != 0)
! 	find_best_addr (insn, &XEXP (x, 0));
  
        {
  	/* Even if we don't fold in the insn itself,
--- 3376,3382 ----
  	 best address.  Not only don't we care, but we could modify the
  	 MEM in an invalid way since we have no insn to validate against.  */
        if (insn != 0)
! 	find_best_addr (insn, &XEXP (x, 0), GET_MODE (x));
  
        {
  	/* Even if we don't fold in the insn itself,
*** rtl.h.old	Mon Apr 17 16:15:10 2000
--- rtl.h	Mon Apr 17 16:16:08 2000
*************** extern void push_obstacks		PARAMS ((stru
*** 1502,1507 ****
--- 1502,1508 ----
  /* In cse.c */
  struct cse_basic_block_data;
  extern int rtx_cost			PARAMS ((rtx, enum rtx_code));
+ extern int address_cost			PARAMS ((rtx, enum machine_mode));
  extern void delete_trivially_dead_insns	PARAMS ((rtx, int));
  #ifdef BUFSIZ
  extern int cse_main			PARAMS ((rtx, int, int, FILE *));
*** loop.c.old	Mon Apr 17 16:15:15 2000
--- loop.c	Mon Apr 17 17:03:17 2000
*************** static int basic_induction_var PARAMS ((
*** 278,284 ****
  					rtx *, rtx *, rtx **, int *));
  static rtx simplify_giv_expr PARAMS ((const struct loop *, rtx, int *));
  static int general_induction_var PARAMS ((const struct loop *loop, rtx, rtx *,
! 					  rtx *, rtx *, int, int *));
  static int consec_sets_giv PARAMS ((const struct loop *, int, rtx,
  				    rtx, rtx, rtx *, rtx *, rtx *));
  static int check_dbra_loop PARAMS ((struct loop *, int));
--- 278,284 ----
  					rtx *, rtx *, rtx **, int *));
  static rtx simplify_giv_expr PARAMS ((const struct loop *, rtx, int *));
  static int general_induction_var PARAMS ((const struct loop *loop, rtx, rtx *,
! 					  rtx *, rtx *, int, int *, enum machine_mode));
  static int consec_sets_giv PARAMS ((const struct loop *, int, rtx,
  				    rtx, rtx, rtx *, rtx *, rtx *));
  static int check_dbra_loop PARAMS ((struct loop *, int));
*************** init_loop ()
*** 367,377 ****
  
    add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
  
! #ifdef ADDRESS_COST
!   reg_address_cost = ADDRESS_COST (reg);
! #else
!   reg_address_cost = rtx_cost (reg, MEM);
! #endif
  
    /* We multiply by 2 to reconcile the difference in scale between
       these two ways of computing costs.  Otherwise the cost of a copy
--- 367,373 ----
  
    add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
  
!   reg_address_cost = address_cost (reg, SImode);
  
    /* We multiply by 2 to reconcile the difference in scale between
       these two ways of computing costs.  Otherwise the cost of a copy
*************** check_insn_for_givs (loop, p, not_every_
*** 5134,5145 ****
  
        if (/* SET_SRC is a giv.  */
  	  (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
! 				  &mult_val, 0, &benefit)
  	   /* Equivalent expression is a giv.  */
  	   || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
  	       && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
  					 &add_val, &mult_val, 0,
! 					 &benefit)))
  	  /* Don't try to handle any regs made by loop optimization.
  	     We have nothing on them in regno_first_uid, etc.  */
  	  && REGNO (dest_reg) < max_reg_before_loop
--- 5130,5141 ----
  
        if (/* SET_SRC is a giv.  */
  	  (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
! 				  &mult_val, 0, &benefit, VOIDmode)
  	   /* Equivalent expression is a giv.  */
  	   || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
  	       && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
  					 &add_val, &mult_val, 0,
! 					 &benefit, VOIDmode)))
  	  /* Don't try to handle any regs made by loop optimization.
  	     We have nothing on them in regno_first_uid, etc.  */
  	  && REGNO (dest_reg) < max_reg_before_loop
*************** find_mem_givs (loop, x, insn, not_every_
*** 5277,5283 ****
  	   this one would not be seen.   */
  
  	if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
! 				   &mult_val, 1, &benefit))
  	  {
  	    /* Found one; record it.  */
  	    struct induction *v
--- 5273,5279 ----
  	   this one would not be seen.   */
  
  	if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
! 				   &mult_val, 1, &benefit, GET_MODE (x)))
  	  {
  	    /* Found one; record it.  */
  	    struct induction *v
*************** basic_induction_var (loop, x, mode, dest
*** 6121,6127 ****
       such that the value of X is biv * mult + add;  */
  
  static int
! general_induction_var (loop, x, src_reg, add_val, mult_val, is_addr, pbenefit)
       const struct loop *loop;
       rtx x;
       rtx *src_reg;
--- 6117,6124 ----
       such that the value of X is biv * mult + add;  */
  
  static int
! general_induction_var (loop, x, src_reg, add_val, mult_val, is_addr,
! 		       pbenefit, addr_mode)
       const struct loop *loop;
       rtx x;
       rtx *src_reg;
*************** general_induction_var (loop, x, src_reg,
*** 6129,6134 ****
--- 6126,6132 ----
       rtx *mult_val;
       int is_addr;
       int *pbenefit;
+      enum machine_mode addr_mode;
  {
    rtx orig_x = x;
    char *storage;
*************** general_induction_var (loop, x, src_reg,
*** 6202,6214 ****
      *mult_val = XEXP (*mult_val, 0);
  
    if (is_addr)
!     {
! #ifdef ADDRESS_COST
!       *pbenefit += ADDRESS_COST (orig_x) - reg_address_cost;
! #else
!       *pbenefit += rtx_cost (orig_x, MEM) - reg_address_cost;
! #endif
!     }
    else
      *pbenefit += rtx_cost (orig_x, SET);
  
--- 6200,6206 ----
      *mult_val = XEXP (*mult_val, 0);
  
    if (is_addr)
!     *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
    else
      *pbenefit += rtx_cost (orig_x, SET);
  
*************** consec_sets_giv (loop, first_benefit, p,
*** 6725,6735 ****
  	  && GET_CODE (SET_DEST (set)) == REG
  	  && SET_DEST (set) == dest_reg
  	  && (general_induction_var (loop, SET_SRC (set), &src_reg,
! 				     add_val, mult_val, 0, &benefit)
  	      /* Giv created by equivalent expression.  */
  	      || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
  		  && general_induction_var (loop, XEXP (temp, 0), &src_reg,
! 					    add_val, mult_val, 0, &benefit)))
  	  && src_reg == v->src_reg)
  	{
  	  if (find_reg_note (p, REG_RETVAL, NULL_RTX))
--- 6717,6728 ----
  	  && GET_CODE (SET_DEST (set)) == REG
  	  && SET_DEST (set) == dest_reg
  	  && (general_induction_var (loop, SET_SRC (set), &src_reg,
! 				     add_val, mult_val, 0, &benefit, VOIDmode)
  	      /* Giv created by equivalent expression.  */
  	      || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
  		  && general_induction_var (loop, XEXP (temp, 0), &src_reg,
! 					    add_val, mult_val, 0, &benefit,
! 					    VOIDmode)))
  	  && src_reg == v->src_reg)
  	{
  	  if (find_reg_note (p, REG_RETVAL, NULL_RTX))
*** config/i386/i386.h.old	Mon Apr 17 17:21:26 2000
--- config/i386/i386.h	Mon Apr 17 17:22:00 2000
*************** while (0)
*** 2040,2050 ****
     lifetimes.  */
  
  #define ADDRESS_COST(RTX) \
!   ((CONSTANT_P (RTX)						\
!     || (GET_CODE (RTX) == PLUS && CONSTANT_P (XEXP (RTX, 1))	\
! 	&& REG_P (XEXP (RTX, 0)))) ? 0				\
!    : REG_P (RTX) ? 1						\
!    : 2)
  
  /* A C expression for the cost of moving data from a register in class FROM to
     one in class TO.  The classes are expressed using the enumeration values
--- 2040,2046 ----
     lifetimes.  */
  
  #define ADDRESS_COST(RTX) \
!   ix86_address_cost (x)
  
  /* A C expression for the cost of moving data from a register in class FROM to
     one in class TO.  The classes are expressed using the enumeration values
*** config/i386/i386-protos.h.old	Mon Apr 17 17:21:34 2000
--- config/i386/i386-protos.h	Mon Apr 17 17:22:25 2000
*************** extern void ix86_split_ashldi PARAMS ((r
*** 108,113 ****
--- 108,114 ----
  extern void ix86_split_ashrdi PARAMS ((rtx *, rtx));
  extern void ix86_split_lshrdi PARAMS ((rtx *, rtx));
  extern void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx, rtx));
+ extern int ix86_address_cost PARAMS ((rtx));
  
  extern rtx assign_386_stack_local PARAMS ((enum machine_mode, int));
  extern int ix86_attr_length_default PARAMS ((rtx));
*** config/i386/i386.c.old	Mon Apr 17 17:21:29 2000
--- config/i386/i386.c	Mon Apr 17 20:07:52 2000
*************** ix86_decompose_address (addr, out)
*** 2253,2259 ****
--- 2253,2317 ----
  
    return TRUE;
  }
+ 
+ /* Return cost of the memory address x.
+    For i386, it is better to use a complex address than let gcc copy
+    the address into a reg and make a new pseudo.  But not if the address
+    requires to two regs - that would mean more pseudos with longer
+    lifetimes.  */
+ int
+ ix86_address_cost (x)
+      rtx x;
+ {
+   struct ix86_address parts;
+   int cost = 1;
  
+   if (!ix86_decompose_address (x, &parts))
+     abort ();
+ 
+   /* More complex memory references are better.  */
+   if (parts.disp && parts.disp != const0_rtx)
+     cost--;
+ 
+   /* Attempt to minimize number of registers in the address.  */
+   if ((parts.base
+        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
+       || (parts.index
+ 	  && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
+     cost++;
+ 
+   if (parts.base && REGNO (parts.base) >= FIRST_PSEUDO_REGISTER
+       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
+       && parts.index
+       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
+       && parts.base != parts.index)
+     cost++;
+ 
+   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
+      since it's predecode logic can't detect the length of instructions
+      and it degenerates to vector decoded.  Increase cost of such
+      instructions here.  The penalty is minimally 2 cycles.
+      It may be worthwhile to split such addresses
+      or even refuse such addresses at all.  
+ 
+      Following addressing mode are affected:
+       [base+scale×index]
+       [scale×index+disp]
+       [base+index]
+    
+      The first and last case  may be avoidable by explicitly coding the zero in
+      memory address, but I don't have AMD-K6 machine handy to check this
+      theory.  */
+ 
+   if (TARGET_K6
+       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
+ 	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
+ 	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
+     cost += 10;
+       
+   return cost;
+ }
+ 
  /* Determine if a given CONST RTX is a valid memory displacement
     in PIC mode.  */
     


More information about the Gcc-patches mailing list