[patch] for some of PR29256 problems

Zdenek Dvorak rakdver@atrey.karlin.mff.cuni.cz
Fri Sep 29 00:34:00 GMT 2006


Hello,

the change to ivopts in
http://gcc.gnu.org/ml/gcc-patches/2006-08/msg00198.html makes us to
create all induction variables in integer types.  This however disabled
the part of the ivopts that used to ensure that we do not use pointer
variables that point to one object to address memory references in
another object.  I.e., with the change, we could produce code like

delta = &a - &c;
for (p = &c; ; p += 4B)
  *p = *(p + delta);

(which is known to confuse alias analysis, as well as users :-)
This patch makes determine_base_object work even for pointers that
are casted to integer types, thus fixing this problem.

Another problem in this PR is that even with this change, the induction
variable selection will choose a suboptimal induction variables.  This
is because on powerpc, [symbol + index] addressing mode will be
considered to be significantly more expensive than [reg + index] mode
(since in the former case, we must load the address of the symbol to
register first, and than use [reg + index] addressing mode, and the
insns to load the address are considered to be quite expensive).  This
however does not make much sense, since the addresses will have to be
loaded to registers in any case (and they are loop invariant), so
penalizing the symbol + index addresses so much is missleading.  This
patch also adds the code to get_address_cost to fix up this case and to
decrease the cost of [symbol + index] addressing mode if it is
significantly mode expensive than [reg + index] addressing mode.

The main problem in this PR (unability to construct offsetted addressing
modes after unrolling) is not affected by these fixes, though.  I am
working on that.

Bootstrapped & regtested on i686 and ppc64.

Zdenek

Index: testsuite/gcc.dg/tree-ssa/loop-19.c
===================================================================
*** testsuite/gcc.dg/tree-ssa/loop-19.c	(revision 0)
--- testsuite/gcc.dg/tree-ssa/loop-19.c	(revision 0)
***************
*** 0 ****
--- 1,27 ----
+ /* This tests strength reduction and choice of induction variables.  The targets
+    for this testcase are quite limited, as with different set of available
+    addressing modes, the results may be quite different.
+  
+    The testcase comes from PR 29256 (and originally, the stream benchmark).  */
+ 
+ /* { dg-do compile { target i?86-*-* x86_64-*-* powerpc*-*-*} } */
+ /* { dg-options "-O3 -fdump-tree-final_cleanup" } */
+ 
+ # define N      2000000
+ static double   a[N],c[N];
+ void tuned_STREAM_Copy()
+ {
+   int j;
+   for (j=0; j<N; j++)
+     c[j] = a[j];
+ }
+ 
+ /* Check that the memory references are based on &a and &c, with appropriate
+    offsets.  Ideally, we would want each of them to appear once in the output.
+    However, due to a bug in jump threading, we end up peeling one iteration from
+    the loop, which creates an additional occurence.  */
+ 
+ /* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )a," 2 "final_cleanup" } } */
+ /* { dg-final { scan-tree-dump-times "MEM.(base: &|symbol: )c," 2 "final_cleanup" } } */
+ 
+ /* { dg-final { cleanup-tree-dump "final_cleanup" } } */
Index: tree-ssa-loop-ivopts.c
===================================================================
*** tree-ssa-loop-ivopts.c	(revision 117276)
--- tree-ssa-loop-ivopts.c	(working copy)
*************** determine_base_object (tree expr)
*** 835,840 ****
--- 835,847 ----
    enum tree_code code = TREE_CODE (expr);
    tree base, obj, op0, op1;
  
+   /* If this is a pointer casted to any type, we need to determine
+      the base object for the pointer; so handle conversions before
+      throwing away non-pointer expressions.  */
+   if (TREE_CODE (expr) == NOP_EXPR
+       || TREE_CODE (expr) == CONVERT_EXPR)
+     return determine_base_object (TREE_OPERAND (expr, 0));
+ 
    if (!POINTER_TYPE_P (TREE_TYPE (expr)))
      return NULL_TREE;
  
*************** determine_base_object (tree expr)
*** 871,880 ****
  
        return fold_build2 (code, ptr_type_node, op0, op1);
  
-     case NOP_EXPR:
-     case CONVERT_EXPR:
-       return determine_base_object (TREE_OPERAND (expr, 0));
- 
      default:
        return fold_convert (ptr_type_node, expr);
      }
--- 878,883 ----
*************** get_address_cost (bool symbol_present, b
*** 3371,3379 ****
    static HOST_WIDE_INT min_offset, max_offset;
    static unsigned costs[2][2][2][2];
    unsigned cost, acost;
-   rtx seq, addr, base;
    bool offset_p, ratio_p;
-   rtx reg1;
    HOST_WIDE_INT s_offset;
    unsigned HOST_WIDE_INT mask;
    unsigned bits;
--- 3374,3380 ----
*************** get_address_cost (bool symbol_present, b
*** 3381,3386 ****
--- 3382,3392 ----
    if (!initialized)
      {
        HOST_WIDE_INT i;
+       int old_cse_not_expected;
+       unsigned sym_p, var_p, off_p, rat_p, add_c;
+       rtx seq, addr, base;
+       rtx reg0, reg1;
+ 
        initialized = true;
  
        reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
*************** get_address_cost (bool symbol_present, b
*** 3417,3422 ****
--- 3423,3536 ----
  	    rat = i;
  	    break;
  	  }
+ 
+       /* Compute the cost of various addressing modes.  */
+       acost = 0;
+       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
+       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
+ 
+       for (i = 0; i < 16; i++)
+ 	{
+ 	  sym_p = i & 1;
+ 	  var_p = (i >> 1) & 1;
+ 	  off_p = (i >> 2) & 1;
+ 	  rat_p = (i >> 3) & 1;
+ 
+ 	  addr = reg0;
+ 	  if (rat_p)
+ 	    addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
+ 
+ 	  if (var_p)
+ 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
+ 
+ 	  if (sym_p)
+ 	    {
+ 	      base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
+ 	      if (off_p)
+ 		base = gen_rtx_fmt_e (CONST, Pmode,
+ 				      gen_rtx_fmt_ee (PLUS, Pmode,
+ 						      base,
+ 						      gen_int_mode (off, Pmode)));
+ 	    }
+ 	  else if (off_p)
+ 	    base = gen_int_mode (off, Pmode);
+ 	  else
+ 	    base = NULL_RTX;
+     
+ 	  if (base)
+ 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
+   
+ 	  start_sequence ();
+ 	  /* To avoid splitting addressing modes, pretend that no cse will
+ 	     follow.  */
+ 	  old_cse_not_expected = cse_not_expected;
+ 	  cse_not_expected = true;
+ 	  addr = memory_address (Pmode, addr);
+ 	  cse_not_expected = old_cse_not_expected;
+ 	  seq = get_insns ();
+ 	  end_sequence ();
+ 
+ 	  acost = seq_cost (seq);
+ 	  acost += address_cost (addr, Pmode);
+ 
+ 	  if (!acost)
+ 	    acost = 1;
+ 	  costs[sym_p][var_p][off_p][rat_p] = acost;
+ 	}
+ 
+       /* On some targets, it is quite expensive to load symbol to a register,
+ 	 which makes addresses that contain symbols look much more expensive.
+ 	 However, the symbol will have to be loaded in any case before the
+ 	 loop (and quite likely we have it in register already), so it does not
+ 	 make much sense to penalize them too heavily.  So make some final
+          tweaks for the SYMBOL_PRESENT modes:
+ 
+          If VAR_PRESENT is false, and the mode obtained by changing symbol to
+ 	 var is cheaper, use this mode with small penalty.
+ 	 If VAR_PRESENT is true, try whether the mode with
+ 	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
+ 	 if this is the case, use it.  */
+       add_c = add_cost (Pmode);
+       for (i = 0; i < 8; i++)
+ 	{
+ 	  var_p = i & 1;
+ 	  off_p = (i >> 1) & 1;
+ 	  rat_p = (i >> 2) & 1;
+ 
+ 	  acost = costs[0][1][off_p][rat_p] + 1;
+ 	  if (var_p)
+ 	    acost += add_c;
+ 
+ 	  if (acost < costs[1][var_p][off_p][rat_p])
+ 	    costs[1][var_p][off_p][rat_p] = acost;
+ 	}
+   
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	{
+ 	  fprintf (dump_file, "Address costs:\n");
+       
+ 	  for (i = 0; i < 16; i++)
+ 	    {
+ 	      sym_p = i & 1;
+ 	      var_p = (i >> 1) & 1;
+ 	      off_p = (i >> 2) & 1;
+ 	      rat_p = (i >> 3) & 1;
+ 
+ 	      fprintf (dump_file, "  ");
+ 	      if (sym_p)
+ 		fprintf (dump_file, "sym + ");
+ 	      if (var_p)
+ 		fprintf (dump_file, "var + ");
+ 	      if (off_p)
+ 		fprintf (dump_file, "cst + ");
+ 	      if (rat_p)
+ 		fprintf (dump_file, "rat * ");
+ 
+ 	      acost = costs[sym_p][var_p][off_p][rat_p];
+ 	      fprintf (dump_file, "index costs %d\n", acost);
+ 	    }
+ 	  fprintf (dump_file, "\n");
+ 	}
      }
  
    bits = GET_MODE_BITSIZE (Pmode);
*************** get_address_cost (bool symbol_present, b
*** 3442,3495 ****
      }
  
    acost = costs[symbol_present][var_present][offset_p][ratio_p];
-   if (!acost)
-     {
-       int old_cse_not_expected;
-       acost = 0;
-       
-       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
-       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
-       if (ratio_p)
- 	addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
- 
-       if (var_present)
- 	addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
- 
-       if (symbol_present)
- 	{
- 	  base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
- 	  if (offset_p)
- 	    base = gen_rtx_fmt_e (CONST, Pmode,
- 				  gen_rtx_fmt_ee (PLUS, Pmode,
- 						  base,
- 						  gen_int_mode (off, Pmode)));
- 	}
-       else if (offset_p)
- 	base = gen_int_mode (off, Pmode);
-       else
- 	base = NULL_RTX;
-     
-       if (base)
- 	addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
-   
-       start_sequence ();
-       /* To avoid splitting addressing modes, pretend that no cse will
-  	 follow.  */
-       old_cse_not_expected = cse_not_expected;
-       cse_not_expected = true;
-       addr = memory_address (Pmode, addr);
-       cse_not_expected = old_cse_not_expected;
-       seq = get_insns ();
-       end_sequence ();
-   
-       acost = seq_cost (seq);
-       acost += address_cost (addr, Pmode);
- 
-       if (!acost)
- 	acost = 1;
-       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
-     }
- 
    return cost + acost;
  }
  
--- 3556,3561 ----



More information about the Gcc-patches mailing list