This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[patch] Fix behavior of TER on unrolled loops


Hello,

given the example below, TER produces code like

x.1 = p[i];
x.2 = p[i+1];
...
x.8 = p[i+7];
y.1 = q[i];
y.2 = q[i+1];
...
y.8 = q[i+7];
sum1 = really huge expression
sum2 = ditto

This causes ~20 registers to be needed instead of original ~6, which is
a complete disaster on i686 and pretty bad on some other architectures
(amd64, ppc).  This problem appears in sixtrack benchmark when
unrolling on tree level is implemented (decreasing its performance by
some 50%), and also can be reproduced on manually unrolled loops.

The patch fixes the problem by extending the existing code that prevents
TER from working on this type of chains in the special case when all the
assignments are of form sum = sum op something (i.e. without other
operations with sum as in the example below).

Bootstrapped & regtested on i686.

Zdenek

	* tree-outof-ssa.c (struct temp_expr_table_d): New field expr_vars.
	(new_temp_expr_table, free_temp_expr_table, check_replaceable,
	find_replaceable_in_bb): Stop TER when a different ssa name for the
	same variable is encountered.

	* gcc.dg/tree-ssa/ter-1.c: New test.

Index: tree-outof-ssa.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-outof-ssa.c,v
retrieving revision 2.66
diff -c -3 -p -r2.66 tree-outof-ssa.c
*** tree-outof-ssa.c	4 Aug 2005 23:36:59 -0000	2.66
--- tree-outof-ssa.c	20 Aug 2005 15:06:42 -0000
*************** typedef struct value_expr_d 
*** 1299,1305 ****
  typedef struct temp_expr_table_d 
  {
    var_map map;
!   void **version_info;		
    value_expr_p *partition_dep_list;
    bitmap replaceable;
    bool saw_replaceable;
--- 1299,1306 ----
  typedef struct temp_expr_table_d 
  {
    var_map map;
!   void **version_info;
!   bitmap *expr_vars;
    value_expr_p *partition_dep_list;
    bitmap replaceable;
    bool saw_replaceable;
*************** new_temp_expr_table (var_map map)
*** 1344,1349 ****
--- 1345,1351 ----
    t->map = map;
  
    t->version_info = xcalloc (num_ssa_names + 1, sizeof (void *));
+   t->expr_vars = xcalloc (num_ssa_names + 1, sizeof (bitmap));
    t->partition_dep_list = xcalloc (num_var_partitions (map) + 1, 
  				   sizeof (value_expr_p));
  
*************** free_temp_expr_table (temp_expr_table_p 
*** 1367,1372 ****
--- 1369,1375 ----
  {
    value_expr_p p;
    tree *ret = NULL;
+   unsigned i;
  
  #ifdef ENABLE_CHECKING
    unsigned x;
*************** free_temp_expr_table (temp_expr_table_p 
*** 1383,1388 ****
--- 1386,1396 ----
    BITMAP_FREE (t->partition_in_use);
    BITMAP_FREE (t->replaceable);
  
+   for (i = 0; i <= num_ssa_names; i++)
+     if (t->expr_vars[i])
+       BITMAP_FREE (t->expr_vars[i]);
+   free (t->expr_vars);
+ 
    free (t->partition_dep_list);
    if (t->saw_replaceable)
      ret = (tree *)t->version_info;
*************** add_dependance (temp_expr_table_p tab, i
*** 1545,1555 ****
  static bool
  check_replaceable (temp_expr_table_p tab, tree stmt)
  {
!   tree var, def;
    int version;
    var_map map = tab->map;
    ssa_op_iter iter;
    tree call_expr;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
--- 1553,1564 ----
  static bool
  check_replaceable (temp_expr_table_p tab, tree stmt)
  {
!   tree var, def, basevar;
    int version;
    var_map map = tab->map;
    ssa_op_iter iter;
    tree call_expr;
+   bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
  
    if (TREE_CODE (stmt) != MODIFY_EXPR)
      return false;
*************** check_replaceable (temp_expr_table_p tab
*** 1580,1591 ****
--- 1589,1607 ----
      }
  
    version = SSA_NAME_VERSION (def);
+   basevar = SSA_NAME_VAR (def);
+   bitmap_set_bit (def_vars, DECL_UID (basevar));
  
    /* Add this expression to the dependency list for each use partition.  */
    FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
      {
        add_dependance (tab, version, var);
+ 
+       use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
+       if (use_vars)
+ 	bitmap_ior_into (def_vars, use_vars);
      }
+   tab->expr_vars[version] = def_vars;
  
    /* If there are VUSES, add a dependence on virtual defs.  */
    if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
*************** static void
*** 1704,1710 ****
  find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
  {
    block_stmt_iterator bsi;
!   tree stmt, def;
    stmt_ann_t ann;
    int partition;
    var_map map = tab->map;
--- 1720,1726 ----
  find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
  {
    block_stmt_iterator bsi;
!   tree stmt, def, use;
    stmt_ann_t ann;
    int partition;
    var_map map = tab->map;
*************** find_replaceable_in_bb (temp_expr_table_
*** 1717,1746 ****
        ann = stmt_ann (stmt);
  
        /* Determine if this stmt finishes an existing expression.  */
!       FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
  	{
! 	  if (tab->version_info[SSA_NAME_VERSION (def)])
  	    {
  	      bool same_root_var = false;
- 	      tree def2;
  	      ssa_op_iter iter2;
  
  	      /* See if the root variables are the same.  If they are, we
  		 do not want to do the replacement to avoid problems with
  		 code size, see PR tree-optimization/17549.  */
! 	      FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
! 		if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
! 		  {
! 		    same_root_var = true;
! 		    break;
! 		  }
  
  	      /* Mark expression as replaceable unless stmt is volatile
  		 or DEF sets the same root variable as STMT.  */
  	      if (!ann->has_volatile_ops && !same_root_var)
! 		mark_replaceable (tab, def);
  	      else
! 		finish_expr (tab, SSA_NAME_VERSION (def), false);
  	    }
  	}
        
--- 1733,1766 ----
        ann = stmt_ann (stmt);
  
        /* Determine if this stmt finishes an existing expression.  */
!       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
  	{
! 	  unsigned ver = SSA_NAME_VERSION (use);
! 
! 	  if (tab->version_info[ver])
  	    {
  	      bool same_root_var = false;
  	      ssa_op_iter iter2;
+ 	      bitmap vars = tab->expr_vars[ver];
  
  	      /* See if the root variables are the same.  If they are, we
  		 do not want to do the replacement to avoid problems with
  		 code size, see PR tree-optimization/17549.  */
! 	      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
! 		{
! 		  if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
! 		    {
! 		      same_root_var = true;
! 		      break;
! 		    }
! 		}
  
  	      /* Mark expression as replaceable unless stmt is volatile
  		 or DEF sets the same root variable as STMT.  */
  	      if (!ann->has_volatile_ops && !same_root_var)
! 		mark_replaceable (tab, use);
  	      else
! 		finish_expr (tab, ver, false);
  	    }
  	}
        
Index: testsuite/gcc.dg/tree-ssa/ter-1.c
===================================================================
RCS file: testsuite/gcc.dg/tree-ssa/ter-1.c
diff -N testsuite/gcc.dg/tree-ssa/ter-1.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- testsuite/gcc.dg/tree-ssa/ter-1.c	20 Aug 2005 15:06:50 -0000
***************
*** 0 ****
--- 1,62 ----
+ /* { dg-do compile } */
+ /* { dg-options "-O1 -fdump-tree-vars" } */
+ 
+ int a[200], b[200];
+ 
+ void bla(int, int);
+ 
+ void xxx(void)
+ {
+   int i, sum1 = 0, sum2 = 0, x, y;
+ 
+   for (i = 0; i < 200; i+=8)
+     {
+       x = a[i];
+       y = b[i];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+1];
+       y = b[i+1];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+2];
+       y = b[i+2];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+3];
+       y = b[i+3];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+4];
+       y = b[i+4];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+5];
+       y = b[i+5];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+6];
+       y = b[i+6];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+ 
+       x = a[i+7];
+       y = b[i+7];
+       sum1 = (sum1 * 5) + x * y;
+       sum2 = (sum2 * 5) + x / y;
+     }
+ 
+   bla (sum1, sum2);
+ }
+ 
+ /* TER used to merge all the increments of sum1 and sum2 counters to one huge
+    expression, and thus hopelessly run out of registers on many architectures.
+    Check that no such huge expression is created.  */
+ 
+ /* { dg-final { scan-tree-dump-times "\[^\\r\\n\]{80}" 0 "vars"} } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]