This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [patch] Partial fix for PR 18048


Hello,

here is the patch with the Zack's comment reflected.  Bootstrapped &
regtested on x86_64.

Zdenek

	PR tree-optimization/18048
	* fold-const.c (try_move_mult_to_index): New function.
	(fold): Use try_move_mult_to_index.
	* tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
	* tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
	an all-ones unsigned constant without extra bits.
	* tree.c (build_low_bits_mask): New function.
	* tree.h (build_low_bits_mask): Declare.

Index: fold-const.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/fold-const.c,v
retrieving revision 1.468
diff -c -3 -p -r1.468 fold-const.c
*** fold-const.c	11 Oct 2004 03:42:00 -0000	1.468
--- fold-const.c	27 Oct 2004 11:36:47 -0000
*************** tree_swap_operands_p (tree arg0, tree ar
*** 5967,5972 ****
--- 5967,6050 ----
    return 0;
  }
  
+ /* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is
+    step of the array.  TYPE is the type of the expression.  ADDR is the address.
+    MULT is the multiplicative expression.  If the function succeeds, the new
+    address expression is returned.  Otherwise NULL_TREE is returned.  */
+ 
+ static tree
+ try_move_mult_to_index (tree type, enum tree_code code, tree addr, tree mult)
+ {
+   tree s, delta, step;
+   tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1);
+   tree ref = TREE_OPERAND (addr, 0), pref;
+   tree ret, pos;
+   tree itype;
+ 
+   STRIP_NOPS (arg0);
+   STRIP_NOPS (arg1);
+   
+   if (TREE_CODE (arg0) == INTEGER_CST)
+     {
+       s = arg0;
+       delta = arg1;
+     }
+   else if (TREE_CODE (arg1) == INTEGER_CST)
+     {
+       s = arg1;
+       delta = arg0;
+     }
+   else
+     return NULL_TREE;
+ 
+   for (;; ref = TREE_OPERAND (ref, 0))
+     {
+       if (TREE_CODE (ref) == ARRAY_REF)
+ 	{
+ 	  step = array_ref_element_size (ref);
+ 
+ 	  if (TREE_CODE (step) != INTEGER_CST)
+ 	    continue;
+ 
+ 	  itype = TREE_TYPE (step);
+ 
+ 	  /* If the type sizes do not match, we might run into problems
+ 	     when one of them would overflow.  */
+ 	  if (TYPE_PRECISION (itype) != TYPE_PRECISION (type))
+ 	    continue;
+ 
+ 	  if (!operand_equal_p (step, fold_convert (itype, s), 0))
+ 	    continue;
+ 
+ 	  delta = fold_convert (itype, delta);
+ 	  break;
+ 	}
+ 
+       if (!handled_component_p (ref))
+ 	return NULL_TREE;
+     }
+ 
+   /* We found the suitable array reference.  So copy everything up to it,
+      and replace the index.  */
+ 
+   pref = TREE_OPERAND (addr, 0);
+   ret = copy_node (pref);
+   pos = ret;
+ 
+   while (pref != ref)
+     {
+       pref = TREE_OPERAND (pref, 0);
+       TREE_OPERAND (pos, 0) = copy_node (pref);
+       pos = TREE_OPERAND (pos, 0);
+     }
+ 
+   TREE_OPERAND (pos, 1) = fold (build2 (code, itype,
+ 					TREE_OPERAND (pos, 1),
+ 					delta));
+ 
+   return build1 (ADDR_EXPR, type, ret);
+ }
+ 
  /* Perform constant folding and related simplification of EXPR.
     The related simplifications include x*1 => x, x*0 => 0, etc.,
     and application of the associative law.
*************** fold (tree expr)
*** 6602,6607 ****
--- 6680,6703 ----
  						   alt0, alt1)),
  				     same));
  	    }
+ 
+ 	  /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step
+ 	     of the array.  Loop optimizer sometimes produce this type of
+ 	     expressions.  */
+ 	  if (TREE_CODE (arg0) == ADDR_EXPR
+ 	      && TREE_CODE (arg1) == MULT_EXPR)
+ 	    {
+ 	      tem = try_move_mult_to_index (type, PLUS_EXPR, arg0, arg1);
+ 	      if (tem)
+ 		return fold (tem);
+ 	    }
+ 	  else if (TREE_CODE (arg1) == ADDR_EXPR
+ 		   && TREE_CODE (arg0) == MULT_EXPR)
+ 	    {
+ 	      tem = try_move_mult_to_index (type, PLUS_EXPR, arg1, arg0);
+ 	      if (tem)
+ 		return fold (tem);
+ 	    }
  	}
        else
  	{
*************** fold (tree expr)
*** 6974,6979 ****
--- 7070,7086 ----
  				     &diff))
  	  return build_int_cst_type (type, diff);
        }
+ 	  
+       /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step
+ 	 of the array.  Loop optimizer sometimes produce this type of
+ 	 expressions.  */
+       if (TREE_CODE (arg0) == ADDR_EXPR
+ 	  && TREE_CODE (arg1) == MULT_EXPR)
+ 	{
+ 	  tem = try_move_mult_to_index (type, MINUS_EXPR, arg0, arg1);
+ 	  if (tem)
+ 	    return fold (tem);
+ 	}
  
        if (TREE_CODE (arg0) == MULT_EXPR
  	  && TREE_CODE (arg1) == MULT_EXPR
Index: tree-ssa-loop-ivopts.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-ivopts.c,v
retrieving revision 2.20
diff -c -3 -p -r2.20 tree-ssa-loop-ivopts.c
*** tree-ssa-loop-ivopts.c	18 Oct 2004 18:01:09 -0000	2.20
--- tree-ssa-loop-ivopts.c	27 Oct 2004 11:36:47 -0000
*************** try_add_cand_for (struct ivopts_data *da
*** 3603,3622 ****
    bitmap act_inv = BITMAP_XMALLOC ();
    unsigned i;
    struct cost_pair *cp;
  
    bitmap_copy (best_ivs, ivs);
    bitmap_copy (best_inv, inv);
  
!   for (i = 0; i < use->n_map_members; i++)
      {
!       cp = use->cost_map + i;
!       if (cp->cost == INFTY)
  	continue;
  
        bitmap_copy (act_ivs, ivs);
!       bitmap_set_bit (act_ivs, cp->cand->id);
!       if (cp->depends_on)
! 	bitmap_a_or_b (act_inv, inv, cp->depends_on);
        else
  	bitmap_copy (act_inv, inv);
        act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
--- 3603,3634 ----
    bitmap act_inv = BITMAP_XMALLOC ();
    unsigned i;
    struct cost_pair *cp;
+   bitmap_iterator bi;
+   struct iv_cand *cand;
+   bitmap depends_on;
  
    bitmap_copy (best_ivs, ivs);
    bitmap_copy (best_inv, inv);
  
!   /* First try important candidates.  Only if it fails, try the specific ones.
!      Rationale -- in loops with many variables the best choice often is to use
!      just one generic biv.  If we added here many ivs specific to the uses,
!      the optimization algorithm later would be likely to get stuck in a local
!      minimum, thus causing us to create too many ivs.  The approach from
!      few ivs to more seems more likely to be succesful -- starting from few
!      ivs, replacing an expensive use by a specific iv should always be a
!      win.  */
!   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
      {
!       cand = iv_cand (data, i);
! 
!       if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
  	continue;
  
        bitmap_copy (act_ivs, ivs);
!       bitmap_set_bit (act_ivs, cand->id);
!       if (depends_on)
! 	bitmap_a_or_b (act_inv, inv, depends_on);
        else
  	bitmap_copy (act_inv, inv);
        act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
*************** try_add_cand_for (struct ivopts_data *da
*** 3629,3634 ****
--- 3641,3675 ----
  	}
      }
  
+   if (best_cost == INFTY)
+     {
+       for (i = 0; i < use->n_map_members; i++)
+ 	{
+ 	  cp = use->cost_map + i;
+ 	  if (cp->cost == INFTY)
+ 	    continue;
+ 
+ 	  /* Already tried this.  */
+ 	  if (cp->cand->important)
+ 	    continue;
+ 
+ 	  bitmap_copy (act_ivs, ivs);
+ 	  bitmap_set_bit (act_ivs, cp->cand->id);
+ 	  if (cp->depends_on)
+ 	    bitmap_a_or_b (act_inv, inv, cp->depends_on);
+ 	  else
+ 	    bitmap_copy (act_inv, inv);
+ 	  act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
+ 
+ 	  if (act_cost < best_cost)
+ 	    {
+ 	      best_cost = act_cost;
+ 	      bitmap_copy (best_ivs, act_ivs);
+ 	      bitmap_copy (best_inv, act_inv);
+ 	    }
+ 	}
+     }
+ 
    bitmap_copy (ivs, best_ivs);
    bitmap_copy (inv, best_inv);
  
Index: tree-ssa-loop-niter.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-ssa-loop-niter.c,v
retrieving revision 2.13
diff -c -3 -p -r2.13 tree-ssa-loop-niter.c
*** tree-ssa-loop-niter.c	22 Oct 2004 17:05:08 -0000	2.13
--- tree-ssa-loop-niter.c	27 Oct 2004 11:36:47 -0000
*************** number_of_iterations_cond (tree type, tr
*** 419,427 ****
        d = EXEC_BINARY (LSHIFT_EXPR, niter_type,
  		       build_int_cst_type (niter_type, 1), bits);
        s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits);
!       bound = EXEC_BINARY (RSHIFT_EXPR, niter_type,
! 			   build_int_cst (niter_type, -1),
! 			   bits);
  
        assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
        assumption = fold (build2 (EQ_EXPR, boolean_type_node,
--- 419,428 ----
        d = EXEC_BINARY (LSHIFT_EXPR, niter_type,
  		       build_int_cst_type (niter_type, 1), bits);
        s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits);
! 
!       bound = build_low_bits_mask (niter_type,
! 				   (TYPE_PRECISION (niter_type)
! 				    - tree_low_cst (bits, 1)));
  
        assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
        assumption = fold (build2 (EQ_EXPR, boolean_type_node,
Index: tree.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.c,v
retrieving revision 1.440
diff -c -3 -p -r1.440 tree.c
*** tree.c	25 Oct 2004 13:27:25 -0000	1.440
--- tree.c	27 Oct 2004 11:36:47 -0000
*************** build_int_cst_wide (tree type, unsigned 
*** 609,614 ****
--- 609,648 ----
    return t;
  }
  
+ /* Builds an integer constant in TYPE such that lowest BITS bits are ones
+    and the rest are zeros.  */
+ 
+ tree
+ build_low_bits_mask (tree type, unsigned bits)
+ {
+   unsigned HOST_WIDE_INT low;
+   HOST_WIDE_INT high;
+   unsigned HOST_WIDE_INT all_ones = ~(unsigned HOST_WIDE_INT) 0;
+ 
+   gcc_assert (bits <= TYPE_PRECISION (type));
+ 
+   if (bits == TYPE_PRECISION (type)
+       && !TYPE_UNSIGNED (type))
+     {
+       /* Sign extended all-ones mask.  */
+       low = all_ones;
+       high = -1;
+     }
+   else if (bits <= HOST_BITS_PER_WIDE_INT)
+     {
+       low = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+       high = 0;
+     }
+   else
+     {
+       bits -= HOST_BITS_PER_WIDE_INT;
+       low = all_ones;
+       high = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+     }
+ 
+   return build_int_cst_wide (type, low, high);
+ }
+ 
  /* Checks that X is integer constant that can be expressed in (unsigned)
     HOST_WIDE_INT without loss of precision.  */
  
Index: tree.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree.h,v
retrieving revision 1.640
diff -c -3 -p -r1.640 tree.h
*** tree.h	22 Oct 2004 18:52:19 -0000	1.640
--- tree.h	27 Oct 2004 11:36:47 -0000
*************** tree fold_build_cleanup_point_expr (tree
*** 3543,3548 ****
--- 3543,3549 ----
  extern tree build_fold_addr_expr_with_type (tree, tree);
  extern tree build_fold_indirect_ref (tree);
  extern tree constant_boolean_node (int, tree);
+ extern tree build_low_bits_mask (tree, unsigned);
  
  extern bool tree_swap_operands_p (tree, tree, bool);
  extern enum tree_code swap_tree_comparison (enum tree_code);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]